Use concurrent sharing mode for image if compute and graphics queue family indices differ
Use semaphores to sync compute and graphics work Fixes #667
This commit is contained in:
parent
5d594acb02
commit
b9f4b1adf1
1 changed files with 58 additions and 60 deletions
|
|
@ -36,6 +36,7 @@ public:
|
||||||
VkDescriptorSet descriptorSetPostCompute; // Image display shader bindings after compute shader image manipulation
|
VkDescriptorSet descriptorSetPostCompute; // Image display shader bindings after compute shader image manipulation
|
||||||
VkPipeline pipeline; // Image display pipeline
|
VkPipeline pipeline; // Image display pipeline
|
||||||
VkPipelineLayout pipelineLayout; // Layout of the graphics pipeline
|
VkPipelineLayout pipelineLayout; // Layout of the graphics pipeline
|
||||||
|
VkSemaphore semaphore; // Execution dependency between compute & graphic submission
|
||||||
} graphics;
|
} graphics;
|
||||||
|
|
||||||
// Resources for the compute part of the example
|
// Resources for the compute part of the example
|
||||||
|
|
@ -43,13 +44,12 @@ public:
|
||||||
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
|
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
|
||||||
VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics)
|
VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics)
|
||||||
VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers
|
VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers
|
||||||
VkFence fence; // Synchronization fence to avoid rewriting compute CB if still in use
|
VkSemaphore semaphore; // Execution dependency between compute & graphic submission
|
||||||
VkDescriptorSetLayout descriptorSetLayout; // Compute shader binding layout
|
VkDescriptorSetLayout descriptorSetLayout; // Compute shader binding layout
|
||||||
VkDescriptorSet descriptorSet; // Compute shader bindings
|
VkDescriptorSet descriptorSet; // Compute shader bindings
|
||||||
VkPipelineLayout pipelineLayout; // Layout of the compute pipeline
|
VkPipelineLayout pipelineLayout; // Layout of the compute pipeline
|
||||||
std::vector<VkPipeline> pipelines; // Compute pipelines for image filters
|
std::vector<VkPipeline> pipelines; // Compute pipelines for image filters
|
||||||
int32_t pipelineIndex = 0; // Current image filtering compute pipeline index
|
int32_t pipelineIndex = 0; // Current image filtering compute pipeline index
|
||||||
uint32_t queueFamilyIndex; // Family index of the graphics queue, used for barriers
|
|
||||||
} compute;
|
} compute;
|
||||||
|
|
||||||
vks::Buffer vertexBuffer;
|
vks::Buffer vertexBuffer;
|
||||||
|
|
@ -83,6 +83,7 @@ public:
|
||||||
vkDestroyPipeline(device, graphics.pipeline, nullptr);
|
vkDestroyPipeline(device, graphics.pipeline, nullptr);
|
||||||
vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr);
|
vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr);
|
||||||
vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr);
|
vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr);
|
||||||
|
vkDestroySemaphore(device, graphics.semaphore, nullptr);
|
||||||
|
|
||||||
// Compute
|
// Compute
|
||||||
for (auto& pipeline : compute.pipelines)
|
for (auto& pipeline : compute.pipelines)
|
||||||
|
|
@ -91,7 +92,7 @@ public:
|
||||||
}
|
}
|
||||||
vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr);
|
vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr);
|
||||||
vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr);
|
vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr);
|
||||||
vkDestroyFence(device, compute.fence, nullptr);
|
vkDestroySemaphore(device, compute.semaphore, nullptr);
|
||||||
vkDestroyCommandPool(device, compute.commandPool, nullptr);
|
vkDestroyCommandPool(device, compute.commandPool, nullptr);
|
||||||
|
|
||||||
vertexBuffer.destroy();
|
vertexBuffer.destroy();
|
||||||
|
|
@ -127,8 +128,18 @@ public:
|
||||||
// Image will be sampled in the fragment shader and used as storage target in the compute shader
|
// Image will be sampled in the fragment shader and used as storage target in the compute shader
|
||||||
imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
||||||
imageCreateInfo.flags = 0;
|
imageCreateInfo.flags = 0;
|
||||||
// Sharing mode exclusive means that ownership of the image does not need to be explicitly transferred between the compute and graphics queue
|
// If compute and graphics queue family indices differ, we create an image that can be shared between them
|
||||||
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
// This can result in worse performance than exclusive sharing mode, but save some synchronization to keep the sample simple
|
||||||
|
std::vector<uint32_t> queueFamilyIndices;
|
||||||
|
if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) {
|
||||||
|
queueFamilyIndices = {
|
||||||
|
vulkanDevice->queueFamilyIndices.graphics,
|
||||||
|
vulkanDevice->queueFamilyIndices.compute
|
||||||
|
};
|
||||||
|
imageCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
|
||||||
|
imageCreateInfo.queueFamilyIndexCount = 2;
|
||||||
|
imageCreateInfo.pQueueFamilyIndices = queueFamilyIndices.data();
|
||||||
|
}
|
||||||
|
|
||||||
VkMemoryAllocateInfo memAllocInfo = vks::initializers::memoryAllocateInfo();
|
VkMemoryAllocateInfo memAllocInfo = vks::initializers::memoryAllocateInfo();
|
||||||
VkMemoryRequirements memReqs;
|
VkMemoryRequirements memReqs;
|
||||||
|
|
@ -224,6 +235,8 @@ public:
|
||||||
imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
||||||
imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||||
|
imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
vkCmdPipelineBarrier(
|
vkCmdPipelineBarrier(
|
||||||
drawCmdBuffers[i],
|
drawCmdBuffers[i],
|
||||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
|
@ -472,51 +485,17 @@ public:
|
||||||
VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline));
|
VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find and create a compute capable device queue
|
void prepareGraphics()
|
||||||
void getComputeQueue()
|
|
||||||
{
|
{
|
||||||
uint32_t queueFamilyCount;
|
// Semaphore for compute & graphics sync
|
||||||
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, NULL);
|
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
|
||||||
assert(queueFamilyCount >= 1);
|
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &graphics.semaphore));
|
||||||
|
|
||||||
std::vector<VkQueueFamilyProperties> queueFamilyProperties;
|
|
||||||
queueFamilyProperties.resize(queueFamilyCount);
|
|
||||||
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilyProperties.data());
|
|
||||||
|
|
||||||
// Some devices have dedicated compute queues, so we first try to find a queue that supports compute and not graphics
|
|
||||||
bool computeQueueFound = false;
|
|
||||||
for (uint32_t i = 0; i < static_cast<uint32_t>(queueFamilyProperties.size()); i++)
|
|
||||||
{
|
|
||||||
if ((queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0))
|
|
||||||
{
|
|
||||||
compute.queueFamilyIndex = i;
|
|
||||||
computeQueueFound = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If there is no dedicated compute queue, just find the first queue family that supports compute
|
|
||||||
if (!computeQueueFound)
|
|
||||||
{
|
|
||||||
for (uint32_t i = 0; i < static_cast<uint32_t>(queueFamilyProperties.size()); i++)
|
|
||||||
{
|
|
||||||
if (queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT)
|
|
||||||
{
|
|
||||||
compute.queueFamilyIndex = i;
|
|
||||||
computeQueueFound = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute is mandatory in Vulkan, so there must be at least one queue family that supports compute
|
|
||||||
assert(computeQueueFound);
|
|
||||||
// Get a compute queue from the device
|
|
||||||
vkGetDeviceQueue(device, compute.queueFamilyIndex, 0, &compute.queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void prepareCompute()
|
void prepareCompute()
|
||||||
{
|
{
|
||||||
getComputeQueue();
|
// Get a compute queue from the device
|
||||||
|
vkGetDeviceQueue(device, vulkanDevice->queueFamilyIndices.compute, 0, &compute.queue);
|
||||||
|
|
||||||
// Create compute pipeline
|
// Create compute pipeline
|
||||||
// Compute pipelines are created separate from graphics pipelines even if they use the same queue
|
// Compute pipelines are created separate from graphics pipelines even if they use the same queue
|
||||||
|
|
@ -563,7 +542,7 @@ public:
|
||||||
// Separate command pool as queue family for compute may be different than graphics
|
// Separate command pool as queue family for compute may be different than graphics
|
||||||
VkCommandPoolCreateInfo cmdPoolInfo = {};
|
VkCommandPoolCreateInfo cmdPoolInfo = {};
|
||||||
cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||||
cmdPoolInfo.queueFamilyIndex = compute.queueFamilyIndex;
|
cmdPoolInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
|
||||||
cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||||
VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool));
|
VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool));
|
||||||
|
|
||||||
|
|
@ -576,9 +555,16 @@ public:
|
||||||
|
|
||||||
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer));
|
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer));
|
||||||
|
|
||||||
// Fence for compute CB sync
|
// Semaphore for compute & graphics sync
|
||||||
VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT);
|
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
|
||||||
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence));
|
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphore));
|
||||||
|
|
||||||
|
// Signal the semaphore
|
||||||
|
VkSubmitInfo submitInfo = vks::initializers::submitInfo();
|
||||||
|
submitInfo.signalSemaphoreCount = 1;
|
||||||
|
submitInfo.pSignalSemaphores = &compute.semaphore;
|
||||||
|
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
|
||||||
|
VK_CHECK_RESULT(vkQueueWaitIdle(queue));
|
||||||
|
|
||||||
// Build a single command buffer containing the compute dispatch commands
|
// Build a single command buffer containing the compute dispatch commands
|
||||||
buildComputeCommandBuffer();
|
buildComputeCommandBuffer();
|
||||||
|
|
@ -611,22 +597,35 @@ public:
|
||||||
{
|
{
|
||||||
VulkanExampleBase::prepareFrame();
|
VulkanExampleBase::prepareFrame();
|
||||||
|
|
||||||
|
VkPipelineStageFlags graphicsWaitStageMasks[] = { VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
|
||||||
|
VkSemaphore graphicsWaitSemaphores[] = { compute.semaphore, semaphores.presentComplete };
|
||||||
|
VkSemaphore graphicsSignalSemaphores[] = { graphics.semaphore, semaphores.renderComplete };
|
||||||
|
|
||||||
|
// Submit graphics commands
|
||||||
submitInfo.commandBufferCount = 1;
|
submitInfo.commandBufferCount = 1;
|
||||||
submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
|
submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
|
||||||
|
submitInfo.waitSemaphoreCount = 2;
|
||||||
|
submitInfo.pWaitSemaphores = graphicsWaitSemaphores;
|
||||||
|
submitInfo.pWaitDstStageMask = graphicsWaitStageMasks;
|
||||||
|
submitInfo.signalSemaphoreCount = 2;
|
||||||
|
submitInfo.pSignalSemaphores = graphicsSignalSemaphores;
|
||||||
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
|
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
|
||||||
|
|
||||||
VulkanExampleBase::submitFrame();
|
VulkanExampleBase::submitFrame();
|
||||||
|
|
||||||
// Submit compute commands
|
// Wait for rendering finished
|
||||||
// Use a fence to ensure that the compute command buffer has finished executing before using it again
|
VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||||
vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
|
|
||||||
vkResetFences(device, 1, &compute.fence);
|
|
||||||
|
|
||||||
|
// Submit compute commands
|
||||||
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
|
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
|
||||||
computeSubmitInfo.commandBufferCount = 1;
|
computeSubmitInfo.commandBufferCount = 1;
|
||||||
computeSubmitInfo.pCommandBuffers = &compute.commandBuffer;
|
computeSubmitInfo.pCommandBuffers = &compute.commandBuffer;
|
||||||
|
computeSubmitInfo.waitSemaphoreCount = 1;
|
||||||
VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence));
|
computeSubmitInfo.pWaitSemaphores = &graphics.semaphore;
|
||||||
|
computeSubmitInfo.pWaitDstStageMask = &waitStageMask;
|
||||||
|
computeSubmitInfo.signalSemaphoreCount = 1;
|
||||||
|
computeSubmitInfo.pSignalSemaphores = &compute.semaphore;
|
||||||
|
VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, VK_NULL_HANDLE));
|
||||||
}
|
}
|
||||||
|
|
||||||
void prepare()
|
void prepare()
|
||||||
|
|
@ -641,6 +640,7 @@ public:
|
||||||
preparePipelines();
|
preparePipelines();
|
||||||
setupDescriptorPool();
|
setupDescriptorPool();
|
||||||
setupDescriptorSet();
|
setupDescriptorSet();
|
||||||
|
prepareGraphics();
|
||||||
prepareCompute();
|
prepareCompute();
|
||||||
buildCommandBuffers();
|
buildCommandBuffers();
|
||||||
prepared = true;
|
prepared = true;
|
||||||
|
|
@ -651,12 +651,10 @@ public:
|
||||||
if (!prepared)
|
if (!prepared)
|
||||||
return;
|
return;
|
||||||
draw();
|
draw();
|
||||||
}
|
if (camera.updated) {
|
||||||
|
|
||||||
virtual void viewChanged()
|
|
||||||
{
|
|
||||||
updateUniformBuffers();
|
updateUniformBuffers();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
virtual void OnUpdateUIOverlay(vks::UIOverlay *overlay)
|
virtual void OnUpdateUIOverlay(vks::UIOverlay *overlay)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue