diff --git a/examples/computeshader/computeshader.cpp b/examples/computeshader/computeshader.cpp index 630a76e2..dec255c4 100644 --- a/examples/computeshader/computeshader.cpp +++ b/examples/computeshader/computeshader.cpp @@ -36,6 +36,7 @@ public: VkDescriptorSet descriptorSetPostCompute; // Image display shader bindings after compute shader image manipulation VkPipeline pipeline; // Image display pipeline VkPipelineLayout pipelineLayout; // Layout of the graphics pipeline + VkSemaphore semaphore; // Execution dependency between compute & graphic submission } graphics; // Resources for the compute part of the example @@ -43,13 +44,12 @@ public: VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics) VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics) VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers - VkFence fence; // Synchronization fence to avoid rewriting compute CB if still in use + VkSemaphore semaphore; // Execution dependency between compute & graphic submission VkDescriptorSetLayout descriptorSetLayout; // Compute shader binding layout VkDescriptorSet descriptorSet; // Compute shader bindings VkPipelineLayout pipelineLayout; // Layout of the compute pipeline std::vector pipelines; // Compute pipelines for image filters int32_t pipelineIndex = 0; // Current image filtering compute pipeline index - uint32_t queueFamilyIndex; // Family index of the graphics queue, used for barriers } compute; vks::Buffer vertexBuffer; @@ -83,6 +83,7 @@ public: vkDestroyPipeline(device, graphics.pipeline, nullptr); vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr); vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr); + vkDestroySemaphore(device, graphics.semaphore, nullptr); // Compute for (auto& pipeline : compute.pipelines) @@ -91,7 +92,7 @@ public: } vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr); vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr); - vkDestroyFence(device, compute.fence, nullptr); + vkDestroySemaphore(device, compute.semaphore, nullptr); vkDestroyCommandPool(device, compute.commandPool, nullptr); vertexBuffer.destroy(); @@ -127,8 +128,18 @@ public: // Image will be sampled in the fragment shader and used as storage target in the compute shader imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT; imageCreateInfo.flags = 0; - // Sharing mode exclusive means that ownership of the image does not need to be explicitly transferred between the compute and graphics queue - imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + // If compute and graphics queue family indices differ, we create an image that can be shared between them + // This can result in worse performance than exclusive sharing mode, but save some synchronization to keep the sample simple + std::vector queueFamilyIndices; + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) { + queueFamilyIndices = { + vulkanDevice->queueFamilyIndices.graphics, + vulkanDevice->queueFamilyIndices.compute + }; + imageCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT; + imageCreateInfo.queueFamilyIndexCount = 2; + imageCreateInfo.pQueueFamilyIndices = queueFamilyIndices.data(); + } VkMemoryAllocateInfo memAllocInfo = vks::initializers::memoryAllocateInfo(); VkMemoryRequirements memReqs; @@ -224,6 +235,8 @@ public: imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vkCmdPipelineBarrier( drawCmdBuffers[i], VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, @@ -472,51 +485,17 @@ public: VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline)); } - // Find and create a compute capable device queue - void getComputeQueue() + void prepareGraphics() { - uint32_t queueFamilyCount; - vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, NULL); - assert(queueFamilyCount >= 1); - - std::vector queueFamilyProperties; - queueFamilyProperties.resize(queueFamilyCount); - vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilyProperties.data()); - - // Some devices have dedicated compute queues, so we first try to find a queue that supports compute and not graphics - bool computeQueueFound = false; - for (uint32_t i = 0; i < static_cast(queueFamilyProperties.size()); i++) - { - if ((queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)) - { - compute.queueFamilyIndex = i; - computeQueueFound = true; - break; - } - } - // If there is no dedicated compute queue, just find the first queue family that supports compute - if (!computeQueueFound) - { - for (uint32_t i = 0; i < static_cast(queueFamilyProperties.size()); i++) - { - if (queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) - { - compute.queueFamilyIndex = i; - computeQueueFound = true; - break; - } - } - } - - // Compute is mandatory in Vulkan, so there must be at least one queue family that supports compute - assert(computeQueueFound); - // Get a compute queue from the device - vkGetDeviceQueue(device, compute.queueFamilyIndex, 0, &compute.queue); + // Semaphore for compute & graphics sync + VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &graphics.semaphore)); } void prepareCompute() { - getComputeQueue(); + // Get a compute queue from the device + vkGetDeviceQueue(device, vulkanDevice->queueFamilyIndices.compute, 0, &compute.queue); // Create compute pipeline // Compute pipelines are created separate from graphics pipelines even if they use the same queue @@ -563,7 +542,7 @@ public: // Separate command pool as queue family for compute may be different than graphics VkCommandPoolCreateInfo cmdPoolInfo = {}; cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmdPoolInfo.queueFamilyIndex = compute.queueFamilyIndex; + cmdPoolInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool)); @@ -576,9 +555,16 @@ public: VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer)); - // Fence for compute CB sync - VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT); - VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence)); + // Semaphore for compute & graphics sync + VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphore)); + + // Signal the semaphore + VkSubmitInfo submitInfo = vks::initializers::submitInfo(); + submitInfo.signalSemaphoreCount = 1; + submitInfo.pSignalSemaphores = &compute.semaphore; + VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); + VK_CHECK_RESULT(vkQueueWaitIdle(queue)); // Build a single command buffer containing the compute dispatch commands buildComputeCommandBuffer(); @@ -611,22 +597,35 @@ public: { VulkanExampleBase::prepareFrame(); + VkPipelineStageFlags graphicsWaitStageMasks[] = { VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT }; + VkSemaphore graphicsWaitSemaphores[] = { compute.semaphore, semaphores.presentComplete }; + VkSemaphore graphicsSignalSemaphores[] = { graphics.semaphore, semaphores.renderComplete }; + + // Submit graphics commands submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer]; + submitInfo.waitSemaphoreCount = 2; + submitInfo.pWaitSemaphores = graphicsWaitSemaphores; + submitInfo.pWaitDstStageMask = graphicsWaitStageMasks; + submitInfo.signalSemaphoreCount = 2; + submitInfo.pSignalSemaphores = graphicsSignalSemaphores; VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); VulkanExampleBase::submitFrame(); - // Submit compute commands - // Use a fence to ensure that the compute command buffer has finished executing before using it again - vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX); - vkResetFences(device, 1, &compute.fence); + // Wait for rendering finished + VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + // Submit compute commands VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); computeSubmitInfo.commandBufferCount = 1; computeSubmitInfo.pCommandBuffers = &compute.commandBuffer; - - VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence)); + computeSubmitInfo.waitSemaphoreCount = 1; + computeSubmitInfo.pWaitSemaphores = &graphics.semaphore; + computeSubmitInfo.pWaitDstStageMask = &waitStageMask; + computeSubmitInfo.signalSemaphoreCount = 1; + computeSubmitInfo.pSignalSemaphores = &compute.semaphore; + VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, VK_NULL_HANDLE)); } void prepare() @@ -641,6 +640,7 @@ public: preparePipelines(); setupDescriptorPool(); setupDescriptorSet(); + prepareGraphics(); prepareCompute(); buildCommandBuffers(); prepared = true; @@ -651,11 +651,9 @@ public: if (!prepared) return; draw(); - } - - virtual void viewChanged() - { - updateUniformBuffers(); + if (camera.updated) { + updateUniformBuffers(); + } } virtual void OnUpdateUIOverlay(vks::UIOverlay *overlay)