From b2f501dc98c967ec5f49d2e47d4f4975753b2a48 Mon Sep 17 00:00:00 2001 From: Stephen Saunders Date: Mon, 13 Jun 2022 22:53:48 -0400 Subject: [PATCH] Fix queue family transfer operations between graphics <-> compute queues, generalize getQueueFamilyIndex() to support VkQueueFlags vs. VkQueueFlagBits, computecloth deltaT now based on frameTimer --- base/VulkanDevice.cpp | 15 +- base/VulkanDevice.h | 2 +- examples/computecloth/computecloth.cpp | 39 ++-- .../computecullandlod/computecullandlod.cpp | 171 ++++++++++++++---- examples/computenbody/computenbody.cpp | 18 +- .../computeparticles/computeparticles.cpp | 13 +- .../computeraytracing/computeraytracing.cpp | 128 +++++++++++-- 7 files changed, 306 insertions(+), 80 deletions(-) diff --git a/base/VulkanDevice.cpp b/base/VulkanDevice.cpp index 6835a3dd..9208be7b 100644 --- a/base/VulkanDevice.cpp +++ b/base/VulkanDevice.cpp @@ -120,6 +120,7 @@ namespace vks /** * Get the index of a queue family that supports the requested queue flags + * SRS - support VkQueueFlags parameter for requesting multiple flags vs. VkQueueFlagBits for a single flag only * * @param queueFlags Queue flags to find a queue family index for * @@ -127,15 +128,15 @@ namespace vks * * @throw Throws an exception if no queue family index could be found that supports the requested flags */ - uint32_t VulkanDevice::getQueueFamilyIndex(VkQueueFlagBits queueFlags) const + uint32_t VulkanDevice::getQueueFamilyIndex(VkQueueFlags queueFlags) const { // Dedicated queue for compute // Try to find a queue family index that supports compute but not graphics - if (queueFlags & VK_QUEUE_COMPUTE_BIT) + if ((queueFlags & VK_QUEUE_COMPUTE_BIT) == queueFlags) { for (uint32_t i = 0; i < static_cast(queueFamilyProperties.size()); i++) { - if ((queueFamilyProperties[i].queueFlags & queueFlags) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)) + if ((queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)) { return i; } @@ -144,11 +145,11 @@ namespace vks // Dedicated queue for transfer // Try to find a queue family index that supports transfer but not graphics and compute - if (queueFlags & VK_QUEUE_TRANSFER_BIT) + if ((queueFlags & VK_QUEUE_TRANSFER_BIT) == queueFlags) { for (uint32_t i = 0; i < static_cast(queueFamilyProperties.size()); i++) { - if ((queueFamilyProperties[i].queueFlags & queueFlags) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) == 0)) + if ((queueFamilyProperties[i].queueFlags & VK_QUEUE_TRANSFER_BIT) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) == 0)) { return i; } @@ -158,7 +159,7 @@ namespace vks // For other queue types or if no separate compute queue is present, return the first one to support the requested flags for (uint32_t i = 0; i < static_cast(queueFamilyProperties.size()); i++) { - if (queueFamilyProperties[i].queueFlags & queueFlags) + if ((queueFamilyProperties[i].queueFlags & queueFlags) == queueFlags) { return i; } @@ -233,7 +234,7 @@ namespace vks queueFamilyIndices.transfer = getQueueFamilyIndex(VK_QUEUE_TRANSFER_BIT); if ((queueFamilyIndices.transfer != queueFamilyIndices.graphics) && (queueFamilyIndices.transfer != queueFamilyIndices.compute)) { - // If compute family index differs, we need an additional queue create info for the compute queue + // If transfer family index differs, we need an additional queue create info for the transfer queue VkDeviceQueueCreateInfo queueInfo{}; queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queueInfo.queueFamilyIndex = queueFamilyIndices.transfer; diff --git a/base/VulkanDevice.h b/base/VulkanDevice.h index 0bb49879..fc41506a 100644 --- a/base/VulkanDevice.h +++ b/base/VulkanDevice.h @@ -55,7 +55,7 @@ struct VulkanDevice explicit VulkanDevice(VkPhysicalDevice physicalDevice); ~VulkanDevice(); uint32_t getMemoryType(uint32_t typeBits, VkMemoryPropertyFlags properties, VkBool32 *memTypeFound = nullptr) const; - uint32_t getQueueFamilyIndex(VkQueueFlagBits queueFlags) const; + uint32_t getQueueFamilyIndex(VkQueueFlags queueFlags) const; VkResult createLogicalDevice(VkPhysicalDeviceFeatures enabledFeatures, std::vector enabledExtensions, void *pNextChain, bool useSwapChain = true, VkQueueFlags requestedQueueTypes = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); VkResult createBuffer(VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags memoryPropertyFlags, VkDeviceSize size, VkBuffer *buffer, VkDeviceMemory *memory, void *data = nullptr); VkResult createBuffer(VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags memoryPropertyFlags, vks::Buffer *buffer, VkDeviceSize size, void *data = nullptr); diff --git a/examples/computecloth/computecloth.cpp b/examples/computecloth/computecloth.cpp index d81567d7..5d4ae92a 100644 --- a/examples/computecloth/computecloth.cpp +++ b/examples/computecloth/computecloth.cpp @@ -136,12 +136,12 @@ public: textureCloth.loadFromFile(getAssetPath() + "textures/vulkan_cloth_rgba.ktx", VK_FORMAT_R8G8B8A8_UNORM, vulkanDevice, queue); } - void addGraphicsToComputeBarriers(VkCommandBuffer commandBuffer) + void addGraphicsToComputeBarriers(VkCommandBuffer commandBuffer, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask) { if (specializedComputeQueue) { VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); - bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + bufferBarrier.srcAccessMask = srcAccessMask; + bufferBarrier.dstAccessMask = dstAccessMask; bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; bufferBarrier.size = VK_WHOLE_SIZE; @@ -152,8 +152,8 @@ public: bufferBarrier.buffer = compute.storageBuffers.output.buffer; bufferBarriers.push_back(bufferBarrier); vkCmdPipelineBarrier(commandBuffer, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + srcStageMask, + dstStageMask, VK_FLAGS_NONE, 0, nullptr, static_cast(bufferBarriers.size()), bufferBarriers.data(), @@ -166,8 +166,8 @@ public: VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; - bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.size = VK_WHOLE_SIZE; std::vector bufferBarriers; bufferBarrier.buffer = compute.storageBuffers.input.buffer; @@ -184,12 +184,12 @@ public: 0, nullptr); } - void addComputeToGraphicsBarriers(VkCommandBuffer commandBuffer) + void addComputeToGraphicsBarriers(VkCommandBuffer commandBuffer, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask) { if (specializedComputeQueue) { VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); - bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - bufferBarrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + bufferBarrier.srcAccessMask = srcAccessMask; + bufferBarrier.dstAccessMask = dstAccessMask; bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; bufferBarrier.size = VK_WHOLE_SIZE; @@ -200,8 +200,8 @@ public: bufferBarriers.push_back(bufferBarrier); vkCmdPipelineBarrier( commandBuffer, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + srcStageMask, + dstStageMask, VK_FLAGS_NONE, 0, nullptr, static_cast(bufferBarriers.size()), bufferBarriers.data(), @@ -234,7 +234,7 @@ public: VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo)); // Acquire storage buffers from compute queue - addComputeToGraphicsBarriers(drawCmdBuffers[i]); + addComputeToGraphicsBarriers(drawCmdBuffers[i], 0, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); // Draw the particle system using the update vertex buffer @@ -267,7 +267,7 @@ public: vkCmdEndRenderPass(drawCmdBuffers[i]); // release the storage buffers to the compute queue - addGraphicsToComputeBarriers(drawCmdBuffers[i]); + addGraphicsToComputeBarriers(drawCmdBuffers[i], VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, 0, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i])); } @@ -285,7 +285,7 @@ public: VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffers[i], &cmdBufInfo)); // Acquire the storage buffers from the graphics queue - addGraphicsToComputeBarriers(compute.commandBuffers[i]); + addGraphicsToComputeBarriers(compute.commandBuffers[i], 0, VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); vkCmdBindPipeline(compute.commandBuffers[i], VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); @@ -313,7 +313,7 @@ public: } // release the storage buffers back to the graphics queue - addComputeToGraphicsBarriers(compute.commandBuffers[i]); + addComputeToGraphicsBarriers(compute.commandBuffers[i], VK_ACCESS_SHADER_WRITE_BIT, 0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); vkEndCommandBuffer(compute.commandBuffers[i]); } } @@ -396,7 +396,7 @@ public: // Add an initial release barrier to the graphics queue, // so that when the compute command buffer executes for the first time // it doesn't complain about a lack of a corresponding "release" to its "acquire" - addGraphicsToComputeBarriers(copyCmd); + addGraphicsToComputeBarriers(copyCmd, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, 0, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); vulkanDevice->flushCommandBuffer(copyCmd, queue, true); stagingBuffer.destroy(); @@ -673,11 +673,12 @@ public: if (!paused) { //compute.ubo.deltaT = 0.000005f; // todo: base on frametime - compute.ubo.deltaT = frameTimer * 0.0015f; + // SRS - Clamp frameTimer to max 20ms refresh period (e.g. if blocked on resize), otherwise image breakup can occur + compute.ubo.deltaT = fmin(frameTimer, 0.02) * 0.0025f; if (simulateWind) { std::default_random_engine rndEngine(benchmark.active ? 0 : (unsigned)time(nullptr)); - std::uniform_real_distribution rd(1.0f, 30.0f); + std::uniform_real_distribution rd(1.0f, 12.0f); compute.ubo.gravity.x = cos(glm::radians(-timer * 360.0f)) * (rd(rndEngine) - rd(rndEngine)); compute.ubo.gravity.z = sin(glm::radians(timer * 360.0f)) * (rd(rndEngine) - rd(rndEngine)); } diff --git a/examples/computecullandlod/computecullandlod.cpp b/examples/computecullandlod/computecullandlod.cpp index c1888659..5b5296d5 100644 --- a/examples/computecullandlod/computecullandlod.cpp +++ b/examples/computecullandlod/computecullandlod.cpp @@ -149,6 +149,32 @@ public: VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo)); + // Acquire barrier + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + vulkanDevice->queueFamilyIndices.compute, + vulkanDevice->queueFamilyIndices.graphics, + indirectCommandsBuffer.buffer, + 0, + indirectCommandsBuffer.descriptor.range + }; + + vkCmdPipelineBarrier( + drawCmdBuffers[i], + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + 0, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } + vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); VkViewport viewport = vks::initializers::viewport((float)width, (float)height, 0.0f, 1.0f); @@ -169,7 +195,7 @@ public: if (vulkanDevice->features.multiDrawIndirect) { - vkCmdDrawIndexedIndirect(drawCmdBuffers[i], indirectCommandsBuffer.buffer, 0, indirectCommands.size(), sizeof(VkDrawIndexedIndirectCommand)); + vkCmdDrawIndexedIndirect(drawCmdBuffers[i], indirectCommandsBuffer.buffer, 0, static_cast(indirectCommands.size()), sizeof(VkDrawIndexedIndirectCommand)); } else { @@ -184,6 +210,32 @@ public: vkCmdEndRenderPass(drawCmdBuffers[i]); + // Release barrier + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + 0, + vulkanDevice->queueFamilyIndices.graphics, + vulkanDevice->queueFamilyIndices.compute, + indirectCommandsBuffer.buffer, + 0, + indirectCommandsBuffer.descriptor.range + }; + + vkCmdPipelineBarrier( + drawCmdBuffers[i], + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + 0, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } + VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i])); } } @@ -200,23 +252,32 @@ public: VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo)); + // Acquire barrier // Add memory barrier to ensure that the indirect commands have been consumed before the compute shader updates them - VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); - bufferBarrier.buffer = indirectCommandsBuffer.buffer; - bufferBarrier.size = indirectCommandsBuffer.descriptor.range; - bufferBarrier.srcAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; - bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; - bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_SHADER_WRITE_BIT, + vulkanDevice->queueFamilyIndices.graphics, + vulkanDevice->queueFamilyIndices.compute, + indirectCommandsBuffer.buffer, + 0, + indirectCommandsBuffer.descriptor.range + }; - vkCmdPipelineBarrier( - compute.commandBuffer, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_FLAGS_NONE, - 0, nullptr, - 1, &bufferBarrier, - 0, nullptr); + vkCmdPipelineBarrier( + compute.commandBuffer, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_FLAGS_NONE, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); vkCmdBindDescriptorSets(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineLayout, 0, 1, &compute.descriptorSet, 0, 0); @@ -226,22 +287,32 @@ public: // It also determines the lod to use depending on distance to the viewer. vkCmdDispatch(compute.commandBuffer, objectCount / 16, 1, 1); + // Release barrier // Add memory barrier to ensure that the compute shader has finished writing the indirect command buffer before it's consumed - bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - bufferBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; - bufferBarrier.buffer = indirectCommandsBuffer.buffer; - bufferBarrier.size = indirectCommandsBuffer.descriptor.range; - bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; - bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_SHADER_WRITE_BIT, + 0, + vulkanDevice->queueFamilyIndices.compute, + vulkanDevice->queueFamilyIndices.graphics, + indirectCommandsBuffer.buffer, + 0, + indirectCommandsBuffer.descriptor.range + }; - vkCmdPipelineBarrier( - compute.commandBuffer, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, - VK_FLAGS_NONE, - 0, nullptr, - 1, &bufferBarrier, - 0, nullptr); + vkCmdPipelineBarrier( + compute.commandBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_FLAGS_NONE, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } // todo: barrier for indirect stats buffer? @@ -424,7 +495,38 @@ public: &instanceBuffer, stagingBuffer.size)); - vulkanDevice->copyBuffer(&stagingBuffer, &instanceBuffer, queue); + // Copy from staging buffer to instance buffer + VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkBufferCopy copyRegion = {}; + copyRegion.size = stagingBuffer.size; + vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, instanceBuffer.buffer, 1, ©Region); + // Add an initial release barrier to the graphics queue, + // so that when the compute command buffer executes for the first time + // it doesn't complain about a lack of a corresponding "release" to its "acquire" + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + 0, + vulkanDevice->queueFamilyIndices.graphics, + vulkanDevice->queueFamilyIndices.compute, + indirectCommandsBuffer.buffer, + 0, + indirectCommandsBuffer.descriptor.range + }; + + vkCmdPipelineBarrier( + copyCmd, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + 0, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } + vulkanDevice->flushCommandBuffer(copyCmd, queue, true); stagingBuffer.destroy(); @@ -710,6 +812,11 @@ public: } } + virtual void viewChanged() + { + updateUniformBuffer(true); + } + virtual void OnUpdateUIOverlay(vks::UIOverlay *overlay) { if (overlay->header("Settings")) { @@ -726,4 +833,4 @@ public: } }; -VULKAN_EXAMPLE_MAIN() \ No newline at end of file +VULKAN_EXAMPLE_MAIN() diff --git a/examples/computenbody/computenbody.cpp b/examples/computenbody/computenbody.cpp index 9dd01594..dec0a00f 100644 --- a/examples/computenbody/computenbody.cpp +++ b/examples/computenbody/computenbody.cpp @@ -160,7 +160,7 @@ public: vkCmdPipelineBarrier( drawCmdBuffers[i], - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, @@ -207,7 +207,7 @@ public: vkCmdPipelineBarrier( drawCmdBuffers[i], VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 1, &buffer_barrier, @@ -243,7 +243,7 @@ public: vkCmdPipelineBarrier( compute.commandBuffer, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, @@ -300,7 +300,7 @@ public: vkCmdPipelineBarrier( compute.commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 1, &buffer_barrier, @@ -417,7 +417,7 @@ public: vkCmdPipelineBarrier( copyCmd, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 1, &buffer_barrier, @@ -746,7 +746,9 @@ public: // Build a single command buffer containing the compute dispatch commands buildComputeCommandBuffer(); + // SRS - By reordering compute and graphics within draw(), the following code is no longer needed: // If graphics and compute queue family indices differ, acquire and immediately release the storage buffer, so that the initial acquire from the graphics command buffers are matched up properly + /* if (graphics.queueFamilyIndex != compute.queueFamilyIndex) { // Create a transient command buffer for setting up the initial buffer transfer state @@ -796,6 +798,7 @@ public: vulkanDevice->flushCommandBuffer(transferCmd, compute.queue, compute.commandPool); } + */ } // Prepare and initialize uniform buffer containing shader uniforms @@ -899,6 +902,11 @@ public: updateGraphicsUniformBuffers(); } } + + virtual void viewChanged() + { + updateGraphicsUniformBuffers(); + } }; VULKAN_EXAMPLE_MAIN() diff --git a/examples/computeparticles/computeparticles.cpp b/examples/computeparticles/computeparticles.cpp index 7c5b79c8..87f91a6d 100644 --- a/examples/computeparticles/computeparticles.cpp +++ b/examples/computeparticles/computeparticles.cpp @@ -149,7 +149,7 @@ public: vkCmdPipelineBarrier( drawCmdBuffers[i], - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, @@ -196,7 +196,7 @@ public: vkCmdPipelineBarrier( drawCmdBuffers[i], VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 1, &buffer_barrier, @@ -234,7 +234,7 @@ public: vkCmdPipelineBarrier( compute.commandBuffer, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, @@ -267,7 +267,7 @@ public: vkCmdPipelineBarrier( compute.commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 1, &buffer_barrier, @@ -336,7 +336,7 @@ public: vkCmdPipelineBarrier( copyCmd, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 1, &buffer_barrier, @@ -647,7 +647,9 @@ public: // Build a single command buffer containing the compute dispatch commands buildComputeCommandBuffer(); + // SRS - By reordering compute and graphics within draw(), the following code is no longer needed: // If graphics and compute queue family indices differ, acquire and immediately release the storage buffer, so that the initial acquire from the graphics command buffers are matched up properly + /* if (graphics.queueFamilyIndex != compute.queueFamilyIndex) { // Create a transient command buffer for setting up the initial buffer transfer state @@ -697,6 +699,7 @@ public: vulkanDevice->flushCommandBuffer(transferCmd, compute.queue, compute.commandPool); } + */ } // Prepare and initialize uniform buffer containing shader uniforms diff --git a/examples/computeraytracing/computeraytracing.cpp b/examples/computeraytracing/computeraytracing.cpp index 6a9036c2..e857d1cd 100644 --- a/examples/computeraytracing/computeraytracing.cpp +++ b/examples/computeraytracing/computeraytracing.cpp @@ -94,6 +94,9 @@ public: ~VulkanExample() { + // SRS - Ensure all operations on the device have finished before destroying resources + vkDeviceWaitIdle(device); + // Graphics vkDestroyPipeline(device, graphics.pipeline, nullptr); vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr); @@ -223,16 +226,38 @@ public: imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; imageMemoryBarrier.image = textureComputeTarget.image; imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - vkCmdPipelineBarrier( - drawCmdBuffers[i], - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - VK_FLAGS_NONE, - 0, nullptr, - 0, nullptr, - 1, &imageMemoryBarrier); + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + // Acquire barrier for graphics queue + imageMemoryBarrier.srcAccessMask = 0; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + imageMemoryBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + vkCmdPipelineBarrier( + drawCmdBuffers[i], + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_FLAGS_NONE, + 0, nullptr, + 0, nullptr, + 1, &imageMemoryBarrier); + } + else + { + // Combined barrier on single queue family + imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vkCmdPipelineBarrier( + drawCmdBuffers[i], + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_FLAGS_NONE, + 0, nullptr, + 0, nullptr, + 1, &imageMemoryBarrier); + } vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); @@ -252,6 +277,23 @@ public: vkCmdEndRenderPass(drawCmdBuffers[i]); + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + // Release barrier from graphics queue + imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier.dstAccessMask = 0; + imageMemoryBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + imageMemoryBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + vkCmdPipelineBarrier( + drawCmdBuffers[i], + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_FLAGS_NONE, + 0, nullptr, + 0, nullptr, + 1, &imageMemoryBarrier); + } + VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i])); } @@ -262,12 +304,52 @@ public: VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo)); + + VkImageMemoryBarrier imageMemoryBarrier = {}; + imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrier.image = textureComputeTarget.image; + imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + // Acquire barrier for compute queue + imageMemoryBarrier.srcAccessMask = 0; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + imageMemoryBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + vkCmdPipelineBarrier( + compute.commandBuffer, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_FLAGS_NONE, + 0, nullptr, + 0, nullptr, + 1, &imageMemoryBarrier); + } vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); vkCmdBindDescriptorSets(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineLayout, 0, 1, &compute.descriptorSet, 0, 0); vkCmdDispatch(compute.commandBuffer, textureComputeTarget.width / 16, textureComputeTarget.height / 16, 1); + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + // Release barrier from compute queue + imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier.dstAccessMask = 0; + imageMemoryBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + imageMemoryBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + vkCmdPipelineBarrier( + compute.commandBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_FLAGS_NONE, + 0, nullptr, + 0, nullptr, + 1, &imageMemoryBarrier); + } + vkEndCommandBuffer(compute.commandBuffer); } @@ -361,6 +443,30 @@ public: copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); copyRegion.size = storageBufferSize; vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffers.planes.buffer, 1, ©Region); + // Add an initial release barrier to the graphics queue, + // so that when the compute command buffer executes for the first time + // it doesn't complain about a lack of a corresponding "release" to its "acquire" + if (vulkanDevice->queueFamilyIndices.graphics != vulkanDevice->queueFamilyIndices.compute) + { + VkImageMemoryBarrier imageMemoryBarrier = {}; + imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrier.image = textureComputeTarget.image; + imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier.dstAccessMask = 0; + imageMemoryBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + imageMemoryBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + vkCmdPipelineBarrier( + copyCmd, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_FLAGS_NONE, + 0, nullptr, + 0, nullptr, + 1, &imageMemoryBarrier); + } vulkanDevice->flushCommandBuffer(copyCmd, queue, true); stagingBuffer.destroy(); @@ -689,9 +795,9 @@ public: void prepare() { VulkanExampleBase::prepare(); + prepareTextureTarget(&textureComputeTarget, TEX_DIM, TEX_DIM, VK_FORMAT_R8G8B8A8_UNORM); prepareStorageBuffers(); prepareUniformBuffers(); - prepareTextureTarget(&textureComputeTarget, TEX_DIM, TEX_DIM, VK_FORMAT_R8G8B8A8_UNORM); setupDescriptorSetLayout(); preparePipelines(); setupDescriptorPool();