diff --git a/examples/computenbody/computenbody.cpp b/examples/computenbody/computenbody.cpp index 54d88c85..5dd81add 100644 --- a/examples/computenbody/computenbody.cpp +++ b/examples/computenbody/computenbody.cpp @@ -759,7 +759,7 @@ public: VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphore)); // Signal the semaphore - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; + VkSubmitInfo submitInfo = vks::initializers::submitInfo(); submitInfo.signalSemaphoreCount = 1; submitInfo.pSignalSemaphores = &compute.semaphore; VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); diff --git a/examples/computeparticles/computeparticles.cpp b/examples/computeparticles/computeparticles.cpp index 115e9b06..771d6405 100644 --- a/examples/computeparticles/computeparticles.cpp +++ b/examples/computeparticles/computeparticles.cpp @@ -38,7 +38,7 @@ class VulkanExample : public VulkanExampleBase public: float timer = 0.0f; float animStart = 20.0f; - bool animate = true; + bool attachToCursor = false; struct { vks::Texture2D particle; @@ -53,20 +53,23 @@ public: // Resources for the graphics part of the example struct { + uint32_t queueFamilyIndex; // Used to check if compute and graphics queue families differ and require additional barriers VkDescriptorSetLayout descriptorSetLayout; // Particle system rendering shader binding layout VkDescriptorSet descriptorSet; // Particle system rendering shader bindings VkPipelineLayout pipelineLayout; // Layout of the graphics pipeline VkPipeline pipeline; // Particle rendering pipeline + VkSemaphore semaphore; // Execution dependency between compute & graphic submission } graphics; // Resources for the compute part of the example struct { + uint32_t queueFamilyIndex; // Used to check if compute and graphics queue families differ and require additional barriers vks::Buffer storageBuffer; // (Shader) storage buffer object containing the particles vks::Buffer uniformBuffer; // Uniform buffer object containing particle system parameters VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics) VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics) VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers - VkFence fence; // Synchronization fence to avoid rewriting compute CB if still in use + VkSemaphore semaphore; // Execution dependency between compute & graphic submission VkDescriptorSetLayout descriptorSetLayout; // Compute shader binding layout VkDescriptorSet descriptorSet; // Compute shader bindings VkPipelineLayout pipelineLayout; // Layout of the compute pipeline @@ -105,7 +108,8 @@ public: vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr); vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr); vkDestroyPipeline(device, compute.pipeline, nullptr); - vkDestroyFence(device, compute.fence, nullptr); + vkDestroySemaphore(device, compute.semaphore, nullptr); + vkDestroyCommandPool(device, compute.commandPool, nullptr); vkDestroyCommandPool(device, compute.commandPool, nullptr); textures.particle.destroy(); @@ -120,13 +124,6 @@ public: void buildCommandBuffers() { - // Destroy command buffers if already present - if (!checkCommandBuffers()) - { - destroyCommandBuffers(); - createCommandBuffers(); - } - VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); VkClearValue clearValues[2]; @@ -149,8 +146,33 @@ public: VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo)); - // Draw the particle system using the update vertex buffer + // Acquire barrier + if (graphics.queueFamilyIndex != compute.queueFamilyIndex) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + compute.queueFamilyIndex, + graphics.queueFamilyIndex, + compute.storageBuffer.buffer, + 0, + compute.storageBuffer.size + }; + vkCmdPipelineBarrier( + drawCmdBuffers[i], + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + 0, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } + + // Draw the particle system using the update vertex buffer vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); VkViewport viewport = vks::initializers::viewport((float)width, (float)height, 0.0f, 1.0f); @@ -170,6 +192,32 @@ public: vkCmdEndRenderPass(drawCmdBuffers[i]); + // Release barrier + if (graphics.queueFamilyIndex != compute.queueFamilyIndex) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + 0, + graphics.queueFamilyIndex, + compute.queueFamilyIndex, + compute.storageBuffer.buffer, + 0, + compute.storageBuffer.size + }; + + vkCmdPipelineBarrier( + drawCmdBuffers[i], + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } + VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i])); } @@ -184,50 +232,62 @@ public: // Compute particle movement // Add memory barrier to ensure that the (graphics) vertex shader has fetched attributes before compute starts to write to the buffer - VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); - bufferBarrier.buffer = compute.storageBuffer.buffer; - bufferBarrier.size = compute.storageBuffer.descriptor.range; - bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; // Vertex shader invocations have finished reading from the buffer - bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; // Compute shader wants to write to the buffer - // Compute and graphics queue may have different queue families (see VulkanDevice::createLogicalDevice) - // For the barrier to work across different queues, we need to set their family indices - bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; // Required as compute and graphics queue may have different families - bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; // Required as compute and graphics queue may have different families + if (graphics.queueFamilyIndex != compute.queueFamilyIndex) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_SHADER_WRITE_BIT, + graphics.queueFamilyIndex, + compute.queueFamilyIndex, + compute.storageBuffer.buffer, + 0, + compute.storageBuffer.size + }; - vkCmdPipelineBarrier( - compute.commandBuffer, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_FLAGS_NONE, - 0, nullptr, - 1, &bufferBarrier, - 0, nullptr); - - vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); - vkCmdBindDescriptorSets(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineLayout, 0, 1, &compute.descriptorSet, 0, 0); + vkCmdPipelineBarrier( + compute.commandBuffer, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } // Dispatch the compute job + vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); + vkCmdBindDescriptorSets(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineLayout, 0, 1, &compute.descriptorSet, 0, 0); vkCmdDispatch(compute.commandBuffer, PARTICLE_COUNT / 256, 1, 1); - // Add memory barrier to ensure that compute shader has finished writing to the buffer + // Add barrier to ensure that compute shader has finished writing to the buffer // Without this the (rendering) vertex shader may display incomplete results (partial data from last frame) - bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; // Compute shader has finished writes to the buffer - bufferBarrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; // Vertex shader invocations want to read from the buffer - bufferBarrier.buffer = compute.storageBuffer.buffer; - bufferBarrier.size = compute.storageBuffer.descriptor.range; - // Compute and graphics queue may have different queue families (see VulkanDevice::createLogicalDevice) - // For the barrier to work across different queues, we need to set their family indices - bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; // Required as compute and graphics queue may have different families - bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; // Required as compute and graphics queue may have different families + if (graphics.queueFamilyIndex != compute.queueFamilyIndex) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_SHADER_WRITE_BIT, + 0, + compute.queueFamilyIndex, + graphics.queueFamilyIndex, + compute.storageBuffer.buffer, + 0, + compute.storageBuffer.size + }; - vkCmdPipelineBarrier( - compute.commandBuffer, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - VK_FLAGS_NONE, - 0, nullptr, - 1, &bufferBarrier, - 0, nullptr); + vkCmdPipelineBarrier( + compute.commandBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + 0, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } vkEndCommandBuffer(compute.commandBuffer); } @@ -267,11 +327,36 @@ public: &compute.storageBuffer, storageBufferSize); - // Copy to staging buffer + // Copy from staging buffer to storage buffer VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); VkBufferCopy copyRegion = {}; copyRegion.size = storageBufferSize; vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffer.buffer, 1, ©Region); + // Execute a transfer barrier to the compute queue, if necessary + if (graphics.queueFamilyIndex != compute.queueFamilyIndex) + { + VkBufferMemoryBarrier buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + 0, + graphics.queueFamilyIndex, + compute.queueFamilyIndex, + compute.storageBuffer.buffer, + 0, + compute.storageBuffer.size + }; + + vkCmdPipelineBarrier( + copyCmd, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 0, nullptr, + 1, &buffer_barrier, + 0, nullptr); + } VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true); stagingBuffer.destroy(); @@ -471,13 +556,26 @@ public: VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline)); } + void prepareGraphics() + { + prepareStorageBuffers(); + prepareUniformBuffers(); + setupDescriptorSetLayout(); + preparePipelines(); + setupDescriptorSet(); + + // Semaphore for compute & graphics sync + VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &graphics.semaphore)); + } + void prepareCompute() { // Create a compute capable device queue // The VulkanDevice::createLogicalDevice functions finds a compute capable queue and prefers queue families that only support compute // Depending on the implementation this may result in different queue family indices for graphics and computes, - // requiring proper synchronization (see the memory barriers in buildComputeCommandBuffer) - vkGetDeviceQueue(device, vulkanDevice->queueFamilyIndices.compute, 0, &compute.queue); + // requiring proper synchronization (see the memory and pipeline barriers) + vkGetDeviceQueue(device, compute.queueFamilyIndex, 0, &compute.queue); // Create compute pipeline // Compute pipelines are created separate from graphics pipelines even if they use the same queue (family index) @@ -543,25 +641,77 @@ public: // Separate command pool as queue family for compute may be different than graphics VkCommandPoolCreateInfo cmdPoolInfo = {}; cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmdPoolInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + cmdPoolInfo.queueFamilyIndex = compute.queueFamilyIndex; cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool)); // Create a command buffer for compute operations - VkCommandBufferAllocateInfo cmdBufAllocateInfo = - vks::initializers::commandBufferAllocateInfo( - compute.commandPool, - VK_COMMAND_BUFFER_LEVEL_PRIMARY, - 1); + compute.commandBuffer = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool); - VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer)); + // Semaphore for compute & graphics sync + VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphore)); - // Fence for compute CB sync - VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT); - VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence)); + // Signal the semaphore + VkSubmitInfo submitInfo = vks::initializers::submitInfo(); + submitInfo.signalSemaphoreCount = 1; + submitInfo.pSignalSemaphores = &compute.semaphore; + VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); + VK_CHECK_RESULT(vkQueueWaitIdle(queue)); // Build a single command buffer containing the compute dispatch commands buildComputeCommandBuffer(); + + // If graphics and compute queue family indices differ, acquire and immediately release the storage buffer, so that the initial acquire from the graphics command buffers are matched up properly + if (graphics.queueFamilyIndex != compute.queueFamilyIndex) + { + // Create a transient command buffer for setting up the initial buffer transfer state + VkCommandBuffer transferCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool, true); + + VkBufferMemoryBarrier acquire_buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_SHADER_WRITE_BIT, + graphics.queueFamilyIndex, + compute.queueFamilyIndex, + compute.storageBuffer.buffer, + 0, + compute.storageBuffer.size + }; + vkCmdPipelineBarrier( + transferCmd, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 0, nullptr, + 1, &acquire_buffer_barrier, + 0, nullptr); + + VkBufferMemoryBarrier release_buffer_barrier = + { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + nullptr, + VK_ACCESS_SHADER_WRITE_BIT, + 0, + compute.queueFamilyIndex, + graphics.queueFamilyIndex, + compute.storageBuffer.buffer, + 0, + compute.storageBuffer.size + }; + vkCmdPipelineBarrier( + transferCmd, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + 0, + 0, nullptr, + 1, &release_buffer_barrier, + 0, nullptr); + + vulkanDevice->flushCommandBuffer(transferCmd, compute.queue, compute.commandPool); + } } // Prepare and initialize uniform buffer containing shader uniforms @@ -583,7 +733,7 @@ public: void updateUniformBuffers() { compute.ubo.deltaT = frameTimer * 2.5f; - if (animate) + if (!attachToCursor) { compute.ubo.destX = sin(glm::radians(timer * 360.0f)) * 0.75f; compute.ubo.destY = 0.0f; @@ -601,36 +751,50 @@ public: void draw() { - VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); - computeSubmitInfo.commandBufferCount = 1; - computeSubmitInfo.pCommandBuffers = &compute.commandBuffer; - - VK_CHECK_RESULT( vkQueueSubmit( compute.queue, 1, &computeSubmitInfo, compute.fence ) ); - - // Submit graphics commands VulkanExampleBase::prepareFrame(); + VkPipelineStageFlags graphicsWaitStageMasks[] = { VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT }; + VkSemaphore graphicsWaitSemaphores[] = { compute.semaphore, semaphores.presentComplete }; + VkSemaphore graphicsSignalSemaphores[] = { graphics.semaphore, semaphores.renderComplete }; + + // Submit graphics commands submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer]; + submitInfo.waitSemaphoreCount = 2; + submitInfo.pWaitSemaphores = graphicsWaitSemaphores; + submitInfo.pWaitDstStageMask = graphicsWaitStageMasks; + submitInfo.signalSemaphoreCount = 2; + submitInfo.pSignalSemaphores = graphicsSignalSemaphores; VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); VulkanExampleBase::submitFrame(); + // Wait for rendering finished + VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + // Submit compute commands - vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX); - vkResetFences(device, 1, &compute.fence); + VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); + computeSubmitInfo.commandBufferCount = 1; + computeSubmitInfo.pCommandBuffers = &compute.commandBuffer; + computeSubmitInfo.waitSemaphoreCount = 1; + computeSubmitInfo.pWaitSemaphores = &graphics.semaphore; + computeSubmitInfo.pWaitDstStageMask = &waitStageMask; + computeSubmitInfo.signalSemaphoreCount = 1; + computeSubmitInfo.pSignalSemaphores = &compute.semaphore; + VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, VK_NULL_HANDLE)); + } void prepare() - { + { VulkanExampleBase::prepare(); + // We will be using the queue family indices to check if graphics and compute queue families differ + // If that's the case, we need additional barriers for acquiring and releasing resources + graphics.queueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + compute.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; loadAssets(); - prepareStorageBuffers(); - prepareUniformBuffers(); - setupDescriptorSetLayout(); - preparePipelines(); setupDescriptorPool(); - setupDescriptorSet(); + prepareGraphics(); prepareCompute(); buildCommandBuffers(); prepared = true; @@ -642,7 +806,7 @@ public: return; draw(); - if (animate) + if (!attachToCursor) { if (animStart > 0.0f) { @@ -662,7 +826,7 @@ public: virtual void OnUpdateUIOverlay(vks::UIOverlay *overlay) { if (overlay->header("Settings")) { - overlay->checkBox("Moving attractor", &animate); + overlay->checkBox("Attach attractor to cursor", &attachToCursor); } } };