diff --git a/data/shaders/computenbody/particle_calculate.comp b/data/shaders/computenbody/particle_calculate.comp index f119e505..9ae3d5b2 100644 --- a/data/shaders/computenbody/particle_calculate.comp +++ b/data/shaders/computenbody/particle_calculate.comp @@ -62,6 +62,9 @@ void main() vec3 len = other.xyz - position.xyz; acceleration.xyz += GRAVITY * len * other.w / pow(dot(len, len) + SOFTEN, POWER); } + + memoryBarrierShared(); + barrier(); } particles[index].vel.xyz += ubo.deltaT * acceleration.xyz; diff --git a/data/shaders/computenbody/particle_calculate.comp.spv b/data/shaders/computenbody/particle_calculate.comp.spv index 290f4710..37ba965f 100644 Binary files a/data/shaders/computenbody/particle_calculate.comp.spv and b/data/shaders/computenbody/particle_calculate.comp.spv differ diff --git a/examples/computenbody/computenbody.cpp b/examples/computenbody/computenbody.cpp index f6519fa9..3d714a80 100644 --- a/examples/computenbody/computenbody.cpp +++ b/examples/computenbody/computenbody.cpp @@ -54,6 +54,7 @@ public: VkDescriptorSet descriptorSet; // Particle system rendering shader bindings VkPipelineLayout pipelineLayout; // Layout of the graphics pipeline VkPipeline pipeline; // Particle rendering pipeline + VkSemaphore semaphore; // Execution dependency between compute & graphic submission struct { glm::mat4 projection; glm::mat4 view; @@ -68,7 +69,7 @@ public: VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics) VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics) VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers - VkFence fence; // Synchronization fence to avoid rewriting compute CB if still in use + VkSemaphore semaphore; // Execution dependency between compute & graphic submission VkDescriptorSetLayout descriptorSetLayout; // Compute shader binding layout VkDescriptorSet descriptorSet; // Compute shader bindings VkPipelineLayout pipelineLayout; // Layout of the compute pipeline @@ -110,6 +111,7 @@ public: vkDestroyPipeline(device, graphics.pipeline, nullptr); vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr); vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr); + vkDestroySemaphore(device, graphics.semaphore, nullptr); // Compute compute.storageBuffer.destroy(); @@ -118,7 +120,7 @@ public: vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr); vkDestroyPipeline(device, compute.pipelineCalculate, nullptr); vkDestroyPipeline(device, compute.pipelineIntegrate, nullptr); - vkDestroyFence(device, compute.fence, nullptr); + vkDestroySemaphore(device, compute.semaphore, nullptr); vkDestroyCommandPool(device, compute.commandPool, nullptr); textures.particle.destroy(); @@ -244,7 +246,6 @@ public: // ------------------------------------------------------------------------------------------------------- vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineIntegrate); vkCmdDispatch(compute.commandBuffer, numParticles / 256, 1, 1); - // Add memory barrier to ensure that compute shader has finished writing to the buffer // Without this the (rendering) vertex shader may display incomplete results (partial data from last frame) bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; // Compute shader has finished writes to the buffer @@ -541,6 +542,19 @@ public: VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline)); } + void prepareGraphics() + { + prepareStorageBuffers(); + prepareUniformBuffers(); + setupDescriptorSetLayout(); + preparePipelines(); + setupDescriptorSet(); + + // Semaphore for compute & graphics sync + VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &graphics.semaphore)); + } + void prepareCompute() { // Create a compute capable device queue @@ -662,9 +676,16 @@ public: VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer)); - // Fence for compute CB sync - VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT); - VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence)); + // Semaphore for compute & graphics sync + VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphore)); + + // Signal the semaphore + VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; + submitInfo.signalSemaphoreCount = 1; + submitInfo.pSignalSemaphores = &compute.semaphore; + VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); + VK_CHECK_RESULT(vkQueueWaitIdle(queue)); // Build a single command buffer containing the compute dispatch commands buildComputeCommandBuffer(); @@ -715,36 +736,45 @@ public: void draw() { - // Submit graphics commands VulkanExampleBase::prepareFrame(); + VkPipelineStageFlags graphicsWaitStageMasks[] = { VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT }; + VkSemaphore graphicsWaitSemaphores[] = { compute.semaphore, semaphores.presentComplete }; + VkSemaphore graphicsSignalSemaphores[] = { graphics.semaphore, semaphores.renderComplete }; + + // Submit graphics commands submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer]; + submitInfo.waitSemaphoreCount = 2; + submitInfo.pWaitSemaphores = graphicsWaitSemaphores; + submitInfo.pWaitDstStageMask = graphicsWaitStageMasks; + submitInfo.signalSemaphoreCount = 2; + submitInfo.pSignalSemaphores = graphicsSignalSemaphores; VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); VulkanExampleBase::submitFrame(); - // Submit compute commands - vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX); - vkResetFences(device, 1, &compute.fence); + // Wait for rendering finished + VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + // Submit compute commands VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); computeSubmitInfo.commandBufferCount = 1; computeSubmitInfo.pCommandBuffers = &compute.commandBuffer; - - VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence)); + computeSubmitInfo.waitSemaphoreCount = 1; + computeSubmitInfo.pWaitSemaphores = &graphics.semaphore; + computeSubmitInfo.pWaitDstStageMask = &waitStageMask; + computeSubmitInfo.signalSemaphoreCount = 1; + computeSubmitInfo.pSignalSemaphores = &compute.semaphore; + VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, VK_NULL_HANDLE)); } void prepare() { VulkanExampleBase::prepare(); loadAssets(); - prepareStorageBuffers(); - prepareUniformBuffers(); - setupDescriptorSetLayout(); - preparePipelines(); setupDescriptorPool(); - setupDescriptorSet(); + prepareGraphics(); prepareCompute(); buildCommandBuffers(); prepared = true;