Fix some synchronization flaws

- shader: add barrier at the end of each iteration to avoid writing before the reading finished
- cpp: add synchronization between compute and graphics, the image barrier is not enough for the data visibility between different submission

	modified:   data/shaders/computenbody/particle_calculate.comp
	modified:   data/shaders/computenbody/particle_calculate.comp.spv
	modified:   examples/computenbody/computenbody.cpp
This commit is contained in:
JC Liang 2019-06-17 19:31:00 +08:00
parent b2add91d2c
commit b575a1ff1a
3 changed files with 50 additions and 17 deletions

View file

@ -54,6 +54,7 @@ public:
VkDescriptorSet descriptorSet; // Particle system rendering shader bindings
VkPipelineLayout pipelineLayout; // Layout of the graphics pipeline
VkPipeline pipeline; // Particle rendering pipeline
VkSemaphore semaphore; // Execution dependency between compute & graphic submission
struct {
glm::mat4 projection;
glm::mat4 view;
@ -68,7 +69,7 @@ public:
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics)
VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers
VkFence fence; // Synchronization fence to avoid rewriting compute CB if still in use
VkSemaphore semaphore; // Execution dependency between compute & graphic submission
VkDescriptorSetLayout descriptorSetLayout; // Compute shader binding layout
VkDescriptorSet descriptorSet; // Compute shader bindings
VkPipelineLayout pipelineLayout; // Layout of the compute pipeline
@ -110,6 +111,7 @@ public:
vkDestroyPipeline(device, graphics.pipeline, nullptr);
vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr);
vkDestroySemaphore(device, graphics.semaphore, nullptr);
// Compute
compute.storageBuffer.destroy();
@ -118,7 +120,7 @@ public:
vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr);
vkDestroyPipeline(device, compute.pipelineCalculate, nullptr);
vkDestroyPipeline(device, compute.pipelineIntegrate, nullptr);
vkDestroyFence(device, compute.fence, nullptr);
vkDestroySemaphore(device, compute.semaphore, nullptr);
vkDestroyCommandPool(device, compute.commandPool, nullptr);
textures.particle.destroy();
@ -244,7 +246,6 @@ public:
// -------------------------------------------------------------------------------------------------------
vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineIntegrate);
vkCmdDispatch(compute.commandBuffer, numParticles / 256, 1, 1);
// Add memory barrier to ensure that compute shader has finished writing to the buffer
// Without this the (rendering) vertex shader may display incomplete results (partial data from last frame)
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; // Compute shader has finished writes to the buffer
@ -541,6 +542,19 @@ public:
VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline));
}
void prepareGraphics()
{
prepareStorageBuffers();
prepareUniformBuffers();
setupDescriptorSetLayout();
preparePipelines();
setupDescriptorSet();
// Semaphore for compute & graphics sync
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &graphics.semaphore));
}
void prepareCompute()
{
// Create a compute capable device queue
@ -662,9 +676,16 @@ public:
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer));
// Fence for compute CB sync
VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT);
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence));
// Semaphore for compute & graphics sync
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphore));
// Signal the semaphore
VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &compute.semaphore;
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VK_CHECK_RESULT(vkQueueWaitIdle(queue));
// Build a single command buffer containing the compute dispatch commands
buildComputeCommandBuffer();
@ -715,36 +736,45 @@ public:
void draw()
{
// Submit graphics commands
VulkanExampleBase::prepareFrame();
VkPipelineStageFlags graphicsWaitStageMasks[] = { VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
VkSemaphore graphicsWaitSemaphores[] = { compute.semaphore, semaphores.presentComplete };
VkSemaphore graphicsSignalSemaphores[] = { graphics.semaphore, semaphores.renderComplete };
// Submit graphics commands
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
submitInfo.waitSemaphoreCount = 2;
submitInfo.pWaitSemaphores = graphicsWaitSemaphores;
submitInfo.pWaitDstStageMask = graphicsWaitStageMasks;
submitInfo.signalSemaphoreCount = 2;
submitInfo.pSignalSemaphores = graphicsSignalSemaphores;
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VulkanExampleBase::submitFrame();
// Submit compute commands
vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &compute.fence);
// Wait for rendering finished
VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
// Submit compute commands
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &compute.commandBuffer;
VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence));
computeSubmitInfo.waitSemaphoreCount = 1;
computeSubmitInfo.pWaitSemaphores = &graphics.semaphore;
computeSubmitInfo.pWaitDstStageMask = &waitStageMask;
computeSubmitInfo.signalSemaphoreCount = 1;
computeSubmitInfo.pSignalSemaphores = &compute.semaphore;
VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, VK_NULL_HANDLE));
}
void prepare()
{
VulkanExampleBase::prepare();
loadAssets();
prepareStorageBuffers();
prepareUniformBuffers();
setupDescriptorSetLayout();
preparePipelines();
setupDescriptorPool();
setupDescriptorSet();
prepareGraphics();
prepareCompute();
buildCommandBuffers();
prepared = true;