Merge pull request #673 from SaschaWillems/compute_sync

Compute sync
This commit is contained in:
Sascha Willems 2020-02-22 12:42:59 +01:00 committed by GitHub
commit 9041ae3da7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 210 additions and 31 deletions

View file

@ -493,38 +493,42 @@ namespace vks
* Allocate a command buffer from the command pool * Allocate a command buffer from the command pool
* *
* @param level Level of the new command buffer (primary or secondary) * @param level Level of the new command buffer (primary or secondary)
* @param pool Command pool from which the command buffer will be allocated
* @param (Optional) begin If true, recording on the new command buffer will be started (vkBeginCommandBuffer) (Defaults to false) * @param (Optional) begin If true, recording on the new command buffer will be started (vkBeginCommandBuffer) (Defaults to false)
* *
* @return A handle to the allocated command buffer * @return A handle to the allocated command buffer
*/ */
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, bool begin = false) VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, VkCommandPool pool, bool begin = false)
{ {
VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(commandPool, level, 1); VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(pool, level, 1);
VkCommandBuffer cmdBuffer; VkCommandBuffer cmdBuffer;
VK_CHECK_RESULT(vkAllocateCommandBuffers(logicalDevice, &cmdBufAllocateInfo, &cmdBuffer)); VK_CHECK_RESULT(vkAllocateCommandBuffers(logicalDevice, &cmdBufAllocateInfo, &cmdBuffer));
// If requested, also start recording for the new command buffer // If requested, also start recording for the new command buffer
if (begin) if (begin)
{ {
VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
VK_CHECK_RESULT(vkBeginCommandBuffer(cmdBuffer, &cmdBufInfo)); VK_CHECK_RESULT(vkBeginCommandBuffer(cmdBuffer, &cmdBufInfo));
} }
return cmdBuffer; return cmdBuffer;
} }
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, bool begin = false)
{
return createCommandBuffer(level, commandPool, begin);
}
/** /**
* Finish command buffer recording and submit it to a queue * Finish command buffer recording and submit it to a queue
* *
* @param commandBuffer Command buffer to flush * @param commandBuffer Command buffer to flush
* @param queue Queue to submit the command buffer to * @param queue Queue to submit the command buffer to
* @param pool Command pool on which the command buffer has been created
* @param free (Optional) Free the command buffer once it has been submitted (Defaults to true) * @param free (Optional) Free the command buffer once it has been submitted (Defaults to true)
* *
* @note The queue that the command buffer is submitted to must be from the same family index as the pool it was allocated from * @note The queue that the command buffer is submitted to must be from the same family index as the pool it was allocated from
* @note Uses a fence to ensure command buffer has finished executing * @note Uses a fence to ensure command buffer has finished executing
*/ */
void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, bool free = true) void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, VkCommandPool pool, bool free = true)
{ {
if (commandBuffer == VK_NULL_HANDLE) if (commandBuffer == VK_NULL_HANDLE)
{ {
@ -536,25 +540,26 @@ namespace vks
VkSubmitInfo submitInfo = vks::initializers::submitInfo(); VkSubmitInfo submitInfo = vks::initializers::submitInfo();
submitInfo.commandBufferCount = 1; submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffer; submitInfo.pCommandBuffers = &commandBuffer;
// Create fence to ensure that the command buffer has finished executing // Create fence to ensure that the command buffer has finished executing
VkFenceCreateInfo fenceInfo = vks::initializers::fenceCreateInfo(VK_FLAGS_NONE); VkFenceCreateInfo fenceInfo = vks::initializers::fenceCreateInfo(VK_FLAGS_NONE);
VkFence fence; VkFence fence;
VK_CHECK_RESULT(vkCreateFence(logicalDevice, &fenceInfo, nullptr, &fence)); VK_CHECK_RESULT(vkCreateFence(logicalDevice, &fenceInfo, nullptr, &fence));
// Submit to the queue // Submit to the queue
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence)); VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
// Wait for the fence to signal that command buffer has finished executing // Wait for the fence to signal that command buffer has finished executing
VK_CHECK_RESULT(vkWaitForFences(logicalDevice, 1, &fence, VK_TRUE, DEFAULT_FENCE_TIMEOUT)); VK_CHECK_RESULT(vkWaitForFences(logicalDevice, 1, &fence, VK_TRUE, DEFAULT_FENCE_TIMEOUT));
vkDestroyFence(logicalDevice, fence, nullptr); vkDestroyFence(logicalDevice, fence, nullptr);
if (free) if (free)
{ {
vkFreeCommandBuffers(logicalDevice, commandPool, 1, &commandBuffer); vkFreeCommandBuffers(logicalDevice, pool, 1, &commandBuffer);
} }
} }
void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, bool free = true)
{
return flushCommandBuffer(commandBuffer, queue, commandPool, free);
}
/** /**
* Check if an extension is supported by the (physical device) * Check if an extension is supported by the (physical device)
* *

View file

@ -49,6 +49,7 @@ public:
// Resources for the graphics part of the example // Resources for the graphics part of the example
struct { struct {
uint32_t queueFamilyIndex; // Used to check if compute and graphics queue families differ and require additional barriers
vks::Buffer uniformBuffer; // Contains scene matrices vks::Buffer uniformBuffer; // Contains scene matrices
VkDescriptorSetLayout descriptorSetLayout; // Particle system rendering shader binding layout VkDescriptorSetLayout descriptorSetLayout; // Particle system rendering shader binding layout
VkDescriptorSet descriptorSet; // Particle system rendering shader bindings VkDescriptorSet descriptorSet; // Particle system rendering shader bindings
@ -64,6 +65,7 @@ public:
// Resources for the compute part of the example // Resources for the compute part of the example
struct { struct {
uint32_t queueFamilyIndex; // Used to check if compute and graphics queue families differ and require additional barriers
vks::Buffer storageBuffer; // (Shader) storage buffer object containing the particles vks::Buffer storageBuffer; // (Shader) storage buffer object containing the particles
vks::Buffer uniformBuffer; // Uniform buffer object containing particle system parameters vks::Buffer uniformBuffer; // Uniform buffer object containing particle system parameters
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics) VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
@ -162,8 +164,33 @@ public:
VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo)); VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo));
// Draw the particle system using the update vertex buffer // Acquire barrier
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
0,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
drawCmdBuffers[i],
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
// Draw the particle system using the update vertex buffer
vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
VkViewport viewport = vks::initializers::viewport((float)width, (float)height, 0.0f, 1.0f); VkViewport viewport = vks::initializers::viewport((float)width, (float)height, 0.0f, 1.0f);
@ -173,7 +200,7 @@ public:
vkCmdSetScissor(drawCmdBuffers[i], 0, 1, &scissor); vkCmdSetScissor(drawCmdBuffers[i], 0, 1, &scissor);
vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipeline); vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipeline);
vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipelineLayout, 0, 1, &graphics.descriptorSet, 0, NULL); vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipelineLayout, 0, 1, &graphics.descriptorSet, 0, nullptr);
VkDeviceSize offsets[1] = { 0 }; VkDeviceSize offsets[1] = { 0 };
vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBuffer.buffer, offsets); vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBuffer.buffer, offsets);
@ -183,6 +210,32 @@ public:
vkCmdEndRenderPass(drawCmdBuffers[i]); vkCmdEndRenderPass(drawCmdBuffers[i]);
// Release barrier
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
0,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
drawCmdBuffers[i],
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i])); VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i]));
} }
@ -194,6 +247,32 @@ public:
VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo)); VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo));
// Acquire barrier
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
0,
VK_ACCESS_SHADER_WRITE_BIT,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
compute.commandBuffer,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
// First pass: Calculate particle movement // First pass: Calculate particle movement
// ------------------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------------
vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineCalculate); vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineCalculate);
@ -224,6 +303,32 @@ public:
vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineIntegrate); vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineIntegrate);
vkCmdDispatch(compute.commandBuffer, numParticles / 256, 1, 1); vkCmdDispatch(compute.commandBuffer, numParticles / 256, 1, 1);
// Release barrier
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_SHADER_WRITE_BIT,
0,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
compute.commandBuffer,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
vkEndCommandBuffer(compute.commandBuffer); vkEndCommandBuffer(compute.commandBuffer);
} }
@ -310,11 +415,36 @@ public:
&compute.storageBuffer, &compute.storageBuffer,
storageBufferSize); storageBufferSize);
// Copy to staging buffer // Copy from staging buffer to storage buffer
VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
VkBufferCopy copyRegion = {}; VkBufferCopy copyRegion = {};
copyRegion.size = storageBufferSize; copyRegion.size = storageBufferSize;
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffer.buffer, 1, &copyRegion); vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffer.buffer, 1, &copyRegion);
// Execute a transfer barrier to the compute queue, if necessary
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
0,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
copyCmd,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true); VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true);
stagingBuffer.destroy(); stagingBuffer.destroy();
@ -411,7 +541,7 @@ public:
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, &textures.gradient.descriptor), vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, &textures.gradient.descriptor),
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &graphics.uniformBuffer.descriptor), vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &graphics.uniformBuffer.descriptor),
}; };
vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, NULL); vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, nullptr);
} }
void preparePipelines() void preparePipelines()
@ -520,12 +650,7 @@ public:
// The VulkanDevice::createLogicalDevice functions finds a compute capable queue and prefers queue families that only support compute // The VulkanDevice::createLogicalDevice functions finds a compute capable queue and prefers queue families that only support compute
// Depending on the implementation this may result in different queue family indices for graphics and computes, // Depending on the implementation this may result in different queue family indices for graphics and computes,
// requiring proper synchronization (see the memory barriers in buildComputeCommandBuffer) // requiring proper synchronization (see the memory barriers in buildComputeCommandBuffer)
VkDeviceQueueCreateInfo queueCreateInfo = {}; vkGetDeviceQueue(device, compute.queueFamilyIndex, 0, &compute.queue);
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.pNext = NULL;
queueCreateInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
queueCreateInfo.queueCount = 1;
vkGetDeviceQueue(device, vulkanDevice->queueFamilyIndices.compute, 0, &compute.queue);
// Create compute pipeline // Create compute pipeline
// Compute pipelines are created separate from graphics pipelines even if they use the same queue (family index) // Compute pipelines are created separate from graphics pipelines even if they use the same queue (family index)
@ -581,7 +706,7 @@ public:
&compute.uniformBuffer.descriptor) &compute.uniformBuffer.descriptor)
}; };
vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL); vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, nullptr);
// Create pipelines // Create pipelines
VkComputePipelineCreateInfo computePipelineCreateInfo = vks::initializers::computePipelineCreateInfo(compute.pipelineLayout, 0); VkComputePipelineCreateInfo computePipelineCreateInfo = vks::initializers::computePipelineCreateInfo(compute.pipelineLayout, 0);
@ -622,18 +747,12 @@ public:
// Separate command pool as queue family for compute may be different than graphics // Separate command pool as queue family for compute may be different than graphics
VkCommandPoolCreateInfo cmdPoolInfo = {}; VkCommandPoolCreateInfo cmdPoolInfo = {};
cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmdPoolInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; cmdPoolInfo.queueFamilyIndex = compute.queueFamilyIndex;
cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool)); VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool));
// Create a command buffer for compute operations // Create a command buffer for compute operations
VkCommandBufferAllocateInfo cmdBufAllocateInfo = compute.commandBuffer = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool);
vks::initializers::commandBufferAllocateInfo(
compute.commandPool,
VK_COMMAND_BUFFER_LEVEL_PRIMARY,
1);
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer));
// Semaphore for compute & graphics sync // Semaphore for compute & graphics sync
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
@ -648,6 +767,57 @@ public:
// Build a single command buffer containing the compute dispatch commands // Build a single command buffer containing the compute dispatch commands
buildComputeCommandBuffer(); buildComputeCommandBuffer();
// If graphics and compute queue family indices differ, acquire and immediately release the storage buffer, so that the initial acquire from the graphics command buffers are matched up properly
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
// Create a transient command buffer for setting up the initial buffer transfer state
VkCommandBuffer transferCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool, true);
VkBufferMemoryBarrier acquire_buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
0,
VK_ACCESS_SHADER_WRITE_BIT,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
transferCmd,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &acquire_buffer_barrier,
0, nullptr);
VkBufferMemoryBarrier release_buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_SHADER_WRITE_BIT,
0,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
transferCmd,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0,
0, nullptr,
1, &release_buffer_barrier,
0, nullptr);
vulkanDevice->flushCommandBuffer(transferCmd, compute.queue, compute.commandPool);
}
} }
// Prepare and initialize uniform buffer containing shader uniforms // Prepare and initialize uniform buffer containing shader uniforms
@ -729,6 +899,10 @@ public:
void prepare() void prepare()
{ {
VulkanExampleBase::prepare(); VulkanExampleBase::prepare();
// We will be using the queue family indices to check if graphics and compute queue families differ
// If that's the case, we need additional barriers for acquiring and releasing resources
graphics.queueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
compute.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
loadAssets(); loadAssets();
setupDescriptorPool(); setupDescriptorPool();
prepareGraphics(); prepareGraphics();