Merge pull request #673 from SaschaWillems/compute_sync

Compute sync
This commit is contained in:
Sascha Willems 2020-02-22 12:42:59 +01:00 committed by GitHub
commit 9041ae3da7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 210 additions and 31 deletions

View file

@ -493,38 +493,42 @@ namespace vks
* Allocate a command buffer from the command pool
*
* @param level Level of the new command buffer (primary or secondary)
* @param pool Command pool from which the command buffer will be allocated
* @param (Optional) begin If true, recording on the new command buffer will be started (vkBeginCommandBuffer) (Defaults to false)
*
* @return A handle to the allocated command buffer
*/
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, bool begin = false)
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, VkCommandPool pool, bool begin = false)
{
VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(commandPool, level, 1);
VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(pool, level, 1);
VkCommandBuffer cmdBuffer;
VK_CHECK_RESULT(vkAllocateCommandBuffers(logicalDevice, &cmdBufAllocateInfo, &cmdBuffer));
// If requested, also start recording for the new command buffer
if (begin)
{
VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
VK_CHECK_RESULT(vkBeginCommandBuffer(cmdBuffer, &cmdBufInfo));
}
return cmdBuffer;
}
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level, bool begin = false)
{
return createCommandBuffer(level, commandPool, begin);
}
/**
* Finish command buffer recording and submit it to a queue
*
* @param commandBuffer Command buffer to flush
* @param queue Queue to submit the command buffer to
* @param pool Command pool on which the command buffer has been created
* @param free (Optional) Free the command buffer once it has been submitted (Defaults to true)
*
* @note The queue that the command buffer is submitted to must be from the same family index as the pool it was allocated from
* @note Uses a fence to ensure command buffer has finished executing
*/
void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, bool free = true)
void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, VkCommandPool pool, bool free = true)
{
if (commandBuffer == VK_NULL_HANDLE)
{
@ -536,25 +540,26 @@ namespace vks
VkSubmitInfo submitInfo = vks::initializers::submitInfo();
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffer;
// Create fence to ensure that the command buffer has finished executing
VkFenceCreateInfo fenceInfo = vks::initializers::fenceCreateInfo(VK_FLAGS_NONE);
VkFence fence;
VK_CHECK_RESULT(vkCreateFence(logicalDevice, &fenceInfo, nullptr, &fence));
// Submit to the queue
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
// Wait for the fence to signal that command buffer has finished executing
VK_CHECK_RESULT(vkWaitForFences(logicalDevice, 1, &fence, VK_TRUE, DEFAULT_FENCE_TIMEOUT));
vkDestroyFence(logicalDevice, fence, nullptr);
if (free)
{
vkFreeCommandBuffers(logicalDevice, commandPool, 1, &commandBuffer);
vkFreeCommandBuffers(logicalDevice, pool, 1, &commandBuffer);
}
}
void flushCommandBuffer(VkCommandBuffer commandBuffer, VkQueue queue, bool free = true)
{
return flushCommandBuffer(commandBuffer, queue, commandPool, free);
}
/**
* Check if an extension is supported by the (physical device)
*

View file

@ -49,6 +49,7 @@ public:
// Resources for the graphics part of the example
struct {
uint32_t queueFamilyIndex; // Used to check if compute and graphics queue families differ and require additional barriers
vks::Buffer uniformBuffer; // Contains scene matrices
VkDescriptorSetLayout descriptorSetLayout; // Particle system rendering shader binding layout
VkDescriptorSet descriptorSet; // Particle system rendering shader bindings
@ -64,6 +65,7 @@ public:
// Resources for the compute part of the example
struct {
uint32_t queueFamilyIndex; // Used to check if compute and graphics queue families differ and require additional barriers
vks::Buffer storageBuffer; // (Shader) storage buffer object containing the particles
vks::Buffer uniformBuffer; // Uniform buffer object containing particle system parameters
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
@ -162,8 +164,33 @@ public:
VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo));
// Draw the particle system using the update vertex buffer
// Acquire barrier
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
0,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
drawCmdBuffers[i],
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
// Draw the particle system using the update vertex buffer
vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
VkViewport viewport = vks::initializers::viewport((float)width, (float)height, 0.0f, 1.0f);
@ -173,7 +200,7 @@ public:
vkCmdSetScissor(drawCmdBuffers[i], 0, 1, &scissor);
vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipeline);
vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipelineLayout, 0, 1, &graphics.descriptorSet, 0, NULL);
vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipelineLayout, 0, 1, &graphics.descriptorSet, 0, nullptr);
VkDeviceSize offsets[1] = { 0 };
vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBuffer.buffer, offsets);
@ -183,6 +210,32 @@ public:
vkCmdEndRenderPass(drawCmdBuffers[i]);
// Release barrier
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
0,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
drawCmdBuffers[i],
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i]));
}
@ -194,6 +247,32 @@ public:
VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo));
// Acquire barrier
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
0,
VK_ACCESS_SHADER_WRITE_BIT,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
compute.commandBuffer,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
// First pass: Calculate particle movement
// -------------------------------------------------------------------------------------------------------
vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineCalculate);
@ -224,6 +303,32 @@ public:
vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineIntegrate);
vkCmdDispatch(compute.commandBuffer, numParticles / 256, 1, 1);
// Release barrier
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_SHADER_WRITE_BIT,
0,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
compute.commandBuffer,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
vkEndCommandBuffer(compute.commandBuffer);
}
@ -310,11 +415,36 @@ public:
&compute.storageBuffer,
storageBufferSize);
// Copy to staging buffer
// Copy from staging buffer to storage buffer
VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
VkBufferCopy copyRegion = {};
copyRegion.size = storageBufferSize;
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffer.buffer, 1, &copyRegion);
// Execute a transfer barrier to the compute queue, if necessary
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
VkBufferMemoryBarrier buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
0,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
copyCmd,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &buffer_barrier,
0, nullptr);
}
VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true);
stagingBuffer.destroy();
@ -411,7 +541,7 @@ public:
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, &textures.gradient.descriptor),
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &graphics.uniformBuffer.descriptor),
};
vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, NULL);
vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, nullptr);
}
void preparePipelines()
@ -520,12 +650,7 @@ public:
// The VulkanDevice::createLogicalDevice functions finds a compute capable queue and prefers queue families that only support compute
// Depending on the implementation this may result in different queue family indices for graphics and computes,
// requiring proper synchronization (see the memory barriers in buildComputeCommandBuffer)
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.pNext = NULL;
queueCreateInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
queueCreateInfo.queueCount = 1;
vkGetDeviceQueue(device, vulkanDevice->queueFamilyIndices.compute, 0, &compute.queue);
vkGetDeviceQueue(device, compute.queueFamilyIndex, 0, &compute.queue);
// Create compute pipeline
// Compute pipelines are created separate from graphics pipelines even if they use the same queue (family index)
@ -581,7 +706,7 @@ public:
&compute.uniformBuffer.descriptor)
};
vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL);
vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, nullptr);
// Create pipelines
VkComputePipelineCreateInfo computePipelineCreateInfo = vks::initializers::computePipelineCreateInfo(compute.pipelineLayout, 0);
@ -622,18 +747,12 @@ public:
// Separate command pool as queue family for compute may be different than graphics
VkCommandPoolCreateInfo cmdPoolInfo = {};
cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmdPoolInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
cmdPoolInfo.queueFamilyIndex = compute.queueFamilyIndex;
cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool));
// Create a command buffer for compute operations
VkCommandBufferAllocateInfo cmdBufAllocateInfo =
vks::initializers::commandBufferAllocateInfo(
compute.commandPool,
VK_COMMAND_BUFFER_LEVEL_PRIMARY,
1);
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer));
compute.commandBuffer = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool);
// Semaphore for compute & graphics sync
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
@ -648,6 +767,57 @@ public:
// Build a single command buffer containing the compute dispatch commands
buildComputeCommandBuffer();
// If graphics and compute queue family indices differ, acquire and immediately release the storage buffer, so that the initial acquire from the graphics command buffers are matched up properly
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
// Create a transient command buffer for setting up the initial buffer transfer state
VkCommandBuffer transferCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool, true);
VkBufferMemoryBarrier acquire_buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
0,
VK_ACCESS_SHADER_WRITE_BIT,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
transferCmd,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &acquire_buffer_barrier,
0, nullptr);
VkBufferMemoryBarrier release_buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_SHADER_WRITE_BIT,
0,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
transferCmd,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0,
0, nullptr,
1, &release_buffer_barrier,
0, nullptr);
vulkanDevice->flushCommandBuffer(transferCmd, compute.queue, compute.commandPool);
}
}
// Prepare and initialize uniform buffer containing shader uniforms
@ -729,6 +899,10 @@ public:
void prepare()
{
VulkanExampleBase::prepare();
// We will be using the queue family indices to check if graphics and compute queue families differ
// If that's the case, we need additional barriers for acquiring and releasing resources
graphics.queueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
compute.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
loadAssets();
setupDescriptorPool();
prepareGraphics();