Fix validation and improve sync for computecloth

This commit is contained in:
Brad Davis 2020-01-02 13:04:03 -08:00
parent e18d428249
commit 055d90ea30

View file

@ -66,11 +66,14 @@ public:
vks::Buffer input;
vks::Buffer output;
} storageBuffers;
struct Semaphores {
VkSemaphore ready{ nullptr };
VkSemaphore complete{ nullptr };
} semaphores;
vks::Buffer uniformBuffer;
VkQueue queue;
VkCommandPool commandPool;
std::array<VkCommandBuffer,2> commandBuffers;
VkFence fence;
VkDescriptorSetLayout descriptorSetLayout;
std::array<VkDescriptorSet,2> descriptorSets;
VkPipelineLayout pipelineLayout;
@ -133,7 +136,8 @@ public:
vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr);
vkDestroyPipeline(device, compute.pipeline, nullptr);
vkDestroyFence(device, compute.fence, nullptr);
vkDestroySemaphore(device, compute.semaphores.ready, nullptr);
vkDestroySemaphore(device, compute.semaphores.complete, nullptr);
vkDestroyCommandPool(device, compute.commandPool, nullptr);
}
@ -151,7 +155,76 @@ public:
modelSphere.loadFromFile(getAssetPath() + "models/geosphere.obj", vertexLayout, compute.ubo.sphereRadius * 0.05f, vulkanDevice, queue);
}
void buildCommandBuffers()
void addGraphicsToComputeBarriers(VkCommandBuffer commandBuffer)
{
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
bufferBarrier.size = VK_WHOLE_SIZE;
std::vector<VkBufferMemoryBarrier> bufferBarriers;
bufferBarrier.buffer = compute.storageBuffers.input.buffer;
bufferBarriers.push_back(bufferBarrier);
bufferBarrier.buffer = compute.storageBuffers.output.buffer;
bufferBarriers.push_back(bufferBarrier);
vkCmdPipelineBarrier(commandBuffer,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
static_cast<uint32_t>(bufferBarriers.size()), bufferBarriers.data(),
0, nullptr);
}
void addComputeToComputeBarriers(VkCommandBuffer commandBuffer)
{
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
bufferBarrier.size = VK_WHOLE_SIZE;
std::vector<VkBufferMemoryBarrier> bufferBarriers;
bufferBarrier.buffer = compute.storageBuffers.input.buffer;
bufferBarriers.push_back(bufferBarrier);
bufferBarrier.buffer = compute.storageBuffers.output.buffer;
bufferBarriers.push_back(bufferBarrier);
vkCmdPipelineBarrier(
commandBuffer,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
static_cast<uint32_t>(bufferBarriers.size()), bufferBarriers.data(),
0, nullptr);
}
void addComputeToGraphicsBarriers(VkCommandBuffer commandBuffer)
{
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
bufferBarrier.size = VK_WHOLE_SIZE;
std::vector<VkBufferMemoryBarrier> bufferBarriers;
bufferBarrier.buffer = compute.storageBuffers.input.buffer;
bufferBarriers.push_back(bufferBarrier);
bufferBarrier.buffer = compute.storageBuffers.output.buffer;
bufferBarriers.push_back(bufferBarrier);
vkCmdPipelineBarrier(
commandBuffer,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_FLAGS_NONE,
0, nullptr,
static_cast<uint32_t>(bufferBarriers.size()), bufferBarriers.data(),
0, nullptr);
}
void buildCommandBuffers()
{
// Destroy command buffers if already present
if (!checkCommandBuffers())
@ -182,6 +255,9 @@ public:
VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo));
// Acquire storage buffers from compute queue
addComputeToGraphicsBarriers(drawCmdBuffers[i]);
// Draw the particle system using the update vertex buffer
vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
@ -214,6 +290,9 @@ public:
vkCmdEndRenderPass(drawCmdBuffers[i]);
// release the storage buffers to the compute queue
addGraphicsToComputeBarriers(drawCmdBuffers[i]);
VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i]));
}
@ -223,33 +302,16 @@ public:
void buildComputeCommandBuffer()
{
VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
cmdBufInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
for (uint32_t i = 0; i < 2; i++) {
VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffers[i], &cmdBufInfo));
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
bufferBarrier.size = VK_WHOLE_SIZE;
// Acquire the storage buffers from the graphics queue
addGraphicsToComputeBarriers(compute.commandBuffers[i]);
std::vector<VkBufferMemoryBarrier> bufferBarriers;
bufferBarrier.buffer = compute.storageBuffers.input.buffer;
bufferBarriers.push_back(bufferBarrier);
bufferBarrier.buffer = compute.storageBuffers.output.buffer;
bufferBarriers.push_back(bufferBarrier);
vkCmdPipelineBarrier(compute.commandBuffers[i],
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
static_cast<uint32_t>(bufferBarriers.size()), bufferBarriers.data(),
0, nullptr);
vkCmdBindPipeline(compute.commandBuffers[i], VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline);
vkCmdBindPipeline(compute.commandBuffers[i], VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline);
uint32_t calculateNormals = 0;
vkCmdPushConstants(compute.commandBuffers[i], compute.pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &calculateNormals);
@ -267,40 +329,15 @@ public:
vkCmdDispatch(compute.commandBuffers[i], cloth.gridsize.x / 10, cloth.gridsize.y / 10, 1);
for (auto &barrier : bufferBarriers) {
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
barrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
}
vkCmdPipelineBarrier(
compute.commandBuffers[i],
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
static_cast<uint32_t>(bufferBarriers.size()), bufferBarriers.data(),
0, nullptr);
// Don't add a barrier on the last iteration of the loop, since we'll have an explicit release to the graphics queue
if (j != iterations - 1) {
addComputeToComputeBarriers(compute.commandBuffers[i]);
}
}
for (auto &barrier : bufferBarriers) {
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
barrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
barrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
}
vkCmdPipelineBarrier(
compute.commandBuffers[i],
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
static_cast<uint32_t>(bufferBarriers.size()), bufferBarriers.data(),
0, nullptr);
// release the storage buffers back to the graphics queue
addComputeToGraphicsBarriers(compute.commandBuffers[i]);
vkEndCommandBuffer(compute.commandBuffers[i]);
}
}
@ -380,6 +417,10 @@ public:
copyRegion.size = storageBufferSize;
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffers.input.buffer, 1, &copyRegion);
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffers.output.buffer, 1, &copyRegion);
// Add an initial release barrier to the graphics queue,
// so that when the compute command buffer executes for the first time
// it doesn't complain about a lack of a corresponding "release" to it's "acquire"
addGraphicsToComputeBarriers(copyCmd);
VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true);
stagingBuffer.destroy();
@ -549,6 +590,7 @@ public:
};
inputState.vertexAttributeDescriptionCount = static_cast<uint32_t>(inputAttributes.size());
inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
inputAssemblyState.primitiveRestartEnable = VK_FALSE;
rasterizationState.polygonMode = VK_POLYGON_MODE_FILL;
shaderStages[0] = loadShader(getAssetPath() + "shaders/computecloth/sphere.vert.spv", VK_SHADER_STAGE_VERTEX_BIT);
shaderStages[1] = loadShader(getAssetPath() + "shaders/computecloth/sphere.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT);
@ -620,9 +662,10 @@ public:
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffers[0]));
// Fence for compute CB sync
VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT);
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence));
// Semaphores for graphics / compute synchronization
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphores.ready));
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphores.complete));
// Build a single command buffer containing the compute dispatch commands
buildComputeCommandBuffer();
@ -694,23 +737,47 @@ public:
void draw()
{
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &compute.commandBuffers[readSet];
static bool firstDraw = true;
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
// FIXME find a better way to do this (without using fences, which is much slower)
VkPipelineStageFlags computeWaitDstStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
if (!firstDraw) {
computeSubmitInfo.waitSemaphoreCount = 1;
computeSubmitInfo.pWaitSemaphores = &compute.semaphores.ready;
computeSubmitInfo.pWaitDstStageMask = &computeWaitDstStageMask;
} else {
firstDraw = false;
}
computeSubmitInfo.signalSemaphoreCount = 1;
computeSubmitInfo.pSignalSemaphores = &compute.semaphores.complete;
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &compute.commandBuffers[readSet];
VK_CHECK_RESULT( vkQueueSubmit( compute.queue, 1, &computeSubmitInfo, compute.fence ) );
VK_CHECK_RESULT( vkQueueSubmit( compute.queue, 1, &computeSubmitInfo, VK_NULL_HANDLE) );
// Submit graphics commands
VulkanExampleBase::prepareFrame();
VkPipelineStageFlags waitDstStageMask[2] = {
submitPipelineStages, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
};
VkSemaphore waitSemaphores[2] = {
semaphores.presentComplete, compute.semaphores.complete
};
VkSemaphore signalSemaphores[2] = {
semaphores.renderComplete, compute.semaphores.ready
};
submitInfo.waitSemaphoreCount = 2;
submitInfo.pWaitDstStageMask = waitDstStageMask;
submitInfo.pWaitSemaphores = waitSemaphores;
submitInfo.signalSemaphoreCount = 2;
submitInfo.pSignalSemaphores = signalSemaphores;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VulkanExampleBase::submitFrame();
vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &compute.fence);
}
void prepare()