diff --git a/examples/computecloth/computecloth.cpp b/examples/computecloth/computecloth.cpp index 7f5e3c28..4017b52b 100644 --- a/examples/computecloth/computecloth.cpp +++ b/examples/computecloth/computecloth.cpp @@ -66,11 +66,14 @@ public: vks::Buffer input; vks::Buffer output; } storageBuffers; + struct Semaphores { + VkSemaphore ready{ nullptr }; + VkSemaphore complete{ nullptr }; + } semaphores; vks::Buffer uniformBuffer; VkQueue queue; VkCommandPool commandPool; std::array commandBuffers; - VkFence fence; VkDescriptorSetLayout descriptorSetLayout; std::array descriptorSets; VkPipelineLayout pipelineLayout; @@ -133,7 +136,8 @@ public: vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr); vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr); vkDestroyPipeline(device, compute.pipeline, nullptr); - vkDestroyFence(device, compute.fence, nullptr); + vkDestroySemaphore(device, compute.semaphores.ready, nullptr); + vkDestroySemaphore(device, compute.semaphores.complete, nullptr); vkDestroyCommandPool(device, compute.commandPool, nullptr); } @@ -151,7 +155,76 @@ public: modelSphere.loadFromFile(getAssetPath() + "models/geosphere.obj", vertexLayout, compute.ubo.sphereRadius * 0.05f, vulkanDevice, queue); } - void buildCommandBuffers() + void addGraphicsToComputeBarriers(VkCommandBuffer commandBuffer) + { + VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); + bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + bufferBarrier.size = VK_WHOLE_SIZE; + + std::vector bufferBarriers; + bufferBarrier.buffer = compute.storageBuffers.input.buffer; + bufferBarriers.push_back(bufferBarrier); + bufferBarrier.buffer = compute.storageBuffers.output.buffer; + bufferBarriers.push_back(bufferBarrier); + vkCmdPipelineBarrier(commandBuffer, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_FLAGS_NONE, + 0, nullptr, + static_cast(bufferBarriers.size()), bufferBarriers.data(), + 0, nullptr); + } + + void addComputeToComputeBarriers(VkCommandBuffer commandBuffer) + { + VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); + bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + bufferBarrier.size = VK_WHOLE_SIZE; + std::vector bufferBarriers; + bufferBarrier.buffer = compute.storageBuffers.input.buffer; + bufferBarriers.push_back(bufferBarrier); + bufferBarrier.buffer = compute.storageBuffers.output.buffer; + bufferBarriers.push_back(bufferBarrier); + vkCmdPipelineBarrier( + commandBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_FLAGS_NONE, + 0, nullptr, + static_cast(bufferBarriers.size()), bufferBarriers.data(), + 0, nullptr); + } + + void addComputeToGraphicsBarriers(VkCommandBuffer commandBuffer) + { + VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); + bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + bufferBarrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; + bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; + bufferBarrier.size = VK_WHOLE_SIZE; + std::vector bufferBarriers; + bufferBarrier.buffer = compute.storageBuffers.input.buffer; + bufferBarriers.push_back(bufferBarrier); + bufferBarrier.buffer = compute.storageBuffers.output.buffer; + bufferBarriers.push_back(bufferBarrier); + vkCmdPipelineBarrier( + commandBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_FLAGS_NONE, + 0, nullptr, + static_cast(bufferBarriers.size()), bufferBarriers.data(), + 0, nullptr); + } + + void buildCommandBuffers() { // Destroy command buffers if already present if (!checkCommandBuffers()) @@ -182,6 +255,9 @@ public: VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo)); + // Acquire storage buffers from compute queue + addComputeToGraphicsBarriers(drawCmdBuffers[i]); + // Draw the particle system using the update vertex buffer vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); @@ -214,6 +290,9 @@ public: vkCmdEndRenderPass(drawCmdBuffers[i]); + // release the storage buffers to the compute queue + addGraphicsToComputeBarriers(drawCmdBuffers[i]); + VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i])); } @@ -223,33 +302,16 @@ public: void buildComputeCommandBuffer() { VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); + cmdBufInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; for (uint32_t i = 0; i < 2; i++) { VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffers[i], &cmdBufInfo)); - VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); - bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; - bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; - bufferBarrier.size = VK_WHOLE_SIZE; + // Acquire the storage buffers from the graphics queue + addGraphicsToComputeBarriers(compute.commandBuffers[i]); - std::vector bufferBarriers; - bufferBarrier.buffer = compute.storageBuffers.input.buffer; - bufferBarriers.push_back(bufferBarrier); - bufferBarrier.buffer = compute.storageBuffers.output.buffer; - bufferBarriers.push_back(bufferBarrier); - - vkCmdPipelineBarrier(compute.commandBuffers[i], - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_FLAGS_NONE, - 0, nullptr, - static_cast(bufferBarriers.size()), bufferBarriers.data(), - 0, nullptr); - - vkCmdBindPipeline(compute.commandBuffers[i], VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); + vkCmdBindPipeline(compute.commandBuffers[i], VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); uint32_t calculateNormals = 0; vkCmdPushConstants(compute.commandBuffers[i], compute.pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &calculateNormals); @@ -267,40 +329,15 @@ public: vkCmdDispatch(compute.commandBuffers[i], cloth.gridsize.x / 10, cloth.gridsize.y / 10, 1); - for (auto &barrier : bufferBarriers) { - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; - barrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; - } - - vkCmdPipelineBarrier( - compute.commandBuffers[i], - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_FLAGS_NONE, - 0, nullptr, - static_cast(bufferBarriers.size()), bufferBarriers.data(), - 0, nullptr); + // Don't add a barrier on the last iteration of the loop, since we'll have an explicit release to the graphics queue + if (j != iterations - 1) { + addComputeToComputeBarriers(compute.commandBuffers[i]); + } } - for (auto &barrier : bufferBarriers) { - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - barrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; - barrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; - } - - vkCmdPipelineBarrier( - compute.commandBuffers[i], - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_FLAGS_NONE, - 0, nullptr, - static_cast(bufferBarriers.size()), bufferBarriers.data(), - 0, nullptr); - + // release the storage buffers back to the graphics queue + addComputeToGraphicsBarriers(compute.commandBuffers[i]); vkEndCommandBuffer(compute.commandBuffers[i]); } } @@ -380,6 +417,10 @@ public: copyRegion.size = storageBufferSize; vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffers.input.buffer, 1, ©Region); vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffers.output.buffer, 1, ©Region); + // Add an initial release barrier to the graphics queue, + // so that when the compute command buffer executes for the first time + // it doesn't complain about a lack of a corresponding "release" to it's "acquire" + addGraphicsToComputeBarriers(copyCmd); VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true); stagingBuffer.destroy(); @@ -549,6 +590,7 @@ public: }; inputState.vertexAttributeDescriptionCount = static_cast(inputAttributes.size()); inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + inputAssemblyState.primitiveRestartEnable = VK_FALSE; rasterizationState.polygonMode = VK_POLYGON_MODE_FILL; shaderStages[0] = loadShader(getAssetPath() + "shaders/computecloth/sphere.vert.spv", VK_SHADER_STAGE_VERTEX_BIT); shaderStages[1] = loadShader(getAssetPath() + "shaders/computecloth/sphere.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT); @@ -620,9 +662,10 @@ public: VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffers[0])); - // Fence for compute CB sync - VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT); - VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence)); + // Semaphores for graphics / compute synchronization + VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo(); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphores.ready)); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &compute.semaphores.complete)); // Build a single command buffer containing the compute dispatch commands buildComputeCommandBuffer(); @@ -694,23 +737,47 @@ public: void draw() { - VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); - computeSubmitInfo.commandBufferCount = 1; - computeSubmitInfo.pCommandBuffers = &compute.commandBuffers[readSet]; + static bool firstDraw = true; + VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); + // FIXME find a better way to do this (without using fences, which is much slower) + VkPipelineStageFlags computeWaitDstStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + if (!firstDraw) { + computeSubmitInfo.waitSemaphoreCount = 1; + computeSubmitInfo.pWaitSemaphores = &compute.semaphores.ready; + computeSubmitInfo.pWaitDstStageMask = &computeWaitDstStageMask; + } else { + firstDraw = false; + } + computeSubmitInfo.signalSemaphoreCount = 1; + computeSubmitInfo.pSignalSemaphores = &compute.semaphores.complete; + computeSubmitInfo.commandBufferCount = 1; + computeSubmitInfo.pCommandBuffers = &compute.commandBuffers[readSet]; - VK_CHECK_RESULT( vkQueueSubmit( compute.queue, 1, &computeSubmitInfo, compute.fence ) ); + VK_CHECK_RESULT( vkQueueSubmit( compute.queue, 1, &computeSubmitInfo, VK_NULL_HANDLE) ); // Submit graphics commands VulkanExampleBase::prepareFrame(); + VkPipelineStageFlags waitDstStageMask[2] = { + submitPipelineStages, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT + }; + VkSemaphore waitSemaphores[2] = { + semaphores.presentComplete, compute.semaphores.complete + }; + VkSemaphore signalSemaphores[2] = { + semaphores.renderComplete, compute.semaphores.ready + }; + + submitInfo.waitSemaphoreCount = 2; + submitInfo.pWaitDstStageMask = waitDstStageMask; + submitInfo.pWaitSemaphores = waitSemaphores; + submitInfo.signalSemaphoreCount = 2; + submitInfo.pSignalSemaphores = signalSemaphores; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer]; VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); VulkanExampleBase::submitFrame(); - - vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX); - vkResetFences(device, 1, &compute.fence); } void prepare()