Proper barriers for buffer writes and reads between host and device (non-coherent)

This commit is contained in:
saschawillems 2017-09-15 20:43:40 +02:00
parent 66a87bbf21
commit 3862b4bcc2

View file

@ -247,7 +247,8 @@ public:
// Submit to the queue // Submit to the queue
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence)); VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, DEFAULT_FENCE_TIMEOUT)); VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX));
vkDestroyFence(device, fence, nullptr); vkDestroyFence(device, fence, nullptr);
vkFreeCommandBuffers(device, commandPool, 1, &copyCmd); vkFreeCommandBuffers(device, commandPool, 1, &copyCmd);
} }
@ -279,9 +280,7 @@ public:
vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1); vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1);
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet)); VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet));
VkDescriptorBufferInfo bufferDescriptor = {}; VkDescriptorBufferInfo bufferDescriptor = { deviceBuffer, 0, VK_WHOLE_SIZE };
bufferDescriptor.buffer = deviceBuffer;
bufferDescriptor.range = VK_WHOLE_SIZE;
std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets = { std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets = {
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, &bufferDescriptor), vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, &bufferDescriptor),
}; };
@ -325,17 +324,18 @@ public:
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &cmdBufInfo)); VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &cmdBufInfo));
// Barrier to ensure that input buffer transfer is finished before compute shader reads from it
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
bufferBarrier.buffer = deviceBuffer; bufferBarrier.buffer = deviceBuffer;
bufferBarrier.size = VK_WHOLE_SIZE; bufferBarrier.size = VK_WHOLE_SIZE;
bufferBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; bufferBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier( vkCmdPipelineBarrier(
commandBuffer, commandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_FLAGS_NONE, VK_FLAGS_NONE,
0, nullptr, 0, nullptr,
@ -348,9 +348,8 @@ public:
vkCmdDispatch(commandBuffer, BUFFER_ELEMENTS, 1, 1); vkCmdDispatch(commandBuffer, BUFFER_ELEMENTS, 1, 1);
// Barrier to ensure that shader writes are finished before buffer is read back from GPU // Barrier to ensure that shader writes are finished before buffer is read back from GPU
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; bufferBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
bufferBarrier.buffer = deviceBuffer; bufferBarrier.buffer = deviceBuffer;
bufferBarrier.size = VK_WHOLE_SIZE; bufferBarrier.size = VK_WHOLE_SIZE;
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@ -370,19 +369,34 @@ public:
copyRegion.size = bufferSize; copyRegion.size = bufferSize;
vkCmdCopyBuffer(commandBuffer, deviceBuffer, hostBuffer, 1, &copyRegion); vkCmdCopyBuffer(commandBuffer, deviceBuffer, hostBuffer, 1, &copyRegion);
// Barrier to ensure that buffer copy is finished before host reading from it
bufferBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
bufferBarrier.buffer = hostBuffer;
bufferBarrier.size = VK_WHOLE_SIZE;
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(
commandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_HOST_BIT,
VK_FLAGS_NONE,
0, nullptr,
1, &bufferBarrier,
0, nullptr);
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer));
// Submit compute work // Submit compute work
vkResetFences(device, 1, &fence); vkResetFences(device, 1, &fence);
const VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
computeSubmitInfo.pWaitDstStageMask = &waitStageMask;
computeSubmitInfo.commandBufferCount = 1; computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &commandBuffer; computeSubmitInfo.pCommandBuffers = &commandBuffer;
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &computeSubmitInfo, fence)); VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &computeSubmitInfo, fence));
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX));
// Wait for fence
vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX);
// Make device writes visible to the host // Make device writes visible to the host
void *mapped; void *mapped;
@ -393,11 +407,13 @@ public:
mappedRange.size = VK_WHOLE_SIZE; mappedRange.size = VK_WHOLE_SIZE;
vkInvalidateMappedMemoryRanges(device, 1, &mappedRange); vkInvalidateMappedMemoryRanges(device, 1, &mappedRange);
void* mapped; // Copy to output
vkMapMemory(device, hostMemory, 0, VK_WHOLE_SIZE, 0, &mapped);
memcpy(computeOutput.data(), mapped, bufferSize); memcpy(computeOutput.data(), mapped, bufferSize);
vkUnmapMemory(device, hostMemory);
} }
vkQueueWaitIdle(queue);
// Output buffer contents // Output buffer contents
std::cout << "Compute input: " << std::endl; std::cout << "Compute input: " << std::endl;
for (auto v : computeInput) { for (auto v : computeInput) {