Proper barriers for buffer writes and reads between host and device (non-coherent)
This commit is contained in:
parent
66a87bbf21
commit
3862b4bcc2
1 changed files with 32 additions and 16 deletions
|
|
@ -247,7 +247,8 @@ public:
|
||||||
|
|
||||||
// Submit to the queue
|
// Submit to the queue
|
||||||
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
|
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
|
||||||
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, DEFAULT_FENCE_TIMEOUT));
|
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX));
|
||||||
|
|
||||||
vkDestroyFence(device, fence, nullptr);
|
vkDestroyFence(device, fence, nullptr);
|
||||||
vkFreeCommandBuffers(device, commandPool, 1, ©Cmd);
|
vkFreeCommandBuffers(device, commandPool, 1, ©Cmd);
|
||||||
}
|
}
|
||||||
|
|
@ -279,9 +280,7 @@ public:
|
||||||
vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1);
|
vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1);
|
||||||
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet));
|
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet));
|
||||||
|
|
||||||
VkDescriptorBufferInfo bufferDescriptor = {};
|
VkDescriptorBufferInfo bufferDescriptor = { deviceBuffer, 0, VK_WHOLE_SIZE };
|
||||||
bufferDescriptor.buffer = deviceBuffer;
|
|
||||||
bufferDescriptor.range = VK_WHOLE_SIZE;
|
|
||||||
std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets = {
|
std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets = {
|
||||||
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, &bufferDescriptor),
|
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, &bufferDescriptor),
|
||||||
};
|
};
|
||||||
|
|
@ -325,17 +324,18 @@ public:
|
||||||
|
|
||||||
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &cmdBufInfo));
|
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &cmdBufInfo));
|
||||||
|
|
||||||
|
// Barrier to ensure that input buffer transfer is finished before compute shader reads from it
|
||||||
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
|
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
|
||||||
bufferBarrier.buffer = deviceBuffer;
|
bufferBarrier.buffer = deviceBuffer;
|
||||||
bufferBarrier.size = VK_WHOLE_SIZE;
|
bufferBarrier.size = VK_WHOLE_SIZE;
|
||||||
bufferBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
|
bufferBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||||
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||||
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
|
||||||
vkCmdPipelineBarrier(
|
vkCmdPipelineBarrier(
|
||||||
commandBuffer,
|
commandBuffer,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_HOST_BIT,
|
||||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_FLAGS_NONE,
|
VK_FLAGS_NONE,
|
||||||
0, nullptr,
|
0, nullptr,
|
||||||
|
|
@ -348,9 +348,8 @@ public:
|
||||||
vkCmdDispatch(commandBuffer, BUFFER_ELEMENTS, 1, 1);
|
vkCmdDispatch(commandBuffer, BUFFER_ELEMENTS, 1, 1);
|
||||||
|
|
||||||
// Barrier to ensure that shader writes are finished before buffer is read back from GPU
|
// Barrier to ensure that shader writes are finished before buffer is read back from GPU
|
||||||
|
|
||||||
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
bufferBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
|
bufferBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||||
bufferBarrier.buffer = deviceBuffer;
|
bufferBarrier.buffer = deviceBuffer;
|
||||||
bufferBarrier.size = VK_WHOLE_SIZE;
|
bufferBarrier.size = VK_WHOLE_SIZE;
|
||||||
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
|
@ -370,19 +369,34 @@ public:
|
||||||
copyRegion.size = bufferSize;
|
copyRegion.size = bufferSize;
|
||||||
vkCmdCopyBuffer(commandBuffer, deviceBuffer, hostBuffer, 1, ©Region);
|
vkCmdCopyBuffer(commandBuffer, deviceBuffer, hostBuffer, 1, ©Region);
|
||||||
|
|
||||||
|
// Barrier to ensure that buffer copy is finished before host reading from it
|
||||||
|
bufferBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||||
|
bufferBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
|
||||||
|
bufferBarrier.buffer = hostBuffer;
|
||||||
|
bufferBarrier.size = VK_WHOLE_SIZE;
|
||||||
|
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
|
||||||
|
vkCmdPipelineBarrier(
|
||||||
|
commandBuffer,
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_HOST_BIT,
|
||||||
|
VK_FLAGS_NONE,
|
||||||
|
0, nullptr,
|
||||||
|
1, &bufferBarrier,
|
||||||
|
0, nullptr);
|
||||||
|
|
||||||
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer));
|
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer));
|
||||||
|
|
||||||
// Submit compute work
|
// Submit compute work
|
||||||
vkResetFences(device, 1, &fence);
|
vkResetFences(device, 1, &fence);
|
||||||
|
const VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||||
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
|
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
|
||||||
|
computeSubmitInfo.pWaitDstStageMask = &waitStageMask;
|
||||||
computeSubmitInfo.commandBufferCount = 1;
|
computeSubmitInfo.commandBufferCount = 1;
|
||||||
computeSubmitInfo.pCommandBuffers = &commandBuffer;
|
computeSubmitInfo.pCommandBuffers = &commandBuffer;
|
||||||
|
|
||||||
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &computeSubmitInfo, fence));
|
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &computeSubmitInfo, fence));
|
||||||
|
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX));
|
||||||
// Wait for fence
|
|
||||||
vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX);
|
|
||||||
|
|
||||||
// Make device writes visible to the host
|
// Make device writes visible to the host
|
||||||
void *mapped;
|
void *mapped;
|
||||||
|
|
@ -393,11 +407,13 @@ public:
|
||||||
mappedRange.size = VK_WHOLE_SIZE;
|
mappedRange.size = VK_WHOLE_SIZE;
|
||||||
vkInvalidateMappedMemoryRanges(device, 1, &mappedRange);
|
vkInvalidateMappedMemoryRanges(device, 1, &mappedRange);
|
||||||
|
|
||||||
void* mapped;
|
// Copy to output
|
||||||
vkMapMemory(device, hostMemory, 0, VK_WHOLE_SIZE, 0, &mapped);
|
|
||||||
memcpy(computeOutput.data(), mapped, bufferSize);
|
memcpy(computeOutput.data(), mapped, bufferSize);
|
||||||
|
vkUnmapMemory(device, hostMemory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vkQueueWaitIdle(queue);
|
||||||
|
|
||||||
// Output buffer contents
|
// Output buffer contents
|
||||||
std::cout << "Compute input: " << std::endl;
|
std::cout << "Compute input: " << std::endl;
|
||||||
for (auto v : computeInput) {
|
for (auto v : computeInput) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue