Use compute queue (and separate CB) for compute dispatch (Refs #211), heavy refactor, moved graphics and compute into two structs to make things clearer

This commit is contained in:
saschawillems 2016-08-16 23:06:27 +02:00
parent 364c36e270
commit 04dffbadf3

View file

@ -27,7 +27,7 @@
#define ENABLE_VALIDATION false #define ENABLE_VALIDATION false
#if defined(__ANDROID__) #if defined(__ANDROID__)
// Lower particle count on Android for performance reasons // Lower particle count on Android for performance reasons
#define PARTICLE_COUNT 64 * 1024 #define PARTICLE_COUNT 128 * 1024
#else #else
#define PARTICLE_COUNT 256 * 1024 #define PARTICLE_COUNT 256 * 1024
#endif #endif
@ -50,44 +50,41 @@ public:
std::vector<VkVertexInputAttributeDescription> attributeDescriptions; std::vector<VkVertexInputAttributeDescription> attributeDescriptions;
} vertices; } vertices;
// Resources for the graphics part of the example
struct { struct {
VkPipeline postCompute; VkDescriptorSetLayout descriptorSetLayout; // Particle system rendering shader binding layout
// Compute pipelines are separated from VkDescriptorSet descriptorSet; // Particle system rendering shader bindings
// graphics pipelines in Vulkan VkPipelineLayout pipelineLayout; // Layout of the graphics pipeline
VkPipeline compute; VkPipeline pipeline; // Particle rendering pipeline
} pipelines; } graphics;
VkQueue computeQueue;
//VkCommandBuffer computeCmdBuffer;
VkPipelineLayout computePipelineLayout;
VkDescriptorSet computeDescriptorSet;
VkDescriptorSetLayout computeDescriptorSetLayout;
vkTools::UniformData computeStorageBuffer;
// Resources for the compute part of the example
struct { struct {
vk::Buffer storageBuffer; // (Shader) storage buffer object containing the particles
vk::Buffer uniformBuffer; // Uniform buffer object containing particle system parameters
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics)
VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers
VkFence fence; // Synchronization fence to avoid rewriting compute CB if still in use
VkDescriptorSetLayout descriptorSetLayout; // Compute shader binding layout
VkDescriptorSet descriptorSet; // Compute shader bindings
VkPipelineLayout pipelineLayout; // Layout of the compute pipeline
VkPipeline pipeline; // Compute pipeline for updating particle positions
} compute;
struct ComputeUBO {
float deltaT; float deltaT;
float destX; float destX;
float destY; float destY;
int32_t particleCount = PARTICLE_COUNT; int32_t particleCount = PARTICLE_COUNT;
} computeUbo; } computeUbo;
struct {
struct {
vkTools::UniformData ubo;
} computeShader;
} uniformData;
struct Particle { struct Particle {
glm::vec2 pos; glm::vec2 pos;
glm::vec2 vel; glm::vec2 vel;
glm::vec4 gradientPos; glm::vec4 gradientPos;
}; };
VkPipelineLayout pipelineLayout;
VkDescriptorSet descriptorSetPostCompute;
VkDescriptorSetLayout descriptorSetLayout;
VulkanExample() : VulkanExampleBase(ENABLE_VALIDATION) VulkanExample() : VulkanExampleBase(ENABLE_VALIDATION)
{ {
enableTextOverlay = true; enableTextOverlay = true;
@ -99,18 +96,19 @@ public:
// Clean up used Vulkan resources // Clean up used Vulkan resources
// Note : Inherited destructor cleans up resources stored in base class // Note : Inherited destructor cleans up resources stored in base class
vkDestroyPipeline(device, pipelines.postCompute, nullptr); // Graphics
vkDestroyPipeline(device, graphics.pipeline, nullptr);
vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr);
vkDestroyPipelineLayout(device, pipelineLayout, nullptr); // Compute
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr); compute.storageBuffer.destroy();
vkDestroyBuffer(device, computeStorageBuffer.buffer, nullptr); compute.uniformBuffer.destroy();
vkFreeMemory(device, computeStorageBuffer.memory, nullptr); vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr);
vkTools::destroyUniformData(device, &uniformData.computeShader.ubo); vkDestroyPipeline(device, compute.pipeline, nullptr);
vkDestroyFence(device, compute.fence, nullptr);
vkDestroyPipelineLayout(device, computePipelineLayout, nullptr); vkDestroyCommandPool(device, compute.commandPool, nullptr);
vkDestroyDescriptorSetLayout(device, computeDescriptorSetLayout, nullptr);
vkDestroyPipeline(device, pipelines.compute, nullptr);
textureLoader->destroyTexture(textures.particle); textureLoader->destroyTexture(textures.particle);
textureLoader->destroyTexture(textures.gradient); textureLoader->destroyTexture(textures.gradient);
@ -153,55 +151,6 @@ public:
VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo)); VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo));
// Compute particle movement
// Add memory barrier to ensure that the (rendering) vertex shader operations have finished
// Required as the compute shader will overwrite the vertex buffer data
VkBufferMemoryBarrier bufferBarrier = vkTools::initializers::bufferMemoryBarrier();
// Vertex shader invocations have finished reading from the buffer
bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
// Compute shader buffer read and write
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT;
bufferBarrier.buffer = computeStorageBuffer.buffer;
bufferBarrier.size = computeStorageBuffer.descriptor.range;
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(
drawCmdBuffers[i],
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
1, &bufferBarrier,
0, nullptr);
vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_COMPUTE, pipelines.compute);
vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_COMPUTE, computePipelineLayout, 0, 1, &computeDescriptorSet, 0, 0);
// Dispatch the compute job
vkCmdDispatch(drawCmdBuffers[i], PARTICLE_COUNT / 16, 1, 1);
// Add memory barrier to ensure that compute shader has finished writing to the buffer
// Without this the (rendering) vertex shader may display incomplete results (partial data from last frame)
// Compute shader has finished writes to the buffer
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
// Vertex shader access (attribute binding)
bufferBarrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
bufferBarrier.buffer = computeStorageBuffer.buffer;
bufferBarrier.size = computeStorageBuffer.descriptor.range;
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(
drawCmdBuffers[i],
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
1, &bufferBarrier,
0, nullptr);
// Draw the particle system using the update vertex buffer // Draw the particle system using the update vertex buffer
vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
@ -212,11 +161,11 @@ public:
VkRect2D scissor = vkTools::initializers::rect2D(width, height, 0, 0); VkRect2D scissor = vkTools::initializers::rect2D(width, height, 0, 0);
vkCmdSetScissor(drawCmdBuffers[i], 0, 1, &scissor); vkCmdSetScissor(drawCmdBuffers[i], 0, 1, &scissor);
vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelines.postCompute); vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipeline);
vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSetPostCompute, 0, NULL); vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipelineLayout, 0, 1, &graphics.descriptorSet, 0, NULL);
VkDeviceSize offsets[1] = { 0 }; VkDeviceSize offsets[1] = { 0 };
vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &computeStorageBuffer.buffer, offsets); vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBuffer.buffer, offsets);
vkCmdDraw(drawCmdBuffers[i], PARTICLE_COUNT, 1, 0, 0); vkCmdDraw(drawCmdBuffers[i], PARTICLE_COUNT, 1, 0, 0);
vkCmdEndRenderPass(drawCmdBuffers[i]); vkCmdEndRenderPass(drawCmdBuffers[i]);
@ -226,6 +175,62 @@ public:
} }
void buildComputeCommandBuffer()
{
VkCommandBufferBeginInfo cmdBufInfo = vkTools::initializers::commandBufferBeginInfo();
VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo));
// Compute particle movement
// Add memory barrier to ensure that the (rendering) vertex shader operations have finished
// Required as the compute shader will overwrite the vertex buffer data
VkBufferMemoryBarrier bufferBarrier = vkTools::initializers::bufferMemoryBarrier();
bufferBarrier.buffer = compute.storageBuffer.buffer;
bufferBarrier.size = compute.storageBuffer.descriptor.range;
bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; // Vertex shader invocations have finished reading from the buffer
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; // Compute shader has finished buffer writes
bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; // Required as compute and graphics queue may have different families
bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; // Required as compute and graphics queue may have different families
vkCmdPipelineBarrier(
compute.commandBuffer,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
1, &bufferBarrier,
0, nullptr);
vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline);
vkCmdBindDescriptorSets(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineLayout, 0, 1, &compute.descriptorSet, 0, 0);
// Dispatch the compute job
vkCmdDispatch(compute.commandBuffer, PARTICLE_COUNT / 16, 1, 1);
// Add memory barrier to ensure that compute shader has finished writing to the buffer
// Without this the (rendering) vertex shader may display incomplete results (partial data from last frame)
// Compute shader has finished writes to the buffer
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
// Vertex shader access (attribute binding)
bufferBarrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
bufferBarrier.buffer = compute.storageBuffer.buffer;
bufferBarrier.size = compute.storageBuffer.descriptor.range;
bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.compute; // Required as compute and graphics queue may have different families
bufferBarrier.dstQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics; // Required as compute and graphics queue may have different families
vkCmdPipelineBarrier(
compute.commandBuffer,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
1, &bufferBarrier,
0, nullptr);
vkEndCommandBuffer(compute.commandBuffer);
}
// Setup and fill the compute shader storage buffers for // Setup and fill the compute shader storage buffers for
// vertex positions and velocities // vertex positions and velocities
void prepareStorageBuffers() void prepareStorageBuffers()
@ -246,29 +251,23 @@ public:
uint32_t storageBufferSize = particleBuffer.size() * sizeof(Particle); uint32_t storageBufferSize = particleBuffer.size() * sizeof(Particle);
// Staging // Staging
// SSBO is static, copy to device local memory // SSBO won't be changed on the host after upload so copy to device local memory
// This results in better performance
struct { vk::Buffer stagingBuffer;
VkDeviceMemory memory;
VkBuffer buffer;
} stagingBuffer;
VulkanExampleBase::createBuffer( vulkanDevice->createBuffer(
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&stagingBuffer,
storageBufferSize, storageBufferSize,
particleBuffer.data(), particleBuffer.data());
&stagingBuffer.buffer,
&stagingBuffer.memory);
VulkanExampleBase::createBuffer( vulkanDevice->createBuffer(
// The SSBO will be used as a storage buffer for the compute pipeline and as a vertex buffer in the graphics pipeline
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
storageBufferSize, &compute.storageBuffer,
nullptr, storageBufferSize);
&computeStorageBuffer.buffer,
&computeStorageBuffer.memory);
// Copy to staging buffer // Copy to staging buffer
VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
@ -278,18 +277,13 @@ public:
vkCmdCopyBuffer( vkCmdCopyBuffer(
copyCmd, copyCmd,
stagingBuffer.buffer, stagingBuffer.buffer,
computeStorageBuffer.buffer, compute.storageBuffer.buffer,
1, 1,
&copyRegion); &copyRegion);
VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true); VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true);
vkFreeMemory(device, stagingBuffer.memory, nullptr); stagingBuffer.destroy();
vkDestroyBuffer(device, stagingBuffer.buffer, nullptr);
computeStorageBuffer.descriptor.range = storageBufferSize;
computeStorageBuffer.descriptor.buffer = computeStorageBuffer.buffer;
computeStorageBuffer.descriptor.offset = 0;
// Binding description // Binding description
vertices.bindingDescriptions.resize(1); vertices.bindingDescriptions.resize(1);
@ -308,14 +302,14 @@ public:
VERTEX_BUFFER_BIND_ID, VERTEX_BUFFER_BIND_ID,
0, 0,
VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
0); offsetof(Particle, pos));
// Location 1 : Gradient position // Location 1 : Gradient position
vertices.attributeDescriptions[1] = vertices.attributeDescriptions[1] =
vkTools::initializers::vertexInputAttributeDescription( vkTools::initializers::vertexInputAttributeDescription(
VERTEX_BUFFER_BIND_ID, VERTEX_BUFFER_BIND_ID,
1, 1,
VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
4 * sizeof(float)); offsetof(Particle, gradientPos));
// Assign to vertex buffer // Assign to vertex buffer
vertices.inputState = vkTools::initializers::pipelineVertexInputStateCreateInfo(); vertices.inputState = vkTools::initializers::pipelineVertexInputStateCreateInfo();
@ -362,14 +356,14 @@ public:
setLayoutBindings.data(), setLayoutBindings.data(),
setLayoutBindings.size()); setLayoutBindings.size());
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &descriptorSetLayout)); VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &graphics.descriptorSetLayout));
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
vkTools::initializers::pipelineLayoutCreateInfo( vkTools::initializers::pipelineLayoutCreateInfo(
&descriptorSetLayout, &graphics.descriptorSetLayout,
1); 1);
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &pipelineLayout)); VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &graphics.pipelineLayout));
} }
void setupDescriptorSet() void setupDescriptorSet()
@ -377,10 +371,10 @@ public:
VkDescriptorSetAllocateInfo allocInfo = VkDescriptorSetAllocateInfo allocInfo =
vkTools::initializers::descriptorSetAllocateInfo( vkTools::initializers::descriptorSetAllocateInfo(
descriptorPool, descriptorPool,
&descriptorSetLayout, &graphics.descriptorSetLayout,
1); 1);
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSetPostCompute)); VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &graphics.descriptorSet));
// Image descriptor for the color map texture // Image descriptor for the color map texture
std::vector<VkDescriptorImageInfo> texDescriptors; std::vector<VkDescriptorImageInfo> texDescriptors;
@ -396,13 +390,13 @@ public:
std::vector<VkWriteDescriptorSet> writeDescriptorSets; std::vector<VkWriteDescriptorSet> writeDescriptorSets;
// Binding 0 : Particle color map // Binding 0 : Particle color map
writeDescriptorSets.push_back(vkTools::initializers::writeDescriptorSet( writeDescriptorSets.push_back(vkTools::initializers::writeDescriptorSet(
descriptorSetPostCompute, graphics.descriptorSet,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
0, 0,
&texDescriptors[0])); &texDescriptors[0]));
// Binding 1 : Particle gradient ramp // Binding 1 : Particle gradient ramp
writeDescriptorSets.push_back(vkTools::initializers::writeDescriptorSet( writeDescriptorSets.push_back(vkTools::initializers::writeDescriptorSet(
descriptorSetPostCompute, graphics.descriptorSet,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
1, 1,
&texDescriptors[1])); &texDescriptors[1]));
@ -468,7 +462,7 @@ public:
VkGraphicsPipelineCreateInfo pipelineCreateInfo = VkGraphicsPipelineCreateInfo pipelineCreateInfo =
vkTools::initializers::pipelineCreateInfo( vkTools::initializers::pipelineCreateInfo(
pipelineLayout, graphics.pipelineLayout,
renderPass, renderPass,
0); 0);
@ -494,14 +488,22 @@ public:
blendAttachmentState.srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; blendAttachmentState.srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA;
blendAttachmentState.dstAlphaBlendFactor = VK_BLEND_FACTOR_DST_ALPHA; blendAttachmentState.dstAlphaBlendFactor = VK_BLEND_FACTOR_DST_ALPHA;
VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &pipelines.postCompute)); VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline));
} }
void prepareCompute() void prepareCompute()
{ {
// Create a compute capable device queue
// todo: comment (queue families, etc.)
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.pNext = NULL;
queueCreateInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
queueCreateInfo.queueCount = 1;
vkGetDeviceQueue(device, vulkanDevice->queueFamilyIndices.compute, 0, &compute.queue);
// Create compute pipeline // Create compute pipeline
// Compute pipelines are created separate from graphics pipelines // Compute pipelines are created separate from graphics pipelines even if they use the same queue (family index)
// even if they use the same queue
std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = { std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = {
// Binding 0 : Particle position storage buffer // Binding 0 : Particle position storage buffer
@ -521,38 +523,37 @@ public:
setLayoutBindings.data(), setLayoutBindings.data(),
setLayoutBindings.size()); setLayoutBindings.size());
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &computeDescriptorSetLayout)); VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &compute.descriptorSetLayout));
VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo =
vkTools::initializers::pipelineLayoutCreateInfo( vkTools::initializers::pipelineLayoutCreateInfo(
&computeDescriptorSetLayout, &compute.descriptorSetLayout,
1); 1);
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pPipelineLayoutCreateInfo, nullptr, &computePipelineLayout)); VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pPipelineLayoutCreateInfo, nullptr, &compute.pipelineLayout));
VkDescriptorSetAllocateInfo allocInfo = VkDescriptorSetAllocateInfo allocInfo =
vkTools::initializers::descriptorSetAllocateInfo( vkTools::initializers::descriptorSetAllocateInfo(
descriptorPool, descriptorPool,
&computeDescriptorSetLayout, &compute.descriptorSetLayout,
1); 1);
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &computeDescriptorSet)); VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &compute.descriptorSet));
std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets = std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets =
{ {
// Binding 0 : Particle position storage buffer // Binding 0 : Particle position storage buffer
vkTools::initializers::writeDescriptorSet( vkTools::initializers::writeDescriptorSet(
computeDescriptorSet, compute.descriptorSet,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
0, 0,
&computeStorageBuffer.descriptor), &compute.storageBuffer.descriptor),
// Binding 1 : Uniform buffer // Binding 1 : Uniform buffer
vkTools::initializers::writeDescriptorSet( vkTools::initializers::writeDescriptorSet(
computeDescriptorSet, compute.descriptorSet,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1, 1,
&uniformData.computeShader.ubo.descriptor) &compute.uniformBuffer.descriptor)
}; };
vkUpdateDescriptorSets(device, computeWriteDescriptorSets.size(), computeWriteDescriptorSets.data(), 0, NULL); vkUpdateDescriptorSets(device, computeWriteDescriptorSets.size(), computeWriteDescriptorSets.data(), 0, NULL);
@ -560,27 +561,48 @@ public:
// Create pipeline // Create pipeline
VkComputePipelineCreateInfo computePipelineCreateInfo = VkComputePipelineCreateInfo computePipelineCreateInfo =
vkTools::initializers::computePipelineCreateInfo( vkTools::initializers::computePipelineCreateInfo(
computePipelineLayout, compute.pipelineLayout,
0); 0);
computePipelineCreateInfo.stage = loadShader(getAssetPath() + "shaders/computeparticles/particle.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT); computePipelineCreateInfo.stage = loadShader(getAssetPath() + "shaders/computeparticles/particle.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT);
VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &pipelines.compute)); VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &compute.pipeline));
// Separate command pool as queue family for compute may be different than graphics
VkCommandPoolCreateInfo cmdPoolInfo = {};
cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmdPoolInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool));
// Create command buffer for compute operations
// tood: differring indices? separate cmd pool?
VkCommandBufferAllocateInfo cmdBufAllocateInfo =
vkTools::initializers::commandBufferAllocateInfo(
compute.commandPool,
VK_COMMAND_BUFFER_LEVEL_PRIMARY,
1);
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer));
// Fence for compute CB sync
VkFenceCreateInfo fenceCreateInfo = vkTools::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT);
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence));
//todo: comment
buildComputeCommandBuffer();
} }
// Prepare and initialize uniform buffer containing shader uniforms // Prepare and initialize uniform buffer containing shader uniforms
void prepareUniformBuffers() void prepareUniformBuffers()
{ {
// Compute shader uniform buffer block // Compute shader uniform buffer block
createBuffer( vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
sizeof(computeUbo), &compute.uniformBuffer,
nullptr, sizeof(computeUbo));
&uniformData.computeShader.ubo.buffer,
&uniformData.computeShader.ubo.memory,
&uniformData.computeShader.ubo.descriptor);
// Map for host access // Map for host access
VK_CHECK_RESULT(vkMapMemory(device, uniformData.computeShader.ubo.memory, 0, sizeof(computeUbo), 0, (void **)&uniformData.computeShader.ubo.mapped)); VK_CHECK_RESULT(compute.uniformBuffer.map());
updateUniformBuffers(); updateUniformBuffers();
} }
@ -601,34 +623,7 @@ public:
computeUbo.destY = normalizedMy; computeUbo.destY = normalizedMy;
} }
memcpy(uniformData.computeShader.ubo.mapped, &computeUbo, sizeof(computeUbo)); memcpy(compute.uniformBuffer.mapped, &computeUbo, sizeof(computeUbo));
}
// Find and create a compute capable device queue
void getComputeQueue()
{
uint32_t queueIndex = 0;
uint32_t queueCount;
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueCount, NULL);
assert(queueCount >= 1);
std::vector<VkQueueFamilyProperties> queueProps;
queueProps.resize(queueCount);
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueCount, queueProps.data());
for (queueIndex = 0; queueIndex < queueCount; queueIndex++)
{
if (queueProps[queueIndex].queueFlags & VK_QUEUE_COMPUTE_BIT)
break;
}
assert(queueIndex < queueCount);
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.pNext = NULL;
queueCreateInfo.queueFamilyIndex = queueIndex;
queueCreateInfo.queueCount = 1;
vkGetDeviceQueue(device, queueIndex, 0, &computeQueue);
} }
void draw() void draw()
@ -640,13 +635,23 @@ public:
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VulkanExampleBase::submitFrame(); VulkanExampleBase::submitFrame();
// Submit compute
// todo: async compute
vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &compute.fence);
VkSubmitInfo computeSubmitInfo = vkTools::initializers::submitInfo();
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &compute.commandBuffer;
VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence));
} }
void prepare() void prepare()
{ {
VulkanExampleBase::prepare(); VulkanExampleBase::prepare();
loadTextures(); loadTextures();
getComputeQueue();
prepareStorageBuffers(); prepareStorageBuffers();
prepareUniformBuffers(); prepareUniformBuffers();
setupDescriptorSetLayout(); setupDescriptorSetLayout();
@ -685,74 +690,17 @@ public:
{ {
animate = !animate; animate = !animate;
} }
};
VulkanExample *vulkanExample; virtual void keyPressed(uint32_t keyCode)
#if defined(_WIN32)
LRESULT CALLBACK WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
if (vulkanExample != NULL)
{ {
vulkanExample->handleMessages(hWnd, uMsg, wParam, lParam); switch (keyCode)
if (uMsg == WM_KEYDOWN)
{ {
switch (wParam) case KEY_A:
{ case GAMEPAD_BUTTON_A:
case 0x41: toggleAnimation();
vulkanExample->toggleAnimation();
break; break;
} }
} }
} };
return (DefWindowProc(hWnd, uMsg, wParam, lParam));
}
#elif defined(__linux__) && !defined(__ANDROID__)
static void handleEvent(const xcb_generic_event_t *event)
{
if (vulkanExample != NULL)
{
vulkanExample->handleEvent(event);
}
}
#endif
// Main entry point VULKAN_EXAMPLE_MAIN()
#if defined(_WIN32)
// Windows entry point
int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR pCmdLine, int nCmdShow)
#elif defined(__ANDROID__)
// Android entry point
void android_main(android_app* state)
#elif defined(__linux__)
// Linux entry point
int main(const int argc, const char *argv[])
#endif
{
#if defined(__ANDROID__)
// Removing this may cause the compiler to omit the main entry point
// which would make the application crash at start
app_dummy();
#endif
vulkanExample = new VulkanExample();
#if defined(_WIN32)
vulkanExample->setupWindow(hInstance, WndProc);
#elif defined(__ANDROID__)
// Attach vulkan example to global android application state
state->userData = vulkanExample;
state->onAppCmd = VulkanExample::handleAppCommand;
state->onInputEvent = VulkanExample::handleAppInput;
vulkanExample->androidApp = state;
#elif defined(__linux__)
vulkanExample->setupWindow();
#endif
#if !defined(__ANDROID__)
vulkanExample->initSwapchain();
vulkanExample->prepare();
#endif
vulkanExample->renderLoop();
delete(vulkanExample);
#if !defined(__ANDROID__)
return 0;
#endif
}