Code cleanup, restructuring. code comments

This commit is contained in:
Sascha Willems 2024-01-08 19:04:03 +01:00
parent 6740aae4d9
commit 11bef52edc
5 changed files with 176 additions and 404 deletions

View file

@ -1,14 +1,18 @@
/*
* Vulkan Example - Compute shader N-body simulation using two passes and shared compute shader memory
*
* Copyright (C) by Sascha Willems - www.saschawillems.de
* This sample shows how to combine compute and graphics for doing N-body particle simulaton
* It calculates the particle system movement using two separate compute passes: calculating particle positions and integrating particles
* For that a shader storage buffer is used which is then used as a vertex buffer for drawing the particle system with a graphics pipeline
* To optimize performance, the compute shaders use shared memory
*
* Copyright (C) 2016-2023 by Sascha Willems - www.saschawillems.de
*
* This code is licensed under the MIT license (MIT) (http://opensource.org/licenses/MIT)
*/
#include "vulkanexamplebase.h"
#define VERTEX_BUFFER_BIND_ID 0
#if defined(__ANDROID__)
// Lower particle count on Android for performance reasons
#define PARTICLES_PER_ATTRACTOR 3 * 1024
@ -19,40 +23,41 @@
class VulkanExample : public VulkanExampleBase
{
public:
uint32_t numParticles;
struct {
struct Textures {
vks::Texture2D particle;
vks::Texture2D gradient;
} textures;
} textures{};
struct {
VkPipelineVertexInputStateCreateInfo inputState;
std::vector<VkVertexInputBindingDescription> bindingDescriptions;
std::vector<VkVertexInputAttributeDescription> attributeDescriptions;
} vertices;
// Particle Definition
struct Particle {
glm::vec4 pos; // xyz = position, w = mass
glm::vec4 vel; // xyz = velocity, w = gradient texture position
};
uint32_t numParticles{ 0 };
// We use a shader storage buffer object to store the particlces
// This is updated by the compute pipeline and displayed as a vertex buffer by the graphics pipeline
vks::Buffer storageBuffer;
// Resources for the graphics part of the example
struct {
struct Graphics {
uint32_t queueFamilyIndex; // Used to check if compute and graphics queue families differ and require additional barriers
vks::Buffer uniformBuffer; // Contains scene matrices
VkDescriptorSetLayout descriptorSetLayout; // Particle system rendering shader binding layout
VkDescriptorSet descriptorSet; // Particle system rendering shader bindings
VkPipelineLayout pipelineLayout; // Layout of the graphics pipeline
VkPipeline pipeline; // Particle rendering pipeline
VkSemaphore semaphore; // Execution dependency between compute & graphic submission
struct {
struct UniformData {
glm::mat4 projection;
glm::mat4 view;
glm::vec2 screenDim;
} ubo;
} uniformData;
vks::Buffer uniformBuffer; // Contains scene matrices
} graphics;
// Resources for the compute part of the example
struct {
struct Compute {
uint32_t queueFamilyIndex; // Used to check if compute and graphics queue families differ and require additional barriers
vks::Buffer storageBuffer; // (Shader) storage buffer object containing the particles
vks::Buffer uniformBuffer; // Uniform buffer object containing particle system parameters
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
VkCommandPool commandPool; // Use a separate command pool (queue family may differ from the one used for graphics)
VkCommandBuffer commandBuffer; // Command buffer storing the dispatch commands and barriers
@ -62,22 +67,17 @@ public:
VkPipelineLayout pipelineLayout; // Layout of the compute pipeline
VkPipeline pipelineCalculate; // Compute pipeline for N-Body velocity calculation (1st pass)
VkPipeline pipelineIntegrate; // Compute pipeline for euler integration (2nd pass)
VkPipeline blur;
VkPipelineLayout pipelineLayoutBlur;
VkDescriptorSetLayout descriptorSetLayoutBlur;
VkDescriptorSet descriptorSetBlur;
struct computeUBO { // Compute shader uniform block object
float deltaT; // Frame delta time
int32_t particleCount;
} ubo;
struct UniformData { // Compute shader uniform block object
float deltaT{ 0.0f }; // Frame delta time
int32_t particleCount{ 0 };
// Parameters used to control the behaviour of the particle system
float gravity{ 0.002f };
float power{ 0.75f };
float soften{ 0.05f };
} uniformData;
vks::Buffer uniformBuffer; // Uniform buffer object containing particle system parameters
} compute;
// SSBO particle declaration
struct Particle {
glm::vec4 pos; // xyz = position, w = mass
glm::vec4 vel; // xyz = velocity, w = gradient texture position
};
VulkanExample() : VulkanExampleBase()
{
title = "Compute shader N-body system";
@ -90,25 +90,28 @@ public:
~VulkanExample()
{
// Graphics
graphics.uniformBuffer.destroy();
vkDestroyPipeline(device, graphics.pipeline, nullptr);
vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr);
vkDestroySemaphore(device, graphics.semaphore, nullptr);
if (device) {
// Graphics
graphics.uniformBuffer.destroy();
vkDestroyPipeline(device, graphics.pipeline, nullptr);
vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr);
vkDestroySemaphore(device, graphics.semaphore, nullptr);
// Compute
compute.storageBuffer.destroy();
compute.uniformBuffer.destroy();
vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr);
vkDestroyPipeline(device, compute.pipelineCalculate, nullptr);
vkDestroyPipeline(device, compute.pipelineIntegrate, nullptr);
vkDestroySemaphore(device, compute.semaphore, nullptr);
vkDestroyCommandPool(device, compute.commandPool, nullptr);
// Compute
compute.uniformBuffer.destroy();
vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr);
vkDestroyPipeline(device, compute.pipelineCalculate, nullptr);
vkDestroyPipeline(device, compute.pipelineIntegrate, nullptr);
vkDestroySemaphore(device, compute.semaphore, nullptr);
vkDestroyCommandPool(device, compute.commandPool, nullptr);
textures.particle.destroy();
textures.gradient.destroy();
storageBuffer.destroy();
textures.particle.destroy();
textures.gradient.destroy();
}
}
void loadAssets()
@ -152,9 +155,9 @@ public:
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
storageBuffer.buffer,
0,
compute.storageBuffer.size
storageBuffer.size
};
vkCmdPipelineBarrier(
@ -180,7 +183,7 @@ public:
vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipelineLayout, 0, 1, &graphics.descriptorSet, 0, nullptr);
VkDeviceSize offsets[1] = { 0 };
vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBuffer.buffer, offsets);
vkCmdBindVertexBuffers(drawCmdBuffers[i], 0, 1, &storageBuffer.buffer, offsets);
vkCmdDraw(drawCmdBuffers[i], numParticles, 1, 0, 0);
drawUI(drawCmdBuffers[i]);
@ -198,9 +201,9 @@ public:
0,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
storageBuffer.buffer,
0,
compute.storageBuffer.size
storageBuffer.size
};
vkCmdPipelineBarrier(
@ -235,9 +238,9 @@ public:
VK_ACCESS_SHADER_WRITE_BIT,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
storageBuffer.buffer,
0,
compute.storageBuffer.size
storageBuffer.size
};
vkCmdPipelineBarrier(
@ -258,8 +261,8 @@ public:
// Add memory barrier to ensure that the computer shader has finished writing to the buffer
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
bufferBarrier.buffer = compute.storageBuffer.buffer;
bufferBarrier.size = compute.storageBuffer.descriptor.range;
bufferBarrier.buffer = storageBuffer.buffer;
bufferBarrier.size = storageBuffer.descriptor.range;
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
// Transfer ownership if compute and graphics queue family indices differ
@ -291,9 +294,9 @@ public:
0,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
storageBuffer.buffer,
0,
compute.storageBuffer.size
storageBuffer.size
};
vkCmdPipelineBarrier(
@ -312,12 +315,7 @@ public:
// Setup and fill the compute shader storage buffers containing the particles
void prepareStorageBuffers()
{
#if 0
std::vector<glm::vec3> attractors = {
glm::vec3(2.5f, 1.5f, 0.0f),
glm::vec3(-2.5f, -1.5f, 0.0f),
};
#else
// We mark a few particles as attractors that move along a given path, these will pull in the other particles
std::vector<glm::vec3> attractors = {
glm::vec3(5.0f, 0.0f, 0.0f),
glm::vec3(-5.0f, 0.0f, 0.0f),
@ -326,7 +324,6 @@ public:
glm::vec3(0.0f, 4.0f, 0.0f),
glm::vec3(0.0f, -8.0f, 0.0f),
};
#endif
numParticles = static_cast<uint32_t>(attractors.size()) * PARTICLES_PER_ATTRACTOR;
@ -340,7 +337,7 @@ public:
{
for (uint32_t j = 0; j < PARTICLES_PER_ATTRACTOR; j++)
{
Particle &particle = particleBuffer[i * PARTICLES_PER_ATTRACTOR + j];
Particle& particle = particleBuffer[i * PARTICLES_PER_ATTRACTOR + j];
// First particle in group as heavy center of gravity
if (j == 0)
@ -369,7 +366,7 @@ public:
}
}
compute.ubo.particleCount = numParticles;
compute.uniformData.particleCount = numParticles;
VkDeviceSize storageBufferSize = particleBuffer.size() * sizeof(Particle);
@ -378,25 +375,15 @@ public:
vks::Buffer stagingBuffer;
vulkanDevice->createBuffer(
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&stagingBuffer,
storageBufferSize,
particleBuffer.data());
vulkanDevice->createBuffer(
// The SSBO will be used as a storage buffer for the compute pipeline and as a vertex buffer in the graphics pipeline
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
&compute.storageBuffer,
storageBufferSize);
vulkanDevice->createBuffer(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &stagingBuffer, storageBufferSize, particleBuffer.data());
// The SSBO will be used as a storage buffer for the compute pipeline and as a vertex buffer in the graphics pipeline
vulkanDevice->createBuffer(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &storageBuffer, storageBufferSize);
// Copy from staging buffer to storage buffer
VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
VkBufferCopy copyRegion = {};
copyRegion.size = storageBufferSize;
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffer.buffer, 1, &copyRegion);
vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, storageBuffer.buffer, 1, &copyRegion);
// Execute a transfer barrier to the compute queue, if necessary
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
@ -408,9 +395,9 @@ public:
0,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
storageBuffer.buffer,
0,
compute.storageBuffer.size
storageBuffer.size
};
vkCmdPipelineBarrier(
@ -425,61 +412,24 @@ public:
vulkanDevice->flushCommandBuffer(copyCmd, queue, true);
stagingBuffer.destroy();
// Binding description
vertices.bindingDescriptions.resize(1);
vertices.bindingDescriptions[0] =
vks::initializers::vertexInputBindingDescription(
VERTEX_BUFFER_BIND_ID,
sizeof(Particle),
VK_VERTEX_INPUT_RATE_VERTEX);
// Attribute descriptions
// Describes memory layout and shader positions
vertices.attributeDescriptions.resize(2);
// Location 0 : Position
vertices.attributeDescriptions[0] =
vks::initializers::vertexInputAttributeDescription(
VERTEX_BUFFER_BIND_ID,
0,
VK_FORMAT_R32G32B32A32_SFLOAT,
offsetof(Particle, pos));
// Location 1 : Velocity (used for gradient lookup)
vertices.attributeDescriptions[1] =
vks::initializers::vertexInputAttributeDescription(
VERTEX_BUFFER_BIND_ID,
1,
VK_FORMAT_R32G32B32A32_SFLOAT,
offsetof(Particle, vel));
// Assign to vertex buffer
vertices.inputState = vks::initializers::pipelineVertexInputStateCreateInfo();
vertices.inputState.vertexBindingDescriptionCount = static_cast<uint32_t>(vertices.bindingDescriptions.size());
vertices.inputState.pVertexBindingDescriptions = vertices.bindingDescriptions.data();
vertices.inputState.vertexAttributeDescriptionCount = static_cast<uint32_t>(vertices.attributeDescriptions.size());
vertices.inputState.pVertexAttributeDescriptions = vertices.attributeDescriptions.data();
}
void setupDescriptorPool()
void prepareGraphics()
{
std::vector<VkDescriptorPoolSize> poolSizes =
{
// Vertex shader uniform buffer block
vulkanDevice->createBuffer(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &graphics.uniformBuffer, sizeof(Graphics::UniformData));
VK_CHECK_RESULT(graphics.uniformBuffer.map());
// Descriptor pool
std::vector<VkDescriptorPoolSize> poolSizes = {
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2),
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1),
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2)
};
VkDescriptorPoolCreateInfo descriptorPoolInfo =
vks::initializers::descriptorPoolCreateInfo(
static_cast<uint32_t>(poolSizes.size()),
poolSizes.data(),
2);
VkDescriptorPoolCreateInfo descriptorPoolInfo = vks::initializers::descriptorPoolCreateInfo(poolSizes, 2);
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool));
}
void setupDescriptorSetLayout()
{
// Descriptor layout
std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings;
setLayoutBindings = {
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_FRAGMENT_BIT, 0),
@ -487,103 +437,58 @@ public:
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT, 2),
};
VkDescriptorSetLayoutCreateInfo descriptorLayout =
vks::initializers::descriptorSetLayoutCreateInfo(
setLayoutBindings.data(),
static_cast<uint32_t>(setLayoutBindings.size()));
VkDescriptorSetLayoutCreateInfo descriptorLayout = vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings);
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &graphics.descriptorSetLayout));
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
vks::initializers::pipelineLayoutCreateInfo(
&graphics.descriptorSetLayout,
1);
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &graphics.pipelineLayout));
}
void setupDescriptorSet()
{
VkDescriptorSetAllocateInfo allocInfo =
vks::initializers::descriptorSetAllocateInfo(
descriptorPool,
&graphics.descriptorSetLayout,
1);
// Descriptor set
VkDescriptorSetAllocateInfo allocInfo = vks::initializers::descriptorSetAllocateInfo(descriptorPool, &graphics.descriptorSetLayout, 1);
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &graphics.descriptorSet));
std::vector<VkWriteDescriptorSet> writeDescriptorSets;
writeDescriptorSets = {
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, &textures.particle.descriptor),
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, &textures.gradient.descriptor),
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &graphics.uniformBuffer.descriptor),
std::vector<VkWriteDescriptorSet> writeDescriptorSets = {
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, &textures.particle.descriptor),
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, &textures.gradient.descriptor),
vks::initializers::writeDescriptorSet(graphics.descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &graphics.uniformBuffer.descriptor),
};
vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, nullptr);
}
void preparePipelines()
{
VkPipelineInputAssemblyStateCreateInfo inputAssemblyState =
vks::initializers::pipelineInputAssemblyStateCreateInfo(
VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
0,
VK_FALSE);
// Pipeline layout
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = vks::initializers::pipelineLayoutCreateInfo(&graphics.descriptorSetLayout, 1);
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &graphics.pipelineLayout));
VkPipelineRasterizationStateCreateInfo rasterizationState =
vks::initializers::pipelineRasterizationStateCreateInfo(
VK_POLYGON_MODE_FILL,
VK_CULL_MODE_NONE,
VK_FRONT_FACE_COUNTER_CLOCKWISE,
0);
// Pipeline
VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = vks::initializers::pipelineInputAssemblyStateCreateInfo(VK_PRIMITIVE_TOPOLOGY_POINT_LIST, 0, VK_FALSE);
VkPipelineRasterizationStateCreateInfo rasterizationState = vks::initializers::pipelineRasterizationStateCreateInfo(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_COUNTER_CLOCKWISE, 0);
VkPipelineColorBlendAttachmentState blendAttachmentState = vks::initializers::pipelineColorBlendAttachmentState(0xf, VK_FALSE);
VkPipelineColorBlendStateCreateInfo colorBlendState = vks::initializers::pipelineColorBlendStateCreateInfo(1, &blendAttachmentState);
VkPipelineDepthStencilStateCreateInfo depthStencilState = vks::initializers::pipelineDepthStencilStateCreateInfo(VK_FALSE, VK_FALSE, VK_COMPARE_OP_ALWAYS);
VkPipelineViewportStateCreateInfo viewportState = vks::initializers::pipelineViewportStateCreateInfo(1, 1, 0);
VkPipelineMultisampleStateCreateInfo multisampleState = vks::initializers::pipelineMultisampleStateCreateInfo(VK_SAMPLE_COUNT_1_BIT, 0);
std::vector<VkDynamicState> dynamicStateEnables = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR };
VkPipelineDynamicStateCreateInfo dynamicState = vks::initializers::pipelineDynamicStateCreateInfo(dynamicStateEnables);
std::array<VkPipelineShaderStageCreateInfo, 2> shaderStages;
VkPipelineColorBlendAttachmentState blendAttachmentState =
vks::initializers::pipelineColorBlendAttachmentState(
0xf,
VK_FALSE);
VkPipelineColorBlendStateCreateInfo colorBlendState =
vks::initializers::pipelineColorBlendStateCreateInfo(
1,
&blendAttachmentState);
VkPipelineDepthStencilStateCreateInfo depthStencilState =
vks::initializers::pipelineDepthStencilStateCreateInfo(
VK_FALSE,
VK_FALSE,
VK_COMPARE_OP_ALWAYS);
VkPipelineViewportStateCreateInfo viewportState =
vks::initializers::pipelineViewportStateCreateInfo(1, 1, 0);
VkPipelineMultisampleStateCreateInfo multisampleState =
vks::initializers::pipelineMultisampleStateCreateInfo(
VK_SAMPLE_COUNT_1_BIT,
0);
std::vector<VkDynamicState> dynamicStateEnables = {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR
// Vertex Input state
std::vector<VkVertexInputBindingDescription> inputBindings = {
vks::initializers::vertexInputBindingDescription(0, sizeof(Particle), VK_VERTEX_INPUT_RATE_VERTEX)
};
VkPipelineDynamicStateCreateInfo dynamicState =
vks::initializers::pipelineDynamicStateCreateInfo(
dynamicStateEnables.data(),
static_cast<uint32_t>(dynamicStateEnables.size()),
0);
// Rendering pipeline
// Load shaders
std::array<VkPipelineShaderStageCreateInfo,2> shaderStages;
std::vector<VkVertexInputAttributeDescription> inputAttributes = {
// Location 0 : Position
vks::initializers::vertexInputAttributeDescription(0, 0, VK_FORMAT_R32G32B32A32_SFLOAT, offsetof(Particle, pos)),
// Location 1 : Velocity (used for color gradient lookup)
vks::initializers::vertexInputAttributeDescription(0, 1, VK_FORMAT_R32G32B32A32_SFLOAT, offsetof(Particle, vel)),
};
VkPipelineVertexInputStateCreateInfo vertexInputState = vks::initializers::pipelineVertexInputStateCreateInfo();
vertexInputState.vertexBindingDescriptionCount = static_cast<uint32_t>(inputBindings.size());
vertexInputState.pVertexBindingDescriptions = inputBindings.data();
vertexInputState.vertexAttributeDescriptionCount = static_cast<uint32_t>(inputAttributes.size());
vertexInputState.pVertexAttributeDescriptions = inputAttributes.data();
// Shaders
shaderStages[0] = loadShader(getShadersPath() + "computenbody/particle.vert.spv", VK_SHADER_STAGE_VERTEX_BIT);
shaderStages[1] = loadShader(getShadersPath() + "computenbody/particle.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT);
VkGraphicsPipelineCreateInfo pipelineCreateInfo =
vks::initializers::pipelineCreateInfo(
graphics.pipelineLayout,
renderPass,
0);
pipelineCreateInfo.pVertexInputState = &vertices.inputState;
VkGraphicsPipelineCreateInfo pipelineCreateInfo = vks::initializers::pipelineCreateInfo(graphics.pipelineLayout, renderPass, 0);
pipelineCreateInfo.pVertexInputState = &vertexInputState;
pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState;
pipelineCreateInfo.pRasterizationState = &rasterizationState;
pipelineCreateInfo.pColorBlendState = &colorBlendState;
@ -606,26 +511,19 @@ public:
blendAttachmentState.dstAlphaBlendFactor = VK_BLEND_FACTOR_DST_ALPHA;
VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline));
}
void prepareGraphics()
{
prepareStorageBuffers();
prepareUniformBuffers();
setupDescriptorSetLayout();
preparePipelines();
setupDescriptorSet();
// Semaphore for compute & graphics sync
// We use a semaphore to synchronize compute and graphics
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo();
VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &graphics.semaphore));
// Signal the semaphore
// Signal the semaphore for the first run
VkSubmitInfo submitInfo = vks::initializers::submitInfo();
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &graphics.semaphore;
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VK_CHECK_RESULT(vkQueueWaitIdle(queue));
buildCommandBuffers();
}
void prepareCompute()
@ -636,90 +534,47 @@ public:
// requiring proper synchronization (see the memory barriers in buildComputeCommandBuffer)
vkGetDeviceQueue(device, compute.queueFamilyIndex, 0, &compute.queue);
// Compute shader uniform buffer block
vulkanDevice->createBuffer(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &compute.uniformBuffer, sizeof(Compute::UniformData));
VK_CHECK_RESULT(compute.uniformBuffer.map());
// Create compute pipeline
// Compute pipelines are created separate from graphics pipelines even if they use the same queue (family index)
std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = {
// Binding 0 : Particle position storage buffer
vks::initializers::descriptorSetLayoutBinding(
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
VK_SHADER_STAGE_COMPUTE_BIT,
0),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 0),
// Binding 1 : Uniform buffer
vks::initializers::descriptorSetLayoutBinding(
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
VK_SHADER_STAGE_COMPUTE_BIT,
1),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 1),
};
VkDescriptorSetLayoutCreateInfo descriptorLayout =
vks::initializers::descriptorSetLayoutCreateInfo(
setLayoutBindings.data(),
static_cast<uint32_t>(setLayoutBindings.size()));
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &compute.descriptorSetLayout));
VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo =
vks::initializers::pipelineLayoutCreateInfo(
&compute.descriptorSetLayout,
1);
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pPipelineLayoutCreateInfo, nullptr, &compute.pipelineLayout));
VkDescriptorSetAllocateInfo allocInfo =
vks::initializers::descriptorSetAllocateInfo(
descriptorPool,
&compute.descriptorSetLayout,
1);
VkDescriptorSetLayoutCreateInfo descriptorLayout = vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings);
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &compute.descriptorSetLayout));
VkDescriptorSetAllocateInfo allocInfo = vks::initializers::descriptorSetAllocateInfo(descriptorPool, &compute.descriptorSetLayout, 1);
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &compute.descriptorSet));
std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets =
{
std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets = {
// Binding 0 : Particle position storage buffer
vks::initializers::writeDescriptorSet(
compute.descriptorSet,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
0,
&compute.storageBuffer.descriptor),
vks::initializers::writeDescriptorSet(compute.descriptorSet, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, &storageBuffer.descriptor),
// Binding 1 : Uniform buffer
vks::initializers::writeDescriptorSet(
compute.descriptorSet,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1,
&compute.uniformBuffer.descriptor)
vks::initializers::writeDescriptorSet(compute.descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,1,&compute.uniformBuffer.descriptor)
};
vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, nullptr);
// Create pipelines
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = vks::initializers::pipelineLayoutCreateInfo(&compute.descriptorSetLayout, 1);
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &compute.pipelineLayout));
VkComputePipelineCreateInfo computePipelineCreateInfo = vks::initializers::computePipelineCreateInfo(compute.pipelineLayout, 0);
// 1st pass
computePipelineCreateInfo.stage = loadShader(getShadersPath() + "computenbody/particle_calculate.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT);
// Set shader parameters via specialization constants
struct SpecializationData {
uint32_t sharedDataSize;
float gravity;
float power;
float soften;
} specializationData;
std::vector<VkSpecializationMapEntry> specializationMapEntries;
specializationMapEntries.push_back(vks::initializers::specializationMapEntry(0, offsetof(SpecializationData, sharedDataSize), sizeof(uint32_t)));
specializationMapEntries.push_back(vks::initializers::specializationMapEntry(1, offsetof(SpecializationData, gravity), sizeof(float)));
specializationMapEntries.push_back(vks::initializers::specializationMapEntry(2, offsetof(SpecializationData, power), sizeof(float)));
specializationMapEntries.push_back(vks::initializers::specializationMapEntry(3, offsetof(SpecializationData, soften), sizeof(float)));
specializationData.sharedDataSize = std::min((uint32_t)1024, (uint32_t)(vulkanDevice->properties.limits.maxComputeSharedMemorySize / sizeof(glm::vec4)));
specializationData.gravity = 0.002f;
specializationData.power = 0.75f;
specializationData.soften = 0.05f;
VkSpecializationInfo specializationInfo =
vks::initializers::specializationInfo(static_cast<uint32_t>(specializationMapEntries.size()), specializationMapEntries.data(), sizeof(specializationData), &specializationData);
// We want to use as much shared memory for the compute shader invocations as available, so we calculate it based on the device limits and pass it to the shader via specialization constants
uint32_t sharedDataSize = std::min((uint32_t)1024, (uint32_t)(vulkanDevice->properties.limits.maxComputeSharedMemorySize / sizeof(glm::vec4)));
VkSpecializationMapEntry specializationMapEntry = vks::initializers::specializationMapEntry(0, 0, sizeof(uint32_t));
VkSpecializationInfo specializationInfo = vks::initializers::specializationInfo(1, &specializationMapEntry, sizeof(int32_t), &sharedDataSize);
computePipelineCreateInfo.stage.pSpecializationInfo = &specializationInfo;
VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &compute.pipelineCalculate));
@ -744,101 +599,34 @@ public:
// Build a single command buffer containing the compute dispatch commands
buildComputeCommandBuffer();
// SRS - By reordering compute and graphics within draw(), the following code is no longer needed:
// If graphics and compute queue family indices differ, acquire and immediately release the storage buffer, so that the initial acquire from the graphics command buffers are matched up properly
/*
if (graphics.queueFamilyIndex != compute.queueFamilyIndex)
{
// Create a transient command buffer for setting up the initial buffer transfer state
VkCommandBuffer transferCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, compute.commandPool, true);
VkBufferMemoryBarrier acquire_buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
0,
VK_ACCESS_SHADER_WRITE_BIT,
graphics.queueFamilyIndex,
compute.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
transferCmd,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0, nullptr,
1, &acquire_buffer_barrier,
0, nullptr);
VkBufferMemoryBarrier release_buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_SHADER_WRITE_BIT,
0,
compute.queueFamilyIndex,
graphics.queueFamilyIndex,
compute.storageBuffer.buffer,
0,
compute.storageBuffer.size
};
vkCmdPipelineBarrier(
transferCmd,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0,
0, nullptr,
1, &release_buffer_barrier,
0, nullptr);
vulkanDevice->flushCommandBuffer(transferCmd, compute.queue, compute.commandPool);
}
*/
}
// Prepare and initialize uniform buffer containing shader uniforms
void prepareUniformBuffers()
{
// Compute shader uniform buffer block
vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&compute.uniformBuffer,
sizeof(compute.ubo));
// Map for host access
VK_CHECK_RESULT(compute.uniformBuffer.map());
// Vertex shader uniform buffer block
vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&graphics.uniformBuffer,
sizeof(graphics.ubo));
// Map for host access
VK_CHECK_RESULT(graphics.uniformBuffer.map());
updateComputeUniformBuffers();
updateGraphicsUniformBuffers();
}
void updateComputeUniformBuffers()
{
compute.ubo.deltaT = paused ? 0.0f : frameTimer * 0.05f;
memcpy(compute.uniformBuffer.mapped, &compute.ubo, sizeof(compute.ubo));
compute.uniformData.deltaT = paused ? 0.0f : frameTimer * 0.05f;
memcpy(compute.uniformBuffer.mapped, &compute.uniformData, sizeof(Compute::UniformData));
}
void updateGraphicsUniformBuffers()
{
graphics.ubo.projection = camera.matrices.perspective;
graphics.ubo.view = camera.matrices.view;
graphics.ubo.screenDim = glm::vec2((float)width, (float)height);
memcpy(graphics.uniformBuffer.mapped, &graphics.ubo, sizeof(graphics.ubo));
graphics.uniformData.projection = camera.matrices.perspective;
graphics.uniformData.view = camera.matrices.view;
graphics.uniformData.screenDim = glm::vec2((float)width, (float)height);
memcpy(graphics.uniformBuffer.mapped, &graphics.uniformData, sizeof(Graphics::UniformData));
}
void prepare()
{
VulkanExampleBase::prepare();
// We will be using the queue family indices to check if graphics and compute queue families differ
// If that's the case, we need additional barriers for acquiring and releasing resources
graphics.queueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
compute.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
loadAssets();
prepareStorageBuffers();
prepareGraphics();
prepareCompute();
prepared = true;
}
void draw()
@ -876,35 +664,13 @@ public:
VulkanExampleBase::submitFrame();
}
void prepare()
{
VulkanExampleBase::prepare();
// We will be using the queue family indices to check if graphics and compute queue families differ
// If that's the case, we need additional barriers for acquiring and releasing resources
graphics.queueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;
compute.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
loadAssets();
setupDescriptorPool();
prepareGraphics();
prepareCompute();
buildCommandBuffers();
prepared = true;
}
virtual void render()
{
if (!prepared)
return;
draw();
updateComputeUniformBuffers();
if (camera.updated) {
updateGraphicsUniformBuffers();
}
}
virtual void viewChanged()
{
updateGraphicsUniformBuffers();
draw();
}
};

View file

@ -19,12 +19,12 @@ layout (binding = 1) uniform UBO
{
float deltaT;
int particleCount;
float gravity;
float power;
float soften;
} ubo;
layout (constant_id = 0) const int SHARED_DATA_SIZE = 512;
layout (constant_id = 1) const float GRAVITY = 0.002;
layout (constant_id = 2) const float POWER = 0.75;
layout (constant_id = 3) const float SOFTEN = 0.0075;
// Share data between computer shader invocations to speed up caluclations
shared vec4 sharedData[SHARED_DATA_SIZE];
@ -58,7 +58,7 @@ void main()
{
vec4 other = sharedData[j];
vec3 len = other.xyz - position.xyz;
acceleration.xyz += GRAVITY * len * other.w / pow(dot(len, len) + SOFTEN, POWER);
acceleration.xyz += ubo.gravity * len * other.w / pow(dot(len, len) + ubo.soften, ubo.power);
}
memoryBarrierShared();
@ -69,6 +69,7 @@ void main()
// Gradient texture position
particles[index].vel.w += 0.1 * ubo.deltaT;
if (particles[index].vel.w > 1.0)
if (particles[index].vel.w > 1.0) {
particles[index].vel.w -= 1.0;
}
}

View file

@ -1,4 +1,5 @@
// Copyright 2020 Google LLC
// Copyright 2023 Sascha Willems
struct Particle
{
@ -13,6 +14,9 @@ struct UBO
{
float deltaT;
int particleCount;
float gravity;
float power;
float soften;
};
cbuffer ubo : register(b1) { UBO ubo; }
@ -55,7 +59,7 @@ void main(uint3 GlobalInvocationID : SV_DispatchThreadID, uint3 LocalInvocationI
{
float4 other = sharedData[j];
float3 len = other.xyz - position.xyz;
acceleration.xyz += GRAVITY * len * other.w / pow(dot(len, len) + SOFTEN, POWER);
acceleration.xyz += ubo.gravity * len * other.w / pow(dot(len, len) + ubo.soften, ubo.power);
}
GroupMemoryBarrierWithGroupSync();
@ -65,6 +69,7 @@ void main(uint3 GlobalInvocationID : SV_DispatchThreadID, uint3 LocalInvocationI
// Gradient texture position
particles[index].vel.w += 0.1 * ubo.deltaT;
if (particles[index].vel.w > 1.0)
if (particles[index].vel.w > 1.0) {
particles[index].vel.w -= 1.0;
}
}