Related to #876. On AMD RX 5700 XT, heaps with MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD are present. Because the heap selection doesn't break early, the code ends up selecting one of the 'late' heaps, which contains more property bits than required. Including this one which causes validation error. Breaking early should solve this on all GPUs, as the Vulkan specs specifies the order of heap declarations: ``` For each pair of elements X and Y returned in memoryTypes, X must be placed at a lower index position than Y if: - the set of bit flags returned in the propertyFlags member of X is a strict subset of the set of bit flags returned in the propertyFlags member of Y; ``` So if my understanding is correct, by breaking early we should always select the most basic heap that meets the sample needs. Signed-off-by: Nathan Gauër <nathan@gauer.org>
623 lines
23 KiB
C++
623 lines
23 KiB
C++
/*
|
|
* Vulkan Example - Minimal headless compute example
|
|
*
|
|
* Copyright (C) 2017-2022 by Sascha Willems - www.saschawillems.de
|
|
*
|
|
* This code is licensed under the MIT license (MIT) (http://opensource.org/licenses/MIT)
|
|
*/
|
|
|
|
#if defined(_WIN32)
|
|
#pragma comment(linker, "/subsystem:console")
|
|
#elif defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
#include <android/native_activity.h>
|
|
#include <android/asset_manager.h>
|
|
#include <android_native_app_glue.h>
|
|
#include <android/log.h>
|
|
#include "VulkanAndroid.h"
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <vector>
|
|
#include <iostream>
|
|
#include <algorithm>
|
|
|
|
#if defined(VK_USE_PLATFORM_MACOS_MVK)
|
|
#define VK_ENABLE_BETA_EXTENSIONS
|
|
#endif
|
|
#include <vulkan/vulkan.h>
|
|
#include "VulkanTools.h"
|
|
#include "CommandLineParser.hpp"
|
|
|
|
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
android_app* androidapp;
|
|
#endif
|
|
|
|
#define DEBUG (!NDEBUG)
|
|
|
|
#define BUFFER_ELEMENTS 32
|
|
|
|
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
#define LOG(...) ((void)__android_log_print(ANDROID_LOG_INFO, "vulkanExample", __VA_ARGS__))
|
|
#else
|
|
#define LOG(...) printf(__VA_ARGS__)
|
|
#endif
|
|
|
|
static VKAPI_ATTR VkBool32 VKAPI_CALL debugMessageCallback(
|
|
VkDebugReportFlagsEXT flags,
|
|
VkDebugReportObjectTypeEXT objectType,
|
|
uint64_t object,
|
|
size_t location,
|
|
int32_t messageCode,
|
|
const char* pLayerPrefix,
|
|
const char* pMessage,
|
|
void* pUserData)
|
|
{
|
|
LOG("[VALIDATION]: %s - %s\n", pLayerPrefix, pMessage);
|
|
return VK_FALSE;
|
|
}
|
|
|
|
CommandLineParser commandLineParser;
|
|
|
|
class VulkanExample
|
|
{
|
|
public:
|
|
VkInstance instance;
|
|
VkPhysicalDevice physicalDevice;
|
|
VkDevice device;
|
|
uint32_t queueFamilyIndex;
|
|
VkPipelineCache pipelineCache;
|
|
VkQueue queue;
|
|
VkCommandPool commandPool;
|
|
VkCommandBuffer commandBuffer;
|
|
VkFence fence;
|
|
VkDescriptorPool descriptorPool;
|
|
VkDescriptorSetLayout descriptorSetLayout;
|
|
VkDescriptorSet descriptorSet;
|
|
VkPipelineLayout pipelineLayout;
|
|
VkPipeline pipeline;
|
|
VkShaderModule shaderModule;
|
|
|
|
VkDebugReportCallbackEXT debugReportCallback{};
|
|
|
|
VkResult createBuffer(VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags memoryPropertyFlags, VkBuffer *buffer, VkDeviceMemory *memory, VkDeviceSize size, void *data = nullptr)
|
|
{
|
|
// Create the buffer handle
|
|
VkBufferCreateInfo bufferCreateInfo = vks::initializers::bufferCreateInfo(usageFlags, size);
|
|
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, nullptr, buffer));
|
|
|
|
// Create the memory backing up the buffer handle
|
|
VkPhysicalDeviceMemoryProperties deviceMemoryProperties;
|
|
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &deviceMemoryProperties);
|
|
VkMemoryRequirements memReqs;
|
|
VkMemoryAllocateInfo memAlloc = vks::initializers::memoryAllocateInfo();
|
|
vkGetBufferMemoryRequirements(device, *buffer, &memReqs);
|
|
memAlloc.allocationSize = memReqs.size;
|
|
// Find a memory type index that fits the properties of the buffer
|
|
bool memTypeFound = false;
|
|
for (uint32_t i = 0; i < deviceMemoryProperties.memoryTypeCount; i++) {
|
|
if ((memReqs.memoryTypeBits & 1) == 1) {
|
|
if ((deviceMemoryProperties.memoryTypes[i].propertyFlags & memoryPropertyFlags) == memoryPropertyFlags) {
|
|
memAlloc.memoryTypeIndex = i;
|
|
memTypeFound = true;
|
|
break;
|
|
}
|
|
}
|
|
memReqs.memoryTypeBits >>= 1;
|
|
}
|
|
assert(memTypeFound);
|
|
VK_CHECK_RESULT(vkAllocateMemory(device, &memAlloc, nullptr, memory));
|
|
|
|
if (data != nullptr) {
|
|
void *mapped;
|
|
VK_CHECK_RESULT(vkMapMemory(device, *memory, 0, size, 0, &mapped));
|
|
memcpy(mapped, data, size);
|
|
vkUnmapMemory(device, *memory);
|
|
}
|
|
|
|
VK_CHECK_RESULT(vkBindBufferMemory(device, *buffer, *memory, 0));
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VulkanExample()
|
|
{
|
|
LOG("Running headless compute example\n");
|
|
|
|
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
LOG("loading vulkan lib");
|
|
vks::android::loadVulkanLibrary();
|
|
#endif
|
|
|
|
VkApplicationInfo appInfo = {};
|
|
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
|
appInfo.pApplicationName = "Vulkan headless example";
|
|
appInfo.pEngineName = "VulkanExample";
|
|
appInfo.apiVersion = VK_API_VERSION_1_0;
|
|
|
|
/*
|
|
Vulkan instance creation (without surface extensions)
|
|
*/
|
|
VkInstanceCreateInfo instanceCreateInfo = {};
|
|
instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
|
instanceCreateInfo.pApplicationInfo = &appInfo;
|
|
|
|
uint32_t layerCount = 1;
|
|
const char* validationLayers[] = { "VK_LAYER_KHRONOS_validation" };
|
|
|
|
std::vector<const char*> instanceExtensions = {};
|
|
#if DEBUG
|
|
// Check if layers are available
|
|
uint32_t instanceLayerCount;
|
|
vkEnumerateInstanceLayerProperties(&instanceLayerCount, nullptr);
|
|
std::vector<VkLayerProperties> instanceLayers(instanceLayerCount);
|
|
vkEnumerateInstanceLayerProperties(&instanceLayerCount, instanceLayers.data());
|
|
|
|
bool layersAvailable = true;
|
|
for (auto layerName : validationLayers) {
|
|
bool layerAvailable = false;
|
|
for (auto instanceLayer : instanceLayers) {
|
|
if (strcmp(instanceLayer.layerName, layerName) == 0) {
|
|
layerAvailable = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!layerAvailable) {
|
|
layersAvailable = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (layersAvailable) {
|
|
instanceExtensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
|
|
instanceCreateInfo.ppEnabledLayerNames = validationLayers;
|
|
instanceCreateInfo.enabledLayerCount = layerCount;
|
|
}
|
|
#endif
|
|
#if defined(VK_USE_PLATFORM_MACOS_MVK)
|
|
// SRS - When running on macOS with MoltenVK, enable VK_KHR_get_physical_device_properties2 (required by VK_KHR_portability_subset)
|
|
instanceExtensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
|
|
#if defined(VK_KHR_portability_enumeration)
|
|
// SRS - When running on macOS with MoltenVK and VK_KHR_portability_enumeration is defined and supported by the instance, enable the extension and the flag
|
|
uint32_t instanceExtCount = 0;
|
|
vkEnumerateInstanceExtensionProperties(nullptr, &instanceExtCount, nullptr);
|
|
if (instanceExtCount > 0)
|
|
{
|
|
std::vector<VkExtensionProperties> extensions(instanceExtCount);
|
|
if (vkEnumerateInstanceExtensionProperties(nullptr, &instanceExtCount, &extensions.front()) == VK_SUCCESS)
|
|
{
|
|
for (VkExtensionProperties extension : extensions)
|
|
{
|
|
if (strcmp(extension.extensionName, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME) == 0)
|
|
{
|
|
instanceExtensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
|
|
instanceCreateInfo.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
instanceCreateInfo.enabledExtensionCount = (uint32_t)instanceExtensions.size();
|
|
instanceCreateInfo.ppEnabledExtensionNames = instanceExtensions.data();
|
|
VK_CHECK_RESULT(vkCreateInstance(&instanceCreateInfo, nullptr, &instance));
|
|
|
|
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
vks::android::loadVulkanFunctions(instance);
|
|
#endif
|
|
#if DEBUG
|
|
if (layersAvailable) {
|
|
VkDebugReportCallbackCreateInfoEXT debugReportCreateInfo = {};
|
|
debugReportCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT;
|
|
debugReportCreateInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT;
|
|
debugReportCreateInfo.pfnCallback = (PFN_vkDebugReportCallbackEXT)debugMessageCallback;
|
|
|
|
// We have to explicitly load this function.
|
|
PFN_vkCreateDebugReportCallbackEXT vkCreateDebugReportCallbackEXT = reinterpret_cast<PFN_vkCreateDebugReportCallbackEXT>(vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT"));
|
|
assert(vkCreateDebugReportCallbackEXT);
|
|
VK_CHECK_RESULT(vkCreateDebugReportCallbackEXT(instance, &debugReportCreateInfo, nullptr, &debugReportCallback));
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
Vulkan device creation
|
|
*/
|
|
// Physical device (always use first)
|
|
uint32_t deviceCount = 0;
|
|
VK_CHECK_RESULT(vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr));
|
|
std::vector<VkPhysicalDevice> physicalDevices(deviceCount);
|
|
VK_CHECK_RESULT(vkEnumeratePhysicalDevices(instance, &deviceCount, physicalDevices.data()));
|
|
physicalDevice = physicalDevices[0];
|
|
|
|
VkPhysicalDeviceProperties deviceProperties;
|
|
vkGetPhysicalDeviceProperties(physicalDevice, &deviceProperties);
|
|
LOG("GPU: %s\n", deviceProperties.deviceName);
|
|
|
|
// Request a single compute queue
|
|
const float defaultQueuePriority(0.0f);
|
|
VkDeviceQueueCreateInfo queueCreateInfo = {};
|
|
uint32_t queueFamilyCount;
|
|
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, nullptr);
|
|
std::vector<VkQueueFamilyProperties> queueFamilyProperties(queueFamilyCount);
|
|
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilyProperties.data());
|
|
for (uint32_t i = 0; i < static_cast<uint32_t>(queueFamilyProperties.size()); i++) {
|
|
if (queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) {
|
|
queueFamilyIndex = i;
|
|
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
|
queueCreateInfo.queueFamilyIndex = i;
|
|
queueCreateInfo.queueCount = 1;
|
|
queueCreateInfo.pQueuePriorities = &defaultQueuePriority;
|
|
break;
|
|
}
|
|
}
|
|
// Create logical device
|
|
VkDeviceCreateInfo deviceCreateInfo = {};
|
|
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
|
deviceCreateInfo.queueCreateInfoCount = 1;
|
|
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
|
|
std::vector<const char*> deviceExtensions = {};
|
|
#if defined(VK_USE_PLATFORM_MACOS_MVK) && defined(VK_KHR_portability_subset)
|
|
// SRS - When running on macOS with MoltenVK and VK_KHR_portability_subset is defined and supported by the device, enable the extension
|
|
uint32_t deviceExtCount = 0;
|
|
vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &deviceExtCount, nullptr);
|
|
if (deviceExtCount > 0)
|
|
{
|
|
std::vector<VkExtensionProperties> extensions(deviceExtCount);
|
|
if (vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &deviceExtCount, &extensions.front()) == VK_SUCCESS)
|
|
{
|
|
for (VkExtensionProperties extension : extensions)
|
|
{
|
|
if (strcmp(extension.extensionName, VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME) == 0)
|
|
{
|
|
deviceExtensions.push_back(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
deviceCreateInfo.enabledExtensionCount = (uint32_t)deviceExtensions.size();
|
|
deviceCreateInfo.ppEnabledExtensionNames = deviceExtensions.data();
|
|
VK_CHECK_RESULT(vkCreateDevice(physicalDevice, &deviceCreateInfo, nullptr, &device));
|
|
|
|
// Get a compute queue
|
|
vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue);
|
|
|
|
// Compute command pool
|
|
VkCommandPoolCreateInfo cmdPoolInfo = {};
|
|
cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
|
cmdPoolInfo.queueFamilyIndex = queueFamilyIndex;
|
|
cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
|
VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &commandPool));
|
|
|
|
/*
|
|
Prepare storage buffers
|
|
*/
|
|
std::vector<uint32_t> computeInput(BUFFER_ELEMENTS);
|
|
std::vector<uint32_t> computeOutput(BUFFER_ELEMENTS);
|
|
|
|
// Fill input data
|
|
uint32_t n = 0;
|
|
std::generate(computeInput.begin(), computeInput.end(), [&n] { return n++; });
|
|
|
|
const VkDeviceSize bufferSize = BUFFER_ELEMENTS * sizeof(uint32_t);
|
|
|
|
VkBuffer deviceBuffer, hostBuffer;
|
|
VkDeviceMemory deviceMemory, hostMemory;
|
|
|
|
// Copy input data to VRAM using a staging buffer
|
|
{
|
|
createBuffer(
|
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
|
&hostBuffer,
|
|
&hostMemory,
|
|
bufferSize,
|
|
computeInput.data());
|
|
|
|
// Flush writes to host visible buffer
|
|
void* mapped;
|
|
vkMapMemory(device, hostMemory, 0, VK_WHOLE_SIZE, 0, &mapped);
|
|
VkMappedMemoryRange mappedRange = vks::initializers::mappedMemoryRange();
|
|
mappedRange.memory = hostMemory;
|
|
mappedRange.offset = 0;
|
|
mappedRange.size = VK_WHOLE_SIZE;
|
|
vkFlushMappedMemoryRanges(device, 1, &mappedRange);
|
|
vkUnmapMemory(device, hostMemory);
|
|
|
|
createBuffer(
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
|
&deviceBuffer,
|
|
&deviceMemory,
|
|
bufferSize);
|
|
|
|
// Copy to staging buffer
|
|
VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1);
|
|
VkCommandBuffer copyCmd;
|
|
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, ©Cmd));
|
|
VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
|
|
VK_CHECK_RESULT(vkBeginCommandBuffer(copyCmd, &cmdBufInfo));
|
|
|
|
VkBufferCopy copyRegion = {};
|
|
copyRegion.size = bufferSize;
|
|
vkCmdCopyBuffer(copyCmd, hostBuffer, deviceBuffer, 1, ©Region);
|
|
VK_CHECK_RESULT(vkEndCommandBuffer(copyCmd));
|
|
|
|
VkSubmitInfo submitInfo = vks::initializers::submitInfo();
|
|
submitInfo.commandBufferCount = 1;
|
|
submitInfo.pCommandBuffers = ©Cmd;
|
|
VkFenceCreateInfo fenceInfo = vks::initializers::fenceCreateInfo(VK_FLAGS_NONE);
|
|
VkFence fence;
|
|
VK_CHECK_RESULT(vkCreateFence(device, &fenceInfo, nullptr, &fence));
|
|
|
|
// Submit to the queue
|
|
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
|
|
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX));
|
|
|
|
vkDestroyFence(device, fence, nullptr);
|
|
vkFreeCommandBuffers(device, commandPool, 1, ©Cmd);
|
|
}
|
|
|
|
/*
|
|
Prepare compute pipeline
|
|
*/
|
|
{
|
|
std::vector<VkDescriptorPoolSize> poolSizes = {
|
|
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1),
|
|
};
|
|
|
|
VkDescriptorPoolCreateInfo descriptorPoolInfo =
|
|
vks::initializers::descriptorPoolCreateInfo(static_cast<uint32_t>(poolSizes.size()), poolSizes.data(), 1);
|
|
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool));
|
|
|
|
std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = {
|
|
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 0),
|
|
};
|
|
VkDescriptorSetLayoutCreateInfo descriptorLayout =
|
|
vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings);
|
|
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &descriptorSetLayout));
|
|
|
|
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
|
|
vks::initializers::pipelineLayoutCreateInfo(&descriptorSetLayout, 1);
|
|
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &pipelineLayout));
|
|
|
|
VkDescriptorSetAllocateInfo allocInfo =
|
|
vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1);
|
|
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet));
|
|
|
|
VkDescriptorBufferInfo bufferDescriptor = { deviceBuffer, 0, VK_WHOLE_SIZE };
|
|
std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets = {
|
|
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, &bufferDescriptor),
|
|
};
|
|
vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL);
|
|
|
|
VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {};
|
|
pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
|
|
VK_CHECK_RESULT(vkCreatePipelineCache(device, &pipelineCacheCreateInfo, nullptr, &pipelineCache));
|
|
|
|
// Create pipeline
|
|
VkComputePipelineCreateInfo computePipelineCreateInfo = vks::initializers::computePipelineCreateInfo(pipelineLayout, 0);
|
|
|
|
// Pass SSBO size via specialization constant
|
|
struct SpecializationData {
|
|
uint32_t BUFFER_ELEMENT_COUNT = BUFFER_ELEMENTS;
|
|
} specializationData;
|
|
VkSpecializationMapEntry specializationMapEntry = vks::initializers::specializationMapEntry(0, 0, sizeof(uint32_t));
|
|
VkSpecializationInfo specializationInfo = vks::initializers::specializationInfo(1, &specializationMapEntry, sizeof(SpecializationData), &specializationData);
|
|
|
|
std::string shaderDir = "glsl";
|
|
if (commandLineParser.isSet("shaders")) {
|
|
shaderDir = commandLineParser.getValueAsString("shaders", "glsl");
|
|
}
|
|
const std::string shadersPath = getAssetPath() + "shaders/"+shaderDir+"/computeheadless/";
|
|
|
|
VkPipelineShaderStageCreateInfo shaderStage = {};
|
|
shaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
shaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
shaderStage.module = vks::tools::loadShader(androidapp->activity->assetManager, (shadersPath + "headless.comp.spv").c_str(), device);
|
|
#else
|
|
shaderStage.module = vks::tools::loadShader((shadersPath + "headless.comp.spv").c_str(), device);
|
|
#endif
|
|
shaderStage.pName = "main";
|
|
shaderStage.pSpecializationInfo = &specializationInfo;
|
|
shaderModule = shaderStage.module;
|
|
|
|
assert(shaderStage.module != VK_NULL_HANDLE);
|
|
computePipelineCreateInfo.stage = shaderStage;
|
|
VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &pipeline));
|
|
|
|
// Create a command buffer for compute operations
|
|
VkCommandBufferAllocateInfo cmdBufAllocateInfo =
|
|
vks::initializers::commandBufferAllocateInfo(commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1);
|
|
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &commandBuffer));
|
|
|
|
// Fence for compute CB sync
|
|
VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT);
|
|
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &fence));
|
|
}
|
|
|
|
/*
|
|
Command buffer creation (for compute work submission)
|
|
*/
|
|
{
|
|
VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
|
|
|
|
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &cmdBufInfo));
|
|
|
|
// Barrier to ensure that input buffer transfer is finished before compute shader reads from it
|
|
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier();
|
|
bufferBarrier.buffer = deviceBuffer;
|
|
bufferBarrier.size = VK_WHOLE_SIZE;
|
|
bufferBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
|
|
bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
|
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
vkCmdPipelineBarrier(
|
|
commandBuffer,
|
|
VK_PIPELINE_STAGE_HOST_BIT,
|
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
VK_FLAGS_NONE,
|
|
0, nullptr,
|
|
1, &bufferBarrier,
|
|
0, nullptr);
|
|
|
|
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &descriptorSet, 0, 0);
|
|
|
|
vkCmdDispatch(commandBuffer, BUFFER_ELEMENTS, 1, 1);
|
|
|
|
// Barrier to ensure that shader writes are finished before buffer is read back from GPU
|
|
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
|
bufferBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
|
bufferBarrier.buffer = deviceBuffer;
|
|
bufferBarrier.size = VK_WHOLE_SIZE;
|
|
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
vkCmdPipelineBarrier(
|
|
commandBuffer,
|
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_FLAGS_NONE,
|
|
0, nullptr,
|
|
1, &bufferBarrier,
|
|
0, nullptr);
|
|
|
|
// Read back to host visible buffer
|
|
VkBufferCopy copyRegion = {};
|
|
copyRegion.size = bufferSize;
|
|
vkCmdCopyBuffer(commandBuffer, deviceBuffer, hostBuffer, 1, ©Region);
|
|
|
|
// Barrier to ensure that buffer copy is finished before host reading from it
|
|
bufferBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
bufferBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
|
|
bufferBarrier.buffer = hostBuffer;
|
|
bufferBarrier.size = VK_WHOLE_SIZE;
|
|
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
|
|
vkCmdPipelineBarrier(
|
|
commandBuffer,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_HOST_BIT,
|
|
VK_FLAGS_NONE,
|
|
0, nullptr,
|
|
1, &bufferBarrier,
|
|
0, nullptr);
|
|
|
|
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer));
|
|
|
|
// Submit compute work
|
|
vkResetFences(device, 1, &fence);
|
|
const VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
|
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
|
|
computeSubmitInfo.pWaitDstStageMask = &waitStageMask;
|
|
computeSubmitInfo.commandBufferCount = 1;
|
|
computeSubmitInfo.pCommandBuffers = &commandBuffer;
|
|
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &computeSubmitInfo, fence));
|
|
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX));
|
|
|
|
// Make device writes visible to the host
|
|
void *mapped;
|
|
vkMapMemory(device, hostMemory, 0, VK_WHOLE_SIZE, 0, &mapped);
|
|
VkMappedMemoryRange mappedRange = vks::initializers::mappedMemoryRange();
|
|
mappedRange.memory = hostMemory;
|
|
mappedRange.offset = 0;
|
|
mappedRange.size = VK_WHOLE_SIZE;
|
|
vkInvalidateMappedMemoryRanges(device, 1, &mappedRange);
|
|
|
|
// Copy to output
|
|
memcpy(computeOutput.data(), mapped, bufferSize);
|
|
vkUnmapMemory(device, hostMemory);
|
|
}
|
|
|
|
vkQueueWaitIdle(queue);
|
|
|
|
// Output buffer contents
|
|
LOG("Compute input:\n");
|
|
for (auto v : computeInput) {
|
|
LOG("%d \t", v);
|
|
}
|
|
std::cout << std::endl;
|
|
|
|
LOG("Compute output:\n");
|
|
for (auto v : computeOutput) {
|
|
LOG("%d \t", v);
|
|
}
|
|
std::cout << std::endl;
|
|
|
|
// Clean up
|
|
vkDestroyBuffer(device, deviceBuffer, nullptr);
|
|
vkFreeMemory(device, deviceMemory, nullptr);
|
|
vkDestroyBuffer(device, hostBuffer, nullptr);
|
|
vkFreeMemory(device, hostMemory, nullptr);
|
|
}
|
|
|
|
~VulkanExample()
|
|
{
|
|
vkDestroyPipelineLayout(device, pipelineLayout, nullptr);
|
|
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
|
|
vkDestroyDescriptorPool(device, descriptorPool, nullptr);
|
|
vkDestroyPipeline(device, pipeline, nullptr);
|
|
vkDestroyPipelineCache(device, pipelineCache, nullptr);
|
|
vkDestroyFence(device, fence, nullptr);
|
|
vkDestroyCommandPool(device, commandPool, nullptr);
|
|
vkDestroyShaderModule(device, shaderModule, nullptr);
|
|
vkDestroyDevice(device, nullptr);
|
|
#if DEBUG
|
|
if (debugReportCallback) {
|
|
PFN_vkDestroyDebugReportCallbackEXT vkDestroyDebugReportCallback = reinterpret_cast<PFN_vkDestroyDebugReportCallbackEXT>(vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT"));
|
|
assert(vkDestroyDebugReportCallback);
|
|
vkDestroyDebugReportCallback(instance, debugReportCallback, nullptr);
|
|
}
|
|
#endif
|
|
vkDestroyInstance(instance, nullptr);
|
|
}
|
|
};
|
|
|
|
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
|
|
void handleAppCommand(android_app * app, int32_t cmd) {
|
|
if (cmd == APP_CMD_INIT_WINDOW) {
|
|
VulkanExample *vulkanExample = new VulkanExample();
|
|
delete(vulkanExample);
|
|
ANativeActivity_finish(app->activity);
|
|
}
|
|
}
|
|
void android_main(android_app* state) {
|
|
androidapp = state;
|
|
androidapp->onAppCmd = handleAppCommand;
|
|
int ident, events;
|
|
struct android_poll_source* source;
|
|
while ((ident = ALooper_pollAll(-1, NULL, &events, (void**)&source)) >= 0) {
|
|
if (source != NULL) {
|
|
source->process(androidapp, source);
|
|
}
|
|
if (androidapp->destroyRequested != 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
int main(int argc, char* argv[]) {
|
|
commandLineParser.add("help", { "--help" }, 0, "Show help");
|
|
commandLineParser.add("shaders", { "-s", "--shaders" }, 1, "Select shader type to use (glsl or hlsl)");
|
|
commandLineParser.parse(argc, argv);
|
|
if (commandLineParser.isSet("help")) {
|
|
commandLineParser.printHelp();
|
|
std::cin.get();
|
|
return 0;
|
|
}
|
|
VulkanExample *vulkanExample = new VulkanExample();
|
|
std::cout << "Finished. Press enter to terminate...";
|
|
std::cin.get();
|
|
delete(vulkanExample);
|
|
return 0;
|
|
}
|
|
#endif
|