/* * Vulkan Example - Minimal headless compute example * * Copyright (C) 2017 by Sascha Willems - www.saschawillems.de * * This code is licensed under the MIT license (MIT) (http://opensource.org/licenses/MIT) */ // TODO: separate transfer queue (if not supported by compute queue) including buffer ownership transfer #if defined(_WIN32) #pragma comment(linker, "/subsystem:console") #endif #include #include #include #include #include #include #include #include #include "VulkanTools.h" #define DEBUG (!NDEBUG) #define BUFFER_ELEMENTS 32 static VKAPI_ATTR VkBool32 VKAPI_CALL debugMessageCallback( VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage, void* pUserData) { std::cout << "[VALIDATION]:" << pLayerPrefix << ":" << pMessage << std::endl; return VK_FALSE; } class VulkanExample { public: VkInstance instance; VkPhysicalDevice physicalDevice; VkDevice device; uint32_t queueFamilyIndex; VkPipelineCache pipelineCache; VkQueue queue; VkCommandPool commandPool; VkCommandBuffer commandBuffer; VkFence fence; VkDescriptorPool descriptorPool; VkDescriptorSetLayout descriptorSetLayout; VkDescriptorSet descriptorSet; VkPipelineLayout pipelineLayout; VkPipeline pipeline; VkDebugReportCallbackEXT debugReportCallback; VkResult createBuffer(VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags memoryPropertyFlags, VkBuffer *buffer, VkDeviceMemory *memory, VkDeviceSize size, void *data = nullptr) { // Create the buffer handle VkBufferCreateInfo bufferCreateInfo = vks::initializers::bufferCreateInfo(usageFlags, size); bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, nullptr, buffer)); // Create the memory backing up the buffer handle VkPhysicalDeviceMemoryProperties deviceMemoryProperties; vkGetPhysicalDeviceMemoryProperties(physicalDevice, &deviceMemoryProperties); VkMemoryRequirements memReqs; VkMemoryAllocateInfo memAlloc = vks::initializers::memoryAllocateInfo(); vkGetBufferMemoryRequirements(device, *buffer, &memReqs); memAlloc.allocationSize = memReqs.size; // Find a memory type index that fits the properties of the buffer bool memTypeFound = false; for (uint32_t i = 0; i < deviceMemoryProperties.memoryTypeCount; i++) { if ((memReqs.memoryTypeBits & 1) == 1) { if ((deviceMemoryProperties.memoryTypes[i].propertyFlags & memoryPropertyFlags) == memoryPropertyFlags) { memAlloc.memoryTypeIndex = i; memTypeFound = true; } } memReqs.memoryTypeBits >>= 1; } assert(memTypeFound); VK_CHECK_RESULT(vkAllocateMemory(device, &memAlloc, nullptr, memory)); if (data != nullptr) { void *mapped; VK_CHECK_RESULT(vkMapMemory(device, *memory, 0, size, 0, &mapped)); memcpy(mapped, data, size); vkUnmapMemory(device, *memory); } VK_CHECK_RESULT(vkBindBufferMemory(device, *buffer, *memory, 0)); return VK_SUCCESS; } VulkanExample() { std::cout << "Running headless example" << std::endl; VkApplicationInfo appInfo = {}; appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; appInfo.pApplicationName = "Vulkan headless example"; appInfo.pEngineName = "VulkanExample"; appInfo.apiVersion = VK_API_VERSION_1_0; /* Vulkan instance creation (without surface extensions) */ VkInstanceCreateInfo instanceCreateInfo = {}; instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; instanceCreateInfo.pApplicationInfo = &appInfo; #if defined(DEBUG) // TODO: Explicit layer names on Android const char* validationlayers[] = { "VK_LAYER_LUNARG_standard_validation" }; const char* validationExt = VK_EXT_DEBUG_REPORT_EXTENSION_NAME; instanceCreateInfo.enabledLayerCount = 1; instanceCreateInfo.ppEnabledLayerNames = validationlayers; instanceCreateInfo.enabledExtensionCount = 1; instanceCreateInfo.ppEnabledExtensionNames = &validationExt; #endif VK_CHECK_RESULT(vkCreateInstance(&instanceCreateInfo, nullptr, &instance)); #if defined(DEBUG) VkDebugReportCallbackCreateInfoEXT debugReportCreateInfo = {}; debugReportCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; debugReportCreateInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; debugReportCreateInfo.pfnCallback = &debugMessageCallback; // We have to explicitly load this function. PFN_vkCreateDebugReportCallbackEXT vkCreateDebugReportCallbackEXT = reinterpret_cast(vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT")); assert(vkCreateDebugReportCallbackEXT); VK_CHECK_RESULT(vkCreateDebugReportCallbackEXT(instance, &debugReportCreateInfo, nullptr, &debugReportCallback)); #endif /* Vulkan device creation */ // Physical device (always use first) uint32_t deviceCount = 0; VK_CHECK_RESULT(vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr)); std::vector physicalDevices(deviceCount); VK_CHECK_RESULT(vkEnumeratePhysicalDevices(instance, &deviceCount, physicalDevices.data())); physicalDevice = physicalDevices[0]; // Request a single compute queue const float defaultQueuePriority(0.0f); VkDeviceQueueCreateInfo queueCreateInfo = {}; uint32_t queueFamilyCount; vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, nullptr); std::vector queueFamilyProperties(queueFamilyCount); vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilyProperties.data()); for (uint32_t i = 0; i < static_cast(queueFamilyProperties.size()); i++) { if (queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { queueFamilyIndex = i; queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queueCreateInfo.queueFamilyIndex = i; queueCreateInfo.queueCount = 1; queueCreateInfo.pQueuePriorities = &defaultQueuePriority; break; } } // Create logical device VkDeviceCreateInfo deviceCreateInfo = {}; deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; deviceCreateInfo.queueCreateInfoCount = 1; deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; VK_CHECK_RESULT(vkCreateDevice(physicalDevice, &deviceCreateInfo, nullptr, &device)); // Get a compute queue vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue); // Compute command pool VkCommandPoolCreateInfo cmdPoolInfo = {}; cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cmdPoolInfo.queueFamilyIndex = queueFamilyIndex; cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &commandPool)); /* Prepare storage buffers */ std::vector computeInput(BUFFER_ELEMENTS); std::vector computeOutput(BUFFER_ELEMENTS); // Fill input data uint32_t n = 0; std::generate(computeInput.begin(), computeInput.end(), [&n] { return n++; }); const VkDeviceSize bufferSize = BUFFER_ELEMENTS * sizeof(uint32_t); VkBuffer deviceBuffer, hostBuffer; VkDeviceMemory deviceMemory, hostMemory; // Copy input data to VRAM using a staging buffer { createBuffer( VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &hostBuffer, &hostMemory, bufferSize, computeInput.data()); // Flush writes to host visible buffer VkMappedMemoryRange mappedRange = vks::initializers::mappedMemoryRange(); mappedRange.memory = hostMemory; mappedRange.offset = 0; mappedRange.size = VK_WHOLE_SIZE; vkFlushMappedMemoryRanges(device, 1, &mappedRange); createBuffer( VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &deviceBuffer, &deviceMemory, bufferSize); // Copy to staging buffer VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1); VkCommandBuffer copyCmd; VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, ©Cmd)); VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); VK_CHECK_RESULT(vkBeginCommandBuffer(copyCmd, &cmdBufInfo)); VkBufferCopy copyRegion = {}; copyRegion.size = bufferSize; vkCmdCopyBuffer(copyCmd, hostBuffer, deviceBuffer, 1, ©Region); VK_CHECK_RESULT(vkEndCommandBuffer(copyCmd)); VkSubmitInfo submitInfo = vks::initializers::submitInfo(); submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = ©Cmd; VkFenceCreateInfo fenceInfo = vks::initializers::fenceCreateInfo(VK_FLAGS_NONE); VkFence fence; VK_CHECK_RESULT(vkCreateFence(device, &fenceInfo, nullptr, &fence)); // Submit to the queue VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence)); VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, DEFAULT_FENCE_TIMEOUT)); vkDestroyFence(device, fence, nullptr); vkFreeCommandBuffers(device, commandPool, 1, ©Cmd); } /* Prepare compute pipeline */ { std::vector poolSizes = { vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1), }; VkDescriptorPoolCreateInfo descriptorPoolInfo = vks::initializers::descriptorPoolCreateInfo(static_cast(poolSizes.size()), poolSizes.data(), 1); VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool)); std::vector setLayoutBindings = { vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 0), }; VkDescriptorSetLayoutCreateInfo descriptorLayout = vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings); VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &descriptorSetLayout)); VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = vks::initializers::pipelineLayoutCreateInfo(&descriptorSetLayout, 1); VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &pipelineLayout)); VkDescriptorSetAllocateInfo allocInfo = vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1); VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet)); VkDescriptorBufferInfo bufferDescriptor = {}; bufferDescriptor.buffer = deviceBuffer; bufferDescriptor.range = VK_WHOLE_SIZE; std::vector computeWriteDescriptorSets = { vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, &bufferDescriptor), }; vkUpdateDescriptorSets(device, static_cast(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL); VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {}; pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; VK_CHECK_RESULT(vkCreatePipelineCache(device, &pipelineCacheCreateInfo, nullptr, &pipelineCache)); // Create pipeline VkComputePipelineCreateInfo computePipelineCreateInfo = vks::initializers::computePipelineCreateInfo(pipelineLayout, 0); VkPipelineShaderStageCreateInfo shaderStage = {}; shaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT; #if defined(VK_USE_PLATFORM_ANDROID_KHR) shaderStage.module = vks::tools::loadShader(androidApp->activity->assetManager, fileName.c_str(), device); #else shaderStage.module = vks::tools::loadShader(ASSET_PATH "shaders/computeheadless/headless.comp.spv", device); #endif shaderStage.pName = "main"; assert(shaderStage.module != VK_NULL_HANDLE); computePipelineCreateInfo.stage = shaderStage; VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &pipeline)); // Create a command buffer for compute operations VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1); VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &commandBuffer)); // Fence for compute CB sync VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT); VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &fence)); } /* Command buffer creation (for compute work submission) */ { VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &cmdBufInfo)); VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); bufferBarrier.buffer = deviceBuffer; bufferBarrier.size = VK_WHOLE_SIZE; bufferBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_FLAGS_NONE, 0, nullptr, 1, &bufferBarrier, 0, nullptr); vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &descriptorSet, 0, 0); vkCmdDispatch(commandBuffer, BUFFER_ELEMENTS, 1, 1); // Barrier to ensure that shader writes are finished before buffer is read back from GPU bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; bufferBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; bufferBarrier.buffer = deviceBuffer; bufferBarrier.size = VK_WHOLE_SIZE; bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_FLAGS_NONE, 0, nullptr, 1, &bufferBarrier, 0, nullptr); // Read back to host visible buffer VkBufferCopy copyRegion = {}; copyRegion.size = bufferSize; vkCmdCopyBuffer(commandBuffer, deviceBuffer, hostBuffer, 1, ©Region); VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // Submit compute work vkResetFences(device, 1, &fence); VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); computeSubmitInfo.commandBufferCount = 1; computeSubmitInfo.pCommandBuffers = &commandBuffer; VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &computeSubmitInfo, fence)); // Wait for fence vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX); // Flush writes to host visible buffer VkMappedMemoryRange mappedRange = vks::initializers::mappedMemoryRange(); mappedRange.memory = hostMemory; mappedRange.offset = 0; mappedRange.size = VK_WHOLE_SIZE; vkFlushMappedMemoryRanges(device, 1, &mappedRange); void* mapped; vkMapMemory(device, hostMemory, 0, VK_WHOLE_SIZE, 0, &mapped); memcpy(computeOutput.data(), mapped, bufferSize); } // Output buffer contents std::cout << "Compute input: " << std::endl; for (auto v : computeInput) { std::cout << v << "\t"; } std::cout << std::endl; std::cout << "Compute output: " << std::endl; for (auto v : computeOutput) { std::cout << v << "\t"; } std::cout << std::endl; // Clean up vkDestroyBuffer(device, deviceBuffer, nullptr); vkFreeMemory(device, deviceMemory, nullptr); vkDestroyBuffer(device, hostBuffer, nullptr); vkFreeMemory(device, hostMemory, nullptr); #if defined(DEBUG) PFN_vkDestroyDebugReportCallbackEXT vkDestroyDebugReportCallback = reinterpret_cast(vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT")); assert(vkDestroyDebugReportCallback); vkDestroyDebugReportCallback(instance, debugReportCallback, nullptr); #endif } ~VulkanExample() { // todo: all other stuff vkDestroyPipelineLayout(device, pipelineLayout, nullptr); vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr); vkDestroyPipeline(device, pipeline, nullptr); vkDestroyFence(device, fence, nullptr); vkDestroyCommandPool(device, commandPool, nullptr); } }; #if defined(__ANDROID__) // Android entry point // A note on app_dummy(): This is required as the compiler may otherwise remove the main entry point of the application VulkanExample *vulkanExample; void android_main(android_app* state) { app_dummy(); VulkanExample *vulkanExample = new VulkanExample(); state->userData = vulkanExample; delete(vulkanExample); } #else int main() { VulkanExample *vulkanExample = new VulkanExample(); std::cout << "Finished. Press enter to terminate..."; getchar(); delete(vulkanExample); return 0; } #endif