/* * Vulkan Example - Minimal headless compute example * * Copyright (C) 2017 by Sascha Willems - www.saschawillems.de * * This code is licensed under the MIT license (MIT) (http://opensource.org/licenses/MIT) */ // TODO: separate transfer queue (if not supported by compute queue) including buffer ownership transfer #if defined(_WIN32) #pragma comment(linker, "/subsystem:console") #elif defined(VK_USE_PLATFORM_ANDROID_KHR) #include #include #include #include #include "VulkanAndroid.h" #endif #include #include #include #include #include #include #include #include #include "VulkanTools.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) android_app* androidapp; #endif #define DEBUG (!NDEBUG) #define BUFFER_ELEMENTS 32 #if defined(VK_USE_PLATFORM_ANDROID_KHR) #define LOG(...) ((void)__android_log_print(ANDROID_LOG_INFO, "vulkanExample", __VA_ARGS__)) #else #define LOG(...) printf(__VA_ARGS__) #endif static VKAPI_ATTR VkBool32 VKAPI_CALL debugMessageCallback( VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage, void* pUserData) { LOG("[VALIDATION]: %s - %s\n", pLayerPrefix, pMessage); return VK_FALSE; } class VulkanExample { public: VkInstance instance; VkPhysicalDevice physicalDevice; VkDevice device; uint32_t queueFamilyIndex; VkPipelineCache pipelineCache; VkQueue queue; VkCommandPool commandPool; VkCommandBuffer commandBuffer; VkFence fence; VkDescriptorPool descriptorPool; VkDescriptorSetLayout descriptorSetLayout; VkDescriptorSet descriptorSet; VkPipelineLayout pipelineLayout; VkPipeline pipeline; VkDebugReportCallbackEXT debugReportCallback; VkResult createBuffer(VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags memoryPropertyFlags, VkBuffer *buffer, VkDeviceMemory *memory, VkDeviceSize size, void *data = nullptr) { // Create the buffer handle VkBufferCreateInfo bufferCreateInfo = vks::initializers::bufferCreateInfo(usageFlags, size); bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, nullptr, buffer)); // Create the memory backing up the buffer handle VkPhysicalDeviceMemoryProperties deviceMemoryProperties; vkGetPhysicalDeviceMemoryProperties(physicalDevice, &deviceMemoryProperties); VkMemoryRequirements memReqs; VkMemoryAllocateInfo memAlloc = vks::initializers::memoryAllocateInfo(); vkGetBufferMemoryRequirements(device, *buffer, &memReqs); memAlloc.allocationSize = memReqs.size; // Find a memory type index that fits the properties of the buffer bool memTypeFound = false; for (uint32_t i = 0; i < deviceMemoryProperties.memoryTypeCount; i++) { if ((memReqs.memoryTypeBits & 1) == 1) { if ((deviceMemoryProperties.memoryTypes[i].propertyFlags & memoryPropertyFlags) == memoryPropertyFlags) { memAlloc.memoryTypeIndex = i; memTypeFound = true; } } memReqs.memoryTypeBits >>= 1; } assert(memTypeFound); VK_CHECK_RESULT(vkAllocateMemory(device, &memAlloc, nullptr, memory)); if (data != nullptr) { void *mapped; VK_CHECK_RESULT(vkMapMemory(device, *memory, 0, size, 0, &mapped)); memcpy(mapped, data, size); vkUnmapMemory(device, *memory); } VK_CHECK_RESULT(vkBindBufferMemory(device, *buffer, *memory, 0)); return VK_SUCCESS; } VulkanExample() { LOG("Running headless compute example\n"); #if defined(VK_USE_PLATFORM_ANDROID_KHR) LOG("loading vulkan lib"); vks::android::loadVulkanLibrary(); #endif VkApplicationInfo appInfo = {}; appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; appInfo.pApplicationName = "Vulkan headless example"; appInfo.pEngineName = "VulkanExample"; appInfo.apiVersion = VK_API_VERSION_1_0; /* Vulkan instance creation (without surface extensions) */ VkInstanceCreateInfo instanceCreateInfo = {}; instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; instanceCreateInfo.pApplicationInfo = &appInfo; uint32_t layerCount = 0; #if defined(VK_USE_PLATFORM_ANDROID_KHR) const char* validationlayers[] = { "VK_LAYER_GOOGLE_threading", "VK_LAYER_LUNARG_parameter_validation", "VK_LAYER_LUNARG_object_tracker","VK_LAYER_LUNARG_core_validation", "VK_LAYER_LUNARG_swapchain", "VK_LAYER_GOOGLE_unique_objects" }; layerCount = 6; #else const char* validationlayers[] = { "VK_LAYER_LUNARG_standard_validation" }; layerCount = 1; #endif #if DEBUG instanceCreateInfo.ppEnabledLayerNames = validationlayers; const char* validationExt = VK_EXT_DEBUG_REPORT_EXTENSION_NAME; instanceCreateInfo.enabledLayerCount = layerCount; instanceCreateInfo.enabledExtensionCount = 1; instanceCreateInfo.ppEnabledExtensionNames = &validationExt; #endif VK_CHECK_RESULT(vkCreateInstance(&instanceCreateInfo, nullptr, &instance)); #if defined(VK_USE_PLATFORM_ANDROID_KHR) vks::android::loadVulkanFunctions(instance); #endif #if DEBUG VkDebugReportCallbackCreateInfoEXT debugReportCreateInfo = {}; debugReportCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; debugReportCreateInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; debugReportCreateInfo.pfnCallback = (PFN_vkDebugReportCallbackEXT)debugMessageCallback; // We have to explicitly load this function. PFN_vkCreateDebugReportCallbackEXT vkCreateDebugReportCallbackEXT = reinterpret_cast(vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT")); assert(vkCreateDebugReportCallbackEXT); VK_CHECK_RESULT(vkCreateDebugReportCallbackEXT(instance, &debugReportCreateInfo, nullptr, &debugReportCallback)); #endif /* Vulkan device creation */ // Physical device (always use first) uint32_t deviceCount = 0; VK_CHECK_RESULT(vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr)); std::vector physicalDevices(deviceCount); VK_CHECK_RESULT(vkEnumeratePhysicalDevices(instance, &deviceCount, physicalDevices.data())); physicalDevice = physicalDevices[0]; VkPhysicalDeviceProperties deviceProperties; vkGetPhysicalDeviceProperties(physicalDevice, &deviceProperties); LOG("GPU: %s\n", deviceProperties.deviceName); // Request a single compute queue const float defaultQueuePriority(0.0f); VkDeviceQueueCreateInfo queueCreateInfo = {}; uint32_t queueFamilyCount; vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, nullptr); std::vector queueFamilyProperties(queueFamilyCount); vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilyProperties.data()); for (uint32_t i = 0; i < static_cast(queueFamilyProperties.size()); i++) { if (queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { queueFamilyIndex = i; queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queueCreateInfo.queueFamilyIndex = i; queueCreateInfo.queueCount = 1; queueCreateInfo.pQueuePriorities = &defaultQueuePriority; break; } } // Create logical device VkDeviceCreateInfo deviceCreateInfo = {}; deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; deviceCreateInfo.queueCreateInfoCount = 1; deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; VK_CHECK_RESULT(vkCreateDevice(physicalDevice, &deviceCreateInfo, nullptr, &device)); // Get a compute queue vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue); // Compute command pool VkCommandPoolCreateInfo cmdPoolInfo = {}; cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cmdPoolInfo.queueFamilyIndex = queueFamilyIndex; cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &commandPool)); /* Prepare storage buffers */ std::vector computeInput(BUFFER_ELEMENTS); std::vector computeOutput(BUFFER_ELEMENTS); // Fill input data uint32_t n = 0; std::generate(computeInput.begin(), computeInput.end(), [&n] { return n++; }); const VkDeviceSize bufferSize = BUFFER_ELEMENTS * sizeof(uint32_t); VkBuffer deviceBuffer, hostBuffer; VkDeviceMemory deviceMemory, hostMemory; // Copy input data to VRAM using a staging buffer { createBuffer( VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &hostBuffer, &hostMemory, bufferSize, computeInput.data()); // Flush writes to host visible buffer void* mapped; vkMapMemory(device, hostMemory, 0, VK_WHOLE_SIZE, 0, &mapped); VkMappedMemoryRange mappedRange = vks::initializers::mappedMemoryRange(); mappedRange.memory = hostMemory; mappedRange.offset = 0; mappedRange.size = VK_WHOLE_SIZE; vkFlushMappedMemoryRanges(device, 1, &mappedRange); vkUnmapMemory(device, hostMemory); createBuffer( VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &deviceBuffer, &deviceMemory, bufferSize); // Copy to staging buffer VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1); VkCommandBuffer copyCmd; VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, ©Cmd)); VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); VK_CHECK_RESULT(vkBeginCommandBuffer(copyCmd, &cmdBufInfo)); VkBufferCopy copyRegion = {}; copyRegion.size = bufferSize; vkCmdCopyBuffer(copyCmd, hostBuffer, deviceBuffer, 1, ©Region); VK_CHECK_RESULT(vkEndCommandBuffer(copyCmd)); VkSubmitInfo submitInfo = vks::initializers::submitInfo(); submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = ©Cmd; VkFenceCreateInfo fenceInfo = vks::initializers::fenceCreateInfo(VK_FLAGS_NONE); VkFence fence; VK_CHECK_RESULT(vkCreateFence(device, &fenceInfo, nullptr, &fence)); // Submit to the queue VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence)); VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX)); vkDestroyFence(device, fence, nullptr); vkFreeCommandBuffers(device, commandPool, 1, ©Cmd); } /* Prepare compute pipeline */ { std::vector poolSizes = { vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1), }; VkDescriptorPoolCreateInfo descriptorPoolInfo = vks::initializers::descriptorPoolCreateInfo(static_cast(poolSizes.size()), poolSizes.data(), 1); VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool)); std::vector setLayoutBindings = { vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 0), }; VkDescriptorSetLayoutCreateInfo descriptorLayout = vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings); VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &descriptorSetLayout)); VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = vks::initializers::pipelineLayoutCreateInfo(&descriptorSetLayout, 1); VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &pipelineLayout)); VkDescriptorSetAllocateInfo allocInfo = vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1); VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet)); VkDescriptorBufferInfo bufferDescriptor = { deviceBuffer, 0, VK_WHOLE_SIZE }; std::vector computeWriteDescriptorSets = { vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0, &bufferDescriptor), }; vkUpdateDescriptorSets(device, static_cast(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL); VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {}; pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; VK_CHECK_RESULT(vkCreatePipelineCache(device, &pipelineCacheCreateInfo, nullptr, &pipelineCache)); // Create pipeline VkComputePipelineCreateInfo computePipelineCreateInfo = vks::initializers::computePipelineCreateInfo(pipelineLayout, 0); // Create specialization constant holding struct struct SpecializationHolding { uint32_t BUFFER_ELEMENT_COUNT = BUFFER_ELEMENTS; } specialization; // create a specialization map entry VkSpecializationMapEntry specializationMapEntry = vks::initializers::specializationMapEntry(0, 0, sizeof(uint32_t)); VkSpecializationInfo specializationInfo = vks::initializers::specializationInfo(1, &specializationMapEntry, sizeof(SpecializationHolding), &specialization); VkPipelineShaderStageCreateInfo shaderStage = {}; shaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT; #if defined(VK_USE_PLATFORM_ANDROID_KHR) shaderStage.module = vks::tools::loadShader(androidapp->activity->assetManager, ASSET_PATH "shaders/computeheadless/headless.comp.spv", device); #else shaderStage.module = vks::tools::loadShader(ASSET_PATH "shaders/computeheadless/headless.comp.spv", device); #endif shaderStage.pName = "main"; shaderStage.pSpecializationInfo = &specializationInfo; assert(shaderStage.module != VK_NULL_HANDLE); computePipelineCreateInfo.stage = shaderStage; VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &pipeline)); // Create a command buffer for compute operations VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo(commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1); VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &commandBuffer)); // Fence for compute CB sync VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT); VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &fence)); } /* Command buffer creation (for compute work submission) */ { VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo(); VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &cmdBufInfo)); // Barrier to ensure that input buffer transfer is finished before compute shader reads from it VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier(); bufferBarrier.buffer = deviceBuffer; bufferBarrier.size = VK_WHOLE_SIZE; bufferBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_FLAGS_NONE, 0, nullptr, 1, &bufferBarrier, 0, nullptr); vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &descriptorSet, 0, 0); vkCmdDispatch(commandBuffer, BUFFER_ELEMENTS, 1, 1); // Barrier to ensure that shader writes are finished before buffer is read back from GPU bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; bufferBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; bufferBarrier.buffer = deviceBuffer; bufferBarrier.size = VK_WHOLE_SIZE; bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_FLAGS_NONE, 0, nullptr, 1, &bufferBarrier, 0, nullptr); // Read back to host visible buffer VkBufferCopy copyRegion = {}; copyRegion.size = bufferSize; vkCmdCopyBuffer(commandBuffer, deviceBuffer, hostBuffer, 1, ©Region); // Barrier to ensure that buffer copy is finished before host reading from it bufferBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; bufferBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; bufferBarrier.buffer = hostBuffer; bufferBarrier.size = VK_WHOLE_SIZE; bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, VK_FLAGS_NONE, 0, nullptr, 1, &bufferBarrier, 0, nullptr); VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // Submit compute work vkResetFences(device, 1, &fence); const VkPipelineStageFlags waitStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo(); computeSubmitInfo.pWaitDstStageMask = &waitStageMask; computeSubmitInfo.commandBufferCount = 1; computeSubmitInfo.pCommandBuffers = &commandBuffer; VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &computeSubmitInfo, fence)); VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX)); // Make device writes visible to the host void *mapped; vkMapMemory(device, hostMemory, 0, VK_WHOLE_SIZE, 0, &mapped); VkMappedMemoryRange mappedRange = vks::initializers::mappedMemoryRange(); mappedRange.memory = hostMemory; mappedRange.offset = 0; mappedRange.size = VK_WHOLE_SIZE; vkInvalidateMappedMemoryRanges(device, 1, &mappedRange); // Copy to output memcpy(computeOutput.data(), mapped, bufferSize); vkUnmapMemory(device, hostMemory); } vkQueueWaitIdle(queue); // Output buffer contents LOG("Compute input:\n"); for (auto v : computeInput) { LOG("%d \t", v); } std::cout << std::endl; LOG("Compute output:\n"); for (auto v : computeOutput) { LOG("%d \t", v); } std::cout << std::endl; // Clean up vkDestroyBuffer(device, deviceBuffer, nullptr); vkFreeMemory(device, deviceMemory, nullptr); vkDestroyBuffer(device, hostBuffer, nullptr); vkFreeMemory(device, hostMemory, nullptr); #if DEBUG PFN_vkDestroyDebugReportCallbackEXT vkDestroyDebugReportCallback = reinterpret_cast(vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT")); assert(vkDestroyDebugReportCallback); vkDestroyDebugReportCallback(instance, debugReportCallback, nullptr); #endif } ~VulkanExample() { // todo: all other stuff vkDestroyPipelineLayout(device, pipelineLayout, nullptr); vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr); vkDestroyPipeline(device, pipeline, nullptr); vkDestroyFence(device, fence, nullptr); vkDestroyCommandPool(device, commandPool, nullptr); } }; #if defined(VK_USE_PLATFORM_ANDROID_KHR) void handleAppCommand(android_app * app, int32_t cmd) { if (cmd == APP_CMD_INIT_WINDOW) { VulkanExample *vulkanExample = new VulkanExample(); delete(vulkanExample); ANativeActivity_finish(app->activity); } } void android_main(android_app* state) { app_dummy(); androidapp = state; androidapp->onAppCmd = handleAppCommand; int ident, events; struct android_poll_source* source; while ((ident = ALooper_pollAll(-1, NULL, &events, (void**)&source)) >= 0) { if (source != NULL) { source->process(androidapp, source); } if (androidapp->destroyRequested != 0) { break; } } } #else int main() { VulkanExample *vulkanExample = new VulkanExample(); std::cout << "Finished. Press enter to terminate..."; getchar(); delete(vulkanExample); return 0; } #endif