NVScaler.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. // The MIT License(MIT)
  2. //
  3. // Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy of
  6. // this software and associated documentation files(the "Software"), to deal in
  7. // the Software without restriction, including without limitation the rights to
  8. // use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies of
  9. // the Software, and to permit persons to whom the Software is furnished to do so,
  10. // subject to the following conditions :
  11. //
  12. // The above copyright notice and this permission notice shall be included in all
  13. // copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
  17. // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
  18. // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
  19. // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  20. // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. #include "NVScaler.h"
  22. #include <iostream>
  23. #include <array>
  24. #include "VKUtilities.h"
  25. #include "DeviceResources.h"
  26. #include "Utilities.h"
  27. NVScaler::NVScaler(DeviceResources& deviceResources, const std::vector<std::string>& shaderPaths, bool glsl)
  28. : m_deviceResources(deviceResources)
  29. , m_outputWidth(1)
  30. , m_outputHeight(1)
  31. {
  32. NISOptimizer opt(true, NISGPUArchitecture::NVIDIA_Generic);
  33. m_blockWidth = opt.GetOptimalBlockWidth();
  34. m_blockHeight = opt.GetOptimalBlockHeight();
  35. uint32_t threadGroupSize = opt.GetOptimalThreadGroupSize();
  36. // Shader
  37. {
  38. std::string shaderName = glsl ? "/nis_scaler_glsl.spv" : "/nis_scaler.spv";
  39. std::string shaderPath;
  40. for (auto& e : shaderPaths)
  41. {
  42. if (std::filesystem::exists(e + "/" + shaderName))
  43. {
  44. shaderPath = e + "/" + shaderName;
  45. break;
  46. }
  47. }
  48. if (shaderPath.empty())
  49. throw std::runtime_error("Shader file not found" + shaderName);
  50. auto shaderBytes = readBytes(shaderPath);
  51. VkShaderModuleCreateInfo createInfo{};
  52. createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
  53. createInfo.codeSize = shaderBytes.size();
  54. createInfo.pCode = reinterpret_cast<uint32_t*>(shaderBytes.data());
  55. VK_OK(vkCreateShaderModule(m_deviceResources.logicalDevice(), &createInfo, nullptr, &m_shaderModule));
  56. }
  57. // Descriptor set
  58. {
  59. std::array<VkDescriptorSetLayoutBinding, 6> bindLayout{ {
  60. VK_COMMON_DESC_LAYOUT(m_deviceResources.sampler()),
  61. {COEF_SCALAR_BINDING, IN_TEX_DESC_TYPE, 1, VK_SHADER_STAGE_COMPUTE_BIT},
  62. {COEF_USM_BINDING, IN_TEX_DESC_TYPE, 1, VK_SHADER_STAGE_COMPUTE_BIT},
  63. } };
  64. VkDescriptorSetLayoutCreateInfo info{};
  65. info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
  66. info.bindingCount = (uint32_t)bindLayout.size();
  67. info.pBindings = bindLayout.data();
  68. VK_OK(vkCreateDescriptorSetLayout(m_deviceResources.logicalDevice(), &info, nullptr, &m_descriptorSetLayout));
  69. }
  70. {
  71. VkDescriptorSetAllocateInfo info{};
  72. info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
  73. info.descriptorPool = deviceResources.descriptorPool();
  74. info.descriptorSetCount = 1;
  75. info.pSetLayouts = &m_descriptorSetLayout;
  76. VK_OK(vkAllocateDescriptorSets(m_deviceResources.logicalDevice(), &info, &m_descriptorSet));
  77. }
  78. // Constant buffer
  79. {
  80. m_deviceResources.createConstBuffer(&m_config, sizeof(NISConfig), &m_buffer, &m_constantBufferDeviceMemory, &m_constantBufferStride);
  81. VK_OK(vkMapMemory(m_deviceResources.logicalDevice(), m_constantBufferDeviceMemory, 0, m_constantBufferStride, 0, (void**)&m_constantMemory));
  82. VkDescriptorBufferInfo descBuffInfo{};
  83. descBuffInfo.buffer = m_buffer;
  84. descBuffInfo.offset = 0;
  85. descBuffInfo.range = sizeof(NISConfig);
  86. VkWriteDescriptorSet writeDescSet{};
  87. writeDescSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
  88. writeDescSet.dstSet = m_descriptorSet;
  89. writeDescSet.dstBinding = CB_BINDING;
  90. writeDescSet.descriptorCount = 1;
  91. writeDescSet.descriptorType = CB_DESC_TYPE;
  92. writeDescSet.dstArrayElement = 0;
  93. writeDescSet.pBufferInfo = &descBuffInfo;
  94. vkUpdateDescriptorSets(m_deviceResources.logicalDevice(), 1, &writeDescSet, 0, nullptr);
  95. }
  96. // Pipeline layout
  97. {
  98. VkPushConstantRange pushConstRange{};
  99. pushConstRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
  100. pushConstRange.size = sizeof(m_config);
  101. VkPipelineLayoutCreateInfo info{};
  102. info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
  103. info.setLayoutCount = 1;
  104. info.pSetLayouts = &m_descriptorSetLayout;
  105. info.pushConstantRangeCount = 1;
  106. info.pPushConstantRanges = &pushConstRange;
  107. VK_OK(vkCreatePipelineLayout(m_deviceResources.logicalDevice(), &info, nullptr, &m_pipelineLayout));
  108. }
  109. // Compute pipeline
  110. {
  111. VkPipelineShaderStageCreateInfo pipeShaderStageCreateInfo{};
  112. pipeShaderStageCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
  113. pipeShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT;
  114. pipeShaderStageCreateInfo.module = m_shaderModule;
  115. pipeShaderStageCreateInfo.pName = "main";
  116. VkComputePipelineCreateInfo info{};
  117. info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
  118. info.stage = pipeShaderStageCreateInfo;
  119. info.layout = m_pipelineLayout;
  120. VK_OK(vkCreateComputePipelines(m_deviceResources.logicalDevice(), VK_NULL_HANDLE, 1, &info, nullptr, &m_pipeline));
  121. }
  122. const int rowPitch = kFilterSize * 2;
  123. const int imageSize = rowPitch * kPhaseCount;
  124. m_deviceResources.createTexture2D(kFilterSize / 4, kPhaseCount, VK_FORMAT_R16G16B16A16_SFLOAT, coef_scale_fp16, rowPitch, imageSize, &m_coefScale, &m_coefScaleDeviceMemory);
  125. m_deviceResources.createTexture2D(kFilterSize / 4, kPhaseCount, VK_FORMAT_R16G16B16A16_SFLOAT, coef_usm_fp16, rowPitch, imageSize, &m_coefUsm, &m_coefUsmDeviceMemory);
  126. m_deviceResources.createSRV(m_coefScale, VK_FORMAT_R16G16B16A16_SFLOAT, &m_coefScaleSrv);
  127. m_deviceResources.createSRV(m_coefUsm, VK_FORMAT_R16G16B16A16_SFLOAT, &m_coefUsmSrv);
  128. }
  129. void NVScaler::cleanUp()
  130. {
  131. vkDestroyImageView(m_deviceResources.logicalDevice(), m_coefUsmSrv, nullptr);
  132. vkDestroyImageView(m_deviceResources.logicalDevice(), m_coefScaleSrv, nullptr);
  133. vkFreeMemory(m_deviceResources.logicalDevice(), m_coefUsmDeviceMemory, nullptr);
  134. vkFreeMemory(m_deviceResources.logicalDevice(), m_coefScaleDeviceMemory, nullptr);
  135. vkDestroyImage(m_deviceResources.logicalDevice(), m_coefUsm, nullptr);
  136. vkDestroyImage(m_deviceResources.logicalDevice(), m_coefScale, nullptr);
  137. vkDestroyPipeline(m_deviceResources.logicalDevice(), m_pipeline, nullptr);
  138. vkDestroyPipelineLayout(m_deviceResources.logicalDevice(), m_pipelineLayout, nullptr);
  139. vkFreeMemory(m_deviceResources.logicalDevice(), m_constantBufferDeviceMemory, nullptr);
  140. vkDestroyBuffer(m_deviceResources.logicalDevice(), m_buffer, nullptr);
  141. vkFreeDescriptorSets(m_deviceResources.logicalDevice(), m_deviceResources.descriptorPool(), 1, &m_descriptorSet);
  142. vkDestroyDescriptorSetLayout(m_deviceResources.logicalDevice(), m_descriptorSetLayout, nullptr);
  143. vkDestroyShaderModule(m_deviceResources.logicalDevice(), m_shaderModule, nullptr);
  144. }
  145. void NVScaler::update(float sharpness, uint32_t inputWidth, uint32_t inputHeight, uint32_t outputWidth, uint32_t outputHeight)
  146. {
  147. NVScalerUpdateConfig(m_config, sharpness,
  148. 0, 0, inputWidth, inputHeight, inputWidth, inputHeight,
  149. 0, 0, outputWidth, outputHeight, outputWidth, outputHeight,
  150. NISHDRMode::None);
  151. m_outputWidth = outputWidth;
  152. m_outputHeight = outputHeight;
  153. }
  154. void NVScaler::dispatch(VkImageView inputSrv, VkImageView outputUav)
  155. {
  156. // Update constant buffer
  157. const auto offset = m_constantBufferStride * m_deviceResources.swapchainIndex();
  158. memcpy(m_constantMemory + offset, &m_config, sizeof(m_config));
  159. VkDescriptorBufferInfo descBuffInfo{};
  160. descBuffInfo.buffer = m_buffer;
  161. descBuffInfo.offset = offset;
  162. descBuffInfo.range = sizeof(NISConfig);
  163. VkWriteDescriptorSet inWriteDescSet{};
  164. VkWriteDescriptorSet outWriteDescSet{};
  165. VkWriteDescriptorSet coefScalarWriteDescSet{};
  166. VkWriteDescriptorSet coefUsmWriteDescSet{};
  167. VkDescriptorImageInfo info_inWriteDescSet{};
  168. info_inWriteDescSet.imageView = inputSrv;
  169. info_inWriteDescSet.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  170. inWriteDescSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
  171. inWriteDescSet.dstSet = m_descriptorSet;
  172. inWriteDescSet.dstBinding = IN_TEX_BINDING;
  173. inWriteDescSet.descriptorCount = 1;
  174. inWriteDescSet.descriptorType = IN_TEX_DESC_TYPE;
  175. inWriteDescSet.pImageInfo = &info_inWriteDescSet;
  176. VkDescriptorImageInfo infooutWriteDescSet{};
  177. infooutWriteDescSet.imageView = outputUav;
  178. infooutWriteDescSet.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
  179. outWriteDescSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
  180. outWriteDescSet.dstSet = m_descriptorSet;
  181. outWriteDescSet.dstBinding = OUT_TEX_BINDING;
  182. outWriteDescSet.descriptorCount = 1;
  183. outWriteDescSet.descriptorType = OUT_TEX_DESC_TYPE;
  184. outWriteDescSet.pImageInfo = &infooutWriteDescSet;
  185. VkDescriptorImageInfo infocoefScalarWriteDescSet{};
  186. infocoefScalarWriteDescSet.imageView = m_coefScaleSrv;
  187. infocoefScalarWriteDescSet.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  188. coefScalarWriteDescSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
  189. coefScalarWriteDescSet.dstSet = m_descriptorSet;
  190. coefScalarWriteDescSet.dstBinding = COEF_SCALAR_BINDING;
  191. coefScalarWriteDescSet.descriptorCount = 1;
  192. coefScalarWriteDescSet.descriptorType = IN_TEX_DESC_TYPE;
  193. coefScalarWriteDescSet.pImageInfo = &infocoefScalarWriteDescSet;
  194. VkDescriptorImageInfo infocoefUsmWriteDescSet{};
  195. infocoefUsmWriteDescSet.imageView = m_coefUsmSrv;
  196. infocoefUsmWriteDescSet.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  197. coefUsmWriteDescSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
  198. coefUsmWriteDescSet.dstSet = m_descriptorSet;
  199. coefUsmWriteDescSet.dstBinding = COEF_USM_BINDING;
  200. coefUsmWriteDescSet.descriptorCount = 1;
  201. coefUsmWriteDescSet.descriptorType = IN_TEX_DESC_TYPE;
  202. coefUsmWriteDescSet.pImageInfo = &infocoefUsmWriteDescSet;
  203. const VkWriteDescriptorSet writeDescSets[] = {
  204. inWriteDescSet,
  205. outWriteDescSet,
  206. coefScalarWriteDescSet,
  207. coefUsmWriteDescSet
  208. };
  209. constexpr auto sizeWriteDescSets = static_cast<uint32_t>(std::size(writeDescSets));
  210. vkUpdateDescriptorSets(m_deviceResources.logicalDevice(), sizeWriteDescSets, writeDescSets, 0, nullptr);
  211. auto cmdBuffer = m_deviceResources.commandBuffer();
  212. vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline);
  213. vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_descriptorSet, 1, (uint32_t*)&descBuffInfo.offset);
  214. uint32_t gridX = uint32_t(std::ceil(m_outputWidth / float(m_blockWidth)));
  215. uint32_t gridY = uint32_t(std::ceil(m_outputHeight / float(m_blockHeight)));
  216. vkCmdDispatch(cmdBuffer, gridX, gridY, 1);
  217. }