Browse Source

NVIDIA Image Scaling v1.0.2

- Added multiple performance optimizations
  Moved edge-map the interpolation and weight computation before the directional
  filter response generation
  Increased detection ratio by a factor of 2
- Fixed host and compute shader compilation warnings
- Adjusted sharpness minimum value and normalization
- Updated copyright notice
Ariel Bernal 2 years ago
parent
commit
4d3f6f1b42
52 changed files with 382 additions and 397 deletions
  1. 143 135
      NIS/NIS_Config.h
  2. 1 1
      NIS/NIS_Main.glsl
  3. 2 2
      NIS/NIS_Main.hlsl
  4. 171 176
      NIS/NIS_Scaler.h
  5. 15 11
      README.md
  6. BIN
      docs/NIS_SDK_Programming_Guide.pdf
  7. 1 1
      licence.txt
  8. 1 1
      samples/DX11/include/AppRenderer.h
  9. 1 1
      samples/DX11/include/BilinearUpscale.h
  10. 1 1
      samples/DX11/include/DXUtilities.h
  11. 1 1
      samples/DX11/include/DeviceResources.h
  12. 1 1
      samples/DX11/include/NVScaler.h
  13. 1 1
      samples/DX11/include/NVSharpen.h
  14. 1 1
      samples/DX11/include/UIRenderer.h
  15. 1 1
      samples/DX11/src/AppRenderer.cpp
  16. 1 1
      samples/DX11/src/BilinearUpscale.cpp
  17. 1 1
      samples/DX11/src/DeviceResources.cpp
  18. 1 1
      samples/DX11/src/NVScaler.cpp
  19. 1 1
      samples/DX11/src/NVSharpen.cpp
  20. 1 1
      samples/DX11/src/Sample.cpp
  21. 1 1
      samples/DX11/src/UIRenderer.cpp
  22. 1 1
      samples/DX12/include/AppRenderer.h
  23. 1 1
      samples/DX12/include/BilinearUpscale.h
  24. 1 1
      samples/DX12/include/DXUtilities.h
  25. 1 1
      samples/DX12/include/DeviceResources.h
  26. 1 1
      samples/DX12/include/NVScaler.h
  27. 1 1
      samples/DX12/include/NVSharpen.h
  28. 1 1
      samples/DX12/include/UIRenderer.h
  29. 1 1
      samples/DX12/src/AppRenderer.cpp
  30. 1 1
      samples/DX12/src/BilinearUpscale.cpp
  31. 3 4
      samples/DX12/src/DeviceResources.cpp
  32. 2 2
      samples/DX12/src/NVScaler.cpp
  33. 2 2
      samples/DX12/src/NVSharpen.cpp
  34. 1 1
      samples/DX12/src/Sample.cpp
  35. 1 1
      samples/DX12/src/UIRenderer.cpp
  36. 1 1
      samples/DX12/src/bilinearUpscale.hlsl
  37. 1 1
      samples/VK/include/AppRenderer.h
  38. 1 1
      samples/VK/include/DeviceResources.h
  39. 1 1
      samples/VK/include/NVScaler.h
  40. 1 1
      samples/VK/include/NVSharpen.h
  41. 1 1
      samples/VK/include/UIRenderer.h
  42. 1 1
      samples/VK/include/VKUtilities.h
  43. 1 1
      samples/VK/src/AppRenderer.cpp
  44. 1 1
      samples/VK/src/DeviceResources.cpp
  45. 1 1
      samples/VK/src/NVScaler.cpp
  46. 1 1
      samples/VK/src/NVSharpen.cpp
  47. 1 1
      samples/VK/src/Sample.cpp
  48. 1 1
      samples/VK/src/UIRenderer.cpp
  49. 1 1
      samples/common/Image.cpp
  50. 1 1
      samples/common/Image.h
  51. 1 1
      samples/common/Utilities.h
  52. 1 22
      third_party_licenses.txt

+ 143 - 135
NIS/NIS_Config.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in
@@ -86,7 +86,8 @@ enum class NISGPUArchitecture : uint32_t
 {
     NVIDIA_Generic = 0,
     AMD_Generic = 1,
-    Intel_Generic = 2
+    Intel_Generic = 2,
+    NVIDIA_Generic_fp16 = 3
 };
 
 struct NISOptimizer
@@ -104,6 +105,8 @@ struct NISOptimizer
         switch (gpuArch) {
         case NISGPUArchitecture::NVIDIA_Generic:
             return 32;
+        case NISGPUArchitecture::NVIDIA_Generic_fp16:
+            return 32;
         case NISGPUArchitecture::AMD_Generic:
             return 32;
         case NISGPUArchitecture::Intel_Generic:
@@ -117,6 +120,8 @@ struct NISOptimizer
         switch (gpuArch) {
         case NISGPUArchitecture::NVIDIA_Generic:
             return isUpscaling ? 24 : 32;
+        case NISGPUArchitecture::NVIDIA_Generic_fp16:
+            return isUpscaling ? 32 : 32;
         case NISGPUArchitecture::AMD_Generic:
             return isUpscaling ? 24 : 32;
         case NISGPUArchitecture::Intel_Generic:
@@ -130,6 +135,8 @@ struct NISOptimizer
         switch (gpuArch) {
         case NISGPUArchitecture::NVIDIA_Generic:
             return 128;
+        case NISGPUArchitecture::NVIDIA_Generic_fp16:
+            return 128;
         case NISGPUArchitecture::AMD_Generic:
             return 256;
         case NISGPUArchitecture::Intel_Generic:
@@ -160,7 +167,7 @@ inline bool NVScalerUpdateConfig(NISConfig& config, float sharpness,
     const float MinScale = (sharpen_slider >= 0.0f) ? 1.25f : 1.0f;
     const float LimitScale = (sharpen_slider >= 0.0f) ? 1.25f : 1.0f;
 
-    float kDetectRatio = 1127.f / 1024.f;
+    float kDetectRatio = 2 * 1127.f / 1024.f;
 
     // Params for SDR
     float kDetectThres = 64.0f / 1024.0f;
@@ -170,7 +177,7 @@ inline bool NVScalerUpdateConfig(NISConfig& config, float sharpness,
     float kSharpStartY = 0.45f;
     float kSharpEndY = 0.9f;
     float kSharpStrengthMin = std::max<float>(0.0f, 0.4f + sharpen_slider * MinScale * 1.2f);
-    float kSharpStrengthMax = 1.6f + sharpen_slider * 1.8f;
+    float kSharpStrengthMax = 1.6f + sharpen_slider * MaxScale * 1.8f;
     float kSharpLimitMin = std::max<float>(0.1f, 0.14f + sharpen_slider * LimitScale * 0.32f);
     float kSharpLimitMax = 0.5f + sharpen_slider * LimitScale * 0.6f;
 
@@ -222,20 +229,21 @@ inline bool NVScalerUpdateConfig(NISConfig& config, float sharpness,
     config.kDstNormY = 1.f / outputTextureHeight;
     config.kScaleX = config.kInputViewportWidth / float(config.kOutputViewportWidth);
     config.kScaleY = config.kInputViewportHeight / float(config.kOutputViewportHeight);
-    if (config.kScaleX < 0.5f || config.kScaleX > 1.f || config.kScaleY < 0.5f || config.kScaleY > 1.f)
-        return false;
     config.kDetectRatio = kDetectRatio;
     config.kDetectThres = kDetectThres;
     config.kMinContrastRatio = kMinContrastRatio;
     config.kRatioNorm = kRatioNorm;
     config.kContrastBoost = 1.0f;
-    config.kEps = 1.0f;
+    config.kEps = 1.0f / 255.0f;
     config.kSharpStartY = kSharpStartY;
     config.kSharpScaleY = kSharpScaleY;
     config.kSharpStrengthMin = kSharpStrengthMin;
     config.kSharpStrengthScale = kSharpStrengthScale;
     config.kSharpLimitMin = kSharpLimitMin;
     config.kSharpLimitScale = kSharpLimitScale;
+
+    if (config.kScaleX < 0.5f || config.kScaleX > 1.f || config.kScaleY < 0.5f || config.kScaleY > 1.f)
+        return false;
     return true;
 }
 
@@ -258,137 +266,137 @@ namespace {
     constexpr size_t kFilterSize = 8;
 
     constexpr float coef_scale[kPhaseCount][kFilterSize] = {
-        {0.0,     0.0,    1.0000, 0.0,     0.0,    0.0, 0.0, 0.0},
-        {0.0029, -0.0127, 1.0000, 0.0132, -0.0034, 0.0, 0.0, 0.0},
-        {0.0063, -0.0249, 0.9985, 0.0269, -0.0068, 0.0, 0.0, 0.0},
-        {0.0088, -0.0361, 0.9956, 0.0415, -0.0103, 0.0005, 0.0, 0.0},
-        {0.0117, -0.0474, 0.9932, 0.0562, -0.0142, 0.0005, 0.0, 0.0},
-        {0.0142, -0.0576, 0.9897, 0.0713, -0.0181, 0.0005, 0.0, 0.0},
-        {0.0166, -0.0674, 0.9844, 0.0874, -0.0220, 0.0010, 0.0, 0.0},
-        {0.0186, -0.0762, 0.9785, 0.1040, -0.0264, 0.0015, 0.0, 0.0},
-        {0.0205, -0.0850, 0.9727, 0.1206, -0.0308, 0.0020, 0.0, 0.0},
-        {0.0225, -0.0928, 0.9648, 0.1382, -0.0352, 0.0024, 0.0, 0.0},
-        {0.0239, -0.1006, 0.9575, 0.1558, -0.0396, 0.0029, 0.0, 0.0},
-        {0.0254, -0.1074, 0.9487, 0.1738, -0.0439, 0.0034, 0.0, 0.0},
-        {0.0264, -0.1138, 0.9390, 0.1929, -0.0488, 0.0044, 0.0, 0.0},
-        {0.0278, -0.1191, 0.9282, 0.2119, -0.0537, 0.0049, 0.0, 0.0},
-        {0.0288, -0.1245, 0.9170, 0.2310, -0.0581, 0.0059, 0.0, 0.0},
-        {0.0293, -0.1294, 0.9058, 0.2510, -0.0630, 0.0063, 0.0, 0.0},
-        {0.0303, -0.1333, 0.8926, 0.2710, -0.0679, 0.0073, 0.0, 0.0},
-        {0.0308, -0.1367, 0.8789, 0.2915, -0.0728, 0.0083, 0.0, 0.0},
-        {0.0308, -0.1401, 0.8657, 0.3120, -0.0776, 0.0093, 0.0, 0.0},
-        {0.0313, -0.1426, 0.8506, 0.3330, -0.0825, 0.0103, 0.0, 0.0},
-        {0.0313, -0.1445, 0.8354, 0.3540, -0.0874, 0.0112, 0.0, 0.0},
-        {0.0313, -0.1460, 0.8193, 0.3755, -0.0923, 0.0122, 0.0, 0.0},
-        {0.0313, -0.1470, 0.8022, 0.3965, -0.0967, 0.0137, 0.0, 0.0},
-        {0.0308, -0.1479, 0.7856, 0.4185, -0.1016, 0.0146, 0.0, 0.0},
-        {0.0303, -0.1479, 0.7681, 0.4399, -0.1060, 0.0156, 0.0, 0.0},
-        {0.0298, -0.1479, 0.7505, 0.4614, -0.1104, 0.0166, 0.0, 0.0},
-        {0.0293, -0.1470, 0.7314, 0.4829, -0.1147, 0.0181, 0.0, 0.0},
-        {0.0288, -0.1460, 0.7119, 0.5049, -0.1187, 0.0190, 0.0, 0.0},
-        {0.0278, -0.1445, 0.6929, 0.5264, -0.1226, 0.0200, 0.0, 0.0},
-        {0.0273, -0.1431, 0.6724, 0.5479, -0.1260, 0.0215, 0.0, 0.0},
-        {0.0264, -0.1411, 0.6528, 0.5693, -0.1299, 0.0225, 0.0, 0.0},
-        {0.0254, -0.1387, 0.6323, 0.5903, -0.1328, 0.0234, 0.0, 0.0},
-        {0.0244, -0.1357, 0.6113, 0.6113, -0.1357, 0.0244, 0.0, 0.0},
-        {0.0234, -0.1328, 0.5903, 0.6323, -0.1387, 0.0254, 0.0, 0.0},
-        {0.0225, -0.1299, 0.5693, 0.6528, -0.1411, 0.0264, 0.0, 0.0},
-        {0.0215, -0.1260, 0.5479, 0.6724, -0.1431, 0.0273, 0.0, 0.0},
-        {0.0200, -0.1226, 0.5264, 0.6929, -0.1445, 0.0278, 0.0, 0.0},
-        {0.0190, -0.1187, 0.5049, 0.7119, -0.1460, 0.0288, 0.0, 0.0},
-        {0.0181, -0.1147, 0.4829, 0.7314, -0.1470, 0.0293, 0.0, 0.0},
-        {0.0166, -0.1104, 0.4614, 0.7505, -0.1479, 0.0298, 0.0, 0.0},
-        {0.0156, -0.1060, 0.4399, 0.7681, -0.1479, 0.0303, 0.0, 0.0},
-        {0.0146, -0.1016, 0.4185, 0.7856, -0.1479, 0.0308, 0.0, 0.0},
-        {0.0137, -0.0967, 0.3965, 0.8022, -0.1470, 0.0313, 0.0, 0.0},
-        {0.0122, -0.0923, 0.3755, 0.8193, -0.1460, 0.0313, 0.0, 0.0},
-        {0.0112, -0.0874, 0.3540, 0.8354, -0.1445, 0.0313, 0.0, 0.0},
-        {0.0103, -0.0825, 0.3330, 0.8506, -0.1426, 0.0313, 0.0, 0.0},
-        {0.0093, -0.0776, 0.3120, 0.8657, -0.1401, 0.0308, 0.0, 0.0},
-        {0.0083, -0.0728, 0.2915, 0.8789, -0.1367, 0.0308, 0.0, 0.0},
-        {0.0073, -0.0679, 0.2710, 0.8926, -0.1333, 0.0303, 0.0, 0.0},
-        {0.0063, -0.0630, 0.2510, 0.9058, -0.1294, 0.0293, 0.0, 0.0},
-        {0.0059, -0.0581, 0.2310, 0.9170, -0.1245, 0.0288, 0.0, 0.0},
-        {0.0049, -0.0537, 0.2119, 0.9282, -0.1191, 0.0278, 0.0, 0.0},
-        {0.0044, -0.0488, 0.1929, 0.9390, -0.1138, 0.0264, 0.0, 0.0},
-        {0.0034, -0.0439, 0.1738, 0.9487, -0.1074, 0.0254, 0.0, 0.0},
-        {0.0029, -0.0396, 0.1558, 0.9575, -0.1006, 0.0239, 0.0, 0.0},
-        {0.0024, -0.0352, 0.1382, 0.9648, -0.0928, 0.0225, 0.0, 0.0},
-        {0.0020, -0.0308, 0.1206, 0.9727, -0.0850, 0.0205, 0.0, 0.0},
-        {0.0015, -0.0264, 0.1040, 0.9785, -0.0762, 0.0186, 0.0, 0.0},
-        {0.0010, -0.0220, 0.0874, 0.9844, -0.0674, 0.0166, 0.0, 0.0},
-        {0.0005, -0.0181, 0.0713, 0.9897, -0.0576, 0.0142, 0.0, 0.0},
-        {0.0005, -0.0142, 0.0562, 0.9932, -0.0474, 0.0117, 0.0, 0.0},
-        {0.0005, -0.0103, 0.0415, 0.9956, -0.0361, 0.0088, 0.0, 0.0},
-        {0.0, -0.0068, 0.0269, 0.9985, -0.0249, 0.0063, 0.0, 0.0},
-        {0.0, -0.0034, 0.0132, 1.0000, -0.0127, 0.0029, 0.0, 0.0}
+        {0.0f,     0.0f,    1.0000f, 0.0f,     0.0f,    0.0f, 0.0f, 0.0f},
+        {0.0029f, -0.0127f, 1.0000f, 0.0132f, -0.0034f, 0.0f, 0.0f, 0.0f},
+        {0.0063f, -0.0249f, 0.9985f, 0.0269f, -0.0068f, 0.0f, 0.0f, 0.0f},
+        {0.0088f, -0.0361f, 0.9956f, 0.0415f, -0.0103f, 0.0005f, 0.0f, 0.0f},
+        {0.0117f, -0.0474f, 0.9932f, 0.0562f, -0.0142f, 0.0005f, 0.0f, 0.0f},
+        {0.0142f, -0.0576f, 0.9897f, 0.0713f, -0.0181f, 0.0005f, 0.0f, 0.0f},
+        {0.0166f, -0.0674f, 0.9844f, 0.0874f, -0.0220f, 0.0010f, 0.0f, 0.0f},
+        {0.0186f, -0.0762f, 0.9785f, 0.1040f, -0.0264f, 0.0015f, 0.0f, 0.0f},
+        {0.0205f, -0.0850f, 0.9727f, 0.1206f, -0.0308f, 0.0020f, 0.0f, 0.0f},
+        {0.0225f, -0.0928f, 0.9648f, 0.1382f, -0.0352f, 0.0024f, 0.0f, 0.0f},
+        {0.0239f, -0.1006f, 0.9575f, 0.1558f, -0.0396f, 0.0029f, 0.0f, 0.0f},
+        {0.0254f, -0.1074f, 0.9487f, 0.1738f, -0.0439f, 0.0034f, 0.0f, 0.0f},
+        {0.0264f, -0.1138f, 0.9390f, 0.1929f, -0.0488f, 0.0044f, 0.0f, 0.0f},
+        {0.0278f, -0.1191f, 0.9282f, 0.2119f, -0.0537f, 0.0049f, 0.0f, 0.0f},
+        {0.0288f, -0.1245f, 0.9170f, 0.2310f, -0.0581f, 0.0059f, 0.0f, 0.0f},
+        {0.0293f, -0.1294f, 0.9058f, 0.2510f, -0.0630f, 0.0063f, 0.0f, 0.0f},
+        {0.0303f, -0.1333f, 0.8926f, 0.2710f, -0.0679f, 0.0073f, 0.0f, 0.0f},
+        {0.0308f, -0.1367f, 0.8789f, 0.2915f, -0.0728f, 0.0083f, 0.0f, 0.0f},
+        {0.0308f, -0.1401f, 0.8657f, 0.3120f, -0.0776f, 0.0093f, 0.0f, 0.0f},
+        {0.0313f, -0.1426f, 0.8506f, 0.3330f, -0.0825f, 0.0103f, 0.0f, 0.0f},
+        {0.0313f, -0.1445f, 0.8354f, 0.3540f, -0.0874f, 0.0112f, 0.0f, 0.0f},
+        {0.0313f, -0.1460f, 0.8193f, 0.3755f, -0.0923f, 0.0122f, 0.0f, 0.0f},
+        {0.0313f, -0.1470f, 0.8022f, 0.3965f, -0.0967f, 0.0137f, 0.0f, 0.0f},
+        {0.0308f, -0.1479f, 0.7856f, 0.4185f, -0.1016f, 0.0146f, 0.0f, 0.0f},
+        {0.0303f, -0.1479f, 0.7681f, 0.4399f, -0.1060f, 0.0156f, 0.0f, 0.0f},
+        {0.0298f, -0.1479f, 0.7505f, 0.4614f, -0.1104f, 0.0166f, 0.0f, 0.0f},
+        {0.0293f, -0.1470f, 0.7314f, 0.4829f, -0.1147f, 0.0181f, 0.0f, 0.0f},
+        {0.0288f, -0.1460f, 0.7119f, 0.5049f, -0.1187f, 0.0190f, 0.0f, 0.0f},
+        {0.0278f, -0.1445f, 0.6929f, 0.5264f, -0.1226f, 0.0200f, 0.0f, 0.0f},
+        {0.0273f, -0.1431f, 0.6724f, 0.5479f, -0.1260f, 0.0215f, 0.0f, 0.0f},
+        {0.0264f, -0.1411f, 0.6528f, 0.5693f, -0.1299f, 0.0225f, 0.0f, 0.0f},
+        {0.0254f, -0.1387f, 0.6323f, 0.5903f, -0.1328f, 0.0234f, 0.0f, 0.0f},
+        {0.0244f, -0.1357f, 0.6113f, 0.6113f, -0.1357f, 0.0244f, 0.0f, 0.0f},
+        {0.0234f, -0.1328f, 0.5903f, 0.6323f, -0.1387f, 0.0254f, 0.0f, 0.0f},
+        {0.0225f, -0.1299f, 0.5693f, 0.6528f, -0.1411f, 0.0264f, 0.0f, 0.0f},
+        {0.0215f, -0.1260f, 0.5479f, 0.6724f, -0.1431f, 0.0273f, 0.0f, 0.0f},
+        {0.0200f, -0.1226f, 0.5264f, 0.6929f, -0.1445f, 0.0278f, 0.0f, 0.0f},
+        {0.0190f, -0.1187f, 0.5049f, 0.7119f, -0.1460f, 0.0288f, 0.0f, 0.0f},
+        {0.0181f, -0.1147f, 0.4829f, 0.7314f, -0.1470f, 0.0293f, 0.0f, 0.0f},
+        {0.0166f, -0.1104f, 0.4614f, 0.7505f, -0.1479f, 0.0298f, 0.0f, 0.0f},
+        {0.0156f, -0.1060f, 0.4399f, 0.7681f, -0.1479f, 0.0303f, 0.0f, 0.0f},
+        {0.0146f, -0.1016f, 0.4185f, 0.7856f, -0.1479f, 0.0308f, 0.0f, 0.0f},
+        {0.0137f, -0.0967f, 0.3965f, 0.8022f, -0.1470f, 0.0313f, 0.0f, 0.0f},
+        {0.0122f, -0.0923f, 0.3755f, 0.8193f, -0.1460f, 0.0313f, 0.0f, 0.0f},
+        {0.0112f, -0.0874f, 0.3540f, 0.8354f, -0.1445f, 0.0313f, 0.0f, 0.0f},
+        {0.0103f, -0.0825f, 0.3330f, 0.8506f, -0.1426f, 0.0313f, 0.0f, 0.0f},
+        {0.0093f, -0.0776f, 0.3120f, 0.8657f, -0.1401f, 0.0308f, 0.0f, 0.0f},
+        {0.0083f, -0.0728f, 0.2915f, 0.8789f, -0.1367f, 0.0308f, 0.0f, 0.0f},
+        {0.0073f, -0.0679f, 0.2710f, 0.8926f, -0.1333f, 0.0303f, 0.0f, 0.0f},
+        {0.0063f, -0.0630f, 0.2510f, 0.9058f, -0.1294f, 0.0293f, 0.0f, 0.0f},
+        {0.0059f, -0.0581f, 0.2310f, 0.9170f, -0.1245f, 0.0288f, 0.0f, 0.0f},
+        {0.0049f, -0.0537f, 0.2119f, 0.9282f, -0.1191f, 0.0278f, 0.0f, 0.0f},
+        {0.0044f, -0.0488f, 0.1929f, 0.9390f, -0.1138f, 0.0264f, 0.0f, 0.0f},
+        {0.0034f, -0.0439f, 0.1738f, 0.9487f, -0.1074f, 0.0254f, 0.0f, 0.0f},
+        {0.0029f, -0.0396f, 0.1558f, 0.9575f, -0.1006f, 0.0239f, 0.0f, 0.0f},
+        {0.0024f, -0.0352f, 0.1382f, 0.9648f, -0.0928f, 0.0225f, 0.0f, 0.0f},
+        {0.0020f, -0.0308f, 0.1206f, 0.9727f, -0.0850f, 0.0205f, 0.0f, 0.0f},
+        {0.0015f, -0.0264f, 0.1040f, 0.9785f, -0.0762f, 0.0186f, 0.0f, 0.0f},
+        {0.0010f, -0.0220f, 0.0874f, 0.9844f, -0.0674f, 0.0166f, 0.0f, 0.0f},
+        {0.0005f, -0.0181f, 0.0713f, 0.9897f, -0.0576f, 0.0142f, 0.0f, 0.0f},
+        {0.0005f, -0.0142f, 0.0562f, 0.9932f, -0.0474f, 0.0117f, 0.0f, 0.0f},
+        {0.0005f, -0.0103f, 0.0415f, 0.9956f, -0.0361f, 0.0088f, 0.0f, 0.0f},
+        {0.0f, -0.0068f, 0.0269f, 0.9985f, -0.0249f, 0.0063f, 0.0f, 0.0f},
+        {0.0f, -0.0034f, 0.0132f, 1.0000f, -0.0127f, 0.0029f, 0.0f, 0.0f}
     };
 
     constexpr float coef_usm[kPhaseCount][kFilterSize] = {
-        {0,      -0.6001, 1.2002, -0.6001,  0,      0, 0, 0},
-        {0.0029, -0.6084, 1.1987, -0.5903, -0.0029, 0, 0, 0},
-        {0.0049, -0.6147, 1.1958, -0.5791, -0.0068, 0.0005, 0, 0},
-        {0.0073, -0.6196, 1.1890, -0.5659, -0.0103, 0, 0, 0},
-        {0.0093, -0.6235, 1.1802, -0.5513, -0.0151, 0, 0, 0},
-        {0.0112, -0.6265, 1.1699, -0.5352, -0.0195, 0.0005, 0, 0},
-        {0.0122, -0.6270, 1.1582, -0.5181, -0.0259, 0.0005, 0, 0},
-        {0.0142, -0.6284, 1.1455, -0.5005, -0.0317, 0.0005, 0, 0},
-        {0.0156, -0.6265, 1.1274, -0.4790, -0.0386, 0.0005, 0, 0},
-        {0.0166, -0.6235, 1.1089, -0.4570, -0.0454, 0.0010, 0, 0},
-        {0.0176, -0.6187, 1.0879, -0.4346, -0.0532, 0.0010, 0, 0},
-        {0.0181, -0.6138, 1.0659, -0.4102, -0.0615, 0.0015, 0, 0},
-        {0.0190, -0.6069, 1.0405, -0.3843, -0.0698, 0.0015, 0, 0},
-        {0.0195, -0.6006, 1.0161, -0.3574, -0.0796, 0.0020, 0, 0},
-        {0.0200, -0.5928, 0.9893, -0.3286, -0.0898, 0.0024, 0, 0},
-        {0.0200, -0.5820, 0.9580, -0.2988, -0.1001, 0.0029, 0, 0},
-        {0.0200, -0.5728, 0.9292, -0.2690, -0.1104, 0.0034, 0, 0},
-        {0.0200, -0.5620, 0.8975, -0.2368, -0.1226, 0.0039, 0, 0},
-        {0.0205, -0.5498, 0.8643, -0.2046, -0.1343, 0.0044, 0, 0},
-        {0.0200, -0.5371, 0.8301, -0.1709, -0.1465, 0.0049, 0, 0},
-        {0.0195, -0.5239, 0.7944, -0.1367, -0.1587, 0.0054, 0, 0},
-        {0.0195, -0.5107, 0.7598, -0.1021, -0.1724, 0.0059, 0, 0},
-        {0.0190, -0.4966, 0.7231, -0.0649, -0.1865, 0.0063, 0, 0},
-        {0.0186, -0.4819, 0.6846, -0.0288, -0.1997, 0.0068, 0, 0},
-        {0.0186, -0.4668, 0.6460, 0.0093, -0.2144, 0.0073, 0, 0},
-        {0.0176, -0.4507, 0.6055, 0.0479, -0.2290, 0.0083, 0, 0},
-        {0.0171, -0.4370, 0.5693, 0.0859, -0.2446, 0.0088, 0, 0},
-        {0.0161, -0.4199, 0.5283, 0.1255, -0.2598, 0.0098, 0, 0},
-        {0.0161, -0.4048, 0.4883, 0.1655, -0.2754, 0.0103, 0, 0},
-        {0.0151, -0.3887, 0.4497, 0.2041, -0.2910, 0.0107, 0, 0},
-        {0.0142, -0.3711, 0.4072, 0.2446, -0.3066, 0.0117, 0, 0},
-        {0.0137, -0.3555, 0.3672, 0.2852, -0.3228, 0.0122, 0, 0},
-        {0.0132, -0.3394, 0.3262, 0.3262, -0.3394, 0.0132, 0, 0},
-        {0.0122, -0.3228, 0.2852, 0.3672, -0.3555, 0.0137, 0, 0},
-        {0.0117, -0.3066, 0.2446, 0.4072, -0.3711, 0.0142, 0, 0},
-        {0.0107, -0.2910, 0.2041, 0.4497, -0.3887, 0.0151, 0, 0},
-        {0.0103, -0.2754, 0.1655, 0.4883, -0.4048, 0.0161, 0, 0},
-        {0.0098, -0.2598, 0.1255, 0.5283, -0.4199, 0.0161, 0, 0},
-        {0.0088, -0.2446, 0.0859, 0.5693, -0.4370, 0.0171, 0, 0},
-        {0.0083, -0.2290, 0.0479, 0.6055, -0.4507, 0.0176, 0, 0},
-        {0.0073, -0.2144, 0.0093, 0.6460, -0.4668, 0.0186, 0, 0},
-        {0.0068, -0.1997, -0.0288, 0.6846, -0.4819, 0.0186, 0, 0},
-        {0.0063, -0.1865, -0.0649, 0.7231, -0.4966, 0.0190, 0, 0},
-        {0.0059, -0.1724, -0.1021, 0.7598, -0.5107, 0.0195, 0, 0},
-        {0.0054, -0.1587, -0.1367, 0.7944, -0.5239, 0.0195, 0, 0},
-        {0.0049, -0.1465, -0.1709, 0.8301, -0.5371, 0.0200, 0, 0},
-        {0.0044, -0.1343, -0.2046, 0.8643, -0.5498, 0.0205, 0, 0},
-        {0.0039, -0.1226, -0.2368, 0.8975, -0.5620, 0.0200, 0, 0},
-        {0.0034, -0.1104, -0.2690, 0.9292, -0.5728, 0.0200, 0, 0},
-        {0.0029, -0.1001, -0.2988, 0.9580, -0.5820, 0.0200, 0, 0},
-        {0.0024, -0.0898, -0.3286, 0.9893, -0.5928, 0.0200, 0, 0},
-        {0.0020, -0.0796, -0.3574, 1.0161, -0.6006, 0.0195, 0, 0},
-        {0.0015, -0.0698, -0.3843, 1.0405, -0.6069, 0.0190, 0, 0},
-        {0.0015, -0.0615, -0.4102, 1.0659, -0.6138, 0.0181, 0, 0},
-        {0.0010, -0.0532, -0.4346, 1.0879, -0.6187, 0.0176, 0, 0},
-        {0.0010, -0.0454, -0.4570, 1.1089, -0.6235, 0.0166, 0, 0},
-        {0.0005, -0.0386, -0.4790, 1.1274, -0.6265, 0.0156, 0, 0},
-        {0.0005, -0.0317, -0.5005, 1.1455, -0.6284, 0.0142, 0, 0},
-        {0.0005, -0.0259, -0.5181, 1.1582, -0.6270, 0.0122, 0, 0},
-        {0.0005, -0.0195, -0.5352, 1.1699, -0.6265, 0.0112, 0, 0},
-        {0, -0.0151, -0.5513, 1.1802, -0.6235, 0.0093, 0, 0},
-        {0, -0.0103, -0.5659, 1.1890, -0.6196, 0.0073, 0, 0},
-        {0.0005, -0.0068, -0.5791, 1.1958, -0.6147, 0.0049, 0, 0},
-        {0, -0.0029, -0.5903, 1.1987, -0.6084, 0.0029, 0, 0}
+        {0.0f,      -0.6001f, 1.2002f, -0.6001f,  0.0f,  0.0f, 0.0f, 0.0f},
+        {0.0029f, -0.6084f, 1.1987f, -0.5903f, -0.0029f, 0.0f, 0.0f, 0.0f},
+        {0.0049f, -0.6147f, 1.1958f, -0.5791f, -0.0068f, 0.0005f, 0.0f, 0.0f},
+        {0.0073f, -0.6196f, 1.1890f, -0.5659f, -0.0103f, 0.0f, 0.0f, 0.0f},
+        {0.0093f, -0.6235f, 1.1802f, -0.5513f, -0.0151f, 0.0f, 0.0f, 0.0f},
+        {0.0112f, -0.6265f, 1.1699f, -0.5352f, -0.0195f, 0.0005f, 0.0f, 0.0f},
+        {0.0122f, -0.6270f, 1.1582f, -0.5181f, -0.0259f, 0.0005f, 0.0f, 0.0f},
+        {0.0142f, -0.6284f, 1.1455f, -0.5005f, -0.0317f, 0.0005f, 0.0f, 0.0f},
+        {0.0156f, -0.6265f, 1.1274f, -0.4790f, -0.0386f, 0.0005f, 0.0f, 0.0f},
+        {0.0166f, -0.6235f, 1.1089f, -0.4570f, -0.0454f, 0.0010f, 0.0f, 0.0f},
+        {0.0176f, -0.6187f, 1.0879f, -0.4346f, -0.0532f, 0.0010f, 0.0f, 0.0f},
+        {0.0181f, -0.6138f, 1.0659f, -0.4102f, -0.0615f, 0.0015f, 0.0f, 0.0f},
+        {0.0190f, -0.6069f, 1.0405f, -0.3843f, -0.0698f, 0.0015f, 0.0f, 0.0f},
+        {0.0195f, -0.6006f, 1.0161f, -0.3574f, -0.0796f, 0.0020f, 0.0f, 0.0f},
+        {0.0200f, -0.5928f, 0.9893f, -0.3286f, -0.0898f, 0.0024f, 0.0f, 0.0f},
+        {0.0200f, -0.5820f, 0.9580f, -0.2988f, -0.1001f, 0.0029f, 0.0f, 0.0f},
+        {0.0200f, -0.5728f, 0.9292f, -0.2690f, -0.1104f, 0.0034f, 0.0f, 0.0f},
+        {0.0200f, -0.5620f, 0.8975f, -0.2368f, -0.1226f, 0.0039f, 0.0f, 0.0f},
+        {0.0205f, -0.5498f, 0.8643f, -0.2046f, -0.1343f, 0.0044f, 0.0f, 0.0f},
+        {0.0200f, -0.5371f, 0.8301f, -0.1709f, -0.1465f, 0.0049f, 0.0f, 0.0f},
+        {0.0195f, -0.5239f, 0.7944f, -0.1367f, -0.1587f, 0.0054f, 0.0f, 0.0f},
+        {0.0195f, -0.5107f, 0.7598f, -0.1021f, -0.1724f, 0.0059f, 0.0f, 0.0f},
+        {0.0190f, -0.4966f, 0.7231f, -0.0649f, -0.1865f, 0.0063f, 0.0f, 0.0f},
+        {0.0186f, -0.4819f, 0.6846f, -0.0288f, -0.1997f, 0.0068f, 0.0f, 0.0f},
+        {0.0186f, -0.4668f, 0.6460f, 0.0093f, -0.2144f, 0.0073f, 0.0f, 0.0f},
+        {0.0176f, -0.4507f, 0.6055f, 0.0479f, -0.2290f, 0.0083f, 0.0f, 0.0f},
+        {0.0171f, -0.4370f, 0.5693f, 0.0859f, -0.2446f, 0.0088f, 0.0f, 0.0f},
+        {0.0161f, -0.4199f, 0.5283f, 0.1255f, -0.2598f, 0.0098f, 0.0f, 0.0f},
+        {0.0161f, -0.4048f, 0.4883f, 0.1655f, -0.2754f, 0.0103f, 0.0f, 0.0f},
+        {0.0151f, -0.3887f, 0.4497f, 0.2041f, -0.2910f, 0.0107f, 0.0f, 0.0f},
+        {0.0142f, -0.3711f, 0.4072f, 0.2446f, -0.3066f, 0.0117f, 0.0f, 0.0f},
+        {0.0137f, -0.3555f, 0.3672f, 0.2852f, -0.3228f, 0.0122f, 0.0f, 0.0f},
+        {0.0132f, -0.3394f, 0.3262f, 0.3262f, -0.3394f, 0.0132f, 0.0f, 0.0f},
+        {0.0122f, -0.3228f, 0.2852f, 0.3672f, -0.3555f, 0.0137f, 0.0f, 0.0f},
+        {0.0117f, -0.3066f, 0.2446f, 0.4072f, -0.3711f, 0.0142f, 0.0f, 0.0f},
+        {0.0107f, -0.2910f, 0.2041f, 0.4497f, -0.3887f, 0.0151f, 0.0f, 0.0f},
+        {0.0103f, -0.2754f, 0.1655f, 0.4883f, -0.4048f, 0.0161f, 0.0f, 0.0f},
+        {0.0098f, -0.2598f, 0.1255f, 0.5283f, -0.4199f, 0.0161f, 0.0f, 0.0f},
+        {0.0088f, -0.2446f, 0.0859f, 0.5693f, -0.4370f, 0.0171f, 0.0f, 0.0f},
+        {0.0083f, -0.2290f, 0.0479f, 0.6055f, -0.4507f, 0.0176f, 0.0f, 0.0f},
+        {0.0073f, -0.2144f, 0.0093f, 0.6460f, -0.4668f, 0.0186f, 0.0f, 0.0f},
+        {0.0068f, -0.1997f, -0.0288f, 0.6846f, -0.4819f, 0.0186f, 0.0f, 0.0f},
+        {0.0063f, -0.1865f, -0.0649f, 0.7231f, -0.4966f, 0.0190f, 0.0f, 0.0f},
+        {0.0059f, -0.1724f, -0.1021f, 0.7598f, -0.5107f, 0.0195f, 0.0f, 0.0f},
+        {0.0054f, -0.1587f, -0.1367f, 0.7944f, -0.5239f, 0.0195f, 0.0f, 0.0f},
+        {0.0049f, -0.1465f, -0.1709f, 0.8301f, -0.5371f, 0.0200f, 0.0f, 0.0f},
+        {0.0044f, -0.1343f, -0.2046f, 0.8643f, -0.5498f, 0.0205f, 0.0f, 0.0f},
+        {0.0039f, -0.1226f, -0.2368f, 0.8975f, -0.5620f, 0.0200f, 0.0f, 0.0f},
+        {0.0034f, -0.1104f, -0.2690f, 0.9292f, -0.5728f, 0.0200f, 0.0f, 0.0f},
+        {0.0029f, -0.1001f, -0.2988f, 0.9580f, -0.5820f, 0.0200f, 0.0f, 0.0f},
+        {0.0024f, -0.0898f, -0.3286f, 0.9893f, -0.5928f, 0.0200f, 0.0f, 0.0f},
+        {0.0020f, -0.0796f, -0.3574f, 1.0161f, -0.6006f, 0.0195f, 0.0f, 0.0f},
+        {0.0015f, -0.0698f, -0.3843f, 1.0405f, -0.6069f, 0.0190f, 0.0f, 0.0f},
+        {0.0015f, -0.0615f, -0.4102f, 1.0659f, -0.6138f, 0.0181f, 0.0f, 0.0f},
+        {0.0010f, -0.0532f, -0.4346f, 1.0879f, -0.6187f, 0.0176f, 0.0f, 0.0f},
+        {0.0010f, -0.0454f, -0.4570f, 1.1089f, -0.6235f, 0.0166f, 0.0f, 0.0f},
+        {0.0005f, -0.0386f, -0.4790f, 1.1274f, -0.6265f, 0.0156f, 0.0f, 0.0f},
+        {0.0005f, -0.0317f, -0.5005f, 1.1455f, -0.6284f, 0.0142f, 0.0f, 0.0f},
+        {0.0005f, -0.0259f, -0.5181f, 1.1582f, -0.6270f, 0.0122f, 0.0f, 0.0f},
+        {0.0005f, -0.0195f, -0.5352f, 1.1699f, -0.6265f, 0.0112f, 0.0f, 0.0f},
+        {0.0f, -0.0151f, -0.5513f, 1.1802f, -0.6235f, 0.0093f, 0.0f, 0.0f},
+        {0.0f, -0.0103f, -0.5659f, 1.1890f, -0.6196f, 0.0073f, 0.0f, 0.0f},
+        {0.0005f, -0.0068f, -0.5791f, 1.1958f, -0.6147f, 0.0049f, 0.0f, 0.0f},
+        {0.0f, -0.0029f, -0.5903f, 1.1987f, -0.6084f, 0.0029f, 0.0f, 0.0f}
     };
 
     constexpr uint16_t coef_scale_fp16[kPhaseCount][kFilterSize] = {

+ 1 - 1
NIS/NIS_Main.glsl

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 2 - 2
NIS/NIS_Main.hlsl

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in
@@ -79,7 +79,7 @@ NIS_BINDING(0) cbuffer cb : register(b0)
 
 NIS_BINDING(1) SamplerState samplerLinearClamp : register(s0);
 NIS_BINDING(2) Texture2D in_texture            : register(t0);
-NIS_BINDING(3) RWTexture2D<unorm float4> out_texture : register(u0);
+NIS_BINDING(3) RWTexture2D<float4> out_texture : register(u0);
 #if NIS_SCALER
 NIS_BINDING(4) Texture2D coef_scaler           : register(t1);
 NIS_BINDING(5) Texture2D coef_usm              : register(t2);

+ 171 - 176
NIS/NIS_Scaler.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in
@@ -20,7 +20,7 @@
 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 //---------------------------------------------------------------------------------
-// NVIDIA Image Scaling SDK  - v1.0
+// NVIDIA Image Scaling SDK  - v1.0.2
 //---------------------------------------------------------------------------------
 // The NVIDIA Image Scaling SDK provides a single spatial scaling and sharpening algorithm
 // for cross-platform support. The scaling algorithm uses a 6-tap scaling filter combined
@@ -154,6 +154,7 @@
 #define NVI2 int2
 #define NVU uint
 #define NVU2 uint2
+#define NVB bool
 #if NIS_USE_HALF_PRECISION
 #if NIS_HLSL_6_2
 #define NVH float16_t
@@ -194,6 +195,7 @@
 #define NVI2 ivec2
 #define NVU uint
 #define NVU2 uvec2
+#define NVB bool
 #if NIS_USE_HALF_PRECISION
 #define NVH float16_t
 #define NVH2 f16vec2
@@ -264,41 +266,26 @@ NVF4 GetEdgeMap(NVF p[5][5], NVI i, NVI j)
     NVF e_0_90 = 0;
     NVF e_45_135 = 0;
 
-    if ((g_0_90_max + g_45_135_max) != 0)
+    if (g_0_90_max + g_45_135_max == 0)
     {
-        e_0_90 = g_0_90_max / (g_0_90_max + g_45_135_max);
-        e_0_90 = min(e_0_90, 1.0f);
-        e_45_135 = 1.0f - e_0_90;
+        return NVF4(0, 0, 0, 0);
     }
 
-    NVF e = ((g_0_90_max > (g_0_90_min * kDetectRatio)) && (g_0_90_max > kDetectThres) && (g_0_90_max > g_45_135_min)) ? 1.f : 0.f;
-    NVF edge_0  = (g_0_90_max == g_0) ? e   : 0.f;
-    NVF edge_90 = (g_0_90_max == g_0) ? 0.f : e;
+    e_0_90 = min(g_0_90_max / (g_0_90_max + g_45_135_max), 1.0f);
+    e_45_135 = 1.0f - e_0_90;
 
-    e = ((g_45_135_max > (g_45_135_min * kDetectRatio)) && (g_45_135_max > kDetectThres) && (g_45_135_max > g_0_90_min)) ? 1.f : 0.f;
-    NVF edge_45  = (g_45_135_max == g_45) ? e   : 0.f;
-    NVF edge_135 = (g_45_135_max == g_45) ? 0.f : e;
+    NVB c_0_90 = (g_0_90_max > (g_0_90_min * kDetectRatio)) && (g_0_90_max > kDetectThres) && (g_0_90_max > g_45_135_min);
+    NVB c_45_135 = (g_45_135_max > (g_45_135_min * kDetectRatio)) && (g_45_135_max > kDetectThres) && (g_45_135_max > g_0_90_min);
+    NVB c_g_0_90 = g_0_90_max == g_0;
+    NVB c_g_45_135 = g_45_135_max == g_45;
 
-    NVF weight_0 = 0.f;
-    NVF weight_90 = 0.f;
-    NVF weight_45 = 0.f;
-    NVF weight_135 = 0.f;
-    if ((edge_0 + edge_90 + edge_45 + edge_135) >= 2.0f)
-    {
-        weight_0  = (edge_0 == 1.0f) ? e_0_90 : 0.f;
-        weight_90 = (edge_0 == 1.0f) ? 0.f    : e_0_90;
-
-        weight_45 =  (edge_45 == 1.0f) ? e_45_135 : 0.f;
-        weight_135 = (edge_45 == 1.0f) ? 0.f      : e_45_135;
-    }
-    else if ((edge_0 + edge_90 + edge_45 + edge_135) >= 1.0f)
-    {
-        weight_0 = edge_0;
-        weight_90 = edge_90;
-        weight_45 = edge_45;
-        weight_135 = edge_135;
-    }
+    NVF f_e_0_90 = (c_0_90 && c_45_135) ? e_0_90 : 1.0f;
+    NVF f_e_45_135 = (c_0_90 && c_45_135) ? e_45_135 : 1.0f;
 
+    NVF weight_0 = (c_0_90 && c_g_0_90) ? f_e_0_90 : 0.0f;
+    NVF weight_90 = (c_0_90 && !c_g_0_90) ? f_e_0_90 : 0.0f;
+    NVF weight_45 = (c_45_135 && c_g_45_135) ? f_e_45_135 : 0.0f;
+    NVF weight_135 = (c_45_135 && !c_g_45_135) ? f_e_45_135 : 0.0f;
 
     return NVF4(weight_0, weight_90, weight_45, weight_135);
 }
@@ -339,7 +326,7 @@ void LoadFilterBanksSh(NVI i0, NVI di) {
     // The work is spread over (kPhaseCount * 2) threads
     for (NVI i = i0; i < kPhaseCount * 2; i += di)
     {
-        NVI phase = i / 2;
+        NVI phase = i >> 1;
         NVI vIdx = i & 1;
 
         NVH4 v = NVH4(NVTEX_LOAD(coef_scaler, NVI2(vIdx, phase)));
@@ -366,7 +353,7 @@ void LoadFilterBanksSh(NVI i0, NVI di) {
 
 NVF CalcLTI(NVF p0, NVF p1, NVF p2, NVF p3, NVF p4, NVF p5, NVI phase_index)
 {
-    const bool selector = (phase_index <= kPhaseCount / 2);
+    const NVB selector = (phase_index <= kPhaseCount / 2);
     NVF sel = selector ? p0 : p3;
     const NVF a_min = min(min(p1, p2), sel);
     const NVF a_max = max(max(p1, p2), sel);
@@ -444,120 +431,122 @@ NVF FilterNormal(const NVF p[6][6], NVI phase_x_frac_int, NVI phase_y_frac_int)
     return h_acc;
 }
 
-NVF4 GetDirFilters(NVF p[6][6], NVF phase_x_frac, NVF phase_y_frac, NVI phase_x_frac_int, NVI phase_y_frac_int)
+NVF AddDirFilters(NVF p[6][6], NVF phase_x_frac, NVF phase_y_frac, NVI phase_x_frac_int, NVI phase_y_frac_int, NVF4 w)
 {
-    NVF4 f;
-    // 0 deg filter
-    NVF interp0Deg[6];
+    NVF f = 0;
+    if (w.x > 0.0f)
     {
-        NIS_UNROLL
-        for (NVI i = 0; i < 6; ++i)
+        // 0 deg filter
+        NVF interp0Deg[6];
         {
-            interp0Deg[i] = lerp(p[i][2], p[i][3], phase_x_frac);
+            NIS_UNROLL
+                for (NVI i = 0; i < 6; ++i)
+                {
+                    interp0Deg[i] = lerp(p[i][2], p[i][3], phase_x_frac);
+                }
         }
+        f += EvalPoly6(interp0Deg, phase_y_frac_int) * w.x;
     }
-
-    f.x = EvalPoly6(interp0Deg, phase_y_frac_int);
-
-    // 90 deg filter
-    NVF interp90Deg[6];
+    if (w.y > 0.0f)
     {
-        NIS_UNROLL
-        for (NVI i = 0; i < 6; ++i)
+        // 90 deg filter
+        NVF interp90Deg[6];
         {
-            interp90Deg[i] = lerp(p[2][i], p[3][i], phase_y_frac);
+            NIS_UNROLL
+                for (NVI i = 0; i < 6; ++i)
+                {
+                    interp90Deg[i] = lerp(p[2][i], p[3][i], phase_y_frac);
+                }
         }
-    }
-
-    f.y = EvalPoly6(interp90Deg, phase_x_frac_int);
 
-    //45 deg filter
-    NVF pphase_b45;
-    pphase_b45 = 0.5f + 0.5f * (phase_x_frac - phase_y_frac);
-
-    NVF temp_interp45Deg[7];
-    temp_interp45Deg[1] = lerp(p[2][1], p[1][2], pphase_b45);
-    temp_interp45Deg[3] = lerp(p[3][2], p[2][3], pphase_b45);
-    temp_interp45Deg[5] = lerp(p[4][3], p[3][4], pphase_b45);
-    {
-        pphase_b45 = pphase_b45 - 0.5f;
-        NVF a = (pphase_b45 >= 0.f) ? p[0][2] : p[2][0];
-        NVF b = (pphase_b45 >= 0.f) ? p[1][3] : p[3][1];
-        NVF c = (pphase_b45 >= 0.f) ? p[2][4] : p[4][2];
-        NVF d = (pphase_b45 >= 0.f) ? p[3][5] : p[5][3];
-        temp_interp45Deg[0] = lerp(p[1][1], a, abs(pphase_b45));
-        temp_interp45Deg[2] = lerp(p[2][2], b, abs(pphase_b45));
-        temp_interp45Deg[4] = lerp(p[3][3], c, abs(pphase_b45));
-        temp_interp45Deg[6] = lerp(p[4][4], d, abs(pphase_b45));
+        f += EvalPoly6(interp90Deg, phase_x_frac_int) * w.y;
     }
+    if (w.z > 0.0f)
+    {
+        //45 deg filter
+        NVF pphase_b45 = 0.5f + 0.5f * (phase_x_frac - phase_y_frac);
 
+        NVF temp_interp45Deg[7];
+        temp_interp45Deg[1] = lerp(p[2][1], p[1][2], pphase_b45);
+        temp_interp45Deg[3] = lerp(p[3][2], p[2][3], pphase_b45);
+        temp_interp45Deg[5] = lerp(p[4][3], p[3][4], pphase_b45);
+        {
+            pphase_b45 = pphase_b45 - 0.5f;
+            NVF a = (pphase_b45 >= 0.f) ? p[0][2] : p[2][0];
+            NVF b = (pphase_b45 >= 0.f) ? p[1][3] : p[3][1];
+            NVF c = (pphase_b45 >= 0.f) ? p[2][4] : p[4][2];
+            NVF d = (pphase_b45 >= 0.f) ? p[3][5] : p[5][3];
+            temp_interp45Deg[0] = lerp(p[1][1], a, abs(pphase_b45));
+            temp_interp45Deg[2] = lerp(p[2][2], b, abs(pphase_b45));
+            temp_interp45Deg[4] = lerp(p[3][3], c, abs(pphase_b45));
+            temp_interp45Deg[6] = lerp(p[4][4], d, abs(pphase_b45));
+        }
 
-    NVF interp45Deg[6];
-    NVF pphase_p45 = phase_x_frac + phase_y_frac;
-    if (pphase_p45 >= 1)
-    {
-        NIS_UNROLL
-        for (NVI i = 0; i < 6; i++)
+        NVF interp45Deg[6];
+        NVF pphase_p45 = phase_x_frac + phase_y_frac;
+        if (pphase_p45 >= 1)
         {
-            interp45Deg[i] = temp_interp45Deg[i + 1];
+            NIS_UNROLL
+                for (NVI i = 0; i < 6; i++)
+                {
+                    interp45Deg[i] = temp_interp45Deg[i + 1];
+                }
+            pphase_p45 = pphase_p45 - 1;
         }
-        pphase_p45 = pphase_p45 - 1;
-    }
-    else
-    {
-        NIS_UNROLL
-        for (NVI i = 0; i < 6; i++)
+        else
         {
-            interp45Deg[i] = temp_interp45Deg[i];
+            NIS_UNROLL
+                for (NVI i = 0; i < 6; i++)
+                {
+                    interp45Deg[i] = temp_interp45Deg[i];
+                }
         }
-    }
-
-    f.z = EvalPoly6(interp45Deg, NVI(pphase_p45 * 64));
-
-    //135 deg filter
-    NVF pphase_b135;
-    pphase_b135 = 0.5f * (phase_x_frac + phase_y_frac);
 
-    NVF temp_interp135Deg[7];
-
-    temp_interp135Deg[1] = lerp(p[3][1], p[4][2], pphase_b135);
-    temp_interp135Deg[3] = lerp(p[2][2], p[3][3], pphase_b135);
-    temp_interp135Deg[5] = lerp(p[1][3], p[2][4], pphase_b135);
-
-    {
-        pphase_b135 = pphase_b135 - 0.5f;
-        NVF a = (pphase_b135 >= 0.f) ? p[5][2] : p[3][0];
-        NVF b = (pphase_b135 >= 0.f) ? p[4][3] : p[2][1];
-        NVF c = (pphase_b135 >= 0.f) ? p[3][4] : p[1][2];
-        NVF d = (pphase_b135 >= 0.f) ? p[2][5] : p[0][3];
-        temp_interp135Deg[0] = lerp(p[4][1], a, abs(pphase_b135));
-        temp_interp135Deg[2] = lerp(p[3][2], b, abs(pphase_b135));
-        temp_interp135Deg[4] = lerp(p[2][3], c, abs(pphase_b135));
-        temp_interp135Deg[6] = lerp(p[1][4], d, abs(pphase_b135));
+        f += EvalPoly6(interp45Deg, NVI(pphase_p45 * 64)) * w.z;
     }
+    if (w.w > 0.0f)
+    {
+        //135 deg filter
+        NVF pphase_b135 = 0.5f * (phase_x_frac + phase_y_frac);
 
+        NVF temp_interp135Deg[7];
+        temp_interp135Deg[1] = lerp(p[3][1], p[4][2], pphase_b135);
+        temp_interp135Deg[3] = lerp(p[2][2], p[3][3], pphase_b135);
+        temp_interp135Deg[5] = lerp(p[1][3], p[2][4], pphase_b135);
+        {
+            pphase_b135 = pphase_b135 - 0.5f;
+            NVF a = (pphase_b135 >= 0.f) ? p[5][2] : p[3][0];
+            NVF b = (pphase_b135 >= 0.f) ? p[4][3] : p[2][1];
+            NVF c = (pphase_b135 >= 0.f) ? p[3][4] : p[1][2];
+            NVF d = (pphase_b135 >= 0.f) ? p[2][5] : p[0][3];
+            temp_interp135Deg[0] = lerp(p[4][1], a, abs(pphase_b135));
+            temp_interp135Deg[2] = lerp(p[3][2], b, abs(pphase_b135));
+            temp_interp135Deg[4] = lerp(p[2][3], c, abs(pphase_b135));
+            temp_interp135Deg[6] = lerp(p[1][4], d, abs(pphase_b135));
+        }
 
-    NVF interp135Deg[6];
-    NVF pphase_p135 = 1 + (phase_x_frac - phase_y_frac);
-    if (pphase_p135 >= 1)
-    {
-        NIS_UNROLL
-        for (NVI i = 0; i < 6; ++i)
+        NVF interp135Deg[6];
+        NVF pphase_p135 = 1 + (phase_x_frac - phase_y_frac);
+        if (pphase_p135 >= 1)
         {
-            interp135Deg[i] = temp_interp135Deg[i + 1];
+            NIS_UNROLL
+                for (NVI i = 0; i < 6; ++i)
+                {
+                    interp135Deg[i] = temp_interp135Deg[i + 1];
+                }
+            pphase_p135 = pphase_p135 - 1;
         }
-        pphase_p135 = pphase_p135 - 1;
-    }
-    else
-    {
-        NIS_UNROLL
-        for (NVI i = 0; i < 6; ++i)
+        else
         {
-            interp135Deg[i] = temp_interp135Deg[i];
+            NIS_UNROLL
+                for (NVI i = 0; i < 6; ++i)
+                {
+                    interp135Deg[i] = temp_interp135Deg[i];
+                }
         }
-    }
 
-    f.w = EvalPoly6(interp135Deg, NVI(pphase_p135 * 64));
+        f += EvalPoly6(interp135Deg, NVI(pphase_p135 * 64)) * w.w;
+    }
     return f;
 }
 
@@ -593,10 +582,10 @@ void NVScaler(NVU2 blockIdx, NVU threadIdx)
     // we use texture gather to get extra support necessary
     // to compute 2x2 edge map outputs too
     {
-        for (NVI i = NVI(threadIdx) * 2; i < numTilePixels / 2; i += NIS_THREAD_GROUP_SIZE * 2)
+        for (NVU i = threadIdx * 2; i < NVU(numTilePixels) >> 1; i += NIS_THREAD_GROUP_SIZE * 2)
         {
-            NVI py = (i / numTilePixelsX) * 2;
-            NVI px = i % numTilePixelsX;
+            NVU py = (i / numTilePixelsX) * 2;
+            NVU px = i % numTilePixelsX;
 
             // 0.5 to be in the center of texel
             // - (kSupportSize - 1) / 2 to shift by the kernel support size
@@ -632,7 +621,7 @@ void NVScaler(NVU2 blockIdx, NVU threadIdx)
                 }
             }
 #endif
-            const NVI idx = py * kTilePitch + px;
+            const NVU idx = py * kTilePitch + px;
             shPixelsY[idx] = NVH(p[0][0]);
             shPixelsY[idx + 1] = NVH(p[0][1]);
             shPixelsY[idx + kTilePitch] = NVH(p[1][0]);
@@ -642,14 +631,14 @@ void NVScaler(NVU2 blockIdx, NVU threadIdx)
     GroupMemoryBarrierWithGroupSync();
     {
         // fill in the edge map of 2x2 pixels
-        for (NVI i = NVI(threadIdx) * 2; i < numEdgeMapPixels / 2; i += NIS_THREAD_GROUP_SIZE * 2)
+        for (NVU i = threadIdx * 2; i < NVU(numEdgeMapPixels) >> 1; i += NIS_THREAD_GROUP_SIZE * 2)
         {
-            NVI py = (i / numEdgeMapPixelsX) * 2;
-            NVI px = i % numEdgeMapPixelsX;
+            NVU py = (i / numEdgeMapPixelsX) * 2;
+            NVU px = i % numEdgeMapPixelsX;
 
-            const NVI edgeMapIdx = py * kEdgeMapPitch + px;
+            const NVU edgeMapIdx = py * kEdgeMapPitch + px;
 
-            NVI tileCornerIdx = (py+1) * kTilePitch + px + 1;
+            NVU tileCornerIdx = (py+1) * kTilePitch + px + 1;
             NVF p[4][4];
             NIS_UNROLL
             for (NVI j = 0; j < 4; j++)
@@ -668,17 +657,26 @@ void NVScaler(NVU2 blockIdx, NVU threadIdx)
         }
     }
     LoadFilterBanksSh(NVI(threadIdx), NIS_THREAD_GROUP_SIZE);
-
     GroupMemoryBarrierWithGroupSync();
 
-    for (NVI k = NVI(threadIdx); k < NIS_BLOCK_WIDTH * NIS_BLOCK_HEIGHT; k += NIS_THREAD_GROUP_SIZE)
+    // output coord within a tile
+    const NVI2 pos = NVI2(NVU(threadIdx) % NVU(NIS_BLOCK_WIDTH), NVU(threadIdx) / NVU(NIS_BLOCK_WIDTH));
+    // x coord inside the output image
+    const NVI dstX = dstBlockX + pos.x;
+    // x coord inside the input image
+    const NVF srcX = (0.5f + dstX) * kScaleX - 0.5f;
+    // nearest integer part
+    const NVI px = NVI(floor(srcX) - srcBlockStartX);
+    // fractional part
+    const NVF fx = srcX - floor(srcX);
+    // discretized phase
+    const NVI fx_int = NVI(fx * kPhaseCount);
+
+    for (NVI k = 0; k < NIS_BLOCK_WIDTH * NIS_BLOCK_HEIGHT / NIS_THREAD_GROUP_SIZE; ++k)
     {
-        const NVI2 pos = NVI2(k % NIS_BLOCK_WIDTH, k / NIS_BLOCK_WIDTH);
-
-        const NVI dstX = dstBlockX + pos.x;
-        const NVI dstY = dstBlockY + pos.y;
-
-        const NVF srcX = (0.5f + dstX) * kScaleX - 0.5f;
+        // y coord inside the output image
+        const NVI dstY = dstBlockY + pos.y + k * (NIS_THREAD_GROUP_SIZE / NIS_BLOCK_WIDTH);
+        // y coord inside the input image
         const NVF srcY = (0.5f + dstY) * kScaleY - 0.5f;
 #if NIS_VIEWPORT_SUPPORT
         if (srcX > kInputViewportWidth || srcY > kInputViewportHeight ||
@@ -687,13 +685,30 @@ void NVScaler(NVU2 blockIdx, NVU threadIdx)
             return;
         }
 #endif
-
-        const NVI px = NVI(floor(srcX) - srcBlockStartX);
+        // nearest integer part
         const NVI py = NVI(floor(srcY) - srcBlockStartY);
+        // fractional part
+        const NVF fy = srcY - floor(srcY);
+        // discretized phase
+        const NVI fy_int = NVI(fy * kPhaseCount);
 
-        const NVI startTileIdx = py * kTilePitch + px;
+        // generate weights for directional filters
+        const NVI startEdgeMapIdx = py * kEdgeMapPitch + px;
+        NVF4 edge[2][2];
+        NIS_UNROLL
+        for (NVI i = 0; i < 2; i++)
+        {
+            NIS_UNROLL
+            for (NVI j = 0; j < 2; j++)
+            {
+                // need to shift edge map sampling since it's a 2x2 centered inside 6x6 grid
+                edge[i][j] = shEdgeMap[startEdgeMapIdx + (i * kEdgeMapPitch) + j];
+            }
+        }
+        const NVF4 w = GetInterpEdgeMap(edge, fx, fy) * NIS_SCALE_INT;
 
         // load 6x6 support to regs
+        const NVI startTileIdx = py * kTilePitch + px;
         NVF p[6][6];
         {
             NIS_UNROLL
@@ -707,43 +722,23 @@ void NVScaler(NVU2 blockIdx, NVU threadIdx)
             }
         }
 
-        // compute discretized filter phase
-        const NVF fx = srcX - floor(srcX);
-        const NVF fy = srcY - floor(srcY);
-        const NVI fx_int = NVI(fx * kPhaseCount);
-        const NVI fy_int = NVI(fy * kPhaseCount);
+        // weigth for luma
+        const NVF baseWeight = NIS_SCALE_FLOAT - w.x - w.y - w.z - w.w;
+
+        // final luma is a weighted product of directional & normal filters
+        NVF opY = 0;
 
         // get traditional scaler filter output
-        const NVF pixel_n = FilterNormal(p, fx_int, fy_int);
+        opY += FilterNormal(p, fx_int, fy_int) * baseWeight;
 
         // get directional filter bank output
-        NVF4 opDirYU = GetDirFilters(p, fx, fy, fx_int, fy_int);
-
-        // final luma is a weighted product of directional & normal filters
+        opY += AddDirFilters(p, fx, fy, fx_int, fy_int, w);
 
-        // generate weights for directional filters
-        const NVI startEdgeMapIdx = py * kEdgeMapPitch + px;
-        NVF4 edge[2][2];
-        NIS_UNROLL
-        for (NVI i = 0; i < 2; i++)
-        {
-            NIS_UNROLL
-            for (NVI j = 0; j < 2; j++)
-            {
-                // need to shift edge map sampling since it's a 2x2 centered inside 6x6 grid
-                edge[i][j] = shEdgeMap[startEdgeMapIdx + (i * kEdgeMapPitch) + j];
-            }
-        }
-        const NVF4 w = GetInterpEdgeMap(edge, fx, fy) * NIS_SCALE_INT;
-
-        // final pixel is a weighted sum filter outputs
-        const NVF opY = (opDirYU.x * w.x + opDirYU.y * w.y + opDirYU.z * w.z + opDirYU.w * w.w +
-            pixel_n * (NIS_SCALE_FLOAT - w.x - w.y - w.z - w.w)) * (1.0f / NIS_SCALE_FLOAT);
         // do bilinear tap for chroma upscaling
 #if NIS_VIEWPORT_SUPPORT
-        NVF4 op = NVTEX_SAMPLE(in_texture, samplerLinearClamp, NVF2((srcX + kInputViewportOriginX) * kSrcNormX, (srcY + kInputViewportOriginY) * kSrcNormY));
+        NVF4 op = NVTEX_SAMPLE(in_texture, samplerLinearClamp, NVF2((srcX + kInputViewportOriginX + 0.5f) * kSrcNormX, (srcY + kInputViewportOriginY + 0.5f) * kSrcNormY));
 #else
-        NVF4 op = NVTEX_SAMPLE(in_texture, samplerLinearClamp, NVF2((dstX + 0.5f) * kDstNormX, (dstY + 0.5f) * kDstNormY));
+        NVF4 op = NVTEX_SAMPLE(in_texture, samplerLinearClamp, NVF2((srcX + 0.5f) * kSrcNormX, (srcY + 0.5f) * kSrcNormY));
 #endif
 #if NIS_HDR_MODE == NIS_HDR_MODE_LINEAR
         const NVF kEps = 1e-4f;
@@ -796,7 +791,7 @@ NVF CalcLTIFast(const NVF y[5])
     const NVF a_cont = a_max - a_min;
     const NVF b_cont = b_max - b_min;
 
-    const NVF cont_ratio = max(a_cont, b_cont) / (min(a_cont, b_cont) + kEps * (1.0f / NIS_SCALE_FLOAT));
+    const NVF cont_ratio = max(a_cont, b_cont) / (min(a_cont, b_cont) + kEps);
     return (1.0f - saturate((cont_ratio - kMinContrastRatio) * kRatioNorm)) * kContrastBoost;
 }
 
@@ -883,7 +878,7 @@ void NVSharpen(NVU2 blockIdx, NVU threadIdx)
 
     for (NVI i = NVI(threadIdx) * 2; i < kNumPixelsX * kNumPixelsY / 2; i += NIS_THREAD_GROUP_SIZE * 2)
     {
-        NVU2 pos = NVU2(i % kNumPixelsX, i / kNumPixelsX * 2);
+        NVU2 pos = NVU2(NVU(i) % NVU(kNumPixelsX), NVU(i) / NVU(kNumPixelsX) * 2);
         NIS_UNROLL
         for (NVI dy = 0; dy < 2; dy++)
         {
@@ -907,7 +902,7 @@ void NVSharpen(NVU2 blockIdx, NVU threadIdx)
 
     for (NVI k = NVI(threadIdx); k < NIS_BLOCK_WIDTH * NIS_BLOCK_HEIGHT; k += NIS_THREAD_GROUP_SIZE)
     {
-        const NVI2 pos = NVI2(k % NIS_BLOCK_WIDTH, k / NIS_BLOCK_WIDTH);
+        const NVI2 pos = NVI2(NVU(k) % NVU(NIS_BLOCK_WIDTH), NVU(k) / NVU(NIS_BLOCK_WIDTH));
 
         // load 5x5 support to regs
         NVF p[5][5];
@@ -942,9 +937,9 @@ void NVSharpen(NVU2 blockIdx, NVU threadIdx)
 #endif
 
 #if NIS_VIEWPORT_SUPPORT
-        NVF4 op = NVTEX_SAMPLE(in_texture, samplerLinearClamp, NVF2((dstX + kInputViewportOriginX) * kSrcNormX, (dstY + kInputViewportOriginY) * kSrcNormY));
+        NVF4 op = NVTEX_SAMPLE(in_texture, samplerLinearClamp, NVF2((dstX + kInputViewportOriginX + 0.5f) * kSrcNormX, (dstY + kInputViewportOriginY + 0.5f) * kSrcNormY));
 #else
-        NVF4 op = NVTEX_SAMPLE(in_texture, samplerLinearClamp, NVF2((dstX + 0.5f) * kDstNormX, (dstY + 0.5f) * kDstNormY));
+        NVF4 op = NVTEX_SAMPLE(in_texture, samplerLinearClamp, NVF2((dstX + 0.5f) * kSrcNormX, (dstY + 0.5f) * kSrcNormY));
 #endif
 #if NIS_HDR_MODE == NIS_HDR_MODE_LINEAR
         const NVF kEps = 1e-4f * kHDRCompressionFactor * kHDRCompressionFactor;

+ 15 - 11
README.md

@@ -1,8 +1,8 @@
-# NVIDIA Image Scaling SDK v1.0.1
+# NVIDIA Image Scaling SDK v1.0.2
 
 The MIT License(MIT)
 
-Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files(the "Software"), to deal in
@@ -84,7 +84,7 @@ The coefficients are included in NIS_Config.h file:
 
 fp32 format: coef_scaler, coef_USM
 
-fp16 format: coef_scaler_fp16, coef_USM_fp16 
+fp16 format: coef_scaler_fp16, coef_USM_fp16
 
 
 ### Resource States, Buffers, and Sampler:
@@ -96,7 +96,7 @@ the correct state.
 - The output texture must be in read/write state. Unordered Access View (UAV) in DirectX
 - The coefficients texture for NVScaler must be in read state. Shader Resource View (SRV) in DirectX
 - The configuration variables must be passed as constant buffer. Constant Buffer View (CBV) in DirectX
-- The sampler for texture pixel sampling. Linear clamp SamplerState in Direct
+- The sampler for texture pixel sampling must use linear filter interpolation and clamp to edge addressing mode
 
 
 
@@ -144,14 +144,15 @@ values (NVScalerUpdateConfig, and NVSharpenUpdateConfig), and to access the algo
 
 ## Optimal shader settings
 
-To get optimal performance of NvScaler and NvSharpen for current and future hardware, it is recommended that the following API is used to obtain the values for NIS_BLOCK_WIDTH, NIS_BLOCK_HEIGHT, and NIS_THREAD_GROUP_SIZE.
+To get optimal performance of NVScaler and NVSharpen for current and future hardware, it is recommended that the following API is used to obtain the values for NIS_BLOCK_WIDTH, NIS_BLOCK_HEIGHT, and NIS_THREAD_GROUP_SIZE. These values can be used to compile permutations of NVScaler and NVSharpen offline.
 
 ```
 enum class NISGPUArchitecture : uint32_t
 {
     NVIDIA_Generic = 0,
     AMD_Generic = 1,
-    Intel_Generic = 2
+    Intel_Generic = 2,
+    NVIDIA_Generic_fp16 = 3,
 };
 ```
 
@@ -191,16 +192,18 @@ The integration instructions in this section can be applied with minimal changes
 
 ### Compile the NIS_Main.hlsl shader
 
-NIS_SCALER should be set to 1, and isUscaling should be pass as true.
+NIS_SCALER should be set to 1, and the isUpscaling argument should set to true.
 
 ```
-NISOptimizer opt(true, NISGPUArchitecture::NVIDIA_Generic);
+bool isUpscaling = true;
+// Note: NISOptimizer is optional and these values can be cached offline
+NISOptimizer opt(isUpscaling, NISGPUArchitecture::NVIDIA_Generic);
 uint32_t blockWidth = opt.GetOptimalBlockWidth();
 uint32_t blockHeight = opt.GetOptimalBlockHeight();
 uint32_t threadGroupSize = opt.GetOptimalThreadGroupSize();
 
 Defines defines;
-defines.add("NIS_SCALER", true);
+defines.add("NIS_SCALER", isUpscaling);
 defines.add("NIS_HDR_MODE", hdrMode);
 defines.add("NIS_BLOCK_WIDTH", blockWidth);
 defines.add("NIS_BLOCK_HEIGHT", blockHeight);
@@ -295,10 +298,11 @@ If your application requires upscaling and sharpening do not use NVSharpen use N
 
 ### Compile the NIS_Main.hlsl shader
 
-NIS_SCALER should be set to 0 and the optimizer isUscaling argument should be set to false.
+NIS_SCALER should be set to 0 and the optimizer isUpscaling argument should be set to false.
 
 ```
 bool isUpscaling = false;
+// Note: NISOptimizer is optional and these values can be cached offline
 NISOptimizer opt(isUpscaling, NISGPUArchitecture::NVIDIA_Generic);
 uint32_t blockWidth = opt.GetOptimalBlockWidth();
 uint32_t blockHeight = opt.GetOptimalBlockHeight();
@@ -384,7 +388,7 @@ context->Dispatch(UINT(std::ceil(outputWidth / float(blockWidth))),
 - CMake 3.16 : https://cmake.org/download/
 
 for building the Vulkan sample:
-- Vulkan SDK 1.2.189.2 : https://vulkan.lunarg.com/  
+- Vulkan SDK 1.2.189.2 : https://vulkan.lunarg.com/
 
 ### Build
 

BIN
docs/NIS_SDK_Programming_Guide.pdf


+ 1 - 1
licence.txt

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 // 
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/include/AppRenderer.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/include/BilinearUpscale.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/include/DXUtilities.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/include/DeviceResources.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/include/NVScaler.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/include/NVSharpen.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/include/UIRenderer.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/src/AppRenderer.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/src/BilinearUpscale.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/src/DeviceResources.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/src/NVScaler.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/src/NVSharpen.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/src/Sample.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX11/src/UIRenderer.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/include/AppRenderer.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/include/BilinearUpscale.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/include/DXUtilities.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/include/DeviceResources.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/include/NVScaler.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/include/NVSharpen.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/include/UIRenderer.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/src/AppRenderer.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/src/BilinearUpscale.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 3 - 4
samples/DX12/src/DeviceResources.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in
@@ -86,8 +86,7 @@ void DeviceResources::create(HWND hWnd, uint32_t adapterIdx)
 
     // Create device
     D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_12_0;
-    HRESULT hr = D3D12CreateDevice(nullptr, featureLevel, __uuidof(ID3D12Device), &m_device);
-    //m_device->SetStablePowerState(true);
+    HRESULT hr = D3D12CreateDevice(pAdapter.Get(), featureLevel, __uuidof(ID3D12Device), &m_device);
 
     // [DEBUG] Setup debug interface to break on any warnings/errors
 #ifdef DX12_ENABLE_DEBUG_LAYER
@@ -253,7 +252,7 @@ void DeviceResources::Present(uint32_t SyncInterval, uint32_t Flags)
     {
         m_swapChain->Present(SyncInterval, Flags);
         m_timer.ReadBack();
-    }    
+    }
     MoveToNextFrame();
 }
 

+ 2 - 2
samples/DX12/src/NVScaler.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in
@@ -45,7 +45,7 @@ NVScaler::NVScaler(DeviceResources& deviceResources, const std::vector<std::stri
     if (shaderPath.empty())
         throw std::runtime_error("Shader file not found" + shaderName);
 
-    NISOptimizer opt(true, NISGPUArchitecture::NVIDIA_Generic);
+    NISOptimizer opt(true, NISGPUArchitecture::NVIDIA_Generic_fp16);
     m_blockWidth = opt.GetOptimalBlockWidth();
     m_blockHeight = opt.GetOptimalBlockHeight();
     uint32_t threadGroupSize = opt.GetOptimalThreadGroupSize();

+ 2 - 2
samples/DX12/src/NVSharpen.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in
@@ -78,7 +78,7 @@ NVSharpen::NVSharpen(DeviceResources& deviceResources,  const std::vector<std::s
     library->CreateIncludeHandler(&includeHandler);
     std::vector<LPCWSTR> args{ L"-O3", L"-enable-16bit-types" };
     ComPtr<IDxcOperationResult> result;
-    HRESULT hr = compiler->Compile(sourceBlob.Get(), wShaderFilename.c_str(), L"main", L"cs_6_2", args.data(), uint32_t(args.size()), 
+    HRESULT hr = compiler->Compile(sourceBlob.Get(), wShaderFilename.c_str(), L"main", L"cs_6_2", args.data(), uint32_t(args.size()),
         defines.data(), uint32_t(defines.size()), includeHandler.Get(), &result);
     if (SUCCEEDED(hr))
         result->GetStatus(&hr);

+ 1 - 1
samples/DX12/src/Sample.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/src/UIRenderer.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/DX12/src/bilinearUpscale.hlsl

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/include/AppRenderer.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/include/DeviceResources.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/include/NVScaler.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/include/NVSharpen.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/include/UIRenderer.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/include/VKUtilities.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/src/AppRenderer.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/src/DeviceResources.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/src/NVScaler.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/src/NVSharpen.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/src/Sample.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/VK/src/UIRenderer.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/common/Image.cpp

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/common/Image.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 1
samples/common/Utilities.h

@@ -1,6 +1,6 @@
 // The MIT License(MIT)
 //
-// Copyright(c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright(c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files(the "Software"), to deal in

+ 1 - 22
third_party_licenses.txt

@@ -147,9 +147,7 @@ tinyexr tools uses stb, which is licensed under public domain: https://github.co
 ------------------------------------------------------------------------------
 stb image
 ------------------------------------------------------------------------------
-This software is available under 2 licenses -- choose whichever you prefer.
-------------------------------------------------------------------------------
-ALTERNATIVE A - MIT License
+MIT License
 Copyright (c) 2017 Sean Barrett
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -166,25 +164,6 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-------------------------------------------------------------------------------
-ALTERNATIVE B - Public Domain (www.unlicense.org)
-This is free and unencumbered software released into the public domain.
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-software, either in source code form or as a compiled binary, for any purpose,
-commercial or non-commercial, and by any means.
-In jurisdictions that recognize copyright laws, the author or authors of this
-software dedicate any and all copyright interest in the software to the public
-domain. We make this dedication for the benefit of the public at large and to
-the detriment of our heirs and successors. We intend this dedication to be an
-overt act of relinquishment in perpetuity of all present and future rights to
-this software under copyright law.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
 
 
 ------------------------------------------------------------------------------