Support R, RG, RGBA Unorm in T2B compute shader

Bug: 348654098
Change-Id: Ib94a90707b699e5930fa4ac9bf9330dc3e5465dd
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/195402
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Quyen Le <lehoangquyen@chromium.org>
Reviewed-by: Shrek Shao <shrekshao@google.com>
diff --git a/src/dawn/native/BlitTextureToBuffer.cpp b/src/dawn/native/BlitTextureToBuffer.cpp
index 077007c..63c3a78 100644
--- a/src/dawn/native/BlitTextureToBuffer.cpp
+++ b/src/dawn/native/BlitTextureToBuffer.cpp
@@ -147,6 +147,25 @@
 @group(0) @binding(0) var src_tex : texture_cube<u32>;
 )";
 
+constexpr std::string_view kEncodeRGBA8UnormInU32 = R"(
+fn encodeVectorInU32General(v: vec4f) -> u32 {
+    return pack4x8unorm(v);
+}
+)";
+
+constexpr std::string_view kEncodeRGBA8SnormInU32 = R"(
+fn encodeVectorInU32General(v: vec4f) -> u32 {
+    return pack4x8snorm(v);
+}
+)";
+
+// Storing and swizzling bgra8unorm texel values and convert to u32.
+constexpr std::string_view kEncodeBGRA8UnormInU32 = R"(
+fn encodeVectorInU32General(v: vec4f) -> u32 {
+    return pack4x8unorm(v.bgra);
+}
+)";
+
 // Each thread is responsible for reading (packTexelCount) texel and packing them into a 4-byte u32.
 constexpr std::string_view kCommonHead = R"(
 struct Params {
@@ -326,11 +345,11 @@
 //       e.g. offset = 1; copyWidth = 256; mask = 0xffffff00;
 //       | 255 |  b  |  b  |  b  |
 
-constexpr std::string_view kPackR8SnormToU32 = R"(
+constexpr std::string_view kPackR8ToU32 = R"(
 // Result bits to store into dst_buf
 var result: u32 = 0u;
-// Storing snorm8 texel values
-// later called by pack4x8snorm to convert to u32.
+// Storing xnorm8 texel values
+// later called by pack4x8xnorm to convert to u32.
 var v: vec4<f32>;
 
 // dstBuf value is used for starting part.
@@ -382,7 +401,7 @@
 
 if (readDstBufAtStart || readDstBufAtEnd) {
     let original: u32 = dst_buf[dstOffset];
-    result = (original & mask) | (pack4x8snorm(v) & ~mask);
+    result = (original & mask) | (encodeVectorInU32General(v) & ~mask);
 } else {
     var coord1: vec3u;
     var coord2: vec3u;
@@ -428,9 +447,9 @@
         && (params.srcExtent.x < params.bytesPerRow);
     if (readDstBufAtMid && id.x == 0) {
         let original: u32 = dst_buf[dstOffset];
-        result = (original & mask) | (pack4x8snorm(v) & ~mask);
+        result = (original & mask) | (encodeVectorInU32General(v) & ~mask);
     } else {
-        result = pack4x8snorm(v);
+        result = encodeVectorInU32General(v);
     }
 }
 )";
@@ -464,11 +483,11 @@
 //       e.g. offset = 1; copyWidth = 128; mask = 0xffff0000;
 //       |   127   |    b    |
 
-constexpr std::string_view kPackRG8SnormToU32 = R"(
+constexpr std::string_view kPackRG8ToU32 = R"(
 // Result bits to store into dst_buf
 var result: u32 = 0u;
 // Storing snorm8 texel values
-// later called by pack4x8snorm to convert to u32.
+// later called by pack4x8xnorm to convert to u32.
 var v: vec4<f32>;
 
 // dstBuf value is used for starting part.
@@ -491,9 +510,9 @@
 
 if (readDstBufAtStart || readDstBufAtEnd) {
     let original: u32 = dst_buf[dstOffset];
-    result = (original & mask) | (pack4x8snorm(v) & ~mask);
+    result = (original & mask) | (encodeVectorInU32General(v) & ~mask);
 } else {
-    result = pack4x8snorm(v);
+    result = encodeVectorInU32General(v);
 }
 )";
 
@@ -526,22 +545,11 @@
     }
 )";
 
-// Storing snorm8 texel values
-// later called by pack4x8snorm to convert to u32.
-constexpr std::string_view kPackRGBA8SnormToU32 = R"(
+// Storing rgba texel values
+// later called by encodeVectorInU32General to convert to u32.
+constexpr std::string_view kPackRGBAToU32 = R"(
     let v = textureLoadGeneral(src_tex, coord0, params.mipLevel);
-    let result: u32 = pack4x8snorm(v);
-)";
-
-// Storing and swizzling bgra8unorm texel values
-// later called by pack4x8unorm to convert to u32.
-constexpr std::string_view kPackBGRA8UnormToU32 = R"(
-    var v: vec4<f32>;
-
-    let texel0 = textureLoadGeneral(src_tex, coord0, params.mipLevel);
-    v = texel0.bgra;
-
-    let result: u32 = pack4x8unorm(v);
+    let result: u32 = encodeVectorInU32General(v);
 )";
 
 // Storing rgb9e5ufloat texel values
@@ -555,9 +563,8 @@
 // 0x0a090807 and 0x0412100e both unpack to
 // [8.344650268554688e-7, 0.000015735626220703125, 0.000015497207641601562]
 // So the bytes copied via blit could be different.
-constexpr std::string_view kPackRGB9E5UfloatToU32 = R"(
-    let v = textureLoadGeneral(src_tex, coord0, params.mipLevel);
-
+constexpr std::string_view kEncodeRGB9E5UfloatInU32 = R"(
+fn encodeVectorInU32General(v: vec4f) -> u32 {
     const n = 9; // number of mantissa bits
     const e_max = 31; // max exponent
     const b = 15; // exponent bias
@@ -585,6 +592,9 @@
         ((blue_s & mask_9) << 18u) |
         ((green_s & mask_9) << 9u) |
         (red_s & mask_9);
+
+    return result;
+}
 )";
 
 // Directly loading depth32float values into dst_buf
@@ -656,47 +666,55 @@
 
     switch (format.format) {
         case wgpu::TextureFormat::R8Snorm:
+        case wgpu::TextureFormat::R8Unorm:
             AppendFloatTextureHead();
             shader += kDstBufferU32;
+            shader += format.IsSnorm() ? kEncodeRGBA8SnormInU32 : kEncodeRGBA8UnormInU32;
             shader += kCommonHead;
             shader += kNonMultipleOf4OffsetStart;
-            shader += kPackR8SnormToU32;
+            shader += kPackR8ToU32;
             shader += kCommonEnd;
             textureSampleType = wgpu::TextureSampleType::Float;
             break;
         case wgpu::TextureFormat::RG8Snorm:
+        case wgpu::TextureFormat::RG8Unorm:
             AppendFloatTextureHead();
             shader += kDstBufferU32;
+            shader += format.IsSnorm() ? kEncodeRGBA8SnormInU32 : kEncodeRGBA8UnormInU32;
             shader += kCommonHead;
             shader += kNonMultipleOf4OffsetStart;
-            shader += kPackRG8SnormToU32;
+            shader += kPackRG8ToU32;
             shader += kCommonEnd;
             textureSampleType = wgpu::TextureSampleType::Float;
             break;
         case wgpu::TextureFormat::RGBA8Snorm:
+        case wgpu::TextureFormat::RGBA8Unorm:
             AppendFloatTextureHead();
             shader += kDstBufferU32;
+            shader += format.IsSnorm() ? kEncodeRGBA8SnormInU32 : kEncodeRGBA8UnormInU32;
             shader += kCommonHead;
             shader += kCommonStart;
-            shader += kPackRGBA8SnormToU32;
+            shader += kPackRGBAToU32;
             shader += kCommonEnd;
             textureSampleType = wgpu::TextureSampleType::Float;
             break;
         case wgpu::TextureFormat::BGRA8Unorm:
             AppendFloatTextureHead();
             shader += kDstBufferU32;
+            shader += kEncodeBGRA8UnormInU32;
             shader += kCommonHead;
             shader += kCommonStart;
-            shader += kPackBGRA8UnormToU32;
+            shader += kPackRGBAToU32;
             shader += kCommonEnd;
             textureSampleType = wgpu::TextureSampleType::Float;
             break;
         case wgpu::TextureFormat::RGB9E5Ufloat:
             AppendFloatTextureHead();
             shader += kDstBufferU32;
+            shader += kEncodeRGB9E5UfloatInU32;
             shader += kCommonHead;
             shader += kCommonStart;
-            shader += kPackRGB9E5UfloatToU32;
+            shader += kPackRGBAToU32;
             shader += kCommonEnd;
             textureSampleType = wgpu::TextureSampleType::Float;
             break;
@@ -826,8 +844,11 @@
     // a subset of them that we support.
     switch (format) {
         case wgpu::TextureFormat::R8Snorm:
+        case wgpu::TextureFormat::R8Unorm:
         case wgpu::TextureFormat::RG8Snorm:
+        case wgpu::TextureFormat::RG8Unorm:
         case wgpu::TextureFormat::RGBA8Snorm:
+        case wgpu::TextureFormat::RGBA8Unorm:
         case wgpu::TextureFormat::BGRA8Unorm:
         case wgpu::TextureFormat::RGB9E5Ufloat:
         case wgpu::TextureFormat::Depth16Unorm:
@@ -890,8 +911,7 @@
 
     uint32_t numU32PerRowNeedsWriting = 0;
     bool readPreviousRow = false;
-    if (format.format == wgpu::TextureFormat::R8Snorm ||
-        format.format == wgpu::TextureFormat::RG8Snorm) {
+    if (bytesPerTexel < 4 && !format.HasDepthOrStencil()) {
         uint32_t extraBytes = dst.offset % 4;
 
         // Between rows and image (whether thread at end of each row needs read start of next
@@ -1014,7 +1034,11 @@
         bufferDesc.size = sizeof(uint32_t) * 20;
         bufferDesc.usage = wgpu::BufferUsage::Uniform;
         bufferDesc.mappedAtCreation = true;
-        DAWN_TRY_ASSIGN(uniformBuffer, device->CreateBuffer(&bufferDesc));
+
+        {
+            IgnoreLazyClearCountScope ignoreClearScope(device);
+            DAWN_TRY_ASSIGN(uniformBuffer, device->CreateBuffer(&bufferDesc));
+        }
 
         uint32_t* params =
             static_cast<uint32_t*>(uniformBuffer->GetMappedRange(0, bufferDesc.size));