Import Tint changes from Dawn

Changes:
  - 75836f51afd8e66ffe5ac2708c914c4621bdb604 [Compat] Fix textureDimensions GLSL transform with mip le... by Shrek Shao <shrekshao@google.com>
  - c5c44ee4377e04ecc8cff394319e329fde2297d5 [tint][utils] Be more strict with UTF-8 decoding by Ben Clayton <bclayton@google.com>
GitOrigin-RevId: 75836f51afd8e66ffe5ac2708c914c4621bdb604
Change-Id: I1a40fd592a6af86de38ce4a46134a30cb3cdd26e
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/184581
Commit-Queue: James Price <jrprice@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: James Price <jrprice@google.com>
diff --git a/src/tint/lang/wgsl/inspector/inspector.cc b/src/tint/lang/wgsl/inspector/inspector.cc
index f6f7980..f00ab13 100644
--- a/src/tint/lang/wgsl/inspector/inspector.cc
+++ b/src/tint/lang/wgsl/inspector/inspector.cc
@@ -1029,10 +1029,16 @@
 
     std::unordered_set<BindingPoint> seen = {};
 
-    auto sample_type_for_call_and_type = [](wgsl::BuiltinFn builtin, const core::type::Type* ty) {
+    auto sample_type_for_call_and_type = [](wgsl::BuiltinFn builtin, const core::type::Type* ty,
+                                            const Vector<const ast::Expression*, 8>& args) {
         if (builtin == wgsl::BuiltinFn::kTextureNumLevels) {
             return TextureQueryType::kTextureNumLevels;
         }
+        if (builtin == wgsl::BuiltinFn::kTextureDimensions && args.Length() > 1) {
+            // When textureDimension takes level as the input,
+            // it requires calls to textureNumLevels to clamp mip levels.
+            return TextureQueryType::kTextureNumLevels;
+        }
         if (builtin == wgsl::BuiltinFn::kTextureLoad) {
             if (!ty->UnwrapRef()
                      ->IsAnyOf<core::type::MultisampledTexture,
@@ -1095,6 +1101,7 @@
                 call->Target(),
                 [&](const sem::BuiltinFn* builtin) {
                     if (builtin->Fn() != wgsl::BuiltinFn::kTextureNumLevels &&
+                        builtin->Fn() != wgsl::BuiltinFn::kTextureDimensions &&
                         builtin->Fn() != wgsl::BuiltinFn::kTextureNumSamples &&
                         builtin->Fn() != wgsl::BuiltinFn::kTextureLoad) {
                         return;
@@ -1104,7 +1111,8 @@
                     auto* texture_sem = sem.GetVal(texture_expr)->RootIdentifier();
                     TINT_ASSERT(texture_sem);
 
-                    auto type = sample_type_for_call_and_type(builtin->Fn(), texture_sem->Type());
+                    auto type = sample_type_for_call_and_type(builtin->Fn(), texture_sem->Type(),
+                                                              call->Declaration()->args);
 
                     tint::Switch(
                         texture_sem,  //
diff --git a/src/tint/utils/text/unicode.cc b/src/tint/utils/text/unicode.cc
index 157f5da..1c10135 100644
--- a/src/tint/utils/text/unicode.cc
+++ b/src/tint/utils/text/unicode.cc
@@ -388,32 +388,34 @@
 
     CodePoint c;
 
-    uint8_t valid = 0x80;
+    uint8_t top_bits = 0b11000000;
     switch (n) {
         // Note: n=0 (invalid) is correctly handled without a case.
         case 1:
             c = CodePoint{ptr[0]};
             break;
         case 2:
-            valid &= ptr[1];
+            top_bits &= ptr[1] ^ 0b01000000;
             c = CodePoint{(static_cast<uint32_t>(ptr[0] & 0b00011111) << 6) |
                           (static_cast<uint32_t>(ptr[1] & 0b00111111))};
             break;
         case 3:
-            valid &= ptr[1] & ptr[2];
+            top_bits &= (ptr[1] ^ 0b01000000) & (ptr[2] ^ 0b01000000);
             c = CodePoint{(static_cast<uint32_t>(ptr[0] & 0b00001111) << 12) |
                           (static_cast<uint32_t>(ptr[1] & 0b00111111) << 6) |
                           (static_cast<uint32_t>(ptr[2] & 0b00111111))};
             break;
         case 4:
-            valid &= ptr[1] & ptr[2] & ptr[3];
+            top_bits &= (ptr[1] ^ 0b01000000) & (ptr[2] ^ 0b01000000) & (ptr[3] ^ 0b01000000);
             c = CodePoint{(static_cast<uint32_t>(ptr[0] & 0b00000111) << 18) |
                           (static_cast<uint32_t>(ptr[1] & 0b00111111) << 12) |
                           (static_cast<uint32_t>(ptr[2] & 0b00111111) << 6) |
                           (static_cast<uint32_t>(ptr[3] & 0b00111111))};
             break;
     }
-    if (!valid) {
+    if (top_bits != 0b11000000) {
+        // Check that the two most significant bits of all the code units after the first code point
+        // are all [1, 0].
         n = 0;
         c = 0;
     }
diff --git a/src/tint/utils/text/unicode_test.cc b/src/tint/utils/text/unicode_test.cc
index 1887fd7..204bc76 100644
--- a/src/tint/utils/text/unicode_test.cc
+++ b/src/tint/utils/text/unicode_test.cc
@@ -545,12 +545,19 @@
                              {0xe8, 0x8f},        // 3-bytes, missing third byte
                              {0xf4, 0x8f, 0x8f},  // 4-bytes, missing fourth byte
 
-                             {0xd0, 0x7f},              // 2-bytes, second byte MSB unset
-                             {0xe8, 0x7f, 0x8f},        // 3-bytes, second byte MSB unset
-                             {0xe8, 0x8f, 0x7f},        // 3-bytes, third byte MSB unset
-                             {0xf4, 0x7f, 0x8f, 0x8f},  // 4-bytes, second byte MSB unset
-                             {0xf4, 0x8f, 0x7f, 0x8f},  // 4-bytes, third byte MSB unset
-                             {0xf4, 0x8f, 0x8f, 0x7f},  // 4-bytes, fourth byte MSB unset
+                             {0xd0, 0x7f},              // 2-bytes, second byte's MSB unset
+                             {0xe8, 0x7f, 0x8f},        // 3-bytes, second byte's MSB unset
+                             {0xe8, 0x8f, 0x7f},        // 3-bytes, third byte's MSB unset
+                             {0xf4, 0x7f, 0x8f, 0x8f},  // 4-bytes, second byte's MSB unset
+                             {0xf4, 0x8f, 0x7f, 0x8f},  // 4-bytes, third byte's MSB unset
+                             {0xf4, 0x8f, 0x8f, 0x7f},  // 4-bytes, fourth byte's MSB unset
+
+                             {0xd0, 0xff},              // 2-bytes, second byte's second-MSB set
+                             {0xe8, 0xff, 0x8f},        // 3-bytes, second byte's second-MSB set
+                             {0xe8, 0x8f, 0xff},        // 3-bytes, third byte's second-MSB set
+                             {0xf4, 0xff, 0x8f, 0x8f},  // 4-bytes, second byte's second-MSB set
+                             {0xf4, 0x8f, 0xff, 0x8f},  // 4-bytes, third byte's second-MSB set
+                             {0xf4, 0x8f, 0x8f, 0xff},  // 4-bytes, fourth byte's second-MSB set
                          }));
 
 }  // namespace utf8_tests