diagnostic: don't squiggle for unicode lines

If the line contains non-ascii characters, then we cannot assume that a single utf8 code unit represents a single glyph, so don't attempt to draw squiggles.

Bug: tint:1437
Change-Id: Ibd911200b03297f3e1b059ad7cc53fcba59e7714
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/80846
Reviewed-by: David Neto <dneto@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
diff --git a/src/diagnostic/formatter.cc b/src/diagnostic/formatter.cc
index 0e9f217..1998523 100644
--- a/src/diagnostic/formatter.cc
+++ b/src/diagnostic/formatter.cc
@@ -201,19 +201,31 @@
       auto& line = src.file->content.lines[line_num - 1];
       auto line_len = line.size();
 
+      bool is_ascii = true;
       for (auto c : line) {
         if (c == '\t') {
           state.repeat(' ', style_.tab_width);
         } else {
           state << c;
         }
+        if (c & 0x80) {
+          is_ascii = false;
+        }
       }
 
       state.newline();
+
+      // If the line contains non-ascii characters, then we cannot assume that
+      // a single utf8 code unit represents a single glyph, so don't attempt to
+      // draw squiggles.
+      if (!is_ascii) {
+        continue;
+      }
+
       state.set_style({Color::kCyan, false});
 
       // Count the number of glyphs in the line span.
-      // start and end use 1-based indexing .
+      // start and end use 1-based indexing.
       auto num_glyphs = [&](size_t start, size_t end) {
         size_t count = 0;
         start = (start > 0) ? (start - 1) : 0;
diff --git a/src/diagnostic/formatter_test.cc b/src/diagnostic/formatter_test.cc
index 966b888..a97f57e 100644
--- a/src/diagnostic/formatter_test.cc
+++ b/src/diagnostic/formatter_test.cc
@@ -37,43 +37,81 @@
   return d;
 }
 
-constexpr const char* content =  // Note: words are tab-delimited
+constexpr const char* ascii_content =  // Note: words are tab-delimited
     R"(the	cat	says	meow
 the	dog	says	woof
 the	snake	says	quack
 the	snail	says	???
 )";
 
+constexpr const char* utf8_content =  // Note: words are tab-delimited
+    "the	\xf0\x9f\x90\xb1	says	meow\n"   // NOLINT: tabs
+    "the	\xf0\x9f\x90\x95	says	woof\n"   // NOLINT: tabs
+    "the	\xf0\x9f\x90\x8d	says	quack\n"  // NOLINT: tabs
+    "the	\xf0\x9f\x90\x8c	says	???\n";   // NOLINT: tabs
+
 class DiagFormatterTest : public testing::Test {
  public:
-  Source::File file{"file.name", content};
-  Diagnostic diag_note =
+  Source::File ascii_file{"file.name", ascii_content};
+  Source::File utf8_file{"file.name", utf8_content};
+  Diagnostic ascii_diag_note =
       Diag(Severity::Note,
-           Source{Source::Range{Source::Location{1, 14}}, &file},
+           Source{Source::Range{Source::Location{1, 14}}, &ascii_file},
            "purr",
            System::Test);
-  Diagnostic diag_warn = Diag(Severity::Warning,
-                              Source{Source::Range{{2, 14}, {2, 18}}, &file},
-                              "grrr",
-                              System::Test);
-  Diagnostic diag_err = Diag(Severity::Error,
-                             Source{Source::Range{{3, 16}, {3, 21}}, &file},
-                             "hiss",
-                             System::Test,
-                             "abc123");
-  Diagnostic diag_ice = Diag(Severity::InternalCompilerError,
-                             Source{Source::Range{{4, 16}, {4, 19}}, &file},
-                             "unreachable",
-                             System::Test);
-  Diagnostic diag_fatal = Diag(Severity::Fatal,
-                               Source{Source::Range{{4, 16}, {4, 19}}, &file},
-                               "nothing",
-                               System::Test);
+  Diagnostic ascii_diag_warn =
+      Diag(Severity::Warning,
+           Source{Source::Range{{2, 14}, {2, 18}}, &ascii_file},
+           "grrr",
+           System::Test);
+  Diagnostic ascii_diag_err =
+      Diag(Severity::Error,
+           Source{Source::Range{{3, 16}, {3, 21}}, &ascii_file},
+           "hiss",
+           System::Test,
+           "abc123");
+  Diagnostic ascii_diag_ice =
+      Diag(Severity::InternalCompilerError,
+           Source{Source::Range{{4, 16}, {4, 19}}, &ascii_file},
+           "unreachable",
+           System::Test);
+  Diagnostic ascii_diag_fatal =
+      Diag(Severity::Fatal,
+           Source{Source::Range{{4, 16}, {4, 19}}, &ascii_file},
+           "nothing",
+           System::Test);
+
+  Diagnostic utf8_diag_note =
+      Diag(Severity::Note,
+           Source{Source::Range{Source::Location{1, 15}}, &utf8_file},
+           "purr",
+           System::Test);
+  Diagnostic utf8_diag_warn =
+      Diag(Severity::Warning,
+           Source{Source::Range{{2, 15}, {2, 19}}, &utf8_file},
+           "grrr",
+           System::Test);
+  Diagnostic utf8_diag_err =
+      Diag(Severity::Error,
+           Source{Source::Range{{3, 15}, {3, 20}}, &utf8_file},
+           "hiss",
+           System::Test,
+           "abc123");
+  Diagnostic utf8_diag_ice =
+      Diag(Severity::InternalCompilerError,
+           Source{Source::Range{{4, 15}, {4, 18}}, &utf8_file},
+           "unreachable",
+           System::Test);
+  Diagnostic utf8_diag_fatal =
+      Diag(Severity::Fatal,
+           Source{Source::Range{{4, 15}, {4, 18}}, &utf8_file},
+           "nothing",
+           System::Test);
 };
 
 TEST_F(DiagFormatterTest, Simple) {
   Formatter fmt{{false, false, false, false}};
-  auto got = fmt.format(List{diag_note, diag_warn, diag_err});
+  auto got = fmt.format(List{ascii_diag_note, ascii_diag_warn, ascii_diag_err});
   auto* expect = R"(1:14: purr
 2:14: grrr
 3:16 abc123: hiss)";
@@ -82,7 +120,7 @@
 
 TEST_F(DiagFormatterTest, SimpleNewlineAtEnd) {
   Formatter fmt{{false, false, false, true}};
-  auto got = fmt.format(List{diag_note, diag_warn, diag_err});
+  auto got = fmt.format(List{ascii_diag_note, ascii_diag_warn, ascii_diag_err});
   auto* expect = R"(1:14: purr
 2:14: grrr
 3:16 abc123: hiss
@@ -100,7 +138,7 @@
 
 TEST_F(DiagFormatterTest, WithFile) {
   Formatter fmt{{true, false, false, false}};
-  auto got = fmt.format(List{diag_note, diag_warn, diag_err});
+  auto got = fmt.format(List{ascii_diag_note, ascii_diag_warn, ascii_diag_err});
   auto* expect = R"(file.name:1:14: purr
 file.name:2:14: grrr
 file.name:3:16 abc123: hiss)";
@@ -109,7 +147,7 @@
 
 TEST_F(DiagFormatterTest, WithSeverity) {
   Formatter fmt{{false, true, false, false}};
-  auto got = fmt.format(List{diag_note, diag_warn, diag_err});
+  auto got = fmt.format(List{ascii_diag_note, ascii_diag_warn, ascii_diag_err});
   auto* expect = R"(1:14 note: purr
 2:14 warning: grrr
 3:16 error abc123: hiss)";
@@ -118,7 +156,7 @@
 
 TEST_F(DiagFormatterTest, WithLine) {
   Formatter fmt{{false, false, true, false}};
-  auto got = fmt.format(List{diag_note, diag_warn, diag_err});
+  auto got = fmt.format(List{ascii_diag_note, ascii_diag_warn, ascii_diag_err});
   auto* expect = R"(1:14: purr
 the  cat  says  meow
                 ^
@@ -134,9 +172,24 @@
   ASSERT_EQ(expect, got);
 }
 
+TEST_F(DiagFormatterTest, UnicodeWithLine) {
+  Formatter fmt{{false, false, true, false}};
+  auto got = fmt.format(List{utf8_diag_note, utf8_diag_warn, utf8_diag_err});
+  auto* expect =
+      "1:15: purr\n"
+      "the  \xf0\x9f\x90\xb1  says  meow\n"
+      "\n"
+      "2:15: grrr\n"
+      "the  \xf0\x9f\x90\x95  says  woof\n"
+      "\n"
+      "3:15 abc123: hiss\n"
+      "the  \xf0\x9f\x90\x8d  says  quack\n";
+  ASSERT_EQ(expect, got);
+}
+
 TEST_F(DiagFormatterTest, BasicWithFileSeverityLine) {
   Formatter fmt{{true, true, true, false}};
-  auto got = fmt.format(List{diag_note, diag_warn, diag_err});
+  auto got = fmt.format(List{ascii_diag_note, ascii_diag_warn, ascii_diag_err});
   auto* expect = R"(file.name:1:14 note: purr
 the  cat  says  meow
                 ^
@@ -153,9 +206,9 @@
 }
 
 TEST_F(DiagFormatterTest, BasicWithMultiLine) {
-  auto multiline =
-      Diag(Severity::Warning, Source{Source::Range{{2, 9}, {4, 15}}, &file},
-           "multiline", System::Test);
+  auto multiline = Diag(Severity::Warning,
+                        Source{Source::Range{{2, 9}, {4, 15}}, &ascii_file},
+                        "multiline", System::Test);
   Formatter fmt{{false, false, true, false}};
   auto got = fmt.format(List{multiline});
   auto* expect = R"(2:9: multiline
@@ -169,9 +222,23 @@
   ASSERT_EQ(expect, got);
 }
 
+TEST_F(DiagFormatterTest, UnicodeWithMultiLine) {
+  auto multiline = Diag(Severity::Warning,
+                        Source{Source::Range{{2, 9}, {4, 15}}, &utf8_file},
+                        "multiline", System::Test);
+  Formatter fmt{{false, false, true, false}};
+  auto got = fmt.format(List{multiline});
+  auto* expect =
+      "2:9: multiline\n"
+      "the  \xf0\x9f\x90\x95  says  woof\n"
+      "the  \xf0\x9f\x90\x8d  says  quack\n"
+      "the  \xf0\x9f\x90\x8c  says  ???\n";
+  ASSERT_EQ(expect, got);
+}
+
 TEST_F(DiagFormatterTest, BasicWithFileSeverityLineTab4) {
   Formatter fmt{{true, true, true, false, 4u}};
-  auto got = fmt.format(List{diag_note, diag_warn, diag_err});
+  auto got = fmt.format(List{ascii_diag_note, ascii_diag_warn, ascii_diag_err});
   auto* expect = R"(file.name:1:14 note: purr
 the    cat    says    meow
                       ^
@@ -188,9 +255,9 @@
 }
 
 TEST_F(DiagFormatterTest, BasicWithMultiLineTab4) {
-  auto multiline =
-      Diag(Severity::Warning, Source{Source::Range{{2, 9}, {4, 15}}, &file},
-           "multiline", System::Test);
+  auto multiline = Diag(Severity::Warning,
+                        Source{Source::Range{{2, 9}, {4, 15}}, &ascii_file},
+                        "multiline", System::Test);
   Formatter fmt{{false, false, true, false, 4u}};
   auto got = fmt.format(List{multiline});
   auto* expect = R"(2:9: multiline
@@ -206,7 +273,7 @@
 
 TEST_F(DiagFormatterTest, ICE) {
   Formatter fmt{{}};
-  auto got = fmt.format(List{diag_ice});
+  auto got = fmt.format(List{ascii_diag_ice});
   auto* expect = R"(file.name:4:16 internal compiler error: unreachable
 the  snail  says  ???
                   ^^^
@@ -217,7 +284,7 @@
 
 TEST_F(DiagFormatterTest, Fatal) {
   Formatter fmt{{}};
-  auto got = fmt.format(List{diag_fatal});
+  auto got = fmt.format(List{ascii_diag_fatal});
   auto* expect = R"(file.name:4:16 fatal: nothing
 the  snail  says  ???
                   ^^^
@@ -229,7 +296,8 @@
 TEST_F(DiagFormatterTest, RangeOOB) {
   Formatter fmt{{true, true, true, true}};
   diag::List list;
-  list.add_error(System::Test, "oob", Source{{{10, 20}, {30, 20}}, &file});
+  list.add_error(System::Test, "oob",
+                 Source{{{10, 20}, {30, 20}}, &ascii_file});
   auto got = fmt.format(list);
   auto* expect = R"(file.name:10:20 error: oob