Terminate line comments with \v, \f, and \r

The WGSL spec says that line comments are terminated by any blankspace
other than a space or a horizontal tab.

Also rename is_whitespace to is_blankspace and tighten up the
definition to only include the characters listed in the WGSL spec.

Change-Id: I4fee0175980ab70e9baf107a6e79ab5c2e4f906d
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/83920
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: James Price <jrprice@google.com>
diff --git a/src/tint/reader/wgsl/lexer.cc b/src/tint/reader/wgsl/lexer.cc
index a68afa2..d5263aa 100644
--- a/src/tint/reader/wgsl/lexer.cc
+++ b/src/tint/reader/wgsl/lexer.cc
@@ -28,8 +28,10 @@
 namespace wgsl {
 namespace {
 
-bool is_whitespace(char c) {
-  return std::isspace(static_cast<unsigned char>(c));
+bool is_blankspace(char c) {
+  // See https://www.w3.org/TR/WGSL/#blankspace.
+  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' ||
+         c == '\r';
 }
 
 uint32_t dec_value(char c) {
@@ -62,7 +64,7 @@
 Lexer::~Lexer() = default;
 
 Token Lexer::next() {
-  if (auto t = skip_whitespace_and_comments(); !t.IsUninitialized()) {
+  if (auto t = skip_blankspace_and_comments(); !t.IsUninitialized()) {
     return t;
   }
 
@@ -128,10 +130,10 @@
   return file_->content.data_view.substr(pos, substr.size()) == substr;
 }
 
-Token Lexer::skip_whitespace_and_comments() {
+Token Lexer::skip_blankspace_and_comments() {
   for (;;) {
     auto pos = pos_;
-    while (!is_eof() && is_whitespace(file_->content.data[pos_])) {
+    while (!is_eof() && is_blankspace(file_->content.data[pos_])) {
       if (matches(pos_, "\n")) {
         pos_++;
         location_.line++;
@@ -148,7 +150,7 @@
       return t;
     }
 
-    // If the cursor didn't advance we didn't remove any whitespace
+    // If the cursor didn't advance we didn't remove any blankspace
     // so we're done.
     if (pos == pos_)
       break;
@@ -162,9 +164,10 @@
 
 Token Lexer::skip_comment() {
   if (matches(pos_, "//")) {
-    // Line comment: ignore everything until the end of line
-    // or end of input.
-    while (!is_eof() && !matches(pos_, "\n")) {
+    // Line comment: ignore everything until the end of input or a blankspace
+    // character other than space or horizontal tab.
+    while (!is_eof() && !(is_blankspace(file_->content.data[pos_]) &&
+                          !matches(pos_, " ") && !matches(pos_, "\t"))) {
       if (is_null()) {
         return {Token::Type::kError, begin_source(), "null character found"};
       }
diff --git a/src/tint/reader/wgsl/lexer.h b/src/tint/reader/wgsl/lexer.h
index 710b10e..cc91e9b 100644
--- a/src/tint/reader/wgsl/lexer.h
+++ b/src/tint/reader/wgsl/lexer.h
@@ -36,10 +36,9 @@
   Token next();
 
  private:
-  /// Advances past whitespace and comments, if present
-  /// at the current position.
+  /// Advances past blankspace and comments, if present at the current position.
   /// @returns error token, EOF, or uninitialized
-  Token skip_whitespace_and_comments();
+  Token skip_blankspace_and_comments();
   /// Advances past a comment at the current position, if one exists.
   /// Returns an error if there was an unterminated block comment,
   /// or a null character was present.
diff --git a/src/tint/reader/wgsl/lexer_test.cc b/src/tint/reader/wgsl/lexer_test.cc
index b05c0a6..93cbf0b 100644
--- a/src/tint/reader/wgsl/lexer_test.cc
+++ b/src/tint/reader/wgsl/lexer_test.cc
@@ -32,7 +32,7 @@
   EXPECT_TRUE(t.IsEof());
 }
 
-TEST_F(LexerTest, Skips_Whitespace) {
+TEST_F(LexerTest, Skips_Blankspace) {
   Source::File file("", "\t\r\n\t    ident\t\n\t  \r ");
   Lexer l(&file);
 
@@ -75,6 +75,43 @@
   EXPECT_TRUE(t.IsEof());
 }
 
+using LineCommentTerminatorTest = testing::TestWithParam<char>;
+TEST_P(LineCommentTerminatorTest, Terminators) {
+  // Test that line comments are ended by blankspace characters other than space
+  // and horizontal tab.
+  char c = GetParam();
+  std::string src = "let// This is a comment";
+  src += c;
+  src += "ident";
+  Source::File file("", src);
+  Lexer l(&file);
+
+  auto t = l.next();
+  EXPECT_TRUE(t.Is(Token::Type::kLet));
+  EXPECT_EQ(t.source().range.begin.line, 1u);
+  EXPECT_EQ(t.source().range.begin.column, 1u);
+  EXPECT_EQ(t.source().range.end.line, 1u);
+  EXPECT_EQ(t.source().range.end.column, 4u);
+
+  if (c != ' ' && c != '\t') {
+    size_t line = c == '\n' ? 2u : 1u;
+    size_t col = c == '\n' ? 1u : 25u;
+    t = l.next();
+    EXPECT_TRUE(t.IsIdentifier());
+    EXPECT_EQ(t.source().range.begin.line, line);
+    EXPECT_EQ(t.source().range.begin.column, col);
+    EXPECT_EQ(t.source().range.end.line, line);
+    EXPECT_EQ(t.source().range.end.column, col + 5);
+    EXPECT_EQ(t.to_str(), "ident");
+  }
+
+  t = l.next();
+  EXPECT_TRUE(t.IsEof());
+}
+INSTANTIATE_TEST_SUITE_P(LexerTest,
+                         LineCommentTerminatorTest,
+                         testing::Values(' ', '\t', '\n', '\v', '\f', '\r'));
+
 TEST_F(LexerTest, Skips_Comments_Block) {
   Source::File file("", R"(/* comment
 text */ident)");
@@ -128,7 +165,7 @@
   EXPECT_EQ(t.source().range.end.column, 4u);
 }
 
-TEST_F(LexerTest, Null_InWhitespace_IsError) {
+TEST_F(LexerTest, Null_InBlankspace_IsError) {
   Source::File file("", std::string{' ', 0, ' '});
   Lexer l(&file);
 
diff --git a/src/tint/writer/text_generator.h b/src/tint/writer/text_generator.h
index f65c6f2..d3eb8ad 100644
--- a/src/tint/writer/text_generator.h
+++ b/src/tint/writer/text_generator.h
@@ -32,7 +32,7 @@
  public:
   /// Line holds a single line of text
   struct Line {
-    /// The indentation of the line in whitespaces
+    /// The indentation of the line in blankspace
     uint32_t indent = 0;
     /// The content of the line, without a trailing newline character
     std::string content;