null character on input is an error

Fixes: tint:1311
Change-Id: Id80adc2c14c6d2cd5ee884e081d1d84f021e6620
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/72200
Auto-Submit: David Neto <dneto@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: David Neto <dneto@google.com>
diff --git a/src/reader/wgsl/lexer.cc b/src/reader/wgsl/lexer.cc
index c22b5a2..00c0f8d 100644
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc
@@ -96,7 +96,8 @@
     return t;
   }
 
-  return {Token::Type::kError, begin_source(), "invalid character found"};
+  return {Token::Type::kError, begin_source(),
+          (is_null() ? "null character found" : "invalid character found")};
 }
 
 Source Lexer::begin_source() const {
@@ -116,6 +117,10 @@
   return pos_ >= len_;
 }
 
+bool Lexer::is_null() const {
+  return (pos_ < len_) && (content_->data[pos_] == 0);
+}
+
 bool Lexer::is_alpha(char ch) const {
   return std::isalpha(ch);
 }
@@ -175,6 +180,9 @@
     // Line comment: ignore everything until the end of line
     // or end of input.
     while (!is_eof() && !matches(pos_, "\n")) {
+      if (is_null()) {
+        return {Token::Type::kError, begin_source(), "null character found"};
+      }
       pos_++;
       location_.column++;
     }
@@ -208,6 +216,8 @@
         pos_++;
         location_.line++;
         location_.column = 1;
+      } else if (is_null()) {
+        return {Token::Type::kError, begin_source(), "null character found"};
       } else {
         // Anything else: skip and update source location.
         pos_++;
@@ -653,10 +663,11 @@
     end++;
   }
 
-  if (!matches(end, "0x")) {
+  if (matches(end, "0x")) {
+    end += 2;
+  } else {
     return {};
   }
-  end += 2;
 
   auto first = end;
   while (!is_eof() && is_hex(content_->data[end])) {
diff --git a/src/reader/wgsl/lexer.h b/src/reader/wgsl/lexer.h
index d84e654..c823f59 100644
--- a/src/reader/wgsl/lexer.h
+++ b/src/reader/wgsl/lexer.h
@@ -41,8 +41,9 @@
   /// at the current position.
   /// @returns error token, EOF, or uninitialized
   Token skip_whitespace_and_comments();
-  /// Advances past a comment at the current position,
-  /// if one exists.
+  /// Advances past a comment at the current position, if one exists.
+  /// Returns an error if there was an unterminated block comment,
+  /// or a null character was present.
   /// @returns uninitialized token on success, or error
   Token skip_comment();
 
@@ -72,6 +73,9 @@
 
   /// @returns true if the end of the input has been reached.
   bool is_eof() const;
+  /// @returns true if there is another character on the input and
+  /// it is not null.
+  bool is_null() const;
   /// @param ch a character
   /// @returns true if 'ch' is an alphabetic character
   bool is_alpha(char ch) const;
diff --git a/src/reader/wgsl/lexer_test.cc b/src/reader/wgsl/lexer_test.cc
index b1f1742..24faeb3 100644
--- a/src/reader/wgsl/lexer_test.cc
+++ b/src/reader/wgsl/lexer_test.cc
@@ -128,6 +128,64 @@
   EXPECT_EQ(t.source().range.end.column, 4u);
 }
 
+TEST_F(LexerTest, Null_InWhitespace_IsError) {
+  Source::FileContent content(std::string{' ', 0, ' '});
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  EXPECT_TRUE(t.IsError());
+  EXPECT_EQ(t.source().range.begin.line, 1u);
+  EXPECT_EQ(t.source().range.begin.column, 2u);
+  EXPECT_EQ(t.source().range.end.line, 1u);
+  EXPECT_EQ(t.source().range.end.column, 2u);
+  EXPECT_EQ(t.to_str(), "null character found");
+}
+
+TEST_F(LexerTest, Null_InLineComment_IsError) {
+  Source::FileContent content(std::string{'/', '/', ' ', 0, ' '});
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  EXPECT_TRUE(t.IsError());
+  EXPECT_EQ(t.source().range.begin.line, 1u);
+  EXPECT_EQ(t.source().range.begin.column, 4u);
+  EXPECT_EQ(t.source().range.end.line, 1u);
+  EXPECT_EQ(t.source().range.end.column, 4u);
+  EXPECT_EQ(t.to_str(), "null character found");
+}
+
+TEST_F(LexerTest, Null_InBlockComment_IsError) {
+  Source::FileContent content(std::string{'/', '*', ' ', 0, '*', '/'});
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  EXPECT_TRUE(t.IsError());
+  EXPECT_EQ(t.source().range.begin.line, 1u);
+  EXPECT_EQ(t.source().range.begin.column, 4u);
+  EXPECT_EQ(t.source().range.end.line, 1u);
+  EXPECT_EQ(t.source().range.end.column, 4u);
+  EXPECT_EQ(t.to_str(), "null character found");
+}
+
+TEST_F(LexerTest, Null_InIdentifier_IsError) {
+  // Try inserting a null in an identifier. Other valid token
+  // kinds will behave similarly, so use the identifier case
+  // as a representative.
+  Source::FileContent content(std::string{'a', 0, 'c'});
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  EXPECT_TRUE(t.IsIdentifier());
+  EXPECT_EQ(t.to_str(), "a");
+  t = l.next();
+  EXPECT_TRUE(t.IsError());
+  EXPECT_EQ(t.source().range.begin.line, 1u);
+  EXPECT_EQ(t.source().range.begin.column, 2u);
+  EXPECT_EQ(t.source().range.end.line, 1u);
+  EXPECT_EQ(t.source().range.end.column, 2u);
+  EXPECT_EQ(t.to_str(), "null character found");
+}
+
 struct FloatData {
   const char* input;
   float result;