Check number of digits in integer during tokenization

While looking ahead to determine if a token is an integer, check the
number of digits to make sure that it can actually fit in the internal
representation.

This is an optimization on the existing code, to cause an early exit
and prevent pathological cases with huge integers from consuming too
much processing time, when they will never succeed.

From a functional perspective this has not effect on whether or not a
token will be accepted as an integer, so almost all of the tests do no
need an update. The one exception is a case where the lexer now
catches the invalid integer earlier in the tokenization, so the error
message is a shorter.

This does not handle the equivalent problem for float literals, though
I believe that only exists for non-hex floats.

BUG=chromium:1240715

Change-Id: I27e43711d5f5eda1d54a4128ba514f810abd0313
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/62280
Auto-Submit: Ryan Harrison <rharrison@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/reader/wgsl/lexer.cc b/src/reader/wgsl/lexer.cc
index 5853c82..e4be0bc 100644
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc
@@ -543,6 +543,7 @@
 }
 
 Token Lexer::try_hex_integer() {
+  constexpr size_t kMaxDigits = 8;  // Valid for both 32-bit integer types
   auto start = pos_;
   auto end = pos_;
 
@@ -551,13 +552,23 @@
   if (matches(end, "-")) {
     end++;
   }
+
   if (!matches(end, "0x")) {
-    return Token();
+    return {};
   }
   end += 2;
 
+  auto first = end;
   while (!is_eof() && is_hex(content_->data[end])) {
-    end += 1;
+    end++;
+
+    auto digits = end - first;
+    if (digits > kMaxDigits) {
+      return {Token::Type::kError, source,
+              "integer literal (" +
+                  content_->data.substr(start, end - 1 - start) +
+                  "...) has too many digits"};
+    }
   }
 
   pos_ = end;
@@ -567,6 +578,7 @@
 }
 
 Token Lexer::try_integer() {
+  constexpr size_t kMaxDigits = 10;  // Valid for both 32-bit integer types
   auto start = pos_;
   auto end = start;
 
@@ -575,6 +587,7 @@
   if (matches(end, "-")) {
     end++;
   }
+
   if (end >= len_ || !is_digit(content_->data[end])) {
     return {};
   }
@@ -582,6 +595,14 @@
   auto first = end;
   while (end < len_ && is_digit(content_->data[end])) {
     end++;
+
+    auto digits = end - first;
+    if (digits > kMaxDigits) {
+      return {Token::Type::kError, source,
+              "integer literal (" +
+                  content_->data.substr(start, end - 1 - start) +
+                  "...) has too many digits"};
+    }
   }
 
   // If the first digit is a zero this must only be zero as leading zeros
diff --git a/src/reader/wgsl/lexer_test.cc b/src/reader/wgsl/lexer_test.cc
index 54e5e58..38482d1 100644
--- a/src/reader/wgsl/lexer_test.cc
+++ b/src/reader/wgsl/lexer_test.cc
@@ -251,6 +251,27 @@
   EXPECT_EQ(t.to_str(), "i32 (-0x8000000F) too small");
 }
 
+TEST_F(LexerTest, IntegerTest_HexSignedTooManyDigits) {
+  {
+    Source::FileContent content("-0x100000000000000000000000");
+    Lexer l("test.wgsl", &content);
+
+    auto t = l.next();
+    ASSERT_TRUE(t.Is(Token::Type::kError));
+    EXPECT_EQ(t.to_str(),
+              "integer literal (-0x10000000...) has too many digits");
+  }
+  {
+    Source::FileContent content("0x100000000000000");
+    Lexer l("test.wgsl", &content);
+
+    auto t = l.next();
+    ASSERT_TRUE(t.Is(Token::Type::kError));
+    EXPECT_EQ(t.to_str(),
+              "integer literal (0x10000000...) has too many digits");
+  }
+}
+
 struct HexUnsignedIntData {
   const char* input;
   uint32_t result;
@@ -287,13 +308,13 @@
                     HexUnsignedIntData{"0xFFFFFFFFu",
                                        std::numeric_limits<uint32_t>::max()}));
 
-TEST_F(LexerTest, IntegerTest_HexUnsignedTooLarge) {
-  Source::FileContent content("0xffffffffffu");
+TEST_F(LexerTest, IntegerTest_HexUnsignedTooManyDigits) {
+  Source::FileContent content("0x1000000000000000000000u");
   Lexer l("test.wgsl", &content);
 
   auto t = l.next();
   ASSERT_TRUE(t.Is(Token::Type::kError));
-  EXPECT_EQ(t.to_str(), "u32 (0xffffffffff) too large");
+  EXPECT_EQ(t.to_str(), "integer literal (0x10000000...) has too many digits");
 }
 
 struct UnsignedIntData {
@@ -325,6 +346,15 @@
                                          UnsignedIntData{"4294967295u",
                                                          4294967295u}));
 
+TEST_F(LexerTest, IntegerTest_UnsignedTooManyDigits) {
+  Source::FileContent content("10000000000000000000000u");
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  ASSERT_TRUE(t.Is(Token::Type::kError));
+  EXPECT_EQ(t.to_str(), "integer literal (1000000000...) has too many digits");
+}
+
 struct SignedIntData {
   const char* input;
   int32_t result;
@@ -357,6 +387,15 @@
                     SignedIntData{"2147483647", 2147483647},
                     SignedIntData{"-2147483648", -2147483648LL}));
 
+TEST_F(LexerTest, IntegerTest_SignedTooManyDigits) {
+  Source::FileContent content("-10000000000000000");
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  ASSERT_TRUE(t.Is(Token::Type::kError));
+  EXPECT_EQ(t.to_str(), "integer literal (-1000000000...) has too many digits");
+}
+
 using IntegerTest_Invalid = testing::TestWithParam<const char*>;
 TEST_P(IntegerTest_Invalid, Parses) {
   Source::FileContent content(GetParam());