Check number of digits in integer during tokenization While looking ahead to determine if a token is an integer, check the number of digits to make sure that it can actually fit in the internal representation. This is an optimization on the existing code, to cause an early exit and prevent pathological cases with huge integers from consuming too much processing time, when they will never succeed. From a functional perspective this has not effect on whether or not a token will be accepted as an integer, so almost all of the tests do no need an update. The one exception is a case where the lexer now catches the invalid integer earlier in the tokenization, so the error message is a shorter. This does not handle the equivalent problem for float literals, though I believe that only exists for non-hex floats. BUG=chromium:1240715 Change-Id: I27e43711d5f5eda1d54a4128ba514f810abd0313 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/62280 Auto-Submit: Ryan Harrison <rharrison@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Reviewed-by: Ben Clayton <bclayton@google.com>

commit: 200cdd2052960ed8dfd343e09ad8e2cfa9acadff [log] [tgz]
author: Ryan Harrison <rharrison@chromium.org> Fri Aug 27 08:29:37 2021 +0000
committer: Tint LUCI CQ <tint-scoped@luci-project-accounts.iam.gserviceaccount.com> Fri Aug 27 08:29:37 2021 +0000
tree: 56a5f1cc4a7755fa42b3293e87a9b6048720a051
parent: 9021eb55940bd6752546684d66dffe94aac22949 [diff]
diff --git a/src/reader/wgsl/lexer.cc b/src/reader/wgsl/lexer.cc
index 5853c82..e4be0bc 100644
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc

@@ -543,6 +543,7 @@
 }
 
 Token Lexer::try_hex_integer() {
+  constexpr size_t kMaxDigits = 8;  // Valid for both 32-bit integer types
   auto start = pos_;
   auto end = pos_;
 
@@ -551,13 +552,23 @@
   if (matches(end, "-")) {
     end++;
   }
+
   if (!matches(end, "0x")) {
-    return Token();
+    return {};
   }
   end += 2;
 
+  auto first = end;
   while (!is_eof() && is_hex(content_->data[end])) {
-    end += 1;
+    end++;
+
+    auto digits = end - first;
+    if (digits > kMaxDigits) {
+      return {Token::Type::kError, source,
+              "integer literal (" +
+                  content_->data.substr(start, end - 1 - start) +
+                  "...) has too many digits"};
+    }
   }
 
   pos_ = end;
@@ -567,6 +578,7 @@
 }
 
 Token Lexer::try_integer() {
+  constexpr size_t kMaxDigits = 10;  // Valid for both 32-bit integer types
   auto start = pos_;
   auto end = start;
 
@@ -575,6 +587,7 @@
   if (matches(end, "-")) {
     end++;
   }
+
   if (end >= len_ || !is_digit(content_->data[end])) {
     return {};
   }
@@ -582,6 +595,14 @@
   auto first = end;
   while (end < len_ && is_digit(content_->data[end])) {
     end++;
+
+    auto digits = end - first;
+    if (digits > kMaxDigits) {
+      return {Token::Type::kError, source,
+              "integer literal (" +
+                  content_->data.substr(start, end - 1 - start) +
+                  "...) has too many digits"};
+    }
   }
 
   // If the first digit is a zero this must only be zero as leading zeros

diff --git a/src/reader/wgsl/lexer_test.cc b/src/reader/wgsl/lexer_test.cc
index 54e5e58..38482d1 100644
--- a/src/reader/wgsl/lexer_test.cc
+++ b/src/reader/wgsl/lexer_test.cc

@@ -251,6 +251,27 @@
   EXPECT_EQ(t.to_str(), "i32 (-0x8000000F) too small");
 }
 
+TEST_F(LexerTest, IntegerTest_HexSignedTooManyDigits) {
+  {
+    Source::FileContent content("-0x100000000000000000000000");
+    Lexer l("test.wgsl", &content);
+
+    auto t = l.next();
+    ASSERT_TRUE(t.Is(Token::Type::kError));
+    EXPECT_EQ(t.to_str(),
+              "integer literal (-0x10000000...) has too many digits");
+  }
+  {
+    Source::FileContent content("0x100000000000000");
+    Lexer l("test.wgsl", &content);
+
+    auto t = l.next();
+    ASSERT_TRUE(t.Is(Token::Type::kError));
+    EXPECT_EQ(t.to_str(),
+              "integer literal (0x10000000...) has too many digits");
+  }
+}
+
 struct HexUnsignedIntData {
   const char* input;
   uint32_t result;
@@ -287,13 +308,13 @@
                     HexUnsignedIntData{"0xFFFFFFFFu",
                                        std::numeric_limits<uint32_t>::max()}));
 
-TEST_F(LexerTest, IntegerTest_HexUnsignedTooLarge) {
-  Source::FileContent content("0xffffffffffu");
+TEST_F(LexerTest, IntegerTest_HexUnsignedTooManyDigits) {
+  Source::FileContent content("0x1000000000000000000000u");
   Lexer l("test.wgsl", &content);
 
   auto t = l.next();
   ASSERT_TRUE(t.Is(Token::Type::kError));
-  EXPECT_EQ(t.to_str(), "u32 (0xffffffffff) too large");
+  EXPECT_EQ(t.to_str(), "integer literal (0x10000000...) has too many digits");
 }
 
 struct UnsignedIntData {
@@ -325,6 +346,15 @@
                                          UnsignedIntData{"4294967295u",
                                                          4294967295u}));
 
+TEST_F(LexerTest, IntegerTest_UnsignedTooManyDigits) {
+  Source::FileContent content("10000000000000000000000u");
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  ASSERT_TRUE(t.Is(Token::Type::kError));
+  EXPECT_EQ(t.to_str(), "integer literal (1000000000...) has too many digits");
+}
+
 struct SignedIntData {
   const char* input;
   int32_t result;
@@ -357,6 +387,15 @@
                     SignedIntData{"2147483647", 2147483647},
                     SignedIntData{"-2147483648", -2147483648LL}));
 
+TEST_F(LexerTest, IntegerTest_SignedTooManyDigits) {
+  Source::FileContent content("-10000000000000000");
+  Lexer l("test.wgsl", &content);
+
+  auto t = l.next();
+  ASSERT_TRUE(t.Is(Token::Type::kError));
+  EXPECT_EQ(t.to_str(), "integer literal (-1000000000...) has too many digits");
+}
+
 using IntegerTest_Invalid = testing::TestWithParam<const char*>;
 TEST_P(IntegerTest_Invalid, Parses) {
   Source::FileContent content(GetParam());
commit	200cdd2052960ed8dfd343e09ad8e2cfa9acadff	[log] [tgz]
author	Ryan Harrison <rharrison@chromium.org>	Fri Aug 27 08:29:37 2021 +0000
committer	Tint LUCI CQ <tint-scoped@luci-project-accounts.iam.gserviceaccount.com>	Fri Aug 27 08:29:37 2021 +0000
tree	56a5f1cc4a7755fa42b3293e87a9b6048720a051
parent	9021eb55940bd6752546684d66dffe94aac22949 [diff]