Expand ASCII short circuit.

This CL expands the ASCII short circuit to add the number range and _
into IsXIDContinue.

IsXIDStart is updated to filter out anything less then the first
sequence after the (a-zA-Z) blocks as they won't be XIDStart but do cover
all of the common ASCII punctuation characters.

Change-Id: Ib839d9840f5a1ecc3d2e80774b11af2444e9f439
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/97071
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: Dan Sinclair <dsinclair@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/tint/text/unicode.cc b/src/tint/text/unicode.cc
index 7339297..e23f3dd 100644
--- a/src/tint/text/unicode.cc
+++ b/src/tint/text/unicode.cc
@@ -306,15 +306,26 @@
 }  // namespace
 
 bool CodePoint::IsXIDStart() const {
-    // Short circuit ascii. It will end up being at the end of the binary search
-    // but is our, currently, common case.
+    // Short circuit ASCII. The binary search will find these last, but most
+    // of our current source is ASCII, so handle them quicker.
     if ((value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z')) {
         return true;
     }
+    // With [a-zA-Z] handled, nothing less then the next sequence start can be
+    // XIDStart, so filter them all out. This catches most of the common symbols
+    // that are used in ASCII.
+    if (value < 0x000aa) {
+        return false;
+    }
     return std::binary_search(kXIDStartRanges, kXIDStartRanges + kNumXIDStartRanges, *this);
 }
 
 bool CodePoint::IsXIDContinue() const {
+    // Short circuit ASCII. The binary search will find these last, but most
+    // of our current source is ASCII, so handle them quicker.
+    if ((value >= '0' && value <= '9') || value == '_') {
+        return true;
+    }
     return IsXIDStart() || std::binary_search(kXIDContinueRanges,
                                               kXIDContinueRanges + kNumXIDContinueRanges, *this);
 }