Expand ASCII short circuit.
This CL expands the ASCII short circuit to add the number range and _
into IsXIDContinue.
IsXIDStart is updated to filter out anything less then the first
sequence after the (a-zA-Z) blocks as they won't be XIDStart but do cover
all of the common ASCII punctuation characters.
Change-Id: Ib839d9840f5a1ecc3d2e80774b11af2444e9f439
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/97071
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: Dan Sinclair <dsinclair@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/tint/text/unicode.cc b/src/tint/text/unicode.cc
index 7339297..e23f3dd 100644
--- a/src/tint/text/unicode.cc
+++ b/src/tint/text/unicode.cc
@@ -306,15 +306,26 @@
} // namespace
bool CodePoint::IsXIDStart() const {
- // Short circuit ascii. It will end up being at the end of the binary search
- // but is our, currently, common case.
+ // Short circuit ASCII. The binary search will find these last, but most
+ // of our current source is ASCII, so handle them quicker.
if ((value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z')) {
return true;
}
+ // With [a-zA-Z] handled, nothing less then the next sequence start can be
+ // XIDStart, so filter them all out. This catches most of the common symbols
+ // that are used in ASCII.
+ if (value < 0x000aa) {
+ return false;
+ }
return std::binary_search(kXIDStartRanges, kXIDStartRanges + kNumXIDStartRanges, *this);
}
bool CodePoint::IsXIDContinue() const {
+ // Short circuit ASCII. The binary search will find these last, but most
+ // of our current source is ASCII, so handle them quicker.
+ if ((value >= '0' && value <= '9') || value == '_') {
+ return true;
+ }
return IsXIDStart() || std::binary_search(kXIDContinueRanges,
kXIDContinueRanges + kNumXIDContinueRanges, *this);
}