null character on input is an error
Fixes: tint:1311
Change-Id: Id80adc2c14c6d2cd5ee884e081d1d84f021e6620
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/72200
Auto-Submit: David Neto <dneto@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: David Neto <dneto@google.com>
diff --git a/src/reader/wgsl/lexer.cc b/src/reader/wgsl/lexer.cc
index c22b5a2..00c0f8d 100644
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc
@@ -96,7 +96,8 @@
return t;
}
- return {Token::Type::kError, begin_source(), "invalid character found"};
+ return {Token::Type::kError, begin_source(),
+ (is_null() ? "null character found" : "invalid character found")};
}
Source Lexer::begin_source() const {
@@ -116,6 +117,10 @@
return pos_ >= len_;
}
+bool Lexer::is_null() const {
+ return (pos_ < len_) && (content_->data[pos_] == 0);
+}
+
bool Lexer::is_alpha(char ch) const {
return std::isalpha(ch);
}
@@ -175,6 +180,9 @@
// Line comment: ignore everything until the end of line
// or end of input.
while (!is_eof() && !matches(pos_, "\n")) {
+ if (is_null()) {
+ return {Token::Type::kError, begin_source(), "null character found"};
+ }
pos_++;
location_.column++;
}
@@ -208,6 +216,8 @@
pos_++;
location_.line++;
location_.column = 1;
+ } else if (is_null()) {
+ return {Token::Type::kError, begin_source(), "null character found"};
} else {
// Anything else: skip and update source location.
pos_++;
@@ -653,10 +663,11 @@
end++;
}
- if (!matches(end, "0x")) {
+ if (matches(end, "0x")) {
+ end += 2;
+ } else {
return {};
}
- end += 2;
auto first = end;
while (!is_eof() && is_hex(content_->data[end])) {
diff --git a/src/reader/wgsl/lexer.h b/src/reader/wgsl/lexer.h
index d84e654..c823f59 100644
--- a/src/reader/wgsl/lexer.h
+++ b/src/reader/wgsl/lexer.h
@@ -41,8 +41,9 @@
/// at the current position.
/// @returns error token, EOF, or uninitialized
Token skip_whitespace_and_comments();
- /// Advances past a comment at the current position,
- /// if one exists.
+ /// Advances past a comment at the current position, if one exists.
+ /// Returns an error if there was an unterminated block comment,
+ /// or a null character was present.
/// @returns uninitialized token on success, or error
Token skip_comment();
@@ -72,6 +73,9 @@
/// @returns true if the end of the input has been reached.
bool is_eof() const;
+ /// @returns true if there is another character on the input and
+ /// it is not null.
+ bool is_null() const;
/// @param ch a character
/// @returns true if 'ch' is an alphabetic character
bool is_alpha(char ch) const;
diff --git a/src/reader/wgsl/lexer_test.cc b/src/reader/wgsl/lexer_test.cc
index b1f1742..24faeb3 100644
--- a/src/reader/wgsl/lexer_test.cc
+++ b/src/reader/wgsl/lexer_test.cc
@@ -128,6 +128,64 @@
EXPECT_EQ(t.source().range.end.column, 4u);
}
+TEST_F(LexerTest, Null_InWhitespace_IsError) {
+ Source::FileContent content(std::string{' ', 0, ' '});
+ Lexer l("test.wgsl", &content);
+
+ auto t = l.next();
+ EXPECT_TRUE(t.IsError());
+ EXPECT_EQ(t.source().range.begin.line, 1u);
+ EXPECT_EQ(t.source().range.begin.column, 2u);
+ EXPECT_EQ(t.source().range.end.line, 1u);
+ EXPECT_EQ(t.source().range.end.column, 2u);
+ EXPECT_EQ(t.to_str(), "null character found");
+}
+
+TEST_F(LexerTest, Null_InLineComment_IsError) {
+ Source::FileContent content(std::string{'/', '/', ' ', 0, ' '});
+ Lexer l("test.wgsl", &content);
+
+ auto t = l.next();
+ EXPECT_TRUE(t.IsError());
+ EXPECT_EQ(t.source().range.begin.line, 1u);
+ EXPECT_EQ(t.source().range.begin.column, 4u);
+ EXPECT_EQ(t.source().range.end.line, 1u);
+ EXPECT_EQ(t.source().range.end.column, 4u);
+ EXPECT_EQ(t.to_str(), "null character found");
+}
+
+TEST_F(LexerTest, Null_InBlockComment_IsError) {
+ Source::FileContent content(std::string{'/', '*', ' ', 0, '*', '/'});
+ Lexer l("test.wgsl", &content);
+
+ auto t = l.next();
+ EXPECT_TRUE(t.IsError());
+ EXPECT_EQ(t.source().range.begin.line, 1u);
+ EXPECT_EQ(t.source().range.begin.column, 4u);
+ EXPECT_EQ(t.source().range.end.line, 1u);
+ EXPECT_EQ(t.source().range.end.column, 4u);
+ EXPECT_EQ(t.to_str(), "null character found");
+}
+
+TEST_F(LexerTest, Null_InIdentifier_IsError) {
+ // Try inserting a null in an identifier. Other valid token
+ // kinds will behave similarly, so use the identifier case
+ // as a representative.
+ Source::FileContent content(std::string{'a', 0, 'c'});
+ Lexer l("test.wgsl", &content);
+
+ auto t = l.next();
+ EXPECT_TRUE(t.IsIdentifier());
+ EXPECT_EQ(t.to_str(), "a");
+ t = l.next();
+ EXPECT_TRUE(t.IsError());
+ EXPECT_EQ(t.source().range.begin.line, 1u);
+ EXPECT_EQ(t.source().range.begin.column, 2u);
+ EXPECT_EQ(t.source().range.end.line, 1u);
+ EXPECT_EQ(t.source().range.end.column, 2u);
+ EXPECT_EQ(t.to_str(), "null character found");
+}
+
struct FloatData {
const char* input;
float result;