Block comments must be terminated before end of input
Lexer methods scanning for comments and whitespace can now
return an error.
Fixes: tint:1309
Change-Id: Ica8e393d3410b1bda2a293db0d9b0006770770ea
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/69361
Auto-Submit: David Neto <dneto@google.com>
Commit-Queue: James Price <jrprice@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: James Price <jrprice@google.com>
diff --git a/src/reader/wgsl/lexer.cc b/src/reader/wgsl/lexer.cc
index f55e5b6..c588630 100644
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc
@@ -61,14 +61,12 @@
Lexer::~Lexer() = default;
Token Lexer::next() {
- skip_whitespace();
- skip_comments();
-
- if (is_eof()) {
- return {Token::Type::kEOF, begin_source()};
+ auto t = skip_whitespace_and_comments();
+ if (!t.IsUninitialized()) {
+ return t;
}
- auto t = try_hex_float();
+ t = try_hex_float();
if (!t.IsUninitialized()) {
return t;
}
@@ -140,7 +138,7 @@
return content_->data.substr(pos, substr.size()) == substr;
}
-void Lexer::skip_whitespace() {
+Token Lexer::skip_whitespace_and_comments() {
for (;;) {
auto pos = pos_;
while (!is_eof() && is_whitespace(content_->data[pos_])) {
@@ -155,27 +153,41 @@
location_.column++;
}
- skip_comments();
+ auto t = skip_comment();
+ if (!t.IsUninitialized()) {
+ return t;
+ }
// If the cursor didn't advance we didn't remove any whitespace
// so we're done.
if (pos == pos_)
break;
}
+ if (is_eof()) {
+ return {Token::Type::kEOF, begin_source()};
+ }
+
+ return {};
}
-void Lexer::skip_comments() {
+Token Lexer::skip_comment() {
if (matches(pos_, "//")) {
- // Line comment: ignore everything until the end of line.
+ // Line comment: ignore everything until the end of line
+ // or end of input.
while (!is_eof() && !matches(pos_, "\n")) {
pos_++;
location_.column++;
}
- return;
+ return {};
}
if (matches(pos_, "/*")) {
// Block comment: ignore everything until the closing '*/' token.
+
+ // Record source location of the initial '/*'
+ auto source = begin_source();
+ source.range.end.column += 1;
+
pos_ += 2;
location_.column += 2;
@@ -202,7 +214,11 @@
location_.column++;
}
}
+ if (depth > 0) {
+ return {Token::Type::kError, source, "unterminated block comment"};
+ }
}
+ return {};
}
Token Lexer::try_float() {
diff --git a/src/reader/wgsl/lexer.h b/src/reader/wgsl/lexer.h
index c40ad7e..5575c3d 100644
--- a/src/reader/wgsl/lexer.h
+++ b/src/reader/wgsl/lexer.h
@@ -32,13 +32,19 @@
Lexer(const std::string& file_path, const Source::FileContent* content);
~Lexer();
- /// Returns the next token in the input stream
+ /// Returns the next token in the input stream.
/// @return Token
Token next();
private:
- void skip_whitespace();
- void skip_comments();
+ /// Advances past whitespace and comments, if present
+ /// at the current position.
+ /// @returns uninitialized token on success, or error
+ Token skip_whitespace_and_comments();
+ /// Advances past a comment at the current position,
+ /// if one exists.
+ /// @returns uninitialized token on success, or error
+ Token skip_comment();
Token build_token_from_int_if_possible(Source source,
size_t start,
@@ -55,6 +61,7 @@
Source begin_source() const;
void end_source(Source&) const;
+ /// @returns true if the end of the input has been reached.
bool is_eof() const;
/// @param ch a character
/// @returns true if 'ch' is an alphabetic character
diff --git a/src/reader/wgsl/lexer_test.cc b/src/reader/wgsl/lexer_test.cc
index d476b53..06a6d9c 100644
--- a/src/reader/wgsl/lexer_test.cc
+++ b/src/reader/wgsl/lexer_test.cc
@@ -110,6 +110,24 @@
EXPECT_TRUE(t.IsEof());
}
+TEST_F(LexerTest, Skips_Comments_Block_Unterminated) {
+ // I had to break up the /* because otherwise the clang readability check
+ // errored out saying it could not find the end of a multi-line comment.
+ Source::FileContent content(R"(
+ /)"
+ R"(*
+abcd)");
+ Lexer l("test.wgsl", &content);
+
+ auto t = l.next();
+ ASSERT_TRUE(t.Is(Token::Type::kError));
+ EXPECT_EQ(t.to_str(), "unterminated block comment");
+ EXPECT_EQ(t.source().range.begin.line, 2u);
+ EXPECT_EQ(t.source().range.begin.column, 3u);
+ EXPECT_EQ(t.source().range.end.line, 2u);
+ EXPECT_EQ(t.source().range.end.column, 4u);
+}
+
struct FloatData {
const char* input;
float result;
diff --git a/src/reader/wgsl/parser_impl_test.cc b/src/reader/wgsl/parser_impl_test.cc
index c354fc8..155113d 100644
--- a/src/reader/wgsl/parser_impl_test.cc
+++ b/src/reader/wgsl/parser_impl_test.cc
@@ -85,7 +85,7 @@
EXPECT_EQ(p->error(), "5:1: exponent is too large for hex float");
}
-TEST_F(ParserImplTest, Comments) {
+TEST_F(ParserImplTest, Comments_TerminatedBlockComment) {
auto p = parser(R"(
/**
* Here is my shader.
@@ -99,12 +99,24 @@
parameters
*/) -> [[location(0)]] vec4<f32> {
return/*block_comments_delimit_tokens*/vec4<f32>(.4, .2, .3, 1);
-}/* unterminated block comments are OK at EOF...)");
+}/* block comments are OK at EOF...*/)");
ASSERT_TRUE(p->Parse()) << p->error();
ASSERT_EQ(1u, p->program().AST().Functions().size());
}
+TEST_F(ParserImplTest, Comments_UnterminatedBlockComment) {
+ auto p = parser(R"(
+[[stage(fragment)]]
+fn main() -> [[location(0)]] vec4<f32> {
+ return vec4<f32>(.4, .2, .3, 1);
+} /* unterminated block comments are invalid ...)");
+
+ ASSERT_FALSE(p->Parse());
+ ASSERT_TRUE(p->has_error());
+ EXPECT_EQ(p->error(), "5:3: unterminated block comment") << p->error();
+}
+
} // namespace
} // namespace wgsl
} // namespace reader