reader/wgsl: Support unicode identifiers
Bug: tint:1437
Change-Id: Ie00ccb3e93d207111e55117dfc989f79b76164bf
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/80844
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: David Neto <dneto@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
diff --git a/docs/origin-trial-changes.md b/docs/origin-trial-changes.md
index 9e19b62..d93bce4 100644
--- a/docs/origin-trial-changes.md
+++ b/docs/origin-trial-changes.md
@@ -12,6 +12,7 @@
* Module-scope declarations can now be declared in any order. [tint:1266](crbug.com/tint/1266)
* The `override` keyword and `@id()` attribute for pipeline-overridable constants are now supported, replacing the `@override` attribute. [tint:1403](crbug.com/tint/1403)
+* Tint now supports unicode identifiers. [tint:1437](crbug.com/tint/1437)
## Changes for M99
diff --git a/samples/main.cc b/samples/main.cc
index d1d0ec2..8a8484a 100644
--- a/samples/main.cc
+++ b/samples/main.cc
@@ -1119,7 +1119,8 @@
case Format::kMsl: {
#if TINT_BUILD_MSL_WRITER
transform_inputs.Add<tint::transform::Renamer::Config>(
- tint::transform::Renamer::Target::kMslKeywords);
+ tint::transform::Renamer::Target::kMslKeywords,
+ /* preserve_unicode */ false);
transform_manager.Add<tint::transform::Renamer>();
#endif // TINT_BUILD_MSL_WRITER
break;
@@ -1132,7 +1133,8 @@
case Format::kHlsl: {
#if TINT_BUILD_HLSL_WRITER
transform_inputs.Add<tint::transform::Renamer::Config>(
- tint::transform::Renamer::Target::kHlslKeywords);
+ tint::transform::Renamer::Target::kHlslKeywords,
+ /* preserve_unicode */ false);
transform_manager.Add<tint::transform::Renamer>();
#endif // TINT_BUILD_HLSL_WRITER
break;
diff --git a/src/reader/wgsl/lexer.cc b/src/reader/wgsl/lexer.cc
index 56e56ad..8b0f7ac 100644
--- a/src/reader/wgsl/lexer.cc
+++ b/src/reader/wgsl/lexer.cc
@@ -21,6 +21,7 @@
#include <utility>
#include "src/debug.h"
+#include "src/text/unicode.h"
namespace tint {
namespace reader {
@@ -113,18 +114,10 @@
return (pos_ < len_) && (file_->content.data[pos_] == 0);
}
-bool Lexer::is_alpha(char ch) const {
- return std::isalpha(ch);
-}
-
bool Lexer::is_digit(char ch) const {
return std::isdigit(ch);
}
-bool Lexer::is_alphanum_underscore(char ch) const {
- return is_alpha(ch) || is_digit(ch) || ch == '_';
-}
-
bool Lexer::is_hex(char ch) const {
return std::isxdigit(ch);
}
@@ -733,31 +726,52 @@
}
Token Lexer::try_ident() {
- // Must begin with an a-zA-Z_
- if (!(is_alpha(file_->content.data[pos_]) ||
- file_->content.data[pos_] == '_')) {
- return {};
- }
-
auto source = begin_source();
+ auto start = pos_;
- auto s = pos_;
- while (!is_eof() && is_alphanum_underscore(file_->content.data[pos_])) {
- pos_++;
- location_.column++;
+ // This below assumes that the size of a single std::string element is 1 byte.
+ static_assert(sizeof(file_->content.data[0]) == sizeof(uint8_t),
+ "tint::reader::wgsl requires the size of a std::string element "
+ "to be a single byte");
+
+ // Must begin with an XID_Source unicode character, or underscore
+ {
+ auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
+ auto [code_point, n] =
+ text::utf8::Decode(utf8, file_->content.data.size() - pos_);
+ if (code_point != text::CodePoint('_') && !code_point.IsXIDStart()) {
+ return {};
+ }
+ // Consume start codepoint
+ pos_ += n;
+ location_.column += n;
}
- if (file_->content.data[s] == '_') {
+ while (!is_eof()) {
+ // Must continue with an XID_Continue unicode character
+ auto* utf8 = reinterpret_cast<const uint8_t*>(&file_->content.data[pos_]);
+ auto [code_point, n] =
+ text::utf8::Decode(utf8, file_->content.data.size() - pos_);
+ if (!code_point.IsXIDContinue()) {
+ break;
+ }
+
+ // Consume continuing codepoint
+ pos_ += n;
+ location_.column += n;
+ }
+
+ if (file_->content.data[start] == '_') {
// Check for an underscore on its own (special token), or a
// double-underscore (not allowed).
- if ((pos_ == s + 1) || (file_->content.data[s + 1] == '_')) {
- location_.column -= (pos_ - s);
- pos_ = s;
+ if ((pos_ == start + 1) || (file_->content.data[start + 1] == '_')) {
+ location_.column -= (pos_ - start);
+ pos_ = start;
return {};
}
}
- auto str = file_->content.data_view.substr(s, pos_ - s);
+ auto str = file_->content.data_view.substr(start, pos_ - start);
end_source(source);
auto t = check_keyword(source, str);
diff --git a/src/reader/wgsl/lexer.h b/src/reader/wgsl/lexer.h
index 43d9ea5..5bdb20f 100644
--- a/src/reader/wgsl/lexer.h
+++ b/src/reader/wgsl/lexer.h
@@ -76,25 +76,18 @@
/// it is not null.
bool is_null() const;
/// @param ch a character
- /// @returns true if 'ch' is an alphabetic character
- bool is_alpha(char ch) const;
- /// @param ch a character
/// @returns true if 'ch' is a decimal digit
bool is_digit(char ch) const;
/// @param ch a character
/// @returns true if 'ch' is a hexadecimal digit
bool is_hex(char ch) const;
- /// @param ch a character
- /// @returns true if 'ch' is a digit, an alphabetic character,
- /// or an underscore.
- bool is_alphanum_underscore(char ch) const;
bool matches(size_t pos, std::string_view substr);
/// The source file content
Source::File const* const file_;
/// The length of the input
uint32_t len_ = 0;
- /// The current position within the input
+ /// The current position in utf-8 code units (bytes) within the input
uint32_t pos_ = 0;
/// The current location within the input
Source::Location location_;
diff --git a/src/reader/wgsl/lexer_test.cc b/src/reader/wgsl/lexer_test.cc
index ff6c9ca..2e79906 100644
--- a/src/reader/wgsl/lexer_test.cc
+++ b/src/reader/wgsl/lexer_test.cc
@@ -315,8 +315,8 @@
"2.5E+ 123",
"2.5E- 123"));
-using IdentifierTest = testing::TestWithParam<const char*>;
-TEST_P(IdentifierTest, Parse) {
+using AsciiIdentifierTest = testing::TestWithParam<const char*>;
+TEST_P(AsciiIdentifierTest, Parse) {
Source::File file("", GetParam());
Lexer l(&file);
@@ -329,7 +329,7 @@
EXPECT_EQ(t.to_str(), GetParam());
}
INSTANTIATE_TEST_SUITE_P(LexerTest,
- IdentifierTest,
+ AsciiIdentifierTest,
testing::Values("a",
"test",
"test01",
@@ -342,6 +342,57 @@
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"alldigits_0123456789"));
+struct UnicodeCase {
+ const char* utf8;
+ size_t code_units;
+};
+
+using UnicodeIdentifierTest = testing::TestWithParam<UnicodeCase>;
+TEST_P(UnicodeIdentifierTest, Parse) {
+ Source::File file("", GetParam().utf8);
+ Lexer l(&file);
+
+ auto t = l.next();
+ EXPECT_TRUE(t.IsIdentifier());
+ EXPECT_EQ(t.source().range.begin.line, 1u);
+ EXPECT_EQ(t.source().range.begin.column, 1u);
+ EXPECT_EQ(t.source().range.end.line, 1u);
+ EXPECT_EQ(t.source().range.end.column, 1u + GetParam().code_units);
+ EXPECT_EQ(t.to_str(), GetParam().utf8);
+}
+INSTANTIATE_TEST_SUITE_P(
+ LexerTest,
+ UnicodeIdentifierTest,
+ testing::ValuesIn({
+ UnicodeCase{// "๐ข๐๐๐ง๐ญ๐ข๐๐ข๐๐ซ"
+ "\xf0\x9d\x90\xa2\xf0\x9d\x90\x9d\xf0\x9d\x90\x9e\xf0\x9d"
+ "\x90\xa7\xf0\x9d\x90\xad\xf0\x9d\x90\xa2\xf0\x9d\x90\x9f"
+ "\xf0\x9d\x90\xa2\xf0\x9d\x90\x9e\xf0\x9d\x90\xab",
+ 40},
+ UnicodeCase{// "๐๐๐๐๐ก๐๐๐๐๐"
+ "\xf0\x9d\x91\x96\xf0\x9d\x91\x91\xf0\x9d\x91\x92\xf0\x9d"
+ "\x91\x9b\xf0\x9d\x91\xa1\xf0\x9d\x91\x96\xf0\x9d\x91\x93"
+ "\xf0\x9d\x91\x96\xf0\x9d\x91\x92\xf0\x9d\x91\x9f",
+ 40},
+ UnicodeCase{
+ // "๏ฝ๏ฝ๏ฝ
๏ฝ๏ฝ๏ฝ๏ฝ๏ฝ๏ฝ
๏ฝ"
+ "\xef\xbd\x89\xef\xbd\x84\xef\xbd\x85\xef\xbd\x8e\xef\xbd\x94\xef"
+ "\xbd\x89\xef\xbd\x86\xef\xbd\x89\xef\xbd\x85\xef\xbd\x92",
+ 30},
+ UnicodeCase{// "๐๐๐๐๐ฅ๐๐๐๐๐ฃ๐๐๐"
+ "\xf0\x9d\x95\x9a\xf0\x9d\x95\x95\xf0\x9d\x95\x96\xf0\x9d"
+ "\x95\x9f\xf0\x9d\x95\xa5\xf0\x9d\x95\x9a\xf0\x9d\x95\x97"
+ "\xf0\x9d\x95\x9a\xf0\x9d\x95\x96\xf0\x9d\x95\xa3\xf0\x9d"
+ "\x9f\x99\xf0\x9d\x9f\x9a\xf0\x9d\x9f\x9b",
+ 52},
+ UnicodeCase{
+ // "๐๐๐๐๐๐๐๐๐๐123"
+ "\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
+ "\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
+ "\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33",
+ 43},
+ }));
+
TEST_F(LexerTest, IdentifierTest_SingleUnderscoreDoesNotMatch) {
Source::File file("", "_");
Lexer l(&file);
diff --git a/src/reader/wgsl/parser_impl_function_decl_test.cc b/src/reader/wgsl/parser_impl_function_decl_test.cc
index f9bb2bd..56f024e 100644
--- a/src/reader/wgsl/parser_impl_function_decl_test.cc
+++ b/src/reader/wgsl/parser_impl_function_decl_test.cc
@@ -14,6 +14,7 @@
#include "src/ast/workgroup_attribute.h"
#include "src/reader/wgsl/parser_impl_test_helper.h"
+#include "src/utils/string.h"
namespace tint {
namespace reader {
@@ -48,6 +49,51 @@
EXPECT_TRUE(body->statements[0]->Is<ast::ReturnStatement>());
}
+TEST_F(ParserImplTest, FunctionDecl_Unicode) {
+ const std::string function_ident = // "๐ณ๐๐ป๐ฐ๐๐ถ๐ผ๐ป"
+ "\xf0\x9d\x97\xb3\xf0\x9d\x98\x82\xf0\x9d\x97\xbb\xf0\x9d\x97\xb0\xf0\x9d"
+ "\x98\x81\xf0\x9d\x97\xb6\xf0\x9d\x97\xbc\xf0\x9d\x97\xbb";
+
+ const std::string param_a_ident = // "๐น๐ช๐ป๐ช๐ถ_๐ช"
+ "\xf0\x9d\x93\xb9\xf0\x9d\x93\xaa\xf0\x9d\x93\xbb\xf0\x9d\x93\xaa\xf0\x9d"
+ "\x93\xb6\x5f\xf0\x9d\x93\xaa";
+
+ const std::string param_b_ident = // "๐ก๐๐ฃ๐๐_๐"
+ "\xf0\x9d\x95\xa1\xf0\x9d\x95\x92\xf0\x9d\x95\xa3\xf0\x9d\x95\x92\xf0\x9d"
+ "\x95\x9e\x5f\xf0\x9d\x95\x93";
+
+ std::string src = "fn $function($param_a : i32, $param_b : f32) { return; }";
+ src = utils::ReplaceAll(src, "$function", function_ident);
+ src = utils::ReplaceAll(src, "$param_a", param_a_ident);
+ src = utils::ReplaceAll(src, "$param_b", param_b_ident);
+
+ auto p = parser(src);
+ auto attrs = p->attribute_list();
+ EXPECT_FALSE(p->has_error()) << p->error();
+ ASSERT_FALSE(attrs.errored);
+ EXPECT_FALSE(attrs.matched);
+ auto f = p->function_decl(attrs.value);
+ EXPECT_FALSE(p->has_error()) << p->error();
+ EXPECT_FALSE(f.errored);
+ EXPECT_TRUE(f.matched);
+ ASSERT_NE(f.value, nullptr);
+
+ EXPECT_EQ(f->symbol, p->builder().Symbols().Get(function_ident));
+ ASSERT_NE(f->return_type, nullptr);
+ EXPECT_TRUE(f->return_type->Is<ast::Void>());
+
+ ASSERT_EQ(f->params.size(), 2u);
+ EXPECT_EQ(f->params[0]->symbol, p->builder().Symbols().Get(param_a_ident));
+ EXPECT_EQ(f->params[1]->symbol, p->builder().Symbols().Get(param_b_ident));
+
+ ASSERT_NE(f->return_type, nullptr);
+ EXPECT_TRUE(f->return_type->Is<ast::Void>());
+
+ auto* body = f->body;
+ ASSERT_EQ(body->statements.size(), 1u);
+ EXPECT_TRUE(body->statements[0]->Is<ast::ReturnStatement>());
+}
+
TEST_F(ParserImplTest, FunctionDecl_AttributeList) {
auto p = parser("@workgroup_size(2, 3, 4) fn main() { return; }");
auto attrs = p->attribute_list();
diff --git a/src/reader/wgsl/parser_impl_struct_decl_test.cc b/src/reader/wgsl/parser_impl_struct_decl_test.cc
index 5df3bd0..40d6cc7 100644
--- a/src/reader/wgsl/parser_impl_struct_decl_test.cc
+++ b/src/reader/wgsl/parser_impl_struct_decl_test.cc
@@ -14,6 +14,7 @@
#include "src/ast/struct_block_attribute.h"
#include "src/reader/wgsl/parser_impl_test_helper.h"
+#include "src/utils/string.h"
namespace tint {
namespace reader {
@@ -42,6 +43,46 @@
EXPECT_EQ(s->members[1]->symbol, p->builder().Symbols().Register("b"));
}
+TEST_F(ParserImplTest, StructDecl_Unicode_Parses) {
+ const std::string struct_ident = // "๐ผ๐ฝ๐ป๐พ๐ฌ๐ฝ๐พ๐ป๐ฎ"
+ "\xf0\x9d\x93\xbc\xf0\x9d\x93\xbd\xf0\x9d\x93\xbb\xf0\x9d\x93\xbe\xf0\x9d"
+ "\x93\xac\xf0\x9d\x93\xbd\xf0\x9d\x93\xbe\xf0\x9d\x93\xbb\xf0\x9d\x93"
+ "\xae";
+ const std::string member_a_ident = // "๐๐๐๐๐๐ฃ_๐"
+ "\xf0\x9d\x95\x9e\xf0\x9d\x95\x96\xf0\x9d\x95\x9e\xf0\x9d\x95\x93\xf0\x9d"
+ "\x95\x96\xf0\x9d\x95\xa3\x5f\xf0\x9d\x95\x92";
+ const std::string member_b_ident = // "๐ช๐ข๐ช๐๐ข๐ฏ_๐"
+ "\xf0\x9d\x94\xaa\xf0\x9d\x94\xa2\xf0\x9d\x94\xaa\xf0\x9d\x94\x9f\xf0\x9d"
+ "\x94\xa2\xf0\x9d\x94\xaf\x5f\xf0\x9d\x94\x9f";
+
+ std::string src = R"(
+struct $struct {
+ $member_a : i32;
+ $member_b : f32;
+})";
+ src = utils::ReplaceAll(src, "$struct", struct_ident);
+ src = utils::ReplaceAll(src, "$member_a", member_a_ident);
+ src = utils::ReplaceAll(src, "$member_b", member_b_ident);
+
+ auto p = parser(src);
+ auto attrs = p->attribute_list();
+ EXPECT_FALSE(attrs.errored);
+ EXPECT_FALSE(attrs.matched);
+ ASSERT_EQ(attrs.value.size(), 0u);
+
+ auto s = p->struct_decl(attrs.value);
+ EXPECT_FALSE(p->has_error());
+ EXPECT_FALSE(s.errored);
+ EXPECT_TRUE(s.matched);
+ ASSERT_NE(s.value, nullptr);
+ ASSERT_EQ(s->name, p->builder().Symbols().Register(struct_ident));
+ ASSERT_EQ(s->members.size(), 2u);
+ EXPECT_EQ(s->members[0]->symbol,
+ p->builder().Symbols().Register(member_a_ident));
+ EXPECT_EQ(s->members[1]->symbol,
+ p->builder().Symbols().Register(member_b_ident));
+}
+
TEST_F(ParserImplTest, StructDecl_ParsesWithAttribute) {
auto p = parser(R"(
[[block]] struct B {
diff --git a/src/reader/wgsl/parser_impl_type_alias_test.cc b/src/reader/wgsl/parser_impl_type_alias_test.cc
index 45e2e32..301db6f 100644
--- a/src/reader/wgsl/parser_impl_type_alias_test.cc
+++ b/src/reader/wgsl/parser_impl_type_alias_test.cc
@@ -34,7 +34,7 @@
EXPECT_EQ(t.value->source.range, (Source::Range{{1u, 1u}, {1u, 13u}}));
}
-TEST_F(ParserImplTest, TypeDecl_ParsesStruct_Ident) {
+TEST_F(ParserImplTest, TypeDecl_Parses_Ident) {
auto p = parser("type a = B");
auto t = p->type_alias();
@@ -49,6 +49,25 @@
EXPECT_EQ(alias->source.range, (Source::Range{{1u, 1u}, {1u, 11u}}));
}
+TEST_F(ParserImplTest, TypeDecl_Unicode_Parses_Ident) {
+ const std::string ident = // "๐ถ๐_๐ฝ๐๐น๐ฎ"
+ "\xf0\x9d\x93\xb6\xf0\x9d\x94\x82\x5f\xf0\x9d\x93\xbd\xf0\x9d\x94\x82\xf0"
+ "\x9d\x93\xb9\xf0\x9d\x93\xae";
+
+ auto p = parser("type " + ident + " = i32");
+
+ auto t = p->type_alias();
+ EXPECT_FALSE(p->has_error());
+ EXPECT_FALSE(t.errored);
+ EXPECT_TRUE(t.matched);
+ ASSERT_NE(t.value, nullptr);
+ ASSERT_TRUE(t.value->Is<ast::Alias>());
+ auto* alias = t.value->As<ast::Alias>();
+ EXPECT_EQ(p->builder().Symbols().NameFor(alias->name), ident);
+ EXPECT_TRUE(alias->type->Is<ast::I32>());
+ EXPECT_EQ(alias->source.range, (Source::Range{{1u, 1u}, {1u, 37u}}));
+}
+
TEST_F(ParserImplTest, TypeDecl_MissingIdent) {
auto p = parser("type = i32");
auto t = p->type_alias();
diff --git a/src/reader/wgsl/parser_impl_variable_decl_test.cc b/src/reader/wgsl/parser_impl_variable_decl_test.cc
index 97bffb1..88a5873 100644
--- a/src/reader/wgsl/parser_impl_variable_decl_test.cc
+++ b/src/reader/wgsl/parser_impl_variable_decl_test.cc
@@ -18,7 +18,6 @@
namespace reader {
namespace wgsl {
namespace {
-
TEST_F(ParserImplTest, VariableDecl_Parses) {
auto p = parser("var my_var : f32");
auto v = p->variable_decl();
@@ -33,6 +32,25 @@
EXPECT_EQ(v->type->source.range, (Source::Range{{1u, 14u}, {1u, 17u}}));
}
+TEST_F(ParserImplTest, VariableDecl_Unicode_Parses) {
+ const std::string ident = // "๐๐๐๐๐๐๐๐๐๐123"
+ "\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
+ "\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
+ "\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33";
+
+ auto p = parser("var " + ident + " : f32");
+ auto v = p->variable_decl();
+ EXPECT_FALSE(p->has_error());
+ EXPECT_TRUE(v.matched);
+ EXPECT_FALSE(v.errored);
+ EXPECT_EQ(v->name, ident);
+ EXPECT_NE(v->type, nullptr);
+ EXPECT_TRUE(v->type->Is<ast::F32>());
+
+ EXPECT_EQ(v->source.range, (Source::Range{{1u, 5u}, {1u, 48u}}));
+ EXPECT_EQ(v->type->source.range, (Source::Range{{1u, 51u}, {1u, 54u}}));
+}
+
TEST_F(ParserImplTest, VariableDecl_Inferred_Parses) {
auto p = parser("var my_var = 1.0");
auto v = p->variable_decl(/*allow_inferred = */ true);
diff --git a/src/source.h b/src/source.h
index 4802d06..b40c994 100644
--- a/src/source.h
+++ b/src/source.h
@@ -27,7 +27,7 @@
/// Source describes a range of characters within a source file.
class Source {
public:
- /// FileContent describes the content of a source file.
+ /// FileContent describes the content of a source file encoded using utf-8.
class FileContent {
public:
/// Constructs the FileContent with the given file content.
@@ -78,7 +78,8 @@
public:
/// the 1-based line number. 0 represents no line information.
size_t line = 0;
- /// the 1-based column number. 0 represents no column information.
+ /// the 1-based column number in utf8-code units (bytes).
+ /// 0 represents no column information.
size_t column = 0;
/// Returns true of `this` location is lexicographically less than `rhs`
diff --git a/src/text/unicode.cc b/src/text/unicode.cc
index 12767bf..826eb5b 100644
--- a/src/text/unicode.cc
+++ b/src/text/unicode.cc
@@ -427,7 +427,9 @@
return out << "'U+" << std::hex << code_point.value << "'";
}
-std::pair<CodePoint, size_t> utf8::Decode(const uint8_t* ptr, size_t len) {
+namespace utf8 {
+
+std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len) {
if (len < 1) {
return {};
}
@@ -490,4 +492,15 @@
return {c, n};
}
+bool IsASCII(std::string_view str) {
+ for (auto c : str) {
+ if (c & 0x80) {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace utf8
+
} // namespace tint::text
diff --git a/src/text/unicode.h b/src/text/unicode.h
index c1ef915..3c74221 100644
--- a/src/text/unicode.h
+++ b/src/text/unicode.h
@@ -69,6 +69,10 @@
/// If the next code point cannot be decoded then returns [0,0].
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len);
+/// @returns true if all the utf-8 code points in the string are ASCII
+/// (code-points 0x00..0x7f).
+bool IsASCII(std::string_view);
+
} // namespace utf8
} // namespace tint::text
diff --git a/src/transform/glsl.cc b/src/transform/glsl.cc
index 46cc848..7744370 100644
--- a/src/transform/glsl.cc
+++ b/src/transform/glsl.cc
@@ -56,7 +56,8 @@
data.Add<SingleEntryPoint::Config>(cfg->entry_point);
}
manager.Add<Renamer>();
- data.Add<Renamer::Config>(Renamer::Target::kGlslKeywords);
+ data.Add<Renamer::Config>(Renamer::Target::kGlslKeywords,
+ /* preserve_unicode */ false);
manager.Add<Unshadow>();
// Attempt to convert `loop`s into for-loops. This is to try and massage the
diff --git a/src/transform/renamer.cc b/src/transform/renamer.cc
index 0b49e6e..6863c3d 100644
--- a/src/transform/renamer.cc
+++ b/src/transform/renamer.cc
@@ -21,13 +21,13 @@
#include "src/program_builder.h"
#include "src/sem/call.h"
#include "src/sem/member_accessor_expression.h"
+#include "src/text/unicode.h"
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer);
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Data);
TINT_INSTANTIATE_TYPEINFO(tint::transform::Renamer::Config);
-namespace tint {
-namespace transform {
+namespace tint::transform {
namespace {
@@ -1245,7 +1245,7 @@
Renamer::Data::Data(const Data&) = default;
Renamer::Data::~Data() = default;
-Renamer::Config::Config(Target t) : target(t) {}
+Renamer::Config::Config(Target t, bool pu) : target(t), preserve_unicode(pu) {}
Renamer::Config::Config(const Config&) = default;
Renamer::Config::~Config() = default;
@@ -1293,48 +1293,52 @@
Data::Remappings remappings;
Target target = Target::kAll;
+ bool preserve_unicode = false;
if (auto* cfg = inputs.Get<Config>()) {
target = cfg->target;
+ preserve_unicode = cfg->preserve_unicode;
}
ctx.ReplaceAll([&](Symbol sym_in) {
auto name_in = ctx.src->Symbols().NameFor(sym_in);
- switch (target) {
- case Target::kAll:
- // Always rename.
- break;
- case Target::kGlslKeywords:
- if (!std::binary_search(
- kReservedKeywordsGLSL,
- kReservedKeywordsGLSL +
- sizeof(kReservedKeywordsGLSL) / sizeof(const char*),
- name_in) &&
- name_in.compare(0, 3, "gl_")) {
- // No match, just reuse the original name.
- return ctx.dst->Symbols().New(name_in);
- }
- break;
- case Target::kHlslKeywords:
- if (!std::binary_search(
- kReservedKeywordsHLSL,
- kReservedKeywordsHLSL +
- sizeof(kReservedKeywordsHLSL) / sizeof(const char*),
- name_in)) {
- // No match, just reuse the original name.
- return ctx.dst->Symbols().New(name_in);
- }
- break;
- case Target::kMslKeywords:
- if (!std::binary_search(
- kReservedKeywordsMSL,
- kReservedKeywordsMSL +
- sizeof(kReservedKeywordsMSL) / sizeof(const char*),
- name_in)) {
- // No match, just reuse the original name.
- return ctx.dst->Symbols().New(name_in);
- }
- break;
+ if (preserve_unicode || text::utf8::IsASCII(name_in)) {
+ switch (target) {
+ case Target::kAll:
+ // Always rename.
+ break;
+ case Target::kGlslKeywords:
+ if (!std::binary_search(
+ kReservedKeywordsGLSL,
+ kReservedKeywordsGLSL +
+ sizeof(kReservedKeywordsGLSL) / sizeof(const char*),
+ name_in) &&
+ name_in.compare(0, 3, "gl_")) {
+ // No match, just reuse the original name.
+ return ctx.dst->Symbols().New(name_in);
+ }
+ break;
+ case Target::kHlslKeywords:
+ if (!std::binary_search(
+ kReservedKeywordsHLSL,
+ kReservedKeywordsHLSL +
+ sizeof(kReservedKeywordsHLSL) / sizeof(const char*),
+ name_in)) {
+ // No match, just reuse the original name.
+ return ctx.dst->Symbols().New(name_in);
+ }
+ break;
+ case Target::kMslKeywords:
+ if (!std::binary_search(
+ kReservedKeywordsMSL,
+ kReservedKeywordsMSL +
+ sizeof(kReservedKeywordsMSL) / sizeof(const char*),
+ name_in)) {
+ // No match, just reuse the original name.
+ return ctx.dst->Symbols().New(name_in);
+ }
+ break;
+ }
}
auto sym_out = ctx.dst->Sym();
@@ -1359,5 +1363,4 @@
std::make_unique<Data>(std::move(remappings)));
}
-} // namespace transform
-} // namespace tint
+} // namespace tint::transform
diff --git a/src/transform/renamer.h b/src/transform/renamer.h
index 4bec367..9b448e8 100644
--- a/src/transform/renamer.h
+++ b/src/transform/renamer.h
@@ -20,8 +20,7 @@
#include "src/transform/transform.h"
-namespace tint {
-namespace transform {
+namespace tint::transform {
/// Renamer is a Transform that renames all the symbols in a program.
class Renamer : public Castable<Renamer, Transform> {
@@ -63,7 +62,9 @@
struct Config : public Castable<Config, transform::Data> {
/// Constructor
/// @param tgt the targets to rename
- explicit Config(Target tgt);
+ /// @param keep_unicode if false, symbols with non-ascii code-points are
+ /// renamed
+ explicit Config(Target tgt, bool keep_unicode = false);
/// Copy constructor
Config(const Config&);
@@ -73,6 +74,9 @@
/// The targets to rename
Target const target = Target::kAll;
+
+ /// If false, symbols with non-ascii code-points are renamed.
+ bool preserve_unicode = false;
};
/// Constructor using a the configuration provided in the input Data
@@ -88,7 +92,6 @@
Output Run(const Program* program, const DataMap& data = {}) const override;
};
-} // namespace transform
-} // namespace tint
+} // namespace tint::transform
#endif // SRC_TRANSFORM_RENAMER_H_
diff --git a/src/transform/renamer_test.cc b/src/transform/renamer_test.cc
index 0cdbdb4..56c887c 100644
--- a/src/transform/renamer_test.cc
+++ b/src/transform/renamer_test.cc
@@ -23,6 +23,11 @@
namespace transform {
namespace {
+constexpr const char kUnicodeIdentifier[] = // "๐๐๐๐๐๐๐๐๐๐123"
+ "\xf0\x9d\x96\x8e\xf0\x9d\x96\x89\xf0\x9d\x96\x8a\xf0\x9d\x96\x93"
+ "\xf0\x9d\x96\x99\xf0\x9d\x96\x8e\xf0\x9d\x96\x8b\xf0\x9d\x96\x8e"
+ "\xf0\x9d\x96\x8a\xf0\x9d\x96\x97\x31\x32\x33";
+
using ::testing::ContainerEq;
using RenamerTest = TransformTest;
@@ -184,6 +189,25 @@
EXPECT_THAT(data->remappings, ContainerEq(expected_remappings));
}
+TEST_F(RenamerTest, PreserveUnicode) {
+ auto src = R"(
+@stage(fragment)
+fn frag_main() {
+ var )" + std::string(kUnicodeIdentifier) +
+ R"( : i32;
+}
+)";
+
+ auto expect = src;
+
+ DataMap inputs;
+ inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords,
+ /* preserve_unicode */ true);
+ auto got = Run<Renamer>(src, inputs);
+
+ EXPECT_EQ(expect, str(got));
+}
+
TEST_F(RenamerTest, AttemptSymbolCollision) {
auto* src = R"(
@stage(vertex)
@@ -244,7 +268,8 @@
)";
DataMap inputs;
- inputs.Add<Renamer::Config>(Renamer::Target::kGlslKeywords);
+ inputs.Add<Renamer::Config>(Renamer::Target::kGlslKeywords,
+ /* preserve_unicode */ false);
auto got = Run<Renamer>(src, inputs);
EXPECT_EQ(expect, str(got));
@@ -269,7 +294,8 @@
)";
DataMap inputs;
- inputs.Add<Renamer::Config>(Renamer::Target::kHlslKeywords);
+ inputs.Add<Renamer::Config>(Renamer::Target::kHlslKeywords,
+ /* preserve_unicode */ false);
auto got = Run<Renamer>(src, inputs);
EXPECT_EQ(expect, str(got));
@@ -294,7 +320,8 @@
)";
DataMap inputs;
- inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords);
+ inputs.Add<Renamer::Config>(Renamer::Target::kMslKeywords,
+ /* preserve_unicode */ false);
auto got = Run<Renamer>(src, inputs);
EXPECT_EQ(expect, str(got));
@@ -528,7 +555,8 @@
// "void", // WGSL keyword
"volatile",
// "while", // WGSL keyword
- "writeonly"));
+ "writeonly",
+ kUnicodeIdentifier));
INSTANTIATE_TEST_SUITE_P(RenamerTestHlsl,
RenamerTestHlsl,
@@ -1142,8 +1170,9 @@
"vertexshader",
"virtual",
// "void", // WGSL keyword
- "volatile"));
-// "while" // WGSL reserved keyword
+ "volatile",
+ // "while" // WGSL reserved keyword
+ kUnicodeIdentifier));
INSTANTIATE_TEST_SUITE_P(
RenamerTestMsl,
@@ -1425,7 +1454,9 @@
"M_2_PI_H",
"M_2_SQRTPI_H",
"M_SQRT2_H",
- "M_SQRT1_2_H"));
+ "M_SQRT1_2_H",
+ // "while" // WGSL reserved keyword
+ kUnicodeIdentifier));
} // namespace
} // namespace transform
diff --git a/src/writer/hlsl/test_helper.h b/src/writer/hlsl/test_helper.h
index e55b9c1..337a1ac 100644
--- a/src/writer/hlsl/test_helper.h
+++ b/src/writer/hlsl/test_helper.h
@@ -89,7 +89,8 @@
transform::Manager transform_manager;
transform::DataMap transform_data;
transform_data.Add<transform::Renamer::Config>(
- transform::Renamer::Target::kHlslKeywords);
+ transform::Renamer::Target::kHlslKeywords,
+ /* preserve_unicode */ true);
transform_manager.Add<tint::transform::Renamer>();
auto result =
transform_manager.Run(&sanitized_result.program, transform_data);
diff --git a/test/unicode/comments.wgsl b/test/unicode/comments.wgsl
new file mode 100644
index 0000000..0e42055
--- /dev/null
+++ b/test/unicode/comments.wgsl
@@ -0,0 +1,11 @@
+// Øโ๊ซ์นวด๐ๅฒพ๐ฅโดตใจๅแฎ
+
+/*
+/* ๐๐ */
+เคจเคฎเคธเฅเคคเฅ เคฆเฅเคจเคฟเคฏเคพ
+*/
+
+@stage(fragment)
+fn /* ใใใซใกใฏไธ็ */ main( /* ไฝ ๅฅฝไธ็ */ ) {
+ // ์๋
ํ์ธ์ ์ธ๊ณ
+}
diff --git a/test/unicode/comments.wgsl.expected.glsl b/test/unicode/comments.wgsl.expected.glsl
new file mode 100644
index 0000000..bb42c59
--- /dev/null
+++ b/test/unicode/comments.wgsl.expected.glsl
@@ -0,0 +1,10 @@
+#version 310 es
+precision mediump float;
+
+void tint_symbol() {
+}
+
+void main() {
+ tint_symbol();
+ return;
+}
diff --git a/test/unicode/comments.wgsl.expected.hlsl b/test/unicode/comments.wgsl.expected.hlsl
new file mode 100644
index 0000000..61d6d57
--- /dev/null
+++ b/test/unicode/comments.wgsl.expected.hlsl
@@ -0,0 +1,3 @@
+void main() {
+ return;
+}
diff --git a/test/unicode/comments.wgsl.expected.msl b/test/unicode/comments.wgsl.expected.msl
new file mode 100644
index 0000000..ac4b77b
--- /dev/null
+++ b/test/unicode/comments.wgsl.expected.msl
@@ -0,0 +1,7 @@
+#include <metal_stdlib>
+
+using namespace metal;
+fragment void tint_symbol() {
+ return;
+}
+
diff --git a/test/unicode/comments.wgsl.expected.spvasm b/test/unicode/comments.wgsl.expected.spvasm
new file mode 100644
index 0000000..123699a
--- /dev/null
+++ b/test/unicode/comments.wgsl.expected.spvasm
@@ -0,0 +1,16 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 5
+; Schema: 0
+ OpCapability Shader
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main"
+ OpExecutionMode %main OriginUpperLeft
+ OpName %main "main"
+ %void = OpTypeVoid
+ %1 = OpTypeFunction %void
+ %main = OpFunction %void None %1
+ %4 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/test/unicode/comments.wgsl.expected.wgsl b/test/unicode/comments.wgsl.expected.wgsl
new file mode 100644
index 0000000..3662a35
--- /dev/null
+++ b/test/unicode/comments.wgsl.expected.wgsl
@@ -0,0 +1,3 @@
+@stage(fragment)
+fn main() {
+}
diff --git a/test/unicode/indentifiers.wgsl b/test/unicode/indentifiers.wgsl
new file mode 100644
index 0000000..84c904a
--- /dev/null
+++ b/test/unicode/indentifiers.wgsl
@@ -0,0 +1,12 @@
+type ๐๐๐
โฏ_๐ถ = i32;
+type ๐ญ๐ฒ๐ฉ๐_๐ = f32;
+
+fn ๐ฏ๐พ๐ท๐ฌ๐ฝ๐ฒ๐ธ๐ท(แตแตสณแตแต : ๐๐๐
โฏ_๐ถ) -> ๐ญ๐ฒ๐ฉ๐_๐ {
+ return ๐ญ๐ฒ๐ฉ๐_๐(แตแตสณแตแต);
+}
+
+@stage(fragment)
+fn ๐๐๐ฅ๐ฃ๐ช๐ก๐ ๐๐๐ฅ() {
+ var ๐ซ๐๐ง๐๐๐๐ก๐ : ๐๐๐
โฏ_๐ถ;
+ let ๐๐๐๐๐๐ = ๐ฏ๐พ๐ท๐ฌ๐ฝ๐ฒ๐ธ๐ท(๐ซ๐๐ง๐๐๐๐ก๐);
+}
diff --git a/test/unicode/indentifiers.wgsl.expected.glsl b/test/unicode/indentifiers.wgsl.expected.glsl
new file mode 100644
index 0000000..d5d5790
--- /dev/null
+++ b/test/unicode/indentifiers.wgsl.expected.glsl
@@ -0,0 +1,16 @@
+#version 310 es
+precision mediump float;
+
+float tint_symbol_2(int tint_symbol_3) {
+ return float(tint_symbol_3);
+}
+
+void tint_symbol_4() {
+ int tint_symbol_5 = 0;
+ float tint_symbol_6 = tint_symbol_2(tint_symbol_5);
+}
+
+void main() {
+ tint_symbol_4();
+ return;
+}
diff --git a/test/unicode/indentifiers.wgsl.expected.hlsl b/test/unicode/indentifiers.wgsl.expected.hlsl
new file mode 100644
index 0000000..d82de3a
--- /dev/null
+++ b/test/unicode/indentifiers.wgsl.expected.hlsl
@@ -0,0 +1,9 @@
+float tint_symbol_2(int tint_symbol_3) {
+ return float(tint_symbol_3);
+}
+
+void tint_symbol_4() {
+ int tint_symbol_5 = 0;
+ const float tint_symbol_6 = tint_symbol_2(tint_symbol_5);
+ return;
+}
diff --git a/test/unicode/indentifiers.wgsl.expected.msl b/test/unicode/indentifiers.wgsl.expected.msl
new file mode 100644
index 0000000..ea0afa5
--- /dev/null
+++ b/test/unicode/indentifiers.wgsl.expected.msl
@@ -0,0 +1,13 @@
+#include <metal_stdlib>
+
+using namespace metal;
+float tint_symbol_2(int tint_symbol_3) {
+ return float(tint_symbol_3);
+}
+
+fragment void tint_symbol_4() {
+ int tint_symbol_5 = 0;
+ float const tint_symbol_6 = tint_symbol_2(tint_symbol_5);
+ return;
+}
+
diff --git a/test/unicode/indentifiers.wgsl.expected.spvasm b/test/unicode/indentifiers.wgsl.expected.spvasm
new file mode 100644
index 0000000..93b3e82
--- /dev/null
+++ b/test/unicode/indentifiers.wgsl.expected.spvasm
@@ -0,0 +1,33 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 17
+; Schema: 0
+ OpCapability Shader
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %________________________________________ "๐๐๐ฅ๐ฃ๐ช๐ก๐ ๐๐๐ฅ"
+ OpExecutionMode %________________________________________ OriginUpperLeft
+ OpName %________________________________ "๐ฏ๐พ๐ท๐ฌ๐ฝ๐ฒ๐ธ๐ท"
+ OpName %______________ "แตแตสณแตแต"
+ OpName %________________________________________ "๐๐๐ฅ๐ฃ๐ช๐ก๐ ๐๐๐ฅ"
+ OpName %_________________________________0 "๐ซ๐๐ง๐๐๐๐ก๐"
+ %float = OpTypeFloat 32
+ %int = OpTypeInt 32 1
+ %1 = OpTypeFunction %float %int
+ %void = OpTypeVoid
+ %8 = OpTypeFunction %void
+%_ptr_Function_int = OpTypePointer Function %int
+ %14 = OpConstantNull %int
+%________________________________ = OpFunction %float None %1
+%______________ = OpFunctionParameter %int
+ %6 = OpLabel
+ %7 = OpConvertSToF %float %______________
+ OpReturnValue %7
+ OpFunctionEnd
+%________________________________________ = OpFunction %void None %8
+ %11 = OpLabel
+%_________________________________0 = OpVariable %_ptr_Function_int Function %14
+ %16 = OpLoad %int %_________________________________0
+ %15 = OpFunctionCall %float %________________________________ %16
+ OpReturn
+ OpFunctionEnd
diff --git a/test/unicode/indentifiers.wgsl.expected.wgsl b/test/unicode/indentifiers.wgsl.expected.wgsl
new file mode 100644
index 0000000..d78ad34
--- /dev/null
+++ b/test/unicode/indentifiers.wgsl.expected.wgsl
@@ -0,0 +1,13 @@
+type ๐๐๐
โฏ_๐ถ = i32;
+
+type ๐ญ๐ฒ๐ฉ๐_๐ = f32;
+
+fn ๐ฏ๐พ๐ท๐ฌ๐ฝ๐ฒ๐ธ๐ท(แตแตสณแตแต : ๐๐๐
โฏ_๐ถ) -> ๐ญ๐ฒ๐ฉ๐_๐ {
+ return ๐ญ๐ฒ๐ฉ๐_๐(แตแตสณแตแต);
+}
+
+@stage(fragment)
+fn ๐๐๐ฅ๐ฃ๐ช๐ก๐ ๐๐๐ฅ() {
+ var ๐ซ๐๐ง๐๐๐๐ก๐ : ๐๐๐
โฏ_๐ถ;
+ let ๐๐๐๐๐๐ = ๐ฏ๐พ๐ท๐ฌ๐ฝ๐ฒ๐ธ๐ท(๐ซ๐๐ง๐๐๐๐ก๐);
+}