blob: 903f9b17c30a99aba5386c8e870dacd9007cf55f [file] [log] [blame]
// Copyright 2020 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/reader/wgsl/lexer.h"
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <limits>
namespace tint {
namespace reader {
namespace wgsl {
namespace {
bool is_whitespace(char c) {
return std::isspace(c);
}
} // namespace
Lexer::Lexer(const std::string& input)
: input_(input), len_(static_cast<uint32_t>(input.size())) {}
Lexer::~Lexer() = default;
Token Lexer::next() {
skip_whitespace();
skip_comments();
if (is_eof()) {
return {Token::Type::kEOF, make_source()};
}
auto t = try_hex_integer();
if (!t.IsUninitialized()) {
return t;
}
t = try_float();
if (!t.IsUninitialized()) {
return t;
}
t = try_integer();
if (!t.IsUninitialized()) {
return t;
}
t = try_string();
if (!t.IsUninitialized()) {
return t;
}
t = try_punctuation();
if (!t.IsUninitialized()) {
return t;
}
t = try_ident();
if (!t.IsUninitialized()) {
return t;
}
return {Token::Type::kError, make_source(), "invalid character found"};
}
Source Lexer::make_source() const {
return Source{line_, column_};
}
bool Lexer::is_eof() const {
return pos_ >= len_;
}
bool Lexer::is_alpha(char ch) const {
return std::isalpha(ch) || ch == '_';
}
bool Lexer::is_digit(char ch) const {
return std::isdigit(ch);
}
bool Lexer::is_alphanum(char ch) const {
return is_alpha(ch) || is_digit(ch);
}
bool Lexer::is_hex(char ch) const {
return std::isxdigit(ch);
}
bool Lexer::matches(size_t pos, const std::string& substr) {
if (pos >= input_.size())
return false;
return input_.substr(pos, substr.size()) == substr;
}
void Lexer::skip_whitespace() {
for (;;) {
auto pos = pos_;
while (!is_eof() && is_whitespace(input_[pos_])) {
if (matches(pos_, "\n")) {
pos_++;
line_++;
column_ = 1;
continue;
}
pos_++;
column_++;
}
skip_comments();
// If the cursor didn't advance we didn't remove any whitespace
// so we're done.
if (pos == pos_)
break;
}
}
void Lexer::skip_comments() {
if (!matches(pos_, "#")) {
return;
}
while (!is_eof() && !matches(pos_, "\n")) {
pos_++;
column_++;
}
}
Token Lexer::try_float() {
auto start = pos_;
auto end = pos_;
auto source = make_source();
if (matches(end, "-")) {
end++;
}
while (end < len_ && is_digit(input_[end])) {
end++;
}
if (end >= len_ || !matches(end, ".")) {
return {};
}
end++;
while (end < len_ && is_digit(input_[end])) {
end++;
}
// Parse the exponent if one exists
if (end < len_ && matches(end, "e")) {
end++;
if (end < len_ && (matches(end, "+") || matches(end, "-"))) {
end++;
}
auto exp_start = end;
while (end < len_ && isdigit(input_[end])) {
end++;
}
// Must have an exponent
if (exp_start == end)
return {};
}
auto str = input_.substr(start, end - start);
if (str == "." || str == "-.")
return {};
pos_ = end;
column_ += (end - start);
auto res = strtod(input_.c_str() + start, nullptr);
// This handles if the number is a really small in the exponent
if (res > 0 && res < static_cast<double>(std::numeric_limits<float>::min())) {
return {Token::Type::kError, source, "f32 (" + str + " too small"};
}
// This handles if the number is really large negative number
if (res < static_cast<double>(std::numeric_limits<float>::lowest())) {
return {Token::Type::kError, source, "f32 (" + str + ") too small"};
}
if (res > static_cast<double>(std::numeric_limits<float>::max())) {
return {Token::Type::kError, source, "f32 (" + str + ") too large"};
}
return {source, static_cast<float>(res)};
}
Token Lexer::build_token_from_int_if_possible(const Source& source,
size_t start,
size_t end,
int32_t base) {
auto res = strtoll(input_.c_str() + start, nullptr, base);
if (matches(pos_, "u")) {
if (static_cast<uint64_t>(res) >
static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) {
return {Token::Type::kError, source,
"u32 (" + input_.substr(start, end - start) + ") too large"};
}
return {source, static_cast<uint32_t>(res)};
}
if (res < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
return {Token::Type::kError, source,
"i32 (" + input_.substr(start, end - start) + ") too small"};
}
if (res > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
return {Token::Type::kError, source,
"i32 (" + input_.substr(start, end - start) + ") too large"};
}
return {source, static_cast<int32_t>(res)};
}
Token Lexer::try_hex_integer() {
auto start = pos_;
auto end = pos_;
auto source = make_source();
if (matches(end, "-")) {
end++;
}
if (!matches(end, "0x")) {
return Token();
}
end += 2;
while (!is_eof() && is_hex(input_[end])) {
end += 1;
}
pos_ = end;
column_ += (end - start);
return build_token_from_int_if_possible(source, start, end, 16);
}
Token Lexer::try_integer() {
auto start = pos_;
auto end = start;
auto source = make_source();
if (matches(end, "-")) {
end++;
}
if (end >= len_ || !is_digit(input_[end])) {
return {};
}
auto first = end;
while (end < len_ && is_digit(input_[end])) {
end++;
}
// If the first digit is a zero this must only be zero as leading zeros
// are not allowed.
if (input_[first] == '0' && (end - first != 1))
return {};
pos_ = end;
column_ += (end - start);
return build_token_from_int_if_possible(source, start, end, 10);
}
Token Lexer::try_ident() {
// Must begin with an a-zA-Z_
if (!is_alpha(input_[pos_])) {
return {};
}
auto source = make_source();
auto s = pos_;
while (!is_eof() && is_alphanum(input_[pos_])) {
pos_++;
column_++;
}
auto str = input_.substr(s, pos_ - s);
auto t = check_reserved(source, str);
if (!t.IsUninitialized()) {
return t;
}
t = check_keyword(source, str);
if (!t.IsUninitialized()) {
return t;
}
return {Token::Type::kIdentifier, source, str};
}
Token Lexer::try_string() {
if (!matches(pos_, R"(")"))
return {};
auto source = make_source();
pos_++;
auto start = pos_;
while (pos_ < len_ && !matches(pos_, R"(")")) {
pos_++;
}
auto end = pos_;
pos_++;
column_ += (pos_ - start) + 1;
return {Token::Type::kStringLiteral, source,
input_.substr(start, end - start)};
}
Token Lexer::try_punctuation() {
auto source = make_source();
auto type = Token::Type::kUninitialized;
if (matches(pos_, "[[")) {
type = Token::Type::kAttrLeft;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, "]]")) {
type = Token::Type::kAttrRight;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, "(")) {
type = Token::Type::kParenLeft;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, ")")) {
type = Token::Type::kParenRight;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "[")) {
type = Token::Type::kBraceLeft;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "]")) {
type = Token::Type::kBraceRight;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "{")) {
type = Token::Type::kBracketLeft;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "}")) {
type = Token::Type::kBracketRight;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "&&")) {
type = Token::Type::kAndAnd;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, "&")) {
type = Token::Type::kAnd;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "/")) {
type = Token::Type::kForwardSlash;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "!=")) {
type = Token::Type::kNotEqual;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, "!")) {
type = Token::Type::kBang;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "::")) {
type = Token::Type::kNamespace;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, ":")) {
type = Token::Type::kColon;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, ",")) {
type = Token::Type::kComma;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "==")) {
type = Token::Type::kEqualEqual;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, "=")) {
type = Token::Type::kEqual;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, ">=")) {
type = Token::Type::kGreaterThanEqual;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, ">")) {
type = Token::Type::kGreaterThan;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "<=")) {
type = Token::Type::kLessThanEqual;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, "<")) {
type = Token::Type::kLessThan;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "%")) {
type = Token::Type::kMod;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "->")) {
type = Token::Type::kArrow;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, "-")) {
type = Token::Type::kMinus;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, ".")) {
type = Token::Type::kPeriod;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "+")) {
type = Token::Type::kPlus;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "||")) {
type = Token::Type::kOrOr;
pos_ += 2;
column_ += 2;
} else if (matches(pos_, "|")) {
type = Token::Type::kOr;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, ";")) {
type = Token::Type::kSemicolon;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "*")) {
type = Token::Type::kStar;
pos_ += 1;
column_ += 1;
} else if (matches(pos_, "^")) {
type = Token::Type::kXor;
pos_ += 1;
column_ += 1;
}
return {type, source};
}
Token Lexer::check_keyword(const Source& source, const std::string& str) {
if (str == "all")
return {Token::Type::kAll, source, "all"};
if (str == "any")
return {Token::Type::kAny, source, "any"};
if (str == "array")
return {Token::Type::kArray, source, "array"};
if (str == "as")
return {Token::Type::kAs, source, "as"};
if (str == "binding")
return {Token::Type::kBinding, source, "binding"};
if (str == "block")
return {Token::Type::kBlock, source, "block"};
if (str == "bool")
return {Token::Type::kBool, source, "bool"};
if (str == "break")
return {Token::Type::kBreak, source, "break"};
if (str == "builtin")
return {Token::Type::kBuiltin, source, "builtin"};
if (str == "case")
return {Token::Type::kCase, source, "case"};
if (str == "cast")
return {Token::Type::kCast, source, "cast"};
if (str == "compute")
return {Token::Type::kCompute, source, "compute"};
if (str == "const")
return {Token::Type::kConst, source, "const"};
if (str == "continue")
return {Token::Type::kContinue, source, "continue"};
if (str == "continuing")
return {Token::Type::kContinuing, source, "continuing"};
if (str == "coarse")
return {Token::Type::kCoarse, source, "coarse"};
if (str == "default")
return {Token::Type::kDefault, source, "default"};
if (str == "dot")
return {Token::Type::kDot, source, "dot"};
if (str == "dpdx")
return {Token::Type::kDpdx, source, "dpdx"};
if (str == "dpdy")
return {Token::Type::kDpdy, source, "dpdy"};
if (str == "else")
return {Token::Type::kElse, source, "else"};
if (str == "elseif")
return {Token::Type::kElseIf, source, "elseif"};
if (str == "entry_point")
return {Token::Type::kEntryPoint, source, "entry_point"};
if (str == "f32")
return {Token::Type::kF32, source, "f32"};
if (str == "fallthrough")
return {Token::Type::kFallthrough, source, "fallthrough"};
if (str == "false")
return {Token::Type::kFalse, source, "false"};
if (str == "fine")
return {Token::Type::kFine, source, "fine"};
if (str == "fn")
return {Token::Type::kFn, source, "fn"};
if (str == "frag_coord")
return {Token::Type::kFragCoord, source, "frag_coord"};
if (str == "frag_depth")
return {Token::Type::kFragDepth, source, "frag_depth"};
if (str == "fragment")
return {Token::Type::kFragment, source, "fragment"};
if (str == "front_facing")
return {Token::Type::kFrontFacing, source, "front_facing"};
if (str == "function")
return {Token::Type::kFunction, source, "function"};
if (str == "fwidth")
return {Token::Type::kFwidth, source, "fwidth"};
if (str == "global_invocation_id")
return {Token::Type::kGlobalInvocationId, source, "global_invocation_id"};
if (str == "i32")
return {Token::Type::kI32, source, "i32"};
if (str == "if")
return {Token::Type::kIf, source, "if"};
if (str == "image")
return {Token::Type::kImage, source, "image"};
if (str == "import")
return {Token::Type::kImport, source, "import"};
if (str == "in")
return {Token::Type::kIn, source, "in"};
if (str == "instance_idx")
return {Token::Type::kInstanceIdx, source, "instance_idx"};
if (str == "is_nan")
return {Token::Type::kIsNan, source, "is_nan"};
if (str == "is_inf")
return {Token::Type::kIsInf, source, "is_inf"};
if (str == "is_finite")
return {Token::Type::kIsFinite, source, "is_finite"};
if (str == "is_normal")
return {Token::Type::kIsNormal, source, "is_normal"};
if (str == "kill")
return {Token::Type::kKill, source, "kill"};
if (str == "local_invocation_id")
return {Token::Type::kLocalInvocationId, source, "local_invocation_id"};
if (str == "local_invocation_idx")
return {Token::Type::kLocalInvocationIdx, source, "local_invocation_idx"};
if (str == "location")
return {Token::Type::kLocation, source, "location"};
if (str == "loop")
return {Token::Type::kLoop, source, "loop"};
if (str == "mat2x2")
return {Token::Type::kMat2x2, source, "mat2x2"};
if (str == "mat2x3")
return {Token::Type::kMat2x3, source, "mat2x3"};
if (str == "mat2x4")
return {Token::Type::kMat2x4, source, "mat2x4"};
if (str == "mat3x2")
return {Token::Type::kMat3x2, source, "mat3x2"};
if (str == "mat3x3")
return {Token::Type::kMat3x3, source, "mat3x3"};
if (str == "mat3x4")
return {Token::Type::kMat3x4, source, "mat3x4"};
if (str == "mat4x2")
return {Token::Type::kMat4x2, source, "mat4x2"};
if (str == "mat4x3")
return {Token::Type::kMat4x3, source, "mat4x3"};
if (str == "mat4x4")
return {Token::Type::kMat4x4, source, "mat4x4"};
if (str == "nop")
return {Token::Type::kNop, source, "nop"};
if (str == "num_workgroups")
return {Token::Type::kNumWorkgroups, source, "num_workgroups"};
if (str == "offset")
return {Token::Type::kOffset, source, "offset"};
if (str == "out")
return {Token::Type::kOut, source, "out"};
if (str == "outer_product")
return {Token::Type::kOuterProduct, source, "outer_product"};
if (str == "position")
return {Token::Type::kPosition, source, "position"};
if (str == "premerge")
return {Token::Type::kPremerge, source, "premerge"};
if (str == "private")
return {Token::Type::kPrivate, source, "private"};
if (str == "ptr")
return {Token::Type::kPtr, source, "ptr"};
if (str == "push_constant")
return {Token::Type::kPushConstant, source, "push_constant"};
if (str == "regardless")
return {Token::Type::kRegardless, source, "regardless"};
if (str == "return")
return {Token::Type::kReturn, source, "return"};
if (str == "set")
return {Token::Type::kSet, source, "set"};
if (str == "storage_buffer")
return {Token::Type::kStorageBuffer, source, "storage_buffer"};
if (str == "struct")
return {Token::Type::kStruct, source, "struct"};
if (str == "switch")
return {Token::Type::kSwitch, source, "switch"};
if (str == "true")
return {Token::Type::kTrue, source, "true"};
if (str == "type")
return {Token::Type::kType, source, "type"};
if (str == "u32")
return {Token::Type::kU32, source, "u32"};
if (str == "uniform")
return {Token::Type::kUniform, source, "uniform"};
if (str == "uniform_constant")
return {Token::Type::kUniformConstant, source, "uniform_constant"};
if (str == "unless")
return {Token::Type::kUnless, source, "unless"};
if (str == "var")
return {Token::Type::kVar, source, "var"};
if (str == "vec2")
return {Token::Type::kVec2, source, "vec2"};
if (str == "vec3")
return {Token::Type::kVec3, source, "vec3"};
if (str == "vec4")
return {Token::Type::kVec4, source, "vec4"};
if (str == "vertex")
return {Token::Type::kVertex, source, "vertex"};
if (str == "vertex_idx")
return {Token::Type::kVertexIdx, source, "vertex_idx"};
if (str == "void")
return {Token::Type::kVoid, source, "void"};
if (str == "workgroup")
return {Token::Type::kWorkgroup, source, "workgroup"};
if (str == "workgroup_size")
return {Token::Type::kWorkgroupSize, source, "workgroup_size"};
return {};
}
Token Lexer::check_reserved(const Source& source, const std::string& str) {
if (str == "asm")
return {Token::Type::kReservedKeyword, source, "asm"};
if (str == "bf16")
return {Token::Type::kReservedKeyword, source, "bf16"};
if (str == "do")
return {Token::Type::kReservedKeyword, source, "do"};
if (str == "enum")
return {Token::Type::kReservedKeyword, source, "enum"};
if (str == "f16")
return {Token::Type::kReservedKeyword, source, "f16"};
if (str == "f64")
return {Token::Type::kReservedKeyword, source, "f64"};
if (str == "for")
return {Token::Type::kReservedKeyword, source, "for"};
if (str == "i8")
return {Token::Type::kReservedKeyword, source, "i8"};
if (str == "i16")
return {Token::Type::kReservedKeyword, source, "i16"};
if (str == "i64")
return {Token::Type::kReservedKeyword, source, "i64"};
if (str == "let")
return {Token::Type::kReservedKeyword, source, "let"};
if (str == "typedef")
return {Token::Type::kReservedKeyword, source, "typedef"};
if (str == "u8")
return {Token::Type::kReservedKeyword, source, "u8"};
if (str == "u16")
return {Token::Type::kReservedKeyword, source, "u16"};
if (str == "u64")
return {Token::Type::kReservedKeyword, source, "u64"};
return {};
}
} // namespace wgsl
} // namespace reader
} // namespace tint