blob: 1f25aea4e09577c9ae9bcab33848dce009b24289 [file] [log] [blame]
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package lexer provides a basic lexer for the Tint intrinsic definition
// language
package lexer
import (
"fmt"
"unicode"
"dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/tok"
)
// Lex produces a list of tokens for the given source code
func Lex(src []rune, filepath string) ([]tok.Token, error) {
l := lexer{
tok.Location{Line: 1, Column: 1, Rune: 0, Filepath: filepath},
src,
[]tok.Token{},
}
if err := l.lex(); err != nil {
return nil, err
}
return l.tokens, nil
}
type lexer struct {
loc tok.Location
runes []rune
tokens []tok.Token
}
// lex() lexes the source, populating l.tokens
func (l *lexer) lex() error {
for {
switch l.peek(0) {
case 0:
return nil
case ' ', '\t':
l.next()
case '\n':
l.next()
case '<':
l.tok(1, tok.Lt)
case '>':
l.tok(1, tok.Gt)
case '(':
l.tok(1, tok.Lparen)
case ')':
l.tok(1, tok.Rparen)
case '{':
l.tok(1, tok.Lbrace)
case '}':
l.tok(1, tok.Rbrace)
case ':':
l.tok(1, tok.Colon)
case ',':
l.tok(1, tok.Comma)
case '|':
l.tok(1, tok.Or)
case '"':
start := l.loc
l.next() // Skip opening quote
n := l.count(toFirst('\n', '"'))
if l.peek(n) != '"' {
return fmt.Errorf("%v unterminated string", start)
}
l.tok(n, tok.String)
l.next() // Skip closing quote
default:
switch {
case l.peek(1) == '/':
l.skip(l.count(toFirst('\n')))
l.next() // Consume newline
case l.match("[[", tok.Ldeco):
case l.match("]]", tok.Rdeco):
case l.match("->", tok.Arrow):
case l.match("fn", tok.Function):
case l.match("enum", tok.Enum):
case l.match("type", tok.Type):
case l.match("match", tok.Match):
case unicode.IsLetter(l.peek(0)) || l.peek(0) == '_':
l.tok(l.count(alphaNumericOrUnderscore), tok.Identifier)
case unicode.IsNumber(l.peek(0)):
l.tok(l.count(unicode.IsNumber), tok.Integer)
default:
return fmt.Errorf("%v: unexpected '%v'", l.loc, string(l.runes[0]))
}
}
}
}
// next() consumes and returns the next rune in the source, or 0 if reached EOF
func (l *lexer) next() rune {
if len(l.runes) > 0 {
r := l.runes[0]
l.runes = l.runes[1:]
l.loc.Rune++
if r == '\n' {
l.loc.Line++
l.loc.Column = 1
} else {
l.loc.Column++
}
return r
}
return 0
}
// skip() consumes the next `n` runes in the source
func (l *lexer) skip(n int) {
for i := 0; i < n; i++ {
l.next()
}
}
// peek() returns the rune `i` runes ahead of the current position
func (l *lexer) peek(i int) rune {
if i >= len(l.runes) {
return 0
}
return l.runes[i]
}
// predicate is a function that can be passed to count()
type predicate func(r rune) bool
// count() returns the number of sequential runes from the current position that
// match the predicate `p`
func (l *lexer) count(p predicate) int {
for i := 0; i < len(l.runes); i++ {
if !p(l.peek(i)) {
return i
}
}
return len(l.runes)
}
// tok() appends a new token of kind `k` using the next `n` runes.
// The next `n` runes are consumed by tok().
func (l *lexer) tok(n int, k tok.Kind) {
start := l.loc
runes := l.runes[:n]
l.skip(n)
end := l.loc
src := tok.Source{S: start, E: end}
l.tokens = append(l.tokens, tok.Token{Kind: k, Source: src, Runes: runes})
}
// match() checks whether the next runes are equal to `s`. If they are, then
// these runes are used to append a new token of kind `k`, and match() returns
// true. If the next runes are not equal to `s` then false is returned, and no
// runes are consumed.
func (l *lexer) match(s string, kind tok.Kind) bool {
runes := []rune(s)
if len(l.runes) < len(runes) {
return false
}
for i, r := range runes {
if l.runes[i] != r {
return false
}
}
l.tok(len(runes), kind)
return true
}
// toFirst() returns a predicate that returns true if the rune is not in `runes`
// toFirst() is intended to be used with count(), so `count(toFirst('x'))` will
// count up to, but not including the number of consecutive runes that are not
// 'x'.
func toFirst(runes ...rune) predicate {
return func(r rune) bool {
for _, t := range runes {
if t == r {
return false
}
}
return true
}
}
// alphaNumericOrUnderscore() returns true if the rune `r` is a number, letter
// or underscore.
func alphaNumericOrUnderscore(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
}