| // Copyright 2021 The Tint Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package lexer provides a basic lexer for the Tint intrinsic definition |
| // language |
| package lexer |
| |
| import ( |
| "fmt" |
| "unicode" |
| |
| "dawn.googlesource.com/tint/tools/src/cmd/intrinsic-gen/tok" |
| ) |
| |
| // Lex produces a list of tokens for the given source code |
| func Lex(src []rune, filepath string) ([]tok.Token, error) { |
| l := lexer{ |
| tok.Location{Line: 1, Column: 1, Rune: 0, Filepath: filepath}, |
| src, |
| []tok.Token{}, |
| } |
| if err := l.lex(); err != nil { |
| return nil, err |
| } |
| return l.tokens, nil |
| } |
| |
| type lexer struct { |
| loc tok.Location |
| runes []rune |
| tokens []tok.Token |
| } |
| |
| // lex() lexes the source, populating l.tokens |
| func (l *lexer) lex() error { |
| for { |
| switch l.peek(0) { |
| case 0: |
| return nil |
| case ' ', '\t': |
| l.next() |
| case '\n': |
| l.next() |
| case '<': |
| l.tok(1, tok.Lt) |
| case '>': |
| l.tok(1, tok.Gt) |
| case '(': |
| l.tok(1, tok.Lparen) |
| case ')': |
| l.tok(1, tok.Rparen) |
| case '{': |
| l.tok(1, tok.Lbrace) |
| case '}': |
| l.tok(1, tok.Rbrace) |
| case ':': |
| l.tok(1, tok.Colon) |
| case ',': |
| l.tok(1, tok.Comma) |
| case '|': |
| l.tok(1, tok.Or) |
| case '"': |
| start := l.loc |
| l.next() // Skip opening quote |
| n := l.count(toFirst('\n', '"')) |
| if l.peek(n) != '"' { |
| return fmt.Errorf("%v unterminated string", start) |
| } |
| l.tok(n, tok.String) |
| l.next() // Skip closing quote |
| default: |
| switch { |
| case l.peek(1) == '/': |
| l.skip(l.count(toFirst('\n'))) |
| l.next() // Consume newline |
| case l.match("[[", tok.Ldeco): |
| case l.match("]]", tok.Rdeco): |
| case l.match("->", tok.Arrow): |
| case l.match("fn", tok.Function): |
| case l.match("enum", tok.Enum): |
| case l.match("type", tok.Type): |
| case l.match("match", tok.Match): |
| case unicode.IsLetter(l.peek(0)) || l.peek(0) == '_': |
| l.tok(l.count(alphaNumericOrUnderscore), tok.Identifier) |
| case unicode.IsNumber(l.peek(0)): |
| l.tok(l.count(unicode.IsNumber), tok.Integer) |
| default: |
| return fmt.Errorf("%v: unexpected '%v'", l.loc, string(l.runes[0])) |
| } |
| } |
| } |
| } |
| |
| // next() consumes and returns the next rune in the source, or 0 if reached EOF |
| func (l *lexer) next() rune { |
| if len(l.runes) > 0 { |
| r := l.runes[0] |
| l.runes = l.runes[1:] |
| l.loc.Rune++ |
| if r == '\n' { |
| l.loc.Line++ |
| l.loc.Column = 1 |
| } else { |
| l.loc.Column++ |
| } |
| return r |
| } |
| return 0 |
| } |
| |
| // skip() consumes the next `n` runes in the source |
| func (l *lexer) skip(n int) { |
| for i := 0; i < n; i++ { |
| l.next() |
| } |
| } |
| |
| // peek() returns the rune `i` runes ahead of the current position |
| func (l *lexer) peek(i int) rune { |
| if i >= len(l.runes) { |
| return 0 |
| } |
| return l.runes[i] |
| } |
| |
| // predicate is a function that can be passed to count() |
| type predicate func(r rune) bool |
| |
| // count() returns the number of sequential runes from the current position that |
| // match the predicate `p` |
| func (l *lexer) count(p predicate) int { |
| for i := 0; i < len(l.runes); i++ { |
| if !p(l.peek(i)) { |
| return i |
| } |
| } |
| return len(l.runes) |
| } |
| |
| // tok() appends a new token of kind `k` using the next `n` runes. |
| // The next `n` runes are consumed by tok(). |
| func (l *lexer) tok(n int, k tok.Kind) { |
| start := l.loc |
| runes := l.runes[:n] |
| l.skip(n) |
| end := l.loc |
| |
| src := tok.Source{S: start, E: end} |
| l.tokens = append(l.tokens, tok.Token{Kind: k, Source: src, Runes: runes}) |
| } |
| |
| // match() checks whether the next runes are equal to `s`. If they are, then |
| // these runes are used to append a new token of kind `k`, and match() returns |
| // true. If the next runes are not equal to `s` then false is returned, and no |
| // runes are consumed. |
| func (l *lexer) match(s string, kind tok.Kind) bool { |
| runes := []rune(s) |
| if len(l.runes) < len(runes) { |
| return false |
| } |
| for i, r := range runes { |
| if l.runes[i] != r { |
| return false |
| } |
| } |
| l.tok(len(runes), kind) |
| return true |
| } |
| |
| // toFirst() returns a predicate that returns true if the rune is not in `runes` |
| // toFirst() is intended to be used with count(), so `count(toFirst('x'))` will |
| // count up to, but not including the number of consecutive runes that are not |
| // 'x'. |
| func toFirst(runes ...rune) predicate { |
| return func(r rune) bool { |
| for _, t := range runes { |
| if t == r { |
| return false |
| } |
| } |
| return true |
| } |
| } |
| |
| // alphaNumericOrUnderscore() returns true if the rune `r` is a number, letter |
| // or underscore. |
| func alphaNumericOrUnderscore(r rune) bool { |
| return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r) |
| } |