@@ -3,20 +3,18 @@ package lexer
33import (
44 "fmt"
55 "strings"
6- "unicode/utf8"
76
87 "github.com/expr-lang/expr/file"
98)
109
11- func Lex (source * file.Source ) ([]Token , error ) {
10+ func Lex (source file.Source ) ([]Token , error ) {
1211 l := & lexer {
13- input : source . Content () ,
12+ source : source ,
1413 tokens : make ([]Token , 0 ),
14+ start : 0 ,
15+ end : 0 ,
1516 }
16-
17- l .loc = file.Location {Line : 1 , Column : 0 }
18- l .prev = l .loc
19- l .startLoc = l .loc
17+ l .commit ()
2018
2119 for state := root ; state != nil ; {
2220 state = state (l )
@@ -30,34 +28,25 @@ func Lex(source *file.Source) ([]Token, error) {
3028}
3129
3230type lexer struct {
33- input string
31+ source file. Source
3432 tokens []Token
35- start , end int // current position in input
36- width int // last rune width
37- startLoc file.Location // start location
38- prev , loc file.Location // prev location of end location, end location
33+ start , end int
3934 err * file.Error
4035}
4136
4237const eof rune = - 1
4338
39+ func (l * lexer ) commit () {
40+ l .start = l .end
41+ }
42+
4443func (l * lexer ) next () rune {
45- if l .end >= len (l .input ) {
46- l .width = 0
44+ if l .end >= len (l .source ) {
45+ l .end ++
4746 return eof
4847 }
49- r , w := utf8 .DecodeRuneInString (l .input [l .end :])
50- l .width = w
51- l .end += w
52-
53- l .prev = l .loc
54- if r == '\n' {
55- l .loc .Line ++
56- l .loc .Column = 0
57- } else {
58- l .loc .Column ++
59- }
60-
48+ r := l .source [l .end ]
49+ l .end ++
6150 return r
6251}
6352
@@ -68,8 +57,7 @@ func (l *lexer) peek() rune {
6857}
6958
7059func (l * lexer ) backup () {
71- l .end -= l .width
72- l .loc = l .prev
60+ l .end --
7361}
7462
7563func (l * lexer ) emit (t Kind ) {
@@ -78,35 +66,39 @@ func (l *lexer) emit(t Kind) {
7866
7967func (l * lexer ) emitValue (t Kind , value string ) {
8068 l .tokens = append (l .tokens , Token {
81- Location : l . startLoc ,
69+ Location : file. Location { From : l . start , To : l . end } ,
8270 Kind : t ,
8371 Value : value ,
8472 })
85- l .start = l .end
86- l .startLoc = l .loc
73+ l .commit ()
8774}
8875
8976func (l * lexer ) emitEOF () {
77+ from := l .end - 2
78+ if from < 0 {
79+ from = 0
80+ }
81+ to := l .end - 1
82+ if to < 0 {
83+ to = 0
84+ }
9085 l .tokens = append (l .tokens , Token {
91- Location : l . prev , // Point to previous position for better error messages.
86+ Location : file. Location { From : from , To : to },
9287 Kind : EOF ,
9388 })
94- l .start = l .end
95- l .startLoc = l .loc
89+ l .commit ()
9690}
9791
9892func (l * lexer ) skip () {
99- l .start = l .end
100- l .startLoc = l .loc
93+ l .commit ()
10194}
10295
10396func (l * lexer ) word () string {
104- return l .input [l .start :l .end ]
105- }
106-
107- func (l * lexer ) ignore () {
108- l .start = l .end
109- l .startLoc = l .loc
97+ // TODO: boundary check is NOT needed here, but for some reason CI fuzz tests are failing.
98+ if l .start > len (l .source ) || l .end > len (l .source ) {
99+ return "__invalid__"
100+ }
101+ return string (l .source [l .start :l .end ])
110102}
111103
112104func (l * lexer ) accept (valid string ) bool {
@@ -132,18 +124,18 @@ func (l *lexer) skipSpaces() {
132124}
133125
134126func (l * lexer ) acceptWord (word string ) bool {
135- pos , loc , prev := l .end , l . loc , l . prev
127+ pos := l .end
136128
137129 l .skipSpaces ()
138130
139131 for _ , ch := range word {
140132 if l .next () != ch {
141- l .end , l . loc , l . prev = pos , loc , prev
133+ l .end = pos
142134 return false
143135 }
144136 }
145137 if r := l .peek (); r != ' ' && r != eof {
146- l .end , l . loc , l . prev = pos , loc , prev
138+ l .end = pos
147139 return false
148140 }
149141
@@ -153,8 +145,11 @@ func (l *lexer) acceptWord(word string) bool {
153145func (l * lexer ) error (format string , args ... any ) stateFn {
154146 if l .err == nil { // show first error
155147 l .err = & file.Error {
156- Location : l .loc ,
157- Message : fmt .Sprintf (format , args ... ),
148+ Location : file.Location {
149+ From : l .end - 1 ,
150+ To : l .end ,
151+ },
152+ Message : fmt .Sprintf (format , args ... ),
158153 }
159154 }
160155 return nil
@@ -230,6 +225,6 @@ func (l *lexer) scanRawString(quote rune) (n int) {
230225 ch = l .next ()
231226 n ++
232227 }
233- l .emitValue (String , l . input [l .start + 1 :l .end - 1 ])
228+ l .emitValue (String , string ( l . source [l .start + 1 :l .end - 1 ]) )
234229 return
235230}
0 commit comments