-
Notifications
You must be signed in to change notification settings - Fork 0
/
scanner.go
326 lines (300 loc) · 6.25 KB
/
scanner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
package main
var scanner Scanner
type Scanner struct {
Source string
Start int
Current int
Line int
}
func initScanner(source string) {
scanner.Source = source
scanner.Start = 0
scanner.Current = 0
scanner.Line = 1
}
type Token struct {
Type TokenType
Line int
Start int // N.B. integer offset into source, not a C-pointer
Length int
Source *string
}
func scanToken() Token {
skipWhitespace()
scanner.Start = scanner.Current
if isAtEnd() {
return makeToken(TOKEN_EOF)
}
c := advanceScanner()
if isAlpha(c) {
return identifier()
}
if isDigit(c) {
return number()
}
switch c {
case '(':
return makeToken(TOKEN_LEFT_PAREN)
case ')':
return makeToken(TOKEN_RIGHT_PAREN)
case '{':
return makeToken(TOKEN_LEFT_BRACE)
case '}':
return makeToken(TOKEN_RIGHT_BRACE)
case ';':
return makeToken(TOKEN_SEMICOLON)
case ',':
return makeToken(TOKEN_COMMA)
case '.':
return makeToken(TOKEN_DOT)
case '-':
return makeToken(TOKEN_MINUS)
case '+':
return makeToken(TOKEN_PLUS)
case '/':
return makeToken(TOKEN_SLASH)
case '*':
return makeToken(TOKEN_STAR)
case '!':
if match('=') {
return makeToken(TOKEN_BANG_EQUAL)
} else {
return makeToken(TOKEN_BANG)
}
case '=':
if match('=') {
return makeToken(TOKEN_EQUAL_EQUAL)
} else {
return makeToken(TOKEN_EQUAL)
}
case '<':
if match('=') {
return makeToken(TOKEN_LESS_EQUAL)
} else {
return makeToken(TOKEN_LESS)
}
case '>':
if match('=') {
return makeToken(TOKEN_GREATER_EQUAL)
} else {
return makeToken(TOKEN_GREATER)
}
case '"':
return makeString() // N.B. 'string()' is like a reserved keyword in go.
}
return errorToken("unexpected character.")
}
func isAlpha(c byte) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c == '_'
}
func identifier() Token {
for isAlpha(peek()) || isDigit(peek()) {
advanceScanner()
}
return makeToken(identifierType())
}
func identifierType() TokenType {
switch scanner.Source[scanner.Start] {
case 'a':
return checkKeyword(1, 2, "nd", TOKEN_AND)
case 'c':
return checkKeyword(1, 4, "lass", TOKEN_CLASS)
case 'e':
return checkKeyword(1, 3, "lse", TOKEN_ELSE)
case 'f':
if scanner.Current-scanner.Start > 1 {
switch scanner.Source[scanner.Start+1] {
case 'a':
return checkKeyword(2, 3, "lse", TOKEN_FALSE)
case 'o':
return checkKeyword(2, 1, "r", TOKEN_FOR)
case 'u':
return checkKeyword(2, 1, "n", TOKEN_FUN)
}
}
case 'i':
return checkKeyword(1, 1, "f", TOKEN_IF)
case 'n':
return checkKeyword(1, 2, "il", TOKEN_NIL)
case 'o':
return checkKeyword(1, 1, "r", TOKEN_OR)
case 'p':
return checkKeyword(1, 4, "rint", TOKEN_PRINT)
case 'r':
return checkKeyword(1, 5, "eturn", TOKEN_RETURN)
case 's':
return checkKeyword(1, 4, "uper", TOKEN_SUPER)
case 't':
if scanner.Current-scanner.Start > 1 {
switch scanner.Source[scanner.Start+1] {
case 'h':
return checkKeyword(2, 2, "is", TOKEN_THIS)
case 'r':
return checkKeyword(2, 2, "ue", TOKEN_TRUE)
}
}
case 'v':
return checkKeyword(1, 2, "ar", TOKEN_VAR)
case 'w':
return checkKeyword(1, 4, "hile", TOKEN_WHILE)
}
return TOKEN_IDENTIFIER
}
func checkKeyword(start, length int, rest string, tokenType TokenType) TokenType {
if scanner.Current-scanner.Start == start+length &&
string(scanner.Source[scanner.Start+start:scanner.Start+start+length]) == rest {
return tokenType
}
return TOKEN_IDENTIFIER
}
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func number() Token {
for isDigit(peek()) {
advanceScanner()
}
// check for a fractional part
if peek() == '.' && isDigit(peekNext()) {
advanceScanner()
for isDigit(peek()) {
advanceScanner()
}
}
return makeToken(TOKEN_NUMBER)
}
func makeString() Token {
for peek() != '"' && !isAtEnd() {
if peek() == '\n' {
scanner.Line++
}
advanceScanner()
}
if isAtEnd() {
return errorToken("unterminated string.")
}
advanceScanner()
return makeToken(TOKEN_STRING)
}
func skipWhitespace() {
for {
c := peek()
switch c {
case ' ':
advanceScanner()
case '\r':
advanceScanner()
case '\t':
advanceScanner()
case '\n':
scanner.Line++
advanceScanner()
case '/': // skip comments
if peekNext() == '/' {
for peek() != '\n' && !isAtEnd() {
advanceScanner()
}
} else {
return
}
default:
return
}
}
}
func peek() byte {
if scanner.Current >= len(scanner.Source) { // fake null-terminated strings -.-
return byte(0)
}
return scanner.Source[scanner.Current]
}
func peekNext() byte {
if isAtEnd() {
return ' '
}
return scanner.Source[scanner.Current+1]
}
func advanceScanner() byte {
scanner.Current++
return scanner.Source[scanner.Current-1]
}
func match(expected byte) bool {
if isAtEnd() {
return false
}
if scanner.Source[scanner.Current] != expected {
return false
}
scanner.Current++
return true
}
func makeToken(tokenType TokenType) Token {
return Token{
Type: tokenType,
Start: scanner.Start,
Length: scanner.Current - scanner.Start,
Line: scanner.Line,
Source: &scanner.Source,
}
}
func errorToken(message string) Token {
return Token{
Type: TOKEN_ERROR,
Start: 0,
Length: len(message),
Line: scanner.Line,
Source: &message,
}
}
func isAtEnd() bool {
return scanner.Current >= len(scanner.Source)-1
}
type TokenType byte
const (
// Single-character tokens.
TOKEN_LEFT_PAREN TokenType = iota
TOKEN_RIGHT_PAREN
TOKEN_LEFT_BRACE
TOKEN_RIGHT_BRACE
TOKEN_COMMA
TOKEN_DOT
TOKEN_MINUS
TOKEN_PLUS
TOKEN_SEMICOLON
TOKEN_SLASH
TOKEN_STAR
// One or two character tokens.
TOKEN_BANG
TOKEN_BANG_EQUAL
TOKEN_EQUAL
TOKEN_EQUAL_EQUAL
TOKEN_GREATER
TOKEN_GREATER_EQUAL
TOKEN_LESS
TOKEN_LESS_EQUAL
// Literals.
TOKEN_IDENTIFIER
TOKEN_STRING
TOKEN_NUMBER
// Keywords.
TOKEN_AND
TOKEN_CLASS
TOKEN_ELSE
TOKEN_FALSE
TOKEN_FOR
TOKEN_FUN
TOKEN_IF
TOKEN_NIL
TOKEN_OR
TOKEN_PRINT
TOKEN_RETURN
TOKEN_SUPER
TOKEN_THIS
TOKEN_TRUE
TOKEN_VAR
TOKEN_WHILE
TOKEN_ERROR
TOKEN_EOF
)