forked from chyyuu/compiler_lab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
LexicalAnalyzer.py
executable file
·96 lines (78 loc) · 3.03 KB
/
LexicalAnalyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python3.4
"""Lexical Analyzer"""
# "Understanding Computation: Impossible Code and the Meaning of Programs"
# Chapter 4.3.1 's Code. Use Python3.
# Authors: Chai Fei
import re
class LexicalAnalyzer(object):
""" 词法分析
将代码字符串解析成符号列表
"""
GRAMMAR = [
{ 'token': 'i', 'pattern': r'if' }, # if 关键字
{ 'token': 'e', 'pattern': r'else' }, # else 关键字
{ 'token': 'w', 'pattern': r'while' }, # while 关键字
{ 'token': 'd', 'pattern': r'do-nothing' }, # do-nothing 关键字
{ 'token': '(', 'pattern': r'\(' }, # 左小括号
{ 'token': ')', 'pattern': r'\)' }, # 右小括号
{ 'token': '{', 'pattern': r'\{' }, # 左大括号
{ 'token': '}', 'pattern': r'\}' }, # 右大括号
{ 'token': ';', 'pattern': r';' }, # 分号
{ 'token': '=', 'pattern': r'=' }, # 等号
{ 'token': '+', 'pattern': r'\+' }, # 加号
{ 'token': '*', 'pattern': r'\*' }, # 乘号
{ 'token': '<', 'pattern': r'\<' }, # 小于号
{ 'token': 'n', 'pattern': r'[0-9]+' }, # 数字
{ 'token': 'b', 'pattern': r'true|false' }, # 布尔值
{ 'token': 'v', 'pattern': r'[a-z]+' } # 变量名
]
def __init__(self, string):
self.string = string
@property
def analyze(self):
tokens = []
while self.more_tokens:
tokens.append(self.next_token)
return tokens
@property
def more_tokens(self):
if self.string != '':
return True
else:
return False
@property
def next_token(self):
rule, match = self.rule_matching(self.string)
self.string = self.string_after(match)
return rule['token']
def rule_matching(self, string):
grammar = self.__class__.GRAMMAR
matches = [self.match_at_beginning(rule['pattern'], string) for rule in grammar]
rules_with_matches = [[rule, match] for rule, match in zip(grammar, matches) if match != None]
return self.rule_with_longest_match(rules_with_matches)
def match_at_beginning(self, pattern, string):
result = re.match(pattern, string)
if result == None:
return None
else:
return result.group(0)
def rule_with_longest_match(self, rules_with_matches):
return max(rules_with_matches, key = lambda value: len(value[1]))
def string_after(self, match):
index = self.string.find(match) + len(match)
return self.string[index:].strip()
##test
print('y = x * 7')
print(LexicalAnalyzer('y = x * 7').analyze)
print('\n')
print('while (x < 5) { x = x * 3 }')
print(LexicalAnalyzer('while (x < 5) { x = x * 3 }').analyze)
print('\n')
print('if (x < 10) { y = true; x = 0 } else { do-nothing }')
print(LexicalAnalyzer('if (x < 10) { y = true; x = 0 } else { do-nothing }').analyze)
print('\n')
print('x = false')
print(LexicalAnalyzer('x = false').analyze)
print('\n')
print('x = falsehood')
print(LexicalAnalyzer('x = falsehood').analyze)