-
Notifications
You must be signed in to change notification settings - Fork 1
/
regex2nfa.py
308 lines (263 loc) · 11.8 KB
/
regex2nfa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
import collections
import json
import re
import random
import os
import ast
from natsort import natsorted
RANGES = "\[.-.\]|\[.-..-.\]"
LETTERS = "[A-Za-z]" # allows ranges of letters A-Z and a-z
DIGITS = "[0-9]" # allows rangesof numbers from 0-9
OPERATIONS = "\(|\)|\||\+|\*" # Matches for (,),|,+,*
def validate(regex):
if re.search(RANGES, regex):
raise Exception("Ranges in the expressions arent allowed !")
i = 0
while i < len(regex):
if not (re.search(LETTERS, regex[i]) or re.search(DIGITS, regex[i]) or re.search(OPERATIONS, regex[i])):
if regex[i] == "\\":
i += 1
else:
raise Exception(
"Special characters in regex must have \ before them !")
i += 1
try:
re.compile(regex)
except re.error:
raise Exception("Input Regex is incorrect !")
"""
Function: OrSolver()
Arguments: Index, Regex Expression, States, Next State
Description: This function is called when the 'regex2nfa' function reads '+' sign in the expression.
This function intializes the Oring_startState, Oring_prevstate, Oring_prevstart, Oring_nextstate by
incrementing one to 'next_state' that is passed by the 'regex2nfa' function.
A new state is created that indicates the start of the oring operation.Then a recursive call back to regex2nfa
with the regex expression starting from the element after the '+' operation to the end of the string,
the updated set of states, and the values indicating the next state, start state, prev state,
prev start indices.
regex-> The whole regex string, represents the element before '+', element after '+', and the '+'
Returns: the length of the regex to indicate the index which we will continue the rest of
The operations on the regex. The o_prevStart which is the oring previous start
which stands for the start state in the oring operation.
o_StartState
o_nextState
"""
def OrSolver(index, regex, states, o_nextState):
# Create states for the oring operation by incrementing one to the next_state
# Which was passed as an argument from the previous operation.
# Here we work with the Oring as a separate operation apart from
# The previous states
o_startState = o_nextState + 1
o_prevStart = o_nextState + 1
o_prevState = o_nextState + 1
o_nextState = o_nextState + 1
# create new state to indicate we are working with oring regex
states.update({"S"+str(o_nextState): {"terminalState": False}})
_, o_nextState, o_startState, o_prevState, o_prevStart, _ = regex2nfa(
regex, states, o_nextState, o_startState, o_prevState, o_prevStart)
return index+len(regex), o_prevStart, o_startState, o_nextState
"""
Function: Bracketsolver ()
Arguments: String inside the bracket, Next State index, Set of states
Description: This function takes the substring inside the bracket as an argument that is used to
recursively call regex2nfa
This function intializes the Bracket_startState, Bracket_prevstate, Bracket_prevstart, Bracket_nextstate by
incrementing one to 'next_state' that is passed by the 'regex2nfa' function
Returns: Bracket_prevStart => Start state before entering the bracket
Bracket_nextState => Last state of the bracket substring aka. the whole string inside the bracket
for example (a+b) 'a+b' is the substring
"""
def Bracketsolver(substring, next_state, states):
b_currentState = next_state+1
b_nextState = next_state+1
b_prevState = next_state+1
b_prevStart = next_state+1
states.update({"S"+str(b_currentState): {"terminalState": False}})
_, b_nextState, _, _, b_prevStart, _, = regex2nfa(
substring, states, b_nextState, b_currentState, b_prevState, b_prevStart)
return b_prevStart, b_nextState
"""
Function: regex2nfa()
Arguments:
Description:
Returns:
"""
def regex2nfa(regex, states, next_state, start_state, prev_state, prev_start):
i = 0
while i < len(regex):
if regex[i] == "\\":
#Taking the element after the backslash
i += 1
next_state, start_state, prev_state, prev_start, = CreateState(
regex, i, next_state, start_state, prev_state, prev_start, states)
i += 1
elif regex[i] == '(':
# Get the substring of the starting regex
subString = getSubString(regex, i+1)
prev, end = Bracketsolver(
subString, next_state, states)
states["S"+str(next_state)].update({"ε": "S"+str(prev)})
# update the states indices
next_state = end
start_state = next_state
prev_state = prev
# continue looping over the regex after that bracket
i = i + len(subString) + 2
elif regex[i] == '|' or regex[i] == '+':
# OrSolver here takes i+1 as an argument for the index parameter
# Where the '+' '|' represents the current index (i).
# 'i' represents the index that we will continue working from
# it takes the regex starting from the element after the '+' operation
# and operates on i
#
i, prev, start, end = OrSolver(
i+1, regex[i+1:], states, next_state)
# create new 2 states to connect the oring branches
states.update({"S"+str(end+1): {"terminalState": False,
" ε ": "S"+str(prev_start), " ε ": "S"+str(prev)}})
states.update({"S"+str(end+2): {"terminalState": False}})
states["S"+str(end)].update({"ε": "S"+str(end+2)})
states["S"+str(next_state)].update({"ε": "S"+str(end+2)})
# update the state indices
prev_state = end + 1
next_state = end + 2
start_state = next_state
prev_start = end + 1
elif regex[i] == '*':
next_state, start_state, prev_state, prev_start = CreateState(
regex, i, next_state, start_state, prev_state, prev_start, states)
i += 1
else:
next_state, start_state, prev_state, prev_start = CreateState(
regex, i, next_state, start_state, prev_state, prev_start, states)
i += 1
return states, next_state, start_state, prev_state, prev_start, i
def getSubString(regex, index):
startingBrackets = 1
closingBrackets = 0
subString = ""
regex = regex[index:]
for j in range(len(regex)):
if regex[j] == "(":
startingBrackets += 1
elif regex[j] == ")":
closingBrackets += 1
if(startingBrackets == closingBrackets):
break
subString += regex[j]
print(subString)
return subString
def CreateState(regex, index, next_state, start_state, prev_state, prev_start, states):
if regex[index] == "*":
# create two state and connect between them using tompthon rule as decribed in the slides
next_state += 1
states["S"+str(start_state)].update({" ε ": "S" +
str(prev_state), "ε ": "S"+str(next_state)})
states["S"+str(prev_state)].update({"ε ": "S"+str(next_state-1)})
states.update({"S"+str(next_state): {"terminalState": False}})
start_state = next_state
else:
next_state += 1
states["S"+str(start_state)
].update({"Transition "+regex[index]: "S"+str(next_state)})
states.update({"S"+str(next_state): {"terminalState": False}})
prev_state = start_state
start_state = next_state
return next_state, start_state, prev_state, prev_start
def prepareForDrawing(states, next_state, prev_start):
# make the last state as out state
states["S"+str(next_state)]["terminalState"] = True
# sort the state ascending
states = collections.OrderedDict(sorted(states.items()))
# loop over sorted states and save them as the given example to json file
# return the json file content to be displayed in graph format
states.update({"startingState": ("S" + str(prev_start))})
with open('out/nfa.json', 'w') as fp:
json.dump(states, fp, ensure_ascii=True)
fp.close()
print(states)
return states
# Function: transformAux
# Description
#
#
#
def transformAux(regex):
next_state = 0 # next state
start_state = 0 # current state
prev_state = 0 # prev state index, state before reptition that allows looping over the repeated expression
prev_start = 0 # New initial state
flag = False
states = {"S0": {"terminalState": False}}
_, next_state, _, _, prev_start, i = regex2nfa(
regex, states, next_state, start_state, prev_state, prev_start)
if i == len(regex):
nfa = prepareForDrawing(states, next_state, prev_start)
return nfa
def createFormalDescription():
with open('out/nfa.json', 'r') as fp:
states = json.load(fp)
fp.close()
print("*******************CREATE FORMAL DESCRIPTION************")
print(states)
# Initializating the Formal description object
formalDescription = {
"setOfStates": [""],
"setOfSymbols": [""],
"transitions": {},
"startState": "",
"setOfFinalStates": {""}
}
# Adding the start state to the formal description
finalStates = set()
# Taking a shallow copy of the original dictionary
modifiedStates = states.copy()
# Adding value of startState to the formalDescription
formalDescription['startState'] = modifiedStates['startingState']
# Removing startingState from the modifiedStates in order to loop
# On the States
del modifiedStates['startingState']
# Re-initializing the setOfStates list to be
# An empty set in order to add the states to it
formalDescription['setOfStates'] = set()
# Re-initializing the setOfSymbols to be an empty set
# In order to add all the symbols used to it
formalDescription['setOfSymbols'] = set()
# Looping over modifiedStates items which contains
# The states
for state, stateDict in modifiedStates.items():
# Adding each state to the setOfStates
formalDescription['setOfStates'].add(state)
# Looping over each state to find its
# terminalState and add it to the
# finalStates if it was True
for key, value in stateDict.items():
if key == 'terminalState':
if value == True:
finalStates.add(state)
# Looping over transitions to add it
# to the setOfSymbols
if key.startswith('Transition'):
# Splitting the transition by the splitter space
# which will be ['Transition','a'] for example
# So we will always pick the second element to add
# it to the setOfSymbols
transition = key.split(' ')
formalDescription["setOfSymbols"].add(transition[1])
# Sorting and adding the finalStates to setOfFinalStates in formalDescription
formalDescription["setOfStates"] = natsorted(
formalDescription["setOfStates"])
formalDescription["setOfFinalStates"] = finalStates
# Loop again in order to add the transitions
# to the formalDescription
setOfTransitions = {}
for state, stateDict in modifiedStates.items():
for key, value in stateDict.items():
if key.startswith('Transition') or key.startswith('ε'):
setOfTransitions.update({state: {key: value}})
# Sort the list of transitions ascendingly
# Then adding it to the formalDescription
setOfTransitions = collections.OrderedDict(
sorted(setOfTransitions.items()))
formalDescription["transitions"] = setOfTransitions
return formalDescription