import re open text txt jump_matrix 14 14 14 14 14 -1 Symbol 14 Number

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import re
f = open('text.txt')
jump_matrix = [
{'i': 1, 'f': 14, 't': 9, 'h': 14, 'e': 4, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 15, 'Gap': 0, 'Any': -1}, #0
{'i': 14, 'f': 2, 't': 14, 'h': 14, 'e': 14, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 14, 'Gap': 0, 'Any': -1}, #1
{'i': 1, 'f': 14, 't': 9, 'h': 14, 'e': 4, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 15, 'Gap': 0, 'Any': -1}, #2
{'i': 1, 'f': 14, 't': 9, 'h': 14, 'e': 4, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 15, 'Gap': 0, 'Any': -1}, #3!!!
{'i': 14, 'f': 14, 't': 14, 'h': 14, 'e': 14, 'n': 14, 'l': 5, 's': 14, '!': -1, 'Symbol': 14, 'Number': 14, 'Gap': 0, 'Any': -1}, #4
{'i': 14, 'f': 14, 't': 14, 'h': 14, 'e': 14, 'n': 14, 'l': 14, 's': 6, '!': -1, 'Symbol': 14, 'Number': 14, 'Gap': 0, 'Any': -1}, #5
{'i': 14, 'f': 14, 't': 14, 'h': 14, 'e': 7, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 14, 'Gap': 0, 'Any': -1}, #6
{'i': 1, 'f': 14, 't': 9, 'h': 14, 'e': 4, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 15, 'Gap': 0, 'Any': -1}, #7
{'i': 1, 'f': 14, 't': 9, 'h': 14, 'e': 4, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 15, 'Gap': 0, 'Any': -1}, #8!!!
{'i': 14, 'f': 14, 't': 14, 'h': 10, 'e': 14, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 14, 'Gap': 0, 'Any': -1}, #9
{'i': 14, 'f': 14, 't': 4, 'h': 14, 'e': 11, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 14, 'Gap': 0, 'Any': -1}, #10
{'i': 14, 'f': 14, 't': 14, 'h': 14, 'e': 14, 'n': 12, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 11, 'Gap': 0, 'Any': -1}, #11
{'i': 1, 'f': 14, 't': 9, 'h': 14, 'e': 4, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 15, 'Gap': 0, 'Any': -1}, #12
{'i': 1, 'f': 14, 't': 9, 'h': 14, 'e': 4, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 15, 'Gap': 0, 'Any': -1}, #13!!!
{'i': 14, 'f': 14, 't': 14, 'h': 14, 'e': 14, 'n': 14, 'l': 14, 's': 14, '!': -1, 'Symbol': 14, 'Number': 14, 'Gap': 0, 'Any': -1}, #14
{'i': 0, 'f': 0, 't': 0, 'h': 0, 'e': 0, 'n': 0, 'l': 0, 's': 0, '!': -1, 'Symbol': 0, 'Number': 15, 'Gap': 0, 'Any': -1}, #15
]
def get_group(x):
if x == 'i': return 'i'
if x == 'f': return 'f'
if x == 't': return 't'
if x == 'h': return 'h'
if x == 'e': return 'e'
if x == 'n': return 'n'
if x == 'l': return 'l'
if x == 's': return 's'
if 'a' <= x <= 'z' or 'A' <= x <= 'Z': return 'Symbol'
if '0' <= x <= '9': return 'Number'
if x == ' ': return 'Gap'
return 'Any'
def print_my(state, _line_number, start_symbol, string):
string = re.sub('^\s+|\n|\r|\s+$', '', string)
if string == '':
return
if state in (14, 1, 6, 11):
print 'IDENTIFIER (' + str(_line_number) + ', ' + str(start_symbol) + '): ' + string
if state == 15:
print 'NUMBER (' + str(_line_number) + ', ' + str(start_symbol) + '): ' + string
if state in (2, 7, 12):
print 'IF THEN ELSE (' + str(_line_number) + ', ' + str(start_symbol) + '): ' + string
if state == -1:
print 'ERROR (' + str(_line_number) + ', ' + str(start_symbol) + '): ' + string
return 0
def nex_token(_line, _line_number):
string = ''
start_symbol = 0
current_state = 0
for i, char in enumerate(_line):
previous_state = current_state
current_state = jump_matrix[previous_state][get_group(char)]
if current_state == -1:
print_my(previous_state, _line_number, start_symbol, string)
string = ''
start_symbol = i + 1
if current_state == 0:
print_my(previous_state, _line_number, start_symbol, string)
string = char
start_symbol = i + 1
else:
string += char
if i == len(_line) - 1:
print_my(current_state, _line_number, start_symbol, string)
for line_number, line in enumerate(f):
nex_token(line, line_number)