usr bin env python coding utf-8 usage parser py usr share emacs 23 lis

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# usage:
# parser.py < /usr/share/emacs/23.1/lisp/loaddefs.el > /dev/dsp
import sys
import re
identifiers = 0
class Node:
def __init__(self, type, value):
self.type = type
self.value = value
def __repr__(self):
return '<Node {0} {1}>'.format(self.type, repr(self.value))
def parse_number(s):
for i in s.split('|'):
if not re.match('^[+-]?([0-9]*\\.)?[0-9]+([Ee][+-]?[0-9]+)?$', i):
return None
return s
def parse_symbol(s):
if parse_number(s):
return Node('number', s)
elif s=='nil':
return Node('list', [])
return Node('symbol', s.lower())
def parse_string(s, pos):
pos += 1
escaping = False
result = ''
while pos < len(s):
i = s[pos]
if escaping:
result += i
escaping = False
else:
if i=='"':
break
elif i=='\\':
escaping = True
result += i
pos += 1
if pos == len(s):
raise SyntaxError, ''
return Node('string', result), pos # TODO: string unescaping
def parse_list(s, pos=0):
#print repr(s)
#pos = 0
objects = []
quoted = [0] # -1, 1 — unquote, quote
symbol = ['']
def add(o):
if quoted[0]:
quotator = ('un' if quoted[0]<0 else '')+'quote'
quoted[0] = abs(quoted[0])
for i in xrange(quoted[0]):
o = Node('list', [Node('symbol', quotator), o])
quoted[0] = 0
objects.append(o)
def add_symbol():
if symbol[0]:
add(parse_symbol(symbol[0]))
symbol[0] = ''
while pos < len(s):
i = s[pos]
if i=='"':
add_symbol()
node, pos = parse_string(s, pos)
add(node)
elif i==';':
add_symbol()
while s[pos]!='\n':
pos += 1
elif i in "',":
if symbol[0]:
add_symbol()
else:
quoted[0] += (1 if i=="'" else -1)
elif i=='(':
add_symbol()
node, pos = parse_list(s, pos+1)
add(node)
elif i==')':
add_symbol()
break
else:
if i in ' \r\n\t':
add_symbol()
else:
symbol[0] += i
pos += 1
if pos==len(s):
raise SyntaxError, ''
return Node('list', objects), pos
def inspect(n, indent=0):
if n.type=='list':
print >>sys.stderr, ' '*indent, 'list:'
for i in n.value:
inspect(i, indent+1)
else:
print >>sys.stderr, ' '*indent, n.type, n.value
n_of_ids = 0
def compile(n):
global n_of_ids
if n.type=='list':
items = []
for i in n.value:
items.append(compile(i))
s = 'l '+(' '.join(map(str, items)))
elif n.type=='string':
s = 's '+(' '.join(str(ord(i)) for i in n.value))
elif n.type=='symbol':
s = 'S '+n.value
elif n.type=='number':
s = 'n '+n.value
else:
s = ''
print n_of_ids, s
n_of_ids += 1
return n_of_ids-1
# Парсер думает, что открывающая скобка уже позади
source = 'do %s)'%sys.stdin.read()
source = source.decode('utf-8')
node, pos = parse_list(source)
if pos!=len(source)-1:
raise SyntaxError, ''
inspect(node)
compile(node)
print n_of_ids, 'e', n_of_ids-1