#!/usr/bin/env python # -*- coding: utf-8 -*- # usage: # parser.py < /usr/share/emacs/23.1/lisp/loaddefs.el > /dev/dsp import sys import re identifiers = 0 class Node: def __init__(self, type, value): self.type = type self.value = value def __repr__(self): return ''.format(self.type, repr(self.value)) def parse_number(s): for i in s.split('|'): if not re.match('^[+-]?([0-9]*\\.)?[0-9]+([Ee][+-]?[0-9]+)?$', i): return None return s def parse_symbol(s): if parse_number(s): return Node('number', s) elif s=='nil': return Node('list', []) return Node('symbol', s.lower()) def parse_string(s, pos): pos += 1 escaping = False result = '' while pos < len(s): i = s[pos] if escaping: result += i escaping = False else: if i=='"': break elif i=='\\': escaping = True result += i pos += 1 if pos == len(s): raise SyntaxError, '' return Node('string', result), pos # TODO: string unescaping def parse_list(s, pos=0): #print repr(s) #pos = 0 objects = [] quoted = [0] # -1, 1 — unquote, quote symbol = [''] def add(o): if quoted[0]: quotator = ('un' if quoted[0]<0 else '')+'quote' quoted[0] = abs(quoted[0]) for i in xrange(quoted[0]): o = Node('list', [Node('symbol', quotator), o]) quoted[0] = 0 objects.append(o) def add_symbol(): if symbol[0]: add(parse_symbol(symbol[0])) symbol[0] = '' while pos < len(s): i = s[pos] if i=='"': add_symbol() node, pos = parse_string(s, pos) add(node) elif i==';': add_symbol() while s[pos]!='\n': pos += 1 elif i in "',": if symbol[0]: add_symbol() else: quoted[0] += (1 if i=="'" else -1) elif i=='(': add_symbol() node, pos = parse_list(s, pos+1) add(node) elif i==')': add_symbol() break else: if i in ' \r\n\t': add_symbol() else: symbol[0] += i pos += 1 if pos==len(s): raise SyntaxError, '' return Node('list', objects), pos def inspect(n, indent=0): if n.type=='list': print >>sys.stderr, ' '*indent, 'list:' for i in n.value: inspect(i, indent+1) else: print >>sys.stderr, ' '*indent, n.type, n.value n_of_ids = 0 def compile(n): global n_of_ids if n.type=='list': items = [] for i in n.value: items.append(compile(i)) s = 'l '+(' '.join(map(str, items))) elif n.type=='string': s = 's '+(' '.join(str(ord(i)) for i in n.value)) elif n.type=='symbol': s = 'S '+n.value elif n.type=='number': s = 'n '+n.value else: s = '' print n_of_ids, s n_of_ids += 1 return n_of_ids-1 # Парсер думает, что открывающая скобка уже позади source = 'do %s)'%sys.stdin.read() source = source.decode('utf-8') node, pos = parse_list(source) if pos!=len(source)-1: raise SyntaxError, '' inspect(node) compile(node) print n_of_ids, 'e', n_of_ids-1