import string from funcparserlib.lexer import make_tokenizer, Token from funcparserlib.parser import some, a, skip, oneplus, many SPECS = [ ('newline', (r'\n+', )), ('markup', (r'>>|<<|\(|\)', )), ('string', (r'[^\n|(|)|>|<]+', )), ] tokenize = lambda x: list(make_tokenizer(SPECS)(x)) def parse(input): const = lambda s: lambda _: s tokval = lambda tok: tok.value join = string.join markup = lambda s: skip(a(Token('markup', s))) literal = some(lambda tok: tok.type == 'string') >> tokval >> string.strip newline = some(lambda tok: tok.type == 'newline') >> const('
\n') phrase = oneplus(literal|newline) >> join paren = (markup('(') + phrase + markup(')') >> (lambda tok: '(' + tok + ')')) chorus = (markup('>>') + oneplus(paren|phrase) + markup('<<') >> (lambda tok: '
' + join(tok) + '
')) top = many(chorus|paren|phrase) >> join return top.parse(tokenize(input))