xml smart parser generator

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
# http://www.activestate.org/ASPN/Cookbook/Python/Recipe/521902
'''
Title: xml reverse-engineering ElementTree code generator (*whew*)
Submitter: Andrew Moffat (other recipes)
Last Updated: 2007/06/13
Version no: 1.1
Category: XML
Description:
takes an ambiguous xml file and generates the ET code to generate that xml file.
this is useful if you have an example xml file, or an ambiguous xml file that you'd
like to use as a template to parameterize certain elements. upgrades coming.
Discussion:
the primary need for this recipe came from me not wanting to read the xml specification
for an IMS manifest, but having an example manifest to work with. after writing this,
i just fed the manifest in and parameterized the values i needed. simple :)
once the ET generator file is created, import it into your main program.
from there, run the "build()" function with named keywords to fill in the blanks on the template'''
import xml.etree.ElementTree as ET
import re
import sys
class ETGen(object):
TAGSUFFIX = 'XMLTag'
def __init__(self, xmlin, out, param={}):
self.counter = 0
self.constants = {}
self.lines = []
h = open(xmlin, 'r')
xml = h.read()
h.close()
builder = ET.XMLTreeBuilder()
builder.feed(xml)
tree = builder.close()
self.out = out
self.__walk(tree, None)
self.__write()
def __genName(self, name):
self.counter += 1
return re.search('(?:{.*?})?(.*)', name).group(1) + ETGen.TAGSUFFIX + str(self.counter)
def __write(self):
h = open(self.out, 'w')
h.write("import xml.etree.ElementTree as ET\n\n")
# prints namespace constants
h.writelines(["%s = '%s'\n" % (v, k) for k, v in self.constants.items()])
h.write("\n")
h.write("def build(**kwargs):\n\t")
h.write("\n\t".join(self.lines))
h.write("\n\treturn ET.tostring(%s)\n\n" % self.root)
h.write("if __name__ == '__main__': print build()")
h.close()
def __getNamespace(self, name):
ns = re.search('(?:{(.*?)})?(.*)', name).group(1)
if ns is None:
return '\'%s\'' % name
if ns not in self.constants:
nsName = "NS" + str(len(self.constants))
self.constants[ns] = nsName
else:
nsName = self.constants[ns]
tag = re.sub('{.*?}(.*)', '\'{%%s}\\1\' %% %s' % nsName, name)
return tag
def __walk(self, node, parent):
name = self.__genName(node.tag)
tag = self.__getNamespace(node.tag)
if parent is None:
self.root = name
self.lines.append("%s = ET.Element(%s)" % (name, tag))
else:
self.lines.append("%s = ET.SubElement(%s, %s)" % (name, parent, tag))
# handles text
try:
t = node.text.strip()
if t == '': t = None
except:
t = None
if t is not None:
self.lines.append("%s.text = kwargs.get('', '%s') # PARAMETERIZE" % (name, t))
# handles attributes
for key,val in node.items():
key = self.__getNamespace(key)
self.lines.append("%s.set(%s, kwargs.get('', '%s')) # PARAMETERIZE" % (name, key, val))
for i in node.getchildren():
self.__walk(i, name)
def main():
g = ETGen(sys.argv[1], sys.argv[2] )#'/home/user/manifest.xml', '/home/user/manifest_generator.py')
if __name__ == '__main__':
main()