# http://www.activestate.org/ASPN/Cookbook/Python/Recipe/521902 ''' Title: xml reverse-engineering ElementTree code generator (*whew*) Submitter: Andrew Moffat (other recipes) Last Updated: 2007/06/13 Version no: 1.1 Category: XML Description: takes an ambiguous xml file and generates the ET code to generate that xml file. this is useful if you have an example xml file, or an ambiguous xml file that you'd like to use as a template to parameterize certain elements. upgrades coming. Discussion: the primary need for this recipe came from me not wanting to read the xml specification for an IMS manifest, but having an example manifest to work with. after writing this, i just fed the manifest in and parameterized the values i needed. simple :) once the ET generator file is created, import it into your main program. from there, run the "build()" function with named keywords to fill in the blanks on the template''' import xml.etree.ElementTree as ET import re import sys class ETGen(object): TAGSUFFIX = 'XMLTag' def __init__(self, xmlin, out, param={}): self.counter = 0 self.constants = {} self.lines = [] h = open(xmlin, 'r') xml = h.read() h.close() builder = ET.XMLTreeBuilder() builder.feed(xml) tree = builder.close() self.out = out self.__walk(tree, None) self.__write() def __genName(self, name): self.counter += 1 return re.search('(?:{.*?})?(.*)', name).group(1) + ETGen.TAGSUFFIX + str(self.counter) def __write(self): h = open(self.out, 'w') h.write("import xml.etree.ElementTree as ET\n\n") # prints namespace constants h.writelines(["%s = '%s'\n" % (v, k) for k, v in self.constants.items()]) h.write("\n") h.write("def build(**kwargs):\n\t") h.write("\n\t".join(self.lines)) h.write("\n\treturn ET.tostring(%s)\n\n" % self.root) h.write("if __name__ == '__main__': print build()") h.close() def __getNamespace(self, name): ns = re.search('(?:{(.*?)})?(.*)', name).group(1) if ns is None: return '\'%s\'' % name if ns not in self.constants: nsName = "NS" + str(len(self.constants)) self.constants[ns] = nsName else: nsName = self.constants[ns] tag = re.sub('{.*?}(.*)', '\'{%%s}\\1\' %% %s' % nsName, name) return tag def __walk(self, node, parent): name = self.__genName(node.tag) tag = self.__getNamespace(node.tag) if parent is None: self.root = name self.lines.append("%s = ET.Element(%s)" % (name, tag)) else: self.lines.append("%s = ET.SubElement(%s, %s)" % (name, parent, tag)) # handles text try: t = node.text.strip() if t == '': t = None except: t = None if t is not None: self.lines.append("%s.text = kwargs.get('', '%s') # PARAMETERIZE" % (name, t)) # handles attributes for key,val in node.items(): key = self.__getNamespace(key) self.lines.append("%s.set(%s, kwargs.get('', '%s')) # PARAMETERIZE" % (name, key, val)) for i in node.getchildren(): self.__walk(i, name) def main(): g = ETGen(sys.argv[1], sys.argv[2] )#'/home/user/manifest.xml', '/home/user/manifest_generator.py') if __name__ == '__main__': main()