#!/usr/bin/python # -*- coding: utf-8 -*- # -------------------------------------------------- # File Name: dissectXMLadditions.py # Location: # Purpose: # Creation Date: 06-07-2017 # Last Modified: Thu, Jul 6, 2017 5:29:28 PM # Author(s): Mike Stout # Copyright 2017 The Author(s) All Rights Reserved # Credits: # -------------------------------------------------- import codecs import xml.dom.minidom as minidom import sys ed = sys.argv[1] def dropAdditions(node): for child in node.childNodes: if (child.nodeType == child.ELEMENT_NODE): if child.tagName == "add": node.removeChild(child) dropAdditions(child) def keepAdditions(node): if (node.nodeType == node.ELEMENT_NODE): if node.tagName in [ "div1", "div2", "stage", "sp", "l", "p"] : # Remove nodes that do not contain additions... if not len(list(node.getElementsByTagName("add"))): parent = node.parentNode try: parent.removeChild(node) except: pass else: # Remove any text before or after an addition ... for c in node.childNodes: c.nodeValue = "\n" # For node nodes that contain additions # remove those child nodes that do not contain additions... for child in list(node.childNodes): if (child.nodeType == child.ELEMENT_NODE): if not len(list(node.getElementsByTagName("add"))): # Do not remove prose milestones ... if not child.tagName == "lb": node.removeChild(child) keepAdditions(child) xml = minidom.parse(ed+"_x.xml") dropAdditions(xml) with codecs.open(ed+"_minus.xml", "w", "utf-8") as out: xml.writexml(out, encoding="utf-8") xml = minidom.parse(ed+"_x.xml") keepAdditions(xml) with codecs.open(ed+"_plus.xml", "w", "utf-8") as out: xml.writexml(out, encoding="utf-8")