#!/usr/bin/python
# -*- coding: utf-8 -*-
# --------------------------------------------------
# File Name: dissectXMLadditions.py
# Location:
# Purpose:
# Creation Date: 06-07-2017
# Last Modified: Thu, Jul 6, 2017 5:29:28 PM
# Author(s): Mike Stout
# Copyright 2017 The Author(s) All Rights Reserved
# Credits:
# --------------------------------------------------
import codecs
import xml.dom.minidom as minidom
import sys
ed = sys.argv[1]
def dropAdditions(node):
for child in node.childNodes:
if (child.nodeType == child.ELEMENT_NODE):
if child.tagName == "add":
node.removeChild(child)
dropAdditions(child)
def keepAdditions(node):
if (node.nodeType == node.ELEMENT_NODE):
if node.tagName in [ "div1", "div2", "stage", "sp", "l", "p"] :
# Remove nodes that do not contain additions...
if not len(list(node.getElementsByTagName("add"))):
parent = node.parentNode
try: parent.removeChild(node)
except: pass
else: # Remove any text before or after an addition ...
for c in node.childNodes:
c.nodeValue = "\n"
# For node nodes that contain additions
# remove those child nodes that do not contain additions...
for child in list(node.childNodes):
if (child.nodeType == child.ELEMENT_NODE):
if not len(list(node.getElementsByTagName("add"))):
# Do not remove prose milestones ...
if not child.tagName == "lb":
node.removeChild(child)
keepAdditions(child)
xml = minidom.parse(ed+"_x.xml")
dropAdditions(xml)
with codecs.open(ed+"_minus.xml", "w", "utf-8") as out:
xml.writexml(out, encoding="utf-8")
xml = minidom.parse(ed+"_x.xml")
keepAdditions(xml)
with codecs.open(ed+"_plus.xml", "w", "utf-8") as out:
xml.writexml(out, encoding="utf-8")