May 10, 2015

xml . ignore this post :)

xml reading.
# blogspot_xml_santizer.py
 
from xml.dom import minidom
from xml.sax.saxutils import unescape
 
p = open('blog-09-02-2012.xml')
x = p.read()
doc_ob = minidom.parseString(x)
 
print(len(doc_ob.getElementsByTagName('content')))
 
entities = {r''': "-"}
 
for content in doc_ob.getElementsByTagName('content'):
 data = content.toprettyxml()
 modified_xml = unescape(data, entities)
 print(modified_xml)