xml reading.
# blogspot_xml_santizer.py
from xml.dom import minidom
from xml.sax.saxutils import unescape
p = open('blog-09-02-2012.xml')
x = p.read()
doc_ob = minidom.parseString(x)
print(len(doc_ob.getElementsByTagName('content')))
entities = {r''': "-"}
for content in doc_ob.getElementsByTagName('content'):
data = content.toprettyxml()
modified_xml = unescape(data, entities)
print(modified_xml)