"""
Allows XML files to be operated on like Python objects.
Features:
- load XML source from file pathnames, readable file objects or raw strings
- add, get and set tag attributes like with python attributes
- iterate over nodes
- save the modified XMLFile or XMLObject to file
Example XML file::
Example usage::
>> from xmlobject import XMLFile
>> x = XMLFile(path="sample.xml")
>> print x
>> print x.root
>> print x.root._children
[, , ,
, ]
>> print x.root.person
[, ]
>> print x.root.person[0].name
John Smith
>> john = x.root.person[0]
>> john.height = 184
>> c = john._addNode("crime")
>> c.name = "Grand Theft Auto"
>> c.date = "4 May, 2005"
>> print x.toxml()
>>
"""
import sys, os
import xml.dom
import xml.dom.minidom
from xml.dom.minidom import parse, parseString, getDOMImplementation
impl = getDOMImplementation()
class MissingRootTag(Exception):
"""root tag name was not given"""
class InvalidXML(Exception):
"""failed to parse XML input"""
class CannotSave(Exception):
"""unable to save"""
class InvalidNode(Exception):
"""not a valid minidom node"""
class XMLFile:
"""
Allows an xml file to be viewed and operated on
as a python object.
(If you're viewing the epydoc-generated HTML documentation, click the 'show private'
link at the top right of this page to see all the methods)
Holds the root node in the .root attribute, also in an attribute
with the same name as this root node.
"""
def __init__(self, **kw):
"""
Create an XMLFile
Keywords:
- path - a pathname from which the file can be read
- file - an open file object from which the raw xml
can be read
- raw - the raw xml itself
- root - name of root tag, if not reading content
Usage scenarios:
1. Working with existing content - you must supply input in
one of the following ways:
- 'path' must be an existing file, or
- 'file' must be a readable file object, or
- 'raw' must contain raw xml as a string
2. Creating whole new content - you must give the name
of the root tag in the 'root' keyword
Notes:
- Keyword precedence governing existing content is:
1. path (if existing file)
2. file
3. raw
- If working with existing content:
- if the 'root' is given, then the content's toplevel tag
MUST match the value given for 'root'
- trying to _save will raise an exception unless 'path'
has been given
- if not working with existing content:
- 'root' must be given
- _save() will raise an exception unless 'path' has been given
"""
path = kw.get("path", None)
fobj = kw.get("file", None)
raw = kw.get("raw", None)
root = kw.get("root", None)
textfilter = kw.get("textfilter", None)
if path:
self.path = path
try:
fobj = file(path)
except IOError:
pass
else:
self.path = None
if fobj:
raw = fobj.read()
if raw:
self.dom = xml.dom.minidom.parseString(raw)
else:
# could not source content, so create a blank slate
if not root:
# in which case, must give a root node name
raise MissingRootTag(
"No existing content, so must specify root")
# ok, create a blank dom
self.dom = impl.createDocument(None, root, None)
# get the root node, save it as attributes 'root' and name of node
rootnode = self.dom.documentElement
# now validate root tag
if root:
if rootnode.nodeName != root:
raise IncorrectRootTag("Gave root='%s', input has root='%s'" % (
root, rootnode.nodeName))
if textfilter:
self.textfilter = textfilter
else:
self.textfilter = lambda x: x
# need this for recursion in XMLNode
self._childrenByName = {}
self._children = []
# add all the child nodes
for child in self.dom.childNodes:
childnode = XMLNode(self, child)
#print "compare %s to %s" % (rootnode, child)
if child == rootnode:
#print "found root"
self.root = childnode
setattr(self, rootnode.nodeName, self.root)
def save(self, where=None, obj=None):
"""
Saves the document.
If argument 'where' is given, saves to it, otherwise
tries to save to the original given 'path' (or barfs)
Value can be a string (taken to be a file path), or an open
file object.
"""
obj = obj or self.dom
if not where:
if self._root.path:
where = self._root.path
if isinstance(where, str):
where = file(where, "w")
if not where:
raise CannotSave("No save destination, and no original path")
where.write(obj.toxml())
where.flush()
def saveAs(self, path):
"""
save this time, and all subsequent times, to filename 'path'
"""
self.path = path
self.save()
def toxml(self):
return self.dom.toxml()
def __len__(self):
"""
returns number of child nodes
"""
return len(self._children)
def __getitem__(self, idx):
if isinstance(idx, int):
return self._children[idx]
else:
return self._childrenByName[idx]
class XMLNode:
"""
This is the workhorse for the xml object interface
(If you're viewing the epydoc-generated HTML documentation, click the 'show private'
link at the top right of this page to see all the methods)
"""
def __init__(self, parent, node):
"""
You shouldn't need to instantiate this directly
"""
self._parent = parent
if isinstance(parent, XMLFile):
self._root = parent
else:
self._root = parent._root
self._node = node
self._childrenByName = {}
self._children = []
# add ourself to parent's children registry
parent._children.append(self)
# the deal with named subtags is that we store the first instance
# as itself, and with second and subsequent instances, we make a list
parentDict = self._parent._childrenByName
nodeName = node.nodeName
if not parentDict.has_key(nodeName):
parentDict[nodeName] = parent.__dict__[nodeName] = self
else:
if isinstance(parentDict[nodeName], XMLNode):
# this is the second child node of a given tag name, so convert
# the instance to a list
parentDict[nodeName] = parent.__dict__[nodeName] = [parentDict[nodeName]]
parentDict[nodeName].append(self)
# figure out our type
self._value = None
if isinstance(node, xml.dom.minidom.Text):
self._type = "text"
self._value = self._root.textfilter(node.nodeValue)
elif isinstance(node, xml.dom.minidom.Element):
self._type = "node"
elif isinstance(node, xml.dom.minidom.Comment):
self._type = "comment"
self._value = node.nodeValue
else:
raise InvalidNode("node class %s" % node.__class__)
# and wrap all the child nodes
for child in node.childNodes:
XMLNode(self, child)
def _render(self):
"""
Produces well-formed XML of this node's contents,
indented as required
"""
return self._node.toxml()
def __repr__(self):
if self._type == "node":
return "" % self._node.nodeName
else:
return "" % self._type
def __getattr__(self, attr):
"""
Fetches an attribute or child node of this tag
If it's an attribute, then returns the attribute value as a string.
If a child node, then:
- if there is only one child node of that name, return it
- if there is more than one child node of that name, return a list
of child nodes of that tag name
Supports some magic attributes:
- _text - the value of the first child node of type text
"""
#print "%s: __getattr__: attr=%s" % (self, attr)
if attr == '_text':
# magic attribute to return text
tnode = self['#text']
if isinstance(tnode, list):
tnode = tnode[0]
return tnode._value
if self._type in ['text', 'comment']:
if attr == '_value':
return self._node.nodeValue
else:
raise AttributeError(attr)
if self._node.hasAttribute(attr):
return self._node.getAttribute(attr)
elif self._childrenByName.has_key(attr):
return self._childrenByName[attr]
#elif attr == 'value':
# magic attribute
else:
raise AttributeError(attr)
def __setattr__(self, attr, val):
"""
Change the value of an attribute of this tag
The magic attribute '_text' can be used to set the first child
text node's value
For example::
Consider:
foo
>> somenode
>> somenode.child
>> somenode.child._text
'foo'
>> somenode._toxml()
u'foo'
>> somenode.child._text = 'bar'
>> somenode.child._text
'bar'
>> somenode.child._toxml()
u'bar/child>'
"""
if attr.startswith("_"):
# magic attribute for setting _text
if attr == '_text':
tnode = self['#text']
if isinstance(tnode, list):
tnode = tnode[0]
tnode._node.nodeValue = val
tnode._value = val
return
self.__dict__[attr] = val
elif self._type in ['text', 'comment']:
self._node.nodeValue = val
else:
# discern between attribute and child node
if self._childrenByName.has_key(attr):
raise Exception("Attribute Exists")
self._node.setAttribute(attr, str(val))
def _keys(self):
"""
Return a list of attribute names
"""
return self._node.attributes.keys()
def _values(self):
"""
Returns a list of (attrname, attrval) tuples for this tag
"""
return [self._node.getAttribute(k) for k in self._node.attributes.keys()]
def _items(self):
"""
returns a list of attribute values for this tag
"""
return [(k, self._node.getAttribute(k)) for k in self._node.attributes.keys()]
def _has_key(self, k):
"""
returns True if this tag has an attribute of the given name
"""
return self._node.hasAttribute(k) or self._childrenByName.has_key(k)
def _get_name(self):
if self._type == "node":
return self._node.nodeName
else:
return self._type
def _get(self, k, default=None):
"""
returns the value of attribute k, or default if no such attribute
"""
if self._has_key(k):
return getattr(self, k)
else:
return default
def __len__(self):
"""
returns number of child nodes
"""
return len(self._children)
def __getitem__(self, idx):
"""
if given key is numeric, return the nth child, otherwise
try to return the child tag (or list of child tags) having
the key as the tag name
"""
#print "__getitem__: idx=%s" % str(idx)
if isinstance(idx, slice) or isinstance(idx, int):
return self._children[idx]
elif isinstance(idx, str):
return self._childrenByName[idx]
else:
raise IndexError(idx)
def _addNode(self, child):
"""
Tries to append a child node to the tree, and returns it
Value of 'child' must be one of:
- a string (in which case it is taken to be the name
of the new node's tag)
- a dom object, in which case it will be wrapped and added
- an XMLNode object, in which case it will be added without
wrapping
"""
if isinstance(child, XMLNode):
# add it to our children registry
self._children.append(child)
parentDict = self._childrenByName
nodeName = child._node.nodeName
if not parentDict.has_key(nodeName):
parentDict[nodeName] = parent.__dict__[nodeName] = child
else:
if isinstance(parentDict[nodeName], XMLNode):
# this is the second child node of a given tag name, so convert
# the instance to a list
parentDict[nodeName] = self.__dict__[nodeName] = [parentDict[nodeName]]
parentDict[nodeName].append(child)
# and stick it in the dom
self._node.appendChild(child._node)
return child
elif isinstance(child, str):
childNode = self._root.dom.createElement(child)
self._node.appendChild(childNode)
elif isinstance(child, xml.dom.minidom.Element):
childNode = child
child = childNode.nodeName
self._node.appendChild(childNode)
return XMLNode(self, childNode)
def _addText(self, value):
"""
Tries to append a child text node, with the given text, to the tree,
and returns the created node object
"""
childNode = self._root.dom.createTextNode(value)
self._node.appendChild(childNode)
return XMLNode(self, childNode)
def _addComment(self, comment):
"""
Tries to append a child comment node (with the given text value)
to the tree, and returns the create node object
"""
childNode = self._root.dom.createCommentNode(comment)
self._node.appendChild(childNode)
return XMLNode(self, childNode)
def _save(self, where=None):
"""
Generates well-formed XML from just this node, and saves it
to a file.
Argument 'where' is either an open file object, or a pathname
If 'where' is not given, then saves the entire document tree.
"""
if not where:
self._root.save()
else:
self._root.save(where, self._node)
def _toxml(self):
"""
renders just this node out to raw xml code
"""
return self._node.toxml()
def _treeWalker(self, node, nodes):
for child in node._children:
if child._type == 'node':
nodes.append(child)
self._treeWalker(child, nodes)
def _toflat(self):
ret = [self]
self._treeWalker(self, ret)
return ret
_name = property(_get_name)