1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17  """ 
 18  Simplexml module provides xmpppy library with all needed tools to handle XML 
 19  nodes and XML streams. I'm personally using it in many other separate 
 20  projects. It is designed to be as standalone as possible 
 21  """ 
 22   
 23  import xml.parsers.expat 
 24  import logging 
 25  log = logging.getLogger('nbxmpp.simplexml') 
 26   
 28      """ 
 29      Return provided string with symbols & < > " replaced by their respective XML 
 30      entities 
 31      """ 
 32       
 33      return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "") 
  34   
 35  ENCODING='utf-8' 
 36   
 38      """ 
 39      Converts object "what" to unicode string using it's own __str__ method if 
 40      accessible or unicode method otherwise 
 41      """ 
 42      if isinstance(what, unicode): 
 43          return what 
 44      try: 
 45          r = what.__str__() 
 46      except AttributeError: 
 47          r = str(what) 
 48      if not isinstance(r, unicode): 
 49          return unicode(r, ENCODING) 
 50      return r 
  51   
 53      """ 
 54      Node class describes syntax of separate XML Node. It have a constructor that 
 55      permits node creation from set of "namespace name", attributes and payload 
 56      of text strings and other nodes. It does not natively support building node 
 57      from text string and uses NodeBuilder class for that purpose. After 
 58      creation node can be mangled in many ways so it can be completely changed. 
 59      Also node can be serialised into string in one of two modes: default (where 
 60      the textual representation of node describes it exactly) and "fancy" - with 
 61      whitespace added to make indentation and thus make result more readable by 
 62      human. 
 63   
 64      Node class have attribute FORCE_NODE_RECREATION that is defaults to False 
 65      thus enabling fast node replication from the some other node. The drawback 
 66      of the fast way is that new node shares some info with the "original" node 
 67      that is changing the one node may influence the other. Though it is rarely 
 68      needed (in xmpppy it is never needed at all since I'm usually never using 
 69      original node after replication (and using replication only to move upwards 
 70      on the classes tree). 
 71      """ 
 72   
 73      FORCE_NODE_RECREATION = 0 
 74   
 75 -    def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None, 
 76                      node_built=False, node=None): 
  77          """ 
 78          Takes "tag" argument as the name of node (prepended by namespace, if 
 79          needed and separated from it by a space), attrs dictionary as the set of 
 80          arguments, payload list as the set of textual strings and child nodes 
 81          that this node carries within itself and "parent" argument that is 
 82          another node that this one will be the child of. Also the __init__ can be 
 83          provided with "node" argument that is either a text string containing 
 84          exactly one node or another Node instance to begin with. If both "node" 
 85          and other arguments is provided then the node initially created as 
 86          replica of "node" provided and then modified to be compliant with other 
 87          arguments. 
 88          """ 
 89          if node: 
 90              if self.FORCE_NODE_RECREATION and isinstance(node, Node): 
 91                  node = str(node) 
 92              if not isinstance(node, Node): 
 93                  node = NodeBuilder(node, self) 
 94                  node_built = True 
 95              else: 
 96                  self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = node.name, node.namespace, {}, [], [], node.parent, {} 
 97                  for key in node.attrs.keys(): 
 98                      self.attrs[key] = node.attrs[key] 
 99                  for data in node.data: 
100                      self.data.append(data) 
101                  for kid in node.kids: 
102                      self.kids.append(kid) 
103                  for k, v in node.nsd.items(): 
104                      self.nsd[k] = v 
105          else: 
106              self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = 'tag', '', {}, [], [], None, {} 
107          if parent: 
108              self.parent = parent 
109          self.nsp_cache = {} 
110          if nsp: 
111              for k, v in nsp.items(): self.nsp_cache[k] = v 
112          for attr, val in attrs.items(): 
113              if attr == 'xmlns': 
114                  self.nsd[u''] = val 
115              elif attr.startswith('xmlns:'): 
116                  self.nsd[attr[6:]] = val 
117              self.attrs[attr]=attrs[attr] 
118          if tag: 
119              if node_built: 
120                  pfx, self.name = (['']+tag.split(':'))[-2:] 
121                  self.namespace = self.lookup_nsp(pfx) 
122              else: 
123                  if ' ' in tag: 
124                      self.namespace, self.name = tag.split() 
125                  else: 
126                      self.name = tag 
127          if isinstance(payload, basestring): payload=[payload] 
128          for i in payload: 
129              if isinstance(i, Node): 
130                  self.addChild(node=i) 
131              else: 
132                  self.data.append(ustr(i)) 
 133   
135          ns = self.nsd.get(pfx, None) 
136          if ns is None: 
137              ns = self.nsp_cache.get(pfx, None) 
138          if ns is None: 
139              if self.parent: 
140                  ns = self.parent.lookup_nsp(pfx) 
141                  self.nsp_cache[pfx] = ns 
142              else: 
143                  return 'http://www.gajim.org/xmlns/undeclared' 
144          return ns 
 145   
147          """ 
148          Method used to dump node into textual representation. If "fancy" argument 
149          is set to True produces indented output for readability 
150          """ 
151          s = (fancy-1) * 2 * ' ' + "<" + self.name 
152          if self.namespace: 
153              if not self.parent or self.parent.namespace!=self.namespace: 
154                  if 'xmlns' not in self.attrs: 
155                      s = s + ' xmlns="%s"'%self.namespace 
156          for key in self.attrs.keys(): 
157              val = ustr(self.attrs[key]) 
158              s = s + ' %s="%s"' % ( key, XMLescape(val) ) 
159          s = s + ">" 
160          cnt = 0 
161          if self.kids: 
162              if fancy: s = s + "\n" 
163              for a in self.kids: 
164                  if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt]) 
165                  elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip()) 
166                  if isinstance(a, str) or isinstance(a, unicode): 
167                      s = s + a.__str__() 
168                  else: 
169                      s = s + a.__str__(fancy and fancy+1) 
170                  cnt=cnt+1 
171          if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) 
172          elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip()) 
173          if not self.kids and s.endswith('>'): 
174              s=s[:-1]+' />' 
175              if fancy: s = s + "\n" 
176          else: 
177              if fancy and not self.data: s = s + (fancy-1) * 2 * ' ' 
178              s = s + "</" + self.name + ">" 
179              if fancy: s = s + "\n" 
180          return s 
 181   
182 -    def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None): 
 183          """ 
184          If "node" argument is provided, adds it as child node. Else creates new 
185          node from the other arguments' values and adds it as well 
186          """ 
187          if 'xmlns' in attrs: 
188              raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}") 
189          if node: 
190              newnode=node 
191              node.parent = self 
192          else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload) 
193          if namespace: 
194              newnode.setNamespace(namespace) 
195          self.kids.append(newnode) 
196          return newnode 
 197   
199          """ 
200          Add some CDATA to node 
201          """ 
202          self.data.append(ustr(data)) 
 203   
205          """ 
206          Remove all CDATA from the node 
207          """ 
208          self.data = [] 
 209   
211          """ 
212          Delete an attribute "key" 
213          """ 
214          del self.attrs[key] 
 215   
217          """ 
218          Delete the "node" from the node's childs list, if "node" is an instance. 
219          Else delete the first node that have specified name and (optionally) 
220          attributes 
221          """ 
222          if not isinstance(node, Node): 
223              node = self.getTag(node, attrs) 
224          self.kids.remove(node) 
225          return node 
 226   
228          """ 
229          Return all node's attributes as dictionary 
230          """ 
231          return self.attrs 
 232   
234          """ 
235          Return value of specified attribute 
236          """ 
237          return self.attrs.get(key) 
 238   
240          """ 
241          Return all node's child nodes as list 
242          """ 
243          return self.kids 
 244   
246          """ 
247          Return all node CDATA as string (concatenated) 
248          """ 
249          return ''.join(self.data) 
 250   
252          """ 
253          Return the name of node 
254          """ 
255          return self.name 
 256   
258          """ 
259          Return the namespace of node 
260          """ 
261          return self.namespace 
 262   
264          """ 
265          Returns the parent of node (if present) 
266          """ 
267          return self.parent 
 268   
270          """ 
271          Return the payload of node i.e. list of child nodes and CDATA entries. 
272          F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned 
273          list: ['text1', <nodea instance>, <nodeb instance>, ' text2'] 
274          """ 
275          ret = [] 
276          for i in range(len(self.kids)+len(self.data)+1): 
277              try: 
278                  if self.data[i]: 
279                      ret.append(self.data[i]) 
280              except IndexError: 
281                  pass 
282              try: 
283                  ret.append(self.kids[i]) 
284              except IndexError: 
285                  pass 
286          return ret 
 287   
288 -    def getTag(self, name, attrs={}, namespace=None): 
 289          """ 
290          Filter all child nodes using specified arguments as filter. Return the 
291          first found or None if not found 
292          """ 
293          return self.getTags(name, attrs, namespace, one=1) 
 294   
296          """ 
297          Return attribute value of the child with specified name (or None if no 
298          such attribute) 
299          """ 
300          try: 
301              return self.getTag(tag).attrs[attr] 
302          except: 
303              return None 
 304   
306          """ 
307          Return cocatenated CDATA of the child with specified name 
308          """ 
309          try: 
310              return self.getTag(tag).getData() 
311          except Exception: 
312              return None 
 313   
333   
348   
350          """ 
351          Set attribute "key" with the value "val" 
352          """ 
353          self.attrs[key] = val 
 354   
356          """ 
357          Set node's CDATA to provided string. Resets all previous CDATA! 
358          """ 
359          self.data = [ustr(data)] 
 360   
362          """ 
363          Change the node name 
364          """ 
365          self.name = val 
 366   
368          """ 
369          Changes the node namespace 
370          """ 
371          self.namespace = namespace 
 372   
374          """ 
375          Set node's parent to "node". WARNING: do not checks if the parent already 
376          present and not removes the node from the list of childs of previous 
377          parent 
378          """ 
379          self.parent = node 
 380   
382          """ 
383          Set node payload according to the list specified. WARNING: completely 
384          replaces all node's previous content. If you wish just to add child or 
385          CDATA - use addData or addChild methods 
386          """ 
387          if isinstance(payload, basestring): 
388              payload = [payload] 
389          if add: 
390              self.kids += payload 
391          else: 
392              self.kids = payload 
 393   
394 -    def setTag(self, name, attrs={}, namespace=None): 
 395          """ 
396          Same as getTag but if the node with specified namespace/attributes not 
397          found, creates such node and returns it 
398          """ 
399          node = self.getTags(name, attrs, namespace=namespace, one=1) 
400          if node: 
401              return node 
402          else: 
403              return self.addChild(name, attrs, namespace=namespace) 
 404   
406          """ 
407          Create new node (if not already present) with name "tag" and set it's 
408          attribute "attr" to value "val" 
409          """ 
410          try: 
411              self.getTag(tag).attrs[attr] = val 
412          except Exception: 
413              self.addChild(tag, attrs={attr: val}) 
 414   
416          """ 
417          Creates new node (if not already present) with name "tag" and 
418          (optionally) attributes "attrs" and sets it's CDATA to string "val" 
419          """ 
420          try: 
421              self.getTag(tag, attrs).setData(ustr(val)) 
422          except Exception: 
423              self.addChild(tag, attrs, payload = [ustr(val)]) 
 424   
426          """ 
427          Check if node have attribute "key" 
428          """ 
429          return key in self.attrs 
 430   
432          """ 
433          Return node's attribute "item" value 
434          """ 
435          return self.getAttr(item) 
 436   
438          """ 
439          Set node's attribute "item" value 
440          """ 
441          return self.setAttr(item, val) 
 442   
444          """ 
445          Delete node's attribute "item" 
446          """ 
447          return self.delAttr(item) 
 448   
450          """ 
451          Check if node has attribute "item" 
452          """ 
453          return self.has_attr(item) 
 454   
456          """ 
457          Reduce memory usage caused by T/NT classes - use memory only when needed 
458          """ 
459          if attr == 'T': 
460              self.T = T(self) 
461              return self.T 
462          if attr == 'NT': 
463              self.NT = NT(self) 
464              return self.NT 
465          raise AttributeError 
  466   
468      """ 
469      Auxiliary class used to quick access to node's child nodes 
470      """ 
471   
473          self.__dict__['node'] = node 
 474   
476          return self.node.setTag(attr) 
 477   
483   
 486   
488      """ 
489      Auxiliary class used to quick create node's child nodes 
490      """ 
491   
494   
496          if isinstance(val, Node): 
497              self.node.addChild(attr, node=val) 
498          else: 
499              return self.node.addChild(attr, payload=[val]) 
  500   
502      """ 
503      Builds a Node class minidom from data parsed to it. This class used for two 
504      purposes: 
505   
506        1. Creation an XML Node from a textual representation. F.e. reading a 
507           config file. See an XML2Node method. 
508        2. Handling an incoming XML stream. This is done by mangling the 
509           __dispatch_depth parameter and redefining the dispatch method. 
510   
511      You do not need to use this class directly if you do not designing your own 
512      XML handler 
513      """ 
514   
515 -    def __init__(self, data=None, initial_node=None): 
 516          """ 
517          Take two optional parameters: "data" and "initial_node" 
518   
519          By default class initialised with empty Node class instance. Though, if 
520          "initial_node" is provided it used as "starting point". You can think 
521          about it as of "node upgrade". "data" (if provided) feeded to parser 
522          immidiatedly after instance init. 
523          """ 
524          log.debug("Preparing to handle incoming XML stream.") 
525          self._parser = xml.parsers.expat.ParserCreate() 
526          self._parser.StartElementHandler = self.starttag 
527          self._parser.EndElementHandler = self.endtag 
528          self._parser.StartNamespaceDeclHandler = self.handle_namespace_start 
529          self._parser.CharacterDataHandler = self.handle_cdata 
530          self._parser.buffer_text = True 
531          self.Parse = self._parser.Parse 
532   
533          self.__depth = 0 
534          self.__last_depth = 0 
535          self.__max_depth = 0 
536          self._dispatch_depth = 1 
537          self._document_attrs = None 
538          self._document_nsp = None 
539          self._mini_dom=initial_node 
540          self.last_is_data = 1 
541          self._ptr=None 
542          self.data_buffer = None 
543          self.streamError = '' 
544          if data: 
545              self._parser.Parse(data, 1) 
 546   
548          if self.data_buffer: 
549              self._ptr.data.append(''.join(self.data_buffer)) 
550              del self.data_buffer[:] 
551              self.data_buffer = None 
 552   
554          """ 
555          Method used to allow class instance to be garbage-collected 
556          """ 
557          self.check_data_buffer() 
558          self._parser.StartElementHandler = None 
559          self._parser.EndElementHandler = None 
560          self._parser.CharacterDataHandler = None 
561          self._parser.StartNamespaceDeclHandler = None 
 562   
564          """ 
565          XML Parser callback. Used internally 
566          """ 
567          self.check_data_buffer() 
568          self._inc_depth() 
569          log.info("STARTTAG.. DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`)) 
570          if self.__depth == self._dispatch_depth: 
571              if not self._mini_dom : 
572                  self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) 
573              else: 
574                  Node.__init__(self._mini_dom, tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) 
575              self._ptr = self._mini_dom 
576          elif self.__depth > self._dispatch_depth: 
577              self._ptr.kids.append(Node(tag=tag, parent=self._ptr, attrs=attrs, node_built=True)) 
578              self._ptr = self._ptr.kids[-1] 
579          if self.__depth == 1: 
580              self._document_attrs = {} 
581              self._document_nsp = {} 
582              nsp, name = (['']+tag.split(':'))[-2:] 
583              for attr, val in attrs.items(): 
584                  if attr == 'xmlns': 
585                      self._document_nsp[u''] = val 
586                  elif attr.startswith('xmlns:'): 
587                      self._document_nsp[attr[6:]] = val 
588                  else: 
589                      self._document_attrs[attr] = val 
590              ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root') 
591              try: 
592                  self.stream_header_received(ns, name, attrs) 
593              except ValueError, e: 
594                  self._document_attrs = None 
595                  raise ValueError(str(e)) 
596          if not self.last_is_data and self._ptr.parent: 
597              self._ptr.parent.data.append('') 
598          self.last_is_data = 0 
 599   
601          """ 
602          XML Parser callback. Used internally 
603          """ 
604          log.info("DEPTH -> %i , tag -> %s" % (self.__depth, tag)) 
605          self.check_data_buffer() 
606          if self.__depth == self._dispatch_depth: 
607              if self._mini_dom.getName() == 'error': 
608                  children = self._mini_dom.getChildren() 
609                  if children: 
610                      self.streamError = children[0].getName() 
611                  else: 
612                      self.streamError = self._mini_dom.getData() 
613              self.dispatch(self._mini_dom) 
614          elif self.__depth > self._dispatch_depth: 
615              self._ptr = self._ptr.parent 
616          else: 
617              log.info("Got higher than dispatch level. Stream terminated?") 
618          self._dec_depth() 
619          self.last_is_data = 0 
620          if self.__depth == 0: self.stream_footer_received() 
 621   
623          if self.last_is_data: 
624              if self.data_buffer: 
625                  self.data_buffer.append(data) 
626          elif self._ptr: 
627              self.data_buffer = [data] 
628              self.last_is_data = 1 
 629   
631          """ 
632          XML Parser callback. Used internally 
633          """ 
634          self.check_data_buffer() 
 635   
637          """ 
638          Return just built Node 
639          """ 
640          self.check_data_buffer() 
641          return self._mini_dom 
 642   
644          """ 
645          Get called when the NodeBuilder reaches some level of depth on it's way 
646          up with the built node as argument. Can be redefined to convert incoming 
647          XML stanzas to program events 
648          """ 
649          pass 
 650   
652          """ 
653          Method called when stream just opened 
654          """ 
655          self.check_data_buffer() 
 656   
662   
664          """ 
665          Return True if at least one end tag was seen (at level) 
666          """ 
667          return self.__depth <= level and self.__max_depth > level 
 668   
670          self.__last_depth = self.__depth 
671          self.__depth += 1 
672          self.__max_depth = max(self.__depth, self.__max_depth) 
 673   
675          self.__last_depth = self.__depth 
676          self.__depth -= 1 
  677   
679      """ 
680      Convert supplied textual string into XML node. Handy f.e. for reading 
681      configuration file. Raises xml.parser.expat.parsererror if provided string 
682      is not well-formed XML 
683      """ 
684      return NodeBuilder(xml).getDom() 
 685   
687      """ 
688      Convert supplied textual string into XML node. Survives if xml data is 
689      cutted half way round. I.e. "<html>some text <br>some more text". Will raise 
690      xml.parser.expat.parsererror on misplaced tags though. F.e. "<b>some text 
691      <br>some more text</b>" will not work 
692      """ 
693      return NodeBuilder(xml).getDom() 
 694