""" atlas.py Copyright (C) 1998 Aloril This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. """ import string,re from xmllib import XMLParser from event import * import panlingua xyz_pat=re.compile(r"\(\s*(\S+)\s*,\s*(\S+)\s*,\s*(\S+)\s*\)") name_id_pat=re.compile(r"(.*)(\d+)") class node: def __init__(self,tag,attributes): self.tag=tag self.attributes=attributes self.data="" self.child=[] def __str__(self): s="<"+self.tag for a in self.attributes.keys(): s=s+" "+a+'="'+self.attributes[a]+'"' s=s+">"+self.data for c in self.child: s=s+str(c) return s+"\n" class parser(XMLParser): def parse_msg(self, msg): self.reset() self.depth=0 self.s=[] self.tree=None self.msg=msg self.feed(msg) return self.parse_tree(self.tree) def unknown_starttag(self, tag, attributes): n=node(tag,attributes) self.s.append(n) if not self.depth: self.tree=n else: self.s[-2].child.append(n) self.depth=self.depth+1 def unknown_endtag(self, tag): self.depth=self.depth-1 del self.s[-1] def handle_data(self, data): data=string.strip(data) if data: self.s[-1].data=data #parse tree def parse_child_tag(self, pt): #generic tag child tag parser (what and loc use this) if not pt.child: return None c=pt.child[0] m=getattr(self,"parse_"+c.tag) return m(c) def parse_attribute_tag(self, c, d): if c.child and c.child[0].tag=="id": d[c.tag]=self.parse_id(c.child[0]) elif c.child and c.child[0].tag=="xyz": d[c.tag]=self.parse_xyz(c.child[0]) else: mstr="parse_"+c.tag if hasattr(self,mstr): m=getattr(self,mstr) d[c.tag]=m(c) else: if c.data: try: d[c.tag]=float(c.data) except ValueError: d[c.tag]=c.data def parse_id(self,pt): return "_"+pt.data+"_"+pt.attributes.get("href","#")[1:] def parse_child_id(self, pt): if not pt.child: return None c=pt.child[0] if c.tag!="id": return None return self.parse_id(c) def parse_desc(self,pt): return pt.data def parse_what_desc(self,pt): return pt.data def parse_xyz(self, pt): r=xyz_pat.match(pt.data) if not r: return None return tuple(map(float,r.groups())) def parse_say(self, pt): string=pt.data d={} for c in pt.child: self.parse_attribute_tag(c,d) return apply(esay,(string,),d) def parse_interlinguish(self,pt): if not pt.child: return [] idd={} res=[] for c in pt.child: r=name_id_pat.match(c.attributes['name']) word=r.group(1) id=int(r.group(2)) synlink_type=lexlink_type=synlink_dir="" synlink=lexlink=None for c2 in c.child: if c2.tag=="lexlink": lexlink_type=c2.data lexlink=c2.attributes['href'] elif c2.tag=="synlink": synlink_type=c2.data if c2.attributes: synlink_dir=c2.attributes.keys()[0] synlink=idd[c2.attributes[synlink_dir][1:]] a=panlingua.atom(word,id,(lexlink,lexlink_type), (synlink,synlink_type),synlink_dir) res.append(a) idd[word+`id`]=a return res def parse_thing(self, pt): if not pt.child: return pt.data id=self.parse_child_id(pt) if id: start=1 else: start=0 id=pt.data d={} for c in pt.child[start:]: self.parse_attribute_tag(c,d) if not len(d): return id return apply(ething,(id,),d) def parse_source(self, pt): return self.parse_child_id(pt) def parse_target(self, pt): return self.parse_child_id(pt) def parse_time(self, pt): return eval(pt.data) def parse_what(self, pt): return self.parse_child_tag(pt) def parse_loc(self, pt): return self.parse_child_tag(pt) def parse_amount(self, pt): try: return float(pt.data) except ValueError: return pt.data def parse_event(self, et): if et.tag!="event" or not et.attributes.has_key("command"): return None e=event(et.attributes["command"]) for c in et.child: m=getattr(self,"parse_"+c.tag) res=m(c) setattr(e,c.tag,res) if type(e.what)==StringType and len(e.what)==0: print `e`,self.msg break_this return e def parse_tree(self, tree): e_list=[] time=tree.attributes.get("time") if time: time=eval(time) if tree.tag!="msg": return (time,e_list) for e in tree.child: e_res=self.parse_event(e) if e_res: e_list.append(e_res) return (time,e_list) xml_event_parser=parser() def parse_events(xml_str, p=xml_event_parser): #print "???",xml_str,":::" res=p.parse_msg(xml_str) #print res,"!!!" return res if __name__=="__main__": p=parser() def parse_file(name,p=p): fp=open(name) data=fp.read() fp.close() print `p.parse_msg(data)` d="../atlas/" parse_file(d+"msg.xml") parse_file(d+"msg2.xml") parse_file(d+"msg3.xml")