Loading conllusvg/UDPipe2UD.py +42 −13 Original line number Diff line number Diff line #! /usr/bin/python -i # "UDPipe2UD.py" by Koichi Yasuoka, July 3, 2019. class UDPipeEntry: class UDPipeEntry(object): def __init__(self,result): self.result=result self._result=result if "\n" in result: t=[] t=[UDPipeEntry("0\t_\t_\t_\t_\t_\t0\t_\t_\t_")] for r in result.split("\n"): w=UDPipeEntry(r) if w.id>0: t.append(w) for i,w in enumerate(t): w.head=w if w.head==0 else t[i+w.head-w.id] self.tokens=t w.head=w if w._head==0 else t[i+w._head-w.id] w._parent=self self._tokens=t else: w=result.split("\t") try: w[0],w[6]=int(w[0]),int(w[6]) except: w=[0]*10 self.id,self.form,self.lemma,self.upos,self.xpos,self.feats,self.head,self.deprel,self.deps,self.misc=w if len(w)==10 else [0]*10 self.id,self.form,self.lemma,self.upos,self.xpos,self.feats,self._head,self.deprel,self.deps,self.misc=w if len(w)==10 else [0]*10 self._useResult=True def __setattr__(self,name,value): v=value if hasattr(self,name): if name=="head": t=self._parent._tokens i=t.index(self) v=self if v==0 else t[i+v-self.id] if getattr(self,name)!=v: if name=="id": t=self._parent._tokens i=t.index(self) j=i+v-self.id super(UDPipeEntry,t[j]).__setattr__("id",t[i].id) super(UDPipeEntry,t[j]).__setattr__("_useResult",False) t[i],t[j]=t[j],t[i] super(UDPipeEntry,self).__setattr__("_useResult",False) super(UDPipeEntry,self).__setattr__(name,v) def __repr__(self): if type(self.result) is str: return self.result return self.result.encode("utf-8") if self._useResult: if hasattr(self,"_tokens"): for t in self._tokens: if not t._useResult: self._useResult=False break if self._useResult: r=self._result elif hasattr(self,"_tokens"): r="".join(str(t)+"\n" for t in self._tokens[1:]) else: r="\t".join([str(self.id),self.form,self.lemma,self.upos,self.xpos,self.feats,str(0 if self.head is self else self.head.id),self.deprel,self.deps,self.misc]) return r if type(r) is str else r.encode("utf-8") def __getitem__(self,item): return self.tokens[item] return self._tokens[item] def __len__(self): return len(self.tokens) return len(self._tokens) def browse(self): self.editor(url="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/viewer.svg") def editor(self,url="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/editor.html"): Loading @@ -40,8 +69,8 @@ class UDPipeEntry: u=urllib.quote(str(self)) webbrowser.open(url+"#"+u) class UDPipe2UD: def __init__(self,lang="ja",option="tokenizer&tagger&parser"): class UDPipe2UD(object): def __init__(self,lang="ja",option="tokenizer=presegmented&tagger&parser"): self.parseURL="http://lindat.mff.cuni.cz/services/udpipe/api/process?model="+lang+"&"+option def __call__(self,sentence): import json Loading Loading
conllusvg/UDPipe2UD.py +42 −13 Original line number Diff line number Diff line #! /usr/bin/python -i # "UDPipe2UD.py" by Koichi Yasuoka, July 3, 2019. class UDPipeEntry: class UDPipeEntry(object): def __init__(self,result): self.result=result self._result=result if "\n" in result: t=[] t=[UDPipeEntry("0\t_\t_\t_\t_\t_\t0\t_\t_\t_")] for r in result.split("\n"): w=UDPipeEntry(r) if w.id>0: t.append(w) for i,w in enumerate(t): w.head=w if w.head==0 else t[i+w.head-w.id] self.tokens=t w.head=w if w._head==0 else t[i+w._head-w.id] w._parent=self self._tokens=t else: w=result.split("\t") try: w[0],w[6]=int(w[0]),int(w[6]) except: w=[0]*10 self.id,self.form,self.lemma,self.upos,self.xpos,self.feats,self.head,self.deprel,self.deps,self.misc=w if len(w)==10 else [0]*10 self.id,self.form,self.lemma,self.upos,self.xpos,self.feats,self._head,self.deprel,self.deps,self.misc=w if len(w)==10 else [0]*10 self._useResult=True def __setattr__(self,name,value): v=value if hasattr(self,name): if name=="head": t=self._parent._tokens i=t.index(self) v=self if v==0 else t[i+v-self.id] if getattr(self,name)!=v: if name=="id": t=self._parent._tokens i=t.index(self) j=i+v-self.id super(UDPipeEntry,t[j]).__setattr__("id",t[i].id) super(UDPipeEntry,t[j]).__setattr__("_useResult",False) t[i],t[j]=t[j],t[i] super(UDPipeEntry,self).__setattr__("_useResult",False) super(UDPipeEntry,self).__setattr__(name,v) def __repr__(self): if type(self.result) is str: return self.result return self.result.encode("utf-8") if self._useResult: if hasattr(self,"_tokens"): for t in self._tokens: if not t._useResult: self._useResult=False break if self._useResult: r=self._result elif hasattr(self,"_tokens"): r="".join(str(t)+"\n" for t in self._tokens[1:]) else: r="\t".join([str(self.id),self.form,self.lemma,self.upos,self.xpos,self.feats,str(0 if self.head is self else self.head.id),self.deprel,self.deps,self.misc]) return r if type(r) is str else r.encode("utf-8") def __getitem__(self,item): return self.tokens[item] return self._tokens[item] def __len__(self): return len(self.tokens) return len(self._tokens) def browse(self): self.editor(url="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/viewer.svg") def editor(self,url="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/editor.html"): Loading @@ -40,8 +69,8 @@ class UDPipeEntry: u=urllib.quote(str(self)) webbrowser.open(url+"#"+u) class UDPipe2UD: def __init__(self,lang="ja",option="tokenizer&tagger&parser"): class UDPipe2UD(object): def __init__(self,lang="ja",option="tokenizer=presegmented&tagger&parser"): self.parseURL="http://lindat.mff.cuni.cz/services/udpipe/api/process?model="+lang+"&"+option def __call__(self,sentence): import json Loading