Newer
Older
def __init__(self,result):
if "\n" in result:
for r in result.split("\n"):
w=UDPipeEntry(r)
if w.id>0:
t.append(w)
for i,w in enumerate(t):
w.head=w if w._head==0 else t[i+w._head-w.id]
w._parent=self
self._tokens=t
else:
w=result.split("\t")
try:
w[0],w[6]=int(w[0]),int(w[6])
except:
w=[0]*10
self.id,self.form,self.lemma,self.upos,self.xpos,self.feats,self._head,self.deprel,self.deps,self.misc=w if len(w)==10 else [0]*10
def __setattr__(self,name,value):
v=value
if hasattr(self,name):
if name=="head":
t=self._parent._tokens
i=t.index(self)
v=self if v==0 else t[i+v-self.id]
if getattr(self,name)!=v:
if name=="id":
t=self._parent._tokens
i=t.index(self)
j=i+v-self.id
super(UDPipeEntry,t[j]).__setattr__("id",t[i].id)
t[i],t[j]=t[j],t[i]
super(UDPipeEntry,self).__setattr__(name,v)
r="".join(str(t)+"\n" for t in self._tokens[1:]).replace("\n1\t","\n\n1\t")
else:
r="\t".join([str(self.id),self.form,self.lemma,self.upos,self.xpos,self.feats,str(0 if self.head is self else self.head.id),self.deprel,self.deps,self.misc])
return r if type(r) is str else r.encode("utf-8")
self.editor(url="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/viewer.svg")
def editor(self,url="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/editor.html"):
import webbrowser
try:
import urllib.parse
u=urllib.parse.quote(str(self))
except:
import urllib
u=urllib.quote(str(self))
class UDPipe2UD(object):
def __init__(self,lang="ja",option="tokenizer=presegmented&tagger&parser"):
self.parseURL="http://lindat.mff.cuni.cz/services/udpipe/api/process?model="+lang+"&"+option
def __call__(self,sentence):
import json
s=sentence if type(sentence) is str else sentence.encode("utf-8")
try:
import urllib.request,urllib.parse
with urllib.request.urlopen(self.parseURL+"&data="+urllib.parse.quote(s)) as r:
q=r.read()
except:
import urllib,urllib2
r=urllib2.urlopen(self.parseURL+"&data="+urllib.quote(s))
q=r.read().decode("utf-8")
return UDPipeEntry(json.loads(q)["result"])