Skip to content
UDPipe2UD.py 2.82 KiB
Newer Older
#! /usr/bin/python -i
Koichi Yasuoka's avatar
Koichi Yasuoka committed
# "UDPipe2UD.py" by Koichi Yasuoka, July 3, 2019.
Koichi Yasuoka's avatar
Koichi Yasuoka committed
class UDPipeEntry(object):
  def __init__(self,result):
    if "\n" in result:
Koichi Yasuoka's avatar
Koichi Yasuoka committed
      t=[UDPipeEntry("0\t_\t_\t_\t_\t_\t0\t_\t_\t_")]
      for r in result.split("\n"):
        w=UDPipeEntry(r)
        if w.id>0:
          t.append(w)
      for i,w in enumerate(t):
Koichi Yasuoka's avatar
Koichi Yasuoka committed
        w.head=w if w._head==0 else t[i+w._head-w.id]
        w._parent=self
      self._tokens=t
Koichi Yasuoka's avatar
Koichi Yasuoka committed
      self._result=result
    else:
      w=result.split("\t")
      try:
        w[0],w[6]=int(w[0]),int(w[6])
      except:
        w=[0]*10
Koichi Yasuoka's avatar
Koichi Yasuoka committed
      self.id,self.form,self.lemma,self.upos,self.xpos,self.feats,self._head,self.deprel,self.deps,self.misc=w if len(w)==10 else [0]*10
Koichi Yasuoka's avatar
Koichi Yasuoka committed
      self._result=""
Koichi Yasuoka's avatar
Koichi Yasuoka committed
  def __setattr__(self,name,value):
    v=value
    if hasattr(self,name):
      if name=="head":
        t=self._parent._tokens
        i=t.index(self)
        v=self if v==0 else t[i+v-self.id]
      if getattr(self,name)!=v:
Koichi Yasuoka's avatar
Koichi Yasuoka committed
        super(UDPipeEntry,self._parent).__setattr__("_result","")
Koichi Yasuoka's avatar
Koichi Yasuoka committed
        if name=="id":
          t=self._parent._tokens
          i=t.index(self)
          j=i+v-self.id
          super(UDPipeEntry,t[j]).__setattr__("id",t[i].id)
          t[i],t[j]=t[j],t[i]
    super(UDPipeEntry,self).__setattr__(name,v)
  def __repr__(self):
Koichi Yasuoka's avatar
Koichi Yasuoka committed
    if self._result!="":
Koichi Yasuoka's avatar
Koichi Yasuoka committed
      r=self._result
    elif hasattr(self,"_tokens"):
Koichi Yasuoka's avatar
Koichi Yasuoka committed
      r="".join(str(t)+"\n" for t in self._tokens[1:]).replace("\n1\t","\n\n1\t")
Koichi Yasuoka's avatar
Koichi Yasuoka committed
    else:
      r="\t".join([str(self.id),self.form,self.lemma,self.upos,self.xpos,self.feats,str(0 if self.head is self else self.head.id),self.deprel,self.deps,self.misc])
    return r if type(r) is str else r.encode("utf-8")
  def __getitem__(self,item):
Koichi Yasuoka's avatar
Koichi Yasuoka committed
    return self._tokens[item]
  def __len__(self):
Koichi Yasuoka's avatar
Koichi Yasuoka committed
    return len(self._tokens)
  def browse(self):
Koichi Yasuoka's avatar
Koichi Yasuoka committed
    self.editor(url="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/viewer.svg")
  def editor(self,url="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/editor.html"):
    import webbrowser
    try:
      import urllib.parse
      u=urllib.parse.quote(str(self))
    except:
      import urllib
      u=urllib.quote(str(self))
Koichi Yasuoka's avatar
Koichi Yasuoka committed
    webbrowser.open(url+"#"+u)
Koichi Yasuoka's avatar
Koichi Yasuoka committed
class UDPipe2UD(object):
  def __init__(self,lang="ja",option="tokenizer=presegmented&tagger&parser"):
    self.parseURL="http://lindat.mff.cuni.cz/services/udpipe/api/process?model="+lang+"&"+option
  def __call__(self,sentence):
    import json
Koichi Yasuoka's avatar
Koichi Yasuoka committed
    s=sentence if type(sentence) is str else sentence.encode("utf-8")
    try:
      import urllib.request,urllib.parse
Koichi Yasuoka's avatar
Koichi Yasuoka committed
      with urllib.request.urlopen(self.parseURL+"&data="+urllib.parse.quote(s)) as r:
        q=r.read()
    except:
      import urllib,urllib2
Koichi Yasuoka's avatar
Koichi Yasuoka committed
      r=urllib2.urlopen(self.parseURL+"&data="+urllib.quote(s))
      q=r.read().decode("utf-8")
    return UDPipeEntry(json.loads(q)["result"])