Commit b1e08555 authored by Koichi Yasuoka's avatar Koichi Yasuoka
Browse files

simplification module changed

parent 54130721
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -10,3 +10,4 @@ seed_KTp2/test.*.result
seed_KTp2/corpus.non-KT.mc
seed_KTp2/corpus.mixed.mc
seed_KTp2/corpus.all.mc
__pycache__
+2 −2
Original line number Diff line number Diff line
@@ -113,7 +113,7 @@ BEGIN{
length($1)==3{
  if(w[$1]!=1)
    printf("%s\n",$0);
}' | tr ' ' , | simplify.py | mc2ud.nawk | sort -u > dict.KTp2.csv
}' | tr ' ' , | simplify.sh | mc2ud.nawk | sort -u > dict.KTp2.csv
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/name.Noun.personal.csv ../seed_names2/name.Noun.surname.csv ../seed_names2/KTp2.Noun.place.?.csv | sort -u | awk -F, '
BEGIN{
  c=sprintf("cat dict.gloss.csv dict.yasuoka.csv | tr %c\\011%c ,",39,39);
@@ -145,7 +145,7 @@ BEGIN{
  }
  if(s!="*"&&$11!="*"&&index($1,m)<1&&index($11,m)<1&&w[$1,$5,$6,$7,$8]!=1)
    printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,s);
}' | simplify.py | mc2ud.nawk | sort -u > dict.name.csv
}' | simplify.sh | mc2ud.nawk | sort -u > dict.name.csv
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/Symbol.csv | sort -u | mc2ud.nawk > dict.symbol.csv
if [ ! -s dict.yasuoka.csv ]
then /bin/rm -f dict.yasuoka.csv
+1 −21
Original line number Diff line number Diff line
#! /usr/bin/python3
#! /usr/bin/python3 -i
# coding=utf-8

simplify={
  "𡑍":"𫭼",
  "":"",
@@ -3194,22 +3193,3 @@ simplify={
  "":"",
  "":"",
}

while True:
  try:
    s=input()
  except:
    quit()
  t=s.split(",")
  if t[10].find("/")<0:
    i=t[0] if t[10]=="*" or t[10]=="" else t[10]
    j=""
    for k in i:
      if k in simplify:
        j+=simplify[k]
      else:
        j+=k
    if i!=j:
      t[10]=i+"/"+j
  print(",".join(t))
+22 −0
Original line number Diff line number Diff line
#! /bin/sh
cat $* | python3 -c '
from simplify import simplify
while True:
  try:
    s=input()
  except:
    quit()
  t=s.split(",")
  if t[10].find("/")<0:
    i=t[0] if t[10]=="*" or t[10]=="" else t[10]
    j=""
    for k in i:
      if k in simplify:
        j+=simplify[k]
      else:
        j+=k
    if i!=j:
      t[10]=i+"/"+j
  print(",".join(t))
'
exit 0