Loading .gitignore +2 −1 Original line number Diff line number Diff line Loading @@ -10,3 +10,4 @@ seed_KTp2/test.*.result seed_KTp2/corpus.non-KT.mc seed_KTp2/corpus.mixed.mc seed_KTp2/corpus.all.mc __pycache__ seed_pulleyblank/makedict.sh +2 −2 Original line number Diff line number Diff line Loading @@ -113,7 +113,7 @@ BEGIN{ length($1)==3{ if(w[$1]!=1) printf("%s\n",$0); }' | tr ' ' , | simplify.py | mc2ud.nawk | sort -u > dict.KTp2.csv }' | tr ' ' , | simplify.sh | mc2ud.nawk | sort -u > dict.KTp2.csv sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/name.Noun.personal.csv ../seed_names2/name.Noun.surname.csv ../seed_names2/KTp2.Noun.place.?.csv | sort -u | awk -F, ' BEGIN{ c=sprintf("cat dict.gloss.csv dict.yasuoka.csv | tr %c\\011%c ,",39,39); Loading Loading @@ -145,7 +145,7 @@ BEGIN{ } if(s!="*"&&$11!="*"&&index($1,m)<1&&index($11,m)<1&&w[$1,$5,$6,$7,$8]!=1) printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,s); }' | simplify.py | mc2ud.nawk | sort -u > dict.name.csv }' | simplify.sh | mc2ud.nawk | sort -u > dict.name.csv sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/Symbol.csv | sort -u | mc2ud.nawk > dict.symbol.csv if [ ! -s dict.yasuoka.csv ] then /bin/rm -f dict.yasuoka.csv Loading seed_pulleyblank/simplify.py +1 −21 Original line number Diff line number Diff line #! /usr/bin/python3 #! /usr/bin/python3 -i # coding=utf-8 simplify={ "𡑍":"𫭼", "㑳":"㑇", Loading Loading @@ -3194,22 +3193,3 @@ simplify={ "難":"难", "頻":"频", } while True: try: s=input() except: quit() t=s.split(",") if t[10].find("/")<0: i=t[0] if t[10]=="*" or t[10]=="" else t[10] j="" for k in i: if k in simplify: j+=simplify[k] else: j+=k if i!=j: t[10]=i+"/"+j print(",".join(t)) seed_pulleyblank/simplify.sh 0 → 100755 +22 −0 Original line number Diff line number Diff line #! /bin/sh cat $* | python3 -c ' from simplify import simplify while True: try: s=input() except: quit() t=s.split(",") if t[10].find("/")<0: i=t[0] if t[10]=="*" or t[10]=="" else t[10] j="" for k in i: if k in simplify: j+=simplify[k] else: j+=k if i!=j: t[10]=i+"/"+j print(",".join(t)) ' exit 0 Loading
.gitignore +2 −1 Original line number Diff line number Diff line Loading @@ -10,3 +10,4 @@ seed_KTp2/test.*.result seed_KTp2/corpus.non-KT.mc seed_KTp2/corpus.mixed.mc seed_KTp2/corpus.all.mc __pycache__
seed_pulleyblank/makedict.sh +2 −2 Original line number Diff line number Diff line Loading @@ -113,7 +113,7 @@ BEGIN{ length($1)==3{ if(w[$1]!=1) printf("%s\n",$0); }' | tr ' ' , | simplify.py | mc2ud.nawk | sort -u > dict.KTp2.csv }' | tr ' ' , | simplify.sh | mc2ud.nawk | sort -u > dict.KTp2.csv sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/name.Noun.personal.csv ../seed_names2/name.Noun.surname.csv ../seed_names2/KTp2.Noun.place.?.csv | sort -u | awk -F, ' BEGIN{ c=sprintf("cat dict.gloss.csv dict.yasuoka.csv | tr %c\\011%c ,",39,39); Loading Loading @@ -145,7 +145,7 @@ BEGIN{ } if(s!="*"&&$11!="*"&&index($1,m)<1&&index($11,m)<1&&w[$1,$5,$6,$7,$8]!=1) printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,s); }' | simplify.py | mc2ud.nawk | sort -u > dict.name.csv }' | simplify.sh | mc2ud.nawk | sort -u > dict.name.csv sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/Symbol.csv | sort -u | mc2ud.nawk > dict.symbol.csv if [ ! -s dict.yasuoka.csv ] then /bin/rm -f dict.yasuoka.csv Loading
seed_pulleyblank/simplify.py +1 −21 Original line number Diff line number Diff line #! /usr/bin/python3 #! /usr/bin/python3 -i # coding=utf-8 simplify={ "𡑍":"𫭼", "㑳":"㑇", Loading Loading @@ -3194,22 +3193,3 @@ simplify={ "難":"难", "頻":"频", } while True: try: s=input() except: quit() t=s.split(",") if t[10].find("/")<0: i=t[0] if t[10]=="*" or t[10]=="" else t[10] j="" for k in i: if k in simplify: j+=simplify[k] else: j+=k if i!=j: t[10]=i+"/"+j print(",".join(t))
seed_pulleyblank/simplify.sh 0 → 100755 +22 −0 Original line number Diff line number Diff line #! /bin/sh cat $* | python3 -c ' from simplify import simplify while True: try: s=input() except: quit() t=s.split(",") if t[10].find("/")<0: i=t[0] if t[10]=="*" or t[10]=="" else t[10] j="" for k in i: if k in simplify: j+=simplify[k] else: j+=k if i!=j: t[10]=i+"/"+j print(",".join(t)) ' exit 0