#! /bin/sh LANG=C export LANG ( cd ../../ud-kanbun/Pulleyblank git pull ) cp /dev/null dict.gloss.csv cp /dev/null dict.yasuoka.csv sort +1 gloss.orig.txt -o gloss.orig.txt nawk ' NF==4{ if($4~/[*?]/) printf("%s,0,0,0,%s,*,*,%s,*,*,*\n",$1,$3,$2)>"dict.yasuoka.csv"; else printf("%s,0,0,0,%s,*,*,%s,*,*,%s\n",$1,$3,$2,$4)>"dict.gloss.csv"; }' gloss.orig.txt for F in corpus.pulleyblank.mc corpus.mencius.mc do ( case $F in corpus.pulleyblank.mc) cd ../../ud-kanbun/Pulleyblank cat Pulleyblank*.txt ;; corpus.mencius.mc) cd ../../ud-kanbun/kanripo/kR1h0001 cat */*.txt ;; esac ) | nawk ' /^[^#]/{ if($0==""){ if(n>0) printf("EOS\n"); n=0; } else{ if(n>=$1) printf("EOS\n"); n=$1; if(match($10,/Gloss=[^|]+/)>0) g=substr($10,RSTART+6,RLENGTH-6); else g="*"; printf("%s\t%s,*,*,%s,*,*,%s\n",$2,$5,$3,g); } } END{ if(n>0) printf("EOS\n"); }'> $F done for F in corpus.KT0.mc corpus.kanjikai.mc corpus.misc.mc do nawk ' { if($1=="EOS") printf("EOS\n"); else{ split($2,a,","); printf("%s\t%s,%s,%s,%s,*,*,%s,*,*,*\n",$1,a[1],a[2],a[3],a[4],a[7]); } }' ../seed_names2/$F > $F done ( sed -e /EOS/d -e 's/ /,0,0,0,/' corpus.pulleyblank.mc corpus.mencius.mc corpus.KT0.mc corpus.kanjikai.mc corpus.misc.mc sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/KTp2.*.csv ../seed_names2/name.Noun.girei.csv ) | sort -u | tr , ' ' | nawk ' BEGIN{ c=sprintf("cat dict.gloss.csv dict.yasuoka.csv | tr , %c %c",39,39); while((c|getline)>0) w[$1]=1; close(c); } length($1)==3{ if(w[$1]!=1) printf("%s\n",$0); }' | tr ' ' , > dict.KTp2.csv sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/name.Noun.personal.csv ../seed_names2/name.Noun.surname.csv ../seed_names2/KTp2.Noun.place.?.csv | sort -u | awk -F, ' { s=$14; if($5=="n"&&$6=="名詞"){ if($7=="人"&&$8=="姓氏") s="[surname]"; else if($7=="人"&&$8=="名") s="[given-name]"; else if($7=="主体"&&$8=="書物") s="[book-name]"; else if($7=="主体"&&$8=="国名") s="[country-name]"; else if($7=="固定物"&&$8=="地名") s="[place-name]"; } printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,s); }' > dict.name.csv sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/Symbol.csv | sort -u > dict.symbol.csv cp -p ../seed_names2/*.def . cp -p ../seed_names2/dicrc . if [ ! -s dict.yasuoka.csv ] then /bin/rm -f dict.yasuoka.csv fi exit 0