Newer
Older
( cd ../../ud-kanbun/Pulleyblank
git pull
)
cp /dev/null dict.gloss.csv
cp /dev/null dict.yasuoka.csv
nawk '
NF==4{
if($4~/[*?]/)
printf("%s,0,0,0,%s,*,*,%s,*,*,*\n",$1,$3,$2)>"dict.yasuoka.csv";
else
printf("%s,0,0,0,%s,*,*,%s,*,*,%s\n",$1,$3,$2,$4)>"dict.gloss.csv";
}' gloss.orig.txt
for F in corpus.pulleyblank.mc corpus.mencius.mc
do ( case $F in
corpus.pulleyblank.mc) cd ../../ud-kanbun/Pulleyblank
cat Pulleyblank*.txt ;;
corpus.mencius.mc) cd ../../ud-kanbun/kanripo/kR1h0001
/^[^#]/{
if($0==""){
if(n>0)
printf("EOS\n");
n=0;
}
else{
if(n>=$1)
printf("EOS\n");
n=$1;
if(match($10,/Gloss=[^|]+/)>0)
g=substr($10,RSTART+6,RLENGTH-6);
else
g="*";
printf("%s\t%s,*,*,%s,*,*,%s\n",$2,$5,$3,g);
}
}
END{
if(n>0)
printf("EOS\n");
for F in corpus.KT0.mc corpus.kanjikai.mc corpus.misc.mc
do nawk '
{
if($1=="EOS")
printf("EOS\n");
else{
split($2,a,",");
printf("%s\t%s,%s,%s,%s,*,*,%s,*,*,*\n",$1,a[1],a[2],a[3],a[4],a[7]);
}
}' ../seed_names2/$F > $F
done
( sed -e /EOS/d -e 's/ /,0,0,0,/' corpus.pulleyblank.mc corpus.mencius.mc corpus.KT0.mc corpus.kanjikai.mc corpus.misc.mc
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/KTp2.*.csv ../seed_names2/name.Noun.girei.csv
) | sort -u | tr , ' ' | nawk '
c=sprintf("cat dict.gloss.csv dict.yasuoka.csv | tr , %c %c",39,39);
while((c|getline)>0)
w[$1]=1;
close(c);
}
length($1)==3{
if(w[$1]!=1)
printf("%s\n",$0);
}' | tr ' ' , > dict.KTp2.csv
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/name.Noun.personal.csv ../seed_names2/name.Noun.surname.csv ../seed_names2/KTp2.Noun.place.?.csv | sort -u | awk -F, '
{
s=$14;
if($5=="n"&&$6=="名詞"){
if($7=="人"&&$8=="姓氏")
s="[surname]";
else if($7=="人"&&$8=="名")
s="[given-name]";
else if($7=="主体"&&$8=="書物")
s="[book-name]";
else if($7=="主体"&&$8=="国名")
s="[country-name]";
else if($7=="固定物"&&$8=="地名")
s="[place-name]";
}
printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,s);
}' > dict.name.csv
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/Symbol.csv | sort -u > dict.symbol.csv
cp -p ../seed_names2/*.def .
cp -p ../seed_names2/dicrc .
if [ ! -s dict.yasuoka.csv ]
then /bin/rm -f dict.yasuoka.csv
fi