#! /bin/sh
LANG=C
export LANG
cat ../../ud-kanbun/Pulleyblank/Pulleyblank*.txt | nawk '
/^[^#]/{
  if($0==""){
    if(n>0)
      printf("EOS\n");
    n=0;
  }
  else{
    if(n>=$1)
      printf("EOS\n");
    n=$1;
    printf("%s\t%s,*,*,%s,*,*,*\n",$2,$5,$3);
  }
}
END{
  if(n>0)
    printf("EOS\n");
}' > corpus.pulleyblank.mc

for F in corpus.KT0.mc corpus.kanjikai.mc corpus.misc.mc
do nawk '
{
  if($1=="EOS")
    printf("EOS\n");
  else{
    split($2,a,",");
    printf("%s\t%s,%s,%s,%s,*,*,%s,*,*,*\n",$1,a[1],a[2],a[3],a[4],a[7]);
  }
}' ../seed_names2/$F > $F
done

sed -e /EOS/d -e 's/	/,0,0,0,/' corpus.pulleyblank.mc | sort -u > dict.pulleyblank.csv
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/KTp2.*.csv | sort -u | tr , ' ' | nawk '
BEGIN{
  c=sprintf("tr , %c %c < corpus.pulleyblank.mc",39,39);
  while((c|getline)>0)
    w[$1]=1;
  close(c);
}
length($1)==3{
  if(w[$1]!=1)
    printf("%s\n",$0);
}' | tr ' ' , > dict.KTp2.csv
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/name.*.csv | sort -u > dict.name.csv
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/ymzknk.*.csv | sort -u > dict.ymzknk.csv
sed 's/,[^,]*,[^,]*,[^,]*$/,*,*,*/' ../seed_names2/Symbol.csv | sort -u | tee dict.symbol.csv | awk -F, '{
  printf("1\t%s\t%s\tPUNCT\t%s,%s,%s,%s\t_\t0\troot\t_\tSpaceAfter=No\n\n",$1,$11,$5,$6,$7,$8);
}' > conllu.symbol.txt
cp -p ../seed_names2/*.def .
cp -p ../seed_names2/dicrc .
exit 0
