Commit dfd3b65e authored by MORIOKA Tomohiko's avatar MORIOKA Tomohiko
Browse files

- Generate corpus.non-KT.mc, corpus.mixed.mc and corpus.all.mc.

(PATH): Use /usr/local/Cellar/mecab/0.996/libexec/mecab instead of
/usr/local/Cellar/mecab/0.994/libexec/mecab.
(CORPUS_LIST): Add "KT0", "KT2", "non-KT", "mixed" and "all".
parent 85b8fbf9
Loading
Loading
Loading
Loading
+12 −4
Original line number Diff line number Diff line
#!/bin/sh

PATH="/usr/local/bin:/usr/local/libexec/mecab:/bin:/usr/lib/mecab:/usr/local/Cellar/mecab/0.994/libexec/mecab"
PATH="/usr/local/bin:/usr/local/libexec/mecab:/bin:/usr/lib/mecab:/usr/local/Cellar/mecab/0.996/libexec/mecab"
export PATH

export LANG=ja_JP.UTF-8

cat corpus.misc.mc corpus.kanjikai.mc corpus.ryomou.mc \
    > corpus.non-KT.mc

cat corpus.KT2.mc corpus.misc.mc corpus.kanjikai.mc corpus.ryomou.mc \
    > corpus.mixed.mc

cat corpus.KT0.mc corpus.misc.mc corpus.kanjikai.mc corpus.ryomou.mc \
    > corpus.all.mc

#CORPUS_LIST="misc kanjikai ryomou jts-wa jts-JP"
CORPUS_LIST="misc kanjikai ryomou"
CORPUS_LIST="KT0 KT2 misc kanjikai ryomou non-KT mixed all"

for i in $CORPUS_LIST
do
    mecab-test-gen < "corpus.$i.mc" > "test.$i"
done

#cat corpus \
#cat corpus.*.txt \
#cat corpus.*.mc \
#| grep -v EOS | sed 's/	/,0,0,0,/' | sort | uniq > misc.corpus.csv