Skip to content
Snippets Groups Projects
Commit dfd3b65e authored by MORIOKA Tomohiko's avatar MORIOKA Tomohiko
Browse files

- Generate corpus.non-KT.mc, corpus.mixed.mc and corpus.all.mc.

(PATH): Use /usr/local/Cellar/mecab/0.996/libexec/mecab instead of
/usr/local/Cellar/mecab/0.994/libexec/mecab.
(CORPUS_LIST): Add "KT0", "KT2", "non-KT", "mixed" and "all".
parent 85b8fbf9
No related branches found
No related tags found
No related merge requests found
#!/bin/sh
PATH="/usr/local/bin:/usr/local/libexec/mecab:/bin:/usr/lib/mecab:/usr/local/Cellar/mecab/0.994/libexec/mecab"
PATH="/usr/local/bin:/usr/local/libexec/mecab:/bin:/usr/lib/mecab:/usr/local/Cellar/mecab/0.996/libexec/mecab"
export PATH
export LANG=ja_JP.UTF-8
cat corpus.misc.mc corpus.kanjikai.mc corpus.ryomou.mc \
> corpus.non-KT.mc
cat corpus.KT2.mc corpus.misc.mc corpus.kanjikai.mc corpus.ryomou.mc \
> corpus.mixed.mc
cat corpus.KT0.mc corpus.misc.mc corpus.kanjikai.mc corpus.ryomou.mc \
> corpus.all.mc
#CORPUS_LIST="misc kanjikai ryomou jts-wa jts-JP"
CORPUS_LIST="misc kanjikai ryomou"
CORPUS_LIST="KT0 KT2 misc kanjikai ryomou non-KT mixed all"
for i in $CORPUS_LIST
do
mecab-test-gen < "corpus.$i.mc" > "test.$i"
done
#cat corpus \
#cat corpus.*.txt \
#cat corpus.*.mc \
#| grep -v EOS | sed 's/ /,0,0,0,/' | sort | uniq > misc.corpus.csv
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment