Commit 9011efee authored by MORIOKA Tomohiko's avatar MORIOKA Tomohiko
Browse files

Use corpus.*.mc instead of corpus.*.txt.

parent ecdde58b
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -2,11 +2,10 @@

export LANG=ja_JP.UTF-8

cat corpus.*.txt > corpus
cat corpus.*.mc > corpus
#cat corpus.kanjikai.txt > corpus
#cat corpus.kanjikai.txt corpus.ryomou.txt > corpus
#cat corpus.misc.txt > corpus

#cat corpus \
cat corpus.*.txt \
cat corpus \
| grep -v EOS | sed 's/	/,0,0,0,/' | sort | uniq > misc.corpus.csv