Commit 91988646 authored by 市村 導人's avatar 市村 導人
Browse files

makecorpus.sh

parent 6f8c5bfa
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@
  do B=`basename $F .utf-8`
     G=$D/corpus/$B.mc.utf-8
     if [ ! -s $G ]
     then sed -e 's/[/\[/g' -e 's/]/\]/g' -e 's/\[[^]]*\]//g' -e 's/(/(/g' -e 's/)/)/g' -e 's/([^)]*)//g' `ls -1t incoming/*/[th]*/$B.utf-8 | head -1` | tr '()\133\135' '\012\012\012\012' > $G
     then sed -e 's/[/\[/g' -e 's/]/\]/g' -e 's/\[[^]]*\]//g' -e 's/(/(/g' -e 's/)/)/g' -e 's/([^)]*)//g' `ls -1t incoming/*/[th]*/$B.utf-8 | head -1` | tr '() \133\135' '\012\012\012\012\012' > $G
     elif [ $D/hakubun/$B.utf-8 -nt $G ]
     then ( sed -e 's/[/\[/g' -e 's/]/\]/g' -e 's/\[[^]]*\]//g' -e 's/(/(/g' -e 's/)/)/g' -e 's/([^)]*)//g' $D/hakubun/$B.utf-8 | tr '()\133\135' '\012\012\012\012' ; echo '' ) | tr -s '\012' '\012' > $G.a~
          sed -n -e '/	/{' -e :loop -e N -e 's/	.*\n//' -e 's/EOS//' -e '/	/bloop' -e p -e '}' $G | tr -s '\012' '\012' > $G.b~