All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.resource.sk.create_dictionary.sh Maven / Gradle / Ivy

There is a newer version: 6.5
Show newest version
#!/bin/sh

set -e

# to build the dictionary:
# 1) get the file with lemma form tag information (TAB separated) from http://korpus.sk
# 2) place it into /tmp/ma.txt.xz
# 3) run this script

# file with the lemma-form-tag information
MA_FILE=/tmp/ma.txt.xz

cd `dirname $0`

# working directory where this script is run from
WORKDIR=`pwd`

# directory where the working tree is kept
LTDIR=$WORKDIR/../../../../../../../../../languagetool-standalone/target/LanguageTool-2.9-SNAPSHOT/LanguageTool-2.9-SNAPSHOT
LTDIR=`readlink -m $LTDIR`

# directory where languagetool sk resources are kept
SKDIR=$WORKDIR/../../../../../../../../../languagetool-language-modules/sk/src/main/resources/
SKDIR=`readlink -m $SKDIR`

tmp=`tempfile` || exit

trap "rm -f -- '$tmp'" EXIT

xzcat $MA_FILE | bin/filter_lft.py | sort -u --parallel=4 > "$tmp"

head "$tmp"

cd $LTDIR

# Synthesizer dictionary:
# The Java program outputs temporary files in /tmp which sucks
rm -v -f /tmp/SynthDictionaryBuilder*.txt_tags.txt
rm -v -f /tmp/DictionaryBuilder*.dict


# Building the binary POS dictionary
java -cp languagetool.jar org.languagetool.dev.POSDictionaryBuilder "$tmp" $SKDIR/org/languagetool/resource/sk/slovak.info

mv -v /tmp/DictionaryBuilder*.dict $SKDIR/org/languagetool/resource/sk/slovak.dict

# Building the binary synthesizer dictionary
java -cp languagetool.jar org.languagetool.dev.SynthDictionaryBuilder "$tmp" $SKDIR/org/languagetool/resource/sk/slovak_synth.info


mv -v /tmp/DictionaryBuilder*.dict $SKDIR/org/languagetool/resource/sk/slovak_synth.dict
mv -v /tmp/SynthDictionaryBuilder*.txt_tags.txt $SKDIR/org/languagetool/resource/sk/slovak_tags.txt


export MAVEN_OPTS='-Xmx512m -XX:MaxPermSize=256m'

# now you can build it with:
#cd $WORKDIR/../
#mvn clean package

rm -v -f -- "$tmp"
trap - EXIT
exit




© 2015 - 2025 Weber Informatics LLC | Privacy Policy