#/bin/sh
# Copyright eBrevia.com 2019
# This script downloads the LibreOffice German dictionaries
# and the OpenOffice Dutch dictionary
# and places them in the current directory.
# It leaves a fair amount of trash in /tmp.

# Change this to the null string for debugging
QUIET='-q'

set -e
export LANG=C
HERE=$(pwd)
cd /tmp

if [ ! -s dict-de-de-1901-oldspell-2017-06-22.oxt ]; then
  echo downloading LibreOffice extension for de-1901
  wget $QUIET --no-check-certificate \
    'https://extensions.libreoffice.org/assets/downloads/z/dict-de-de-1901-oldspell-2017-06-22.oxt'
fi

if [ ! -s dict-de-de-frami-2017-01-12.oxt ]; then
  echo downloading LibreOffice extension for de-DE
  wget $QUIET --no-check-certificate \
    'https://extensions.libreoffice.org/assets/downloads/z/dict-de-de-frami-2017-01-12.oxt'
fi

if [ ! -s dict-de-ch-frami-2017-01-12.oxt ]; then
  echo downloading LibreOffice extension for de-CH
  wget $QUIET --no-check-certificate \
   'https://extensions.libreoffice.org/assets/downloads/z/dict-de-ch-frami-2017-01-12.oxt'
fi

if [ ! -s dict-de-at-frami-2017-01-12.oxt ]; then
  echo downloading LibreOffice extension for de-AT
  wget $QUIET --no-check-certificate \
    'https://extensions.libreoffice.org/assets/downloads/z/dict-de-at-frami-2017-01-12.oxt'
fi
  
if [ ! -s nl-dict-v2.00g.oxt ]; then
  echo downloading LibreOffice extension for nl
  wget $QUIET --no-check-certificate -O nl-dict-v2.00g.oxt \
    'https://downloads.sourceforge.net/project/aoo-extensions/1456/6/nl-dict-v2.00g.oxt?r=https%3A%2F%2Fsourceforge.net%2Fprojects%2Faoo-extensions%2Ffiles%2F1456%2F6%2Fnl-dict-v2.00g.oxt%2Fdownload%3Fuse_mirror%3Dastuteinternet%26r%3Dhttps%253A%252F%252Fwww.google.com%252F%26use_mirror%3Dastuteinternet&ts=1570815217'
fi
  
echo do internal processing
unzip $QUIET -j -o dict-de-de-1901-oldspell-2017-06-22.oxt de_DE_OLDSPELL/de_DE_OLDSPELL.dic
unzip $QUIET -j -o dict-de-de-frami-2017-01-12.oxt de_DE_frami/de_DE_frami.dic
unzip $QUIET -j -o dict-de-ch-frami-2017-01-12.oxt de_CH_frami/de_CH_frami.dic
unzip $QUIET -j -o dict-de-at-frami-2017-01-12.oxt de_AT_frami/de_AT_frami.dic
unzip $QUIET -j -o nl-dict-v2.00g.oxt nl_NL.dic README_nl_NL.txt

cp de_DE_OLDSPELL.dic de_DE_OLDSPELL.dic8
iconv -f ISO-8859-1 -t UTF-8 <de_DE_frami.dic >de_DE_frami.dic8
iconv -f ISO-8859-1 -t UTF-8 <de_CH_frami.dic >de_CH_frami.dic8
iconv -f ISO-8859-1 -t UTF-8 <de_AT_frami.dic >de_AT_frami.dic8

>de_headers
>nl_headers
egrep '^#|^$' de_DE_OLDSPELL.dic8 >>de_headers
egrep '^#|^$' de_DE_frami.dic8 >>de_headers
egrep '^#|^$' de_CH_frami.dic8 >>de_headers
egrep '^#|^$' de_AT_frami.dic8 >>de_headers
sed 's/^/#/' README_nl_NL.txt >>nl_headers

echo sort and merge
egrep -v '^#|^$' de_DE_OLDSPELL.dic8 | sed 's;/.*$;;' |
  sort >$HERE/de-1901.dic
egrep -v '^#|^$' de_DE_frami.dic8 | sed 's;/.*$;;' |
  sort >$HERE/de-DE.dic
egrep -v '^#|^$' de_CH_frami.dic8 | sed 's;/.*$;;' |
  sort >$HERE/de-CH.dic
egrep -v '^#|^$' de_AT_frami.dic8 | sed 's;/.*$;;' |
  sort >$HERE/de-AT.dic
cut -d/ -f1 nl_NL.dic |
  sort | cat nl_headers - >$HERE/nl-NL.dic

echo make de-mixed dictionary
sort -u -o de-mixed.nohead $HERE/de-*.dic
cat de_headers de-mixed.nohead >$HERE/de-mixed.dic
