| Patrick Williams | b48b7b4 | 2016-08-17 15:04:38 -0500 | [diff] [blame] | 1 | #! /bin/sh | 
 | 2 |  | 
 | 3 | # Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved | 
 | 4 | # Released under the MIT license (see meta-openembedded layer's COPYING.MIT) | 
 | 5 |  | 
 | 6 | PV='3.02' | 
 | 7 |  | 
 | 8 | # Sometimes the software package has a minor version, but language | 
 | 9 | # packages have not.  Example:  | 
 | 10 | #   software package: tesseract-ocr-3.02.02.tar.gz | 
 | 11 | #   language package: tesseract-ocr-3.02.por.tar.gz | 
 | 12 | MINOR_PV=02 | 
 | 13 |  | 
 | 14 | recipes_dir=$1 | 
 | 15 |  | 
 | 16 | usage() { | 
 | 17 |     echo "Usage: `basename $0` <recipes dir> [ <download dir> ]" | 
 | 18 | } | 
 | 19 |  | 
 | 20 | if [ -z "$recipes_dir" ]; then | 
 | 21 |     usage | 
 | 22 |     exit 1 | 
 | 23 | fi | 
 | 24 | mkdir -p "$recipes_dir" | 
 | 25 |  | 
 | 26 | file_list_uri='https://code.google.com/p/tesseract-ocr/downloads/list' | 
 | 27 | file_list=`mktemp` | 
 | 28 |  | 
 | 29 | remove_dl_dir= | 
 | 30 | if [ -z "$2" ]; then | 
 | 31 |     remove_dl_dir=1 | 
 | 32 |     dl_dir=`mktemp -d` | 
 | 33 | else | 
 | 34 |     dl_dir="$2" | 
 | 35 | fi | 
 | 36 |  | 
 | 37 | mkdir -p $dl_dir | 
 | 38 |  | 
 | 39 | tesseract_langs() { | 
 | 40 |     wget -q -O "$file_list" "$file_list_uri" | 
 | 41 |  | 
 | 42 |     grep -E 'a href="detail\?name=tesseract-ocr-'${PV}'\.[^\.]+.tar.gz&can=2&q=">' "$file_list" | \ | 
 | 43 |         sed -r -e 's/.*tesseract-ocr-'${PV}'\.*([^\.]+)\.tar\.gz.*/\1/' | \ | 
 | 44 |         grep -Ev '('${MINOR_PV}'|'${MINOR_PV}'-doc-html)' | \ | 
 | 45 |         sort -u | 
 | 46 | } | 
 | 47 |  | 
 | 48 | download_lang_files() { | 
 | 49 |     local langs="$1" | 
 | 50 |     local uri | 
 | 51 |     for lang in $langs; do | 
 | 52 |         if [ ! -e "$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" ]; then | 
 | 53 |             uri="https://tesseract-ocr.googlecode.com/files/tesseract-ocr-${PV}.${lang}.tar.gz" | 
 | 54 |             echo "Downloading $uri" | 
 | 55 |             wget -q -P "$dl_dir" "$uri" | 
 | 56 |         fi | 
 | 57 |     done | 
 | 58 | } | 
 | 59 |  | 
 | 60 | create_recipe() { | 
 | 61 |     local lang=$1 | 
 | 62 |     local tarball | 
 | 63 |  | 
 | 64 |     tarball="$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" | 
 | 65 |  | 
 | 66 |     md5sum=`md5sum $tarball | awk '{print $1}'` | 
 | 67 |     sha256sum=`sha256sum $tarball | awk '{print $1}'` | 
 | 68 |  | 
 | 69 |     cat > $recipes_dir/tesseract-lang-`echo ${lang} | sed s/_/-/g`_${PV}.bb <<EOF | 
 | 70 | # Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved | 
 | 71 | # Released under the MIT license (see meta-openembedded layer's COPYING.MIT) | 
 | 72 |  | 
 | 73 | TESSERACT_LANG = "$lang" | 
 | 74 |  | 
 | 75 | require tesseract-lang.inc | 
 | 76 |  | 
 | 77 | SRC_URI[md5sum] = "${md5sum}" | 
 | 78 | SRC_URI[sha256sum] = "${sha256sum}" | 
 | 79 | EOF | 
 | 80 | } | 
 | 81 |  | 
 | 82 |  | 
 | 83 | LANGS=`tesseract_langs` | 
 | 84 |  | 
 | 85 | download_lang_files "$LANGS" | 
 | 86 |  | 
 | 87 | for lang in $LANGS; do | 
 | 88 |     create_recipe $lang | 
 | 89 | done | 
 | 90 |  | 
 | 91 | [ -n "$remove_dl_dir" ] && rm -rf $dl_dir | 
 | 92 | rm -f $file_list |