2017-01-20 08:02:37 +01:00
#!/usr/bin/env bash
2017-01-12 20:29:08 +01:00
###########################################################################
2017-01-20 08:02:37 +01:00
# check_spelling.sh
2017-01-12 20:29:08 +01:00
# ---------------------
# Date : December 2016
# Copyright : (C) 2016 by Denis Rouzaud
# Email : denis.rouzaud@gmail.com
###########################################################################
# #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation; either version 2 of the License, or #
# (at your option) any later version. #
# #
###########################################################################
2017-01-16 10:22:32 +01:00
# -r: deactivate interactive mode to fix errors
2017-01-12 20:29:08 +01:00
# optional argument: list of files to be checked
2017-01-20 08:02:37 +01:00
2017-01-12 20:29:08 +01:00
DIR = " $( cd " $( dirname " ${ BASH_SOURCE [0] } " ) " && pwd ) "
AGIGNORE = ${ DIR } /.agignore
2017-01-20 08:02:37 +01:00
# GNU prefix command for mac os support (gsed, gsplit)
GP =
if [ [ " $OSTYPE " = ~ darwin* ] ] ; then
GP = g
fi
2017-01-12 20:29:08 +01:00
# ARGUMENTS
2017-01-25 10:26:35 +01:00
INTERACTIVE = $( tty -s && echo YES || echo NO)
2017-01-20 08:02:37 +01:00
DEBUG = NO
OUTPUTLOG = ""
while getopts ":rdl:" opt; do
2017-01-12 20:29:08 +01:00
case $opt in
r)
INTERACTIVE = NO
; ;
2017-01-20 08:02:37 +01:00
d)
DEBUG = YES
; ;
l)
OUTPUTLOG = $OPTARG
; ;
2017-01-12 20:29:08 +01:00
\? )
echo " Invalid option: - $OPTARG " >& 2
exit 1
; ;
esac
done
shift $( expr $OPTIND - 1)
if [ ! $# -eq 0 ] ; then
2017-01-20 08:02:37 +01:00
EXCLUDE = $( cat $AGIGNORE | ${ GP } sed -e 's/\s*#.*$//' -e '/^\s*$/d' | tr '\n' '|' | ${ GP } sed -e 's/|$//' )
2017-01-12 20:29:08 +01:00
INPUTFILES = $( echo $@ | tr -s '[[:blank:]]' '\n' | egrep -iv " $EXCLUDE " | tr '\n' ' ' )
2017-01-20 08:02:37 +01:00
if [ [ -z $INPUTFILES ] ] ; then
2017-01-20 14:52:12 +01:00
exit 0
fi
2017-01-12 20:29:08 +01:00
echo " Running spell check on files: $INPUTFILES "
else
INPUTFILES = "."
fi
2017-01-16 10:22:32 +01:00
# regex to find escape string
SPELLOKRX = '(#\s*spellok|<!--\s*#\s*spellok\s*-->)$'
# split into several files to avoid too long regexes
2017-01-20 08:02:37 +01:00
SPLIT = 6
2017-01-16 10:22:32 +01:00
2017-01-20 08:02:37 +01:00
${ GP } split --number= l/$SPLIT --numeric-suffixes --suffix-length= 2 --additional-suffix= ~ ${ DIR } /spelling.dat spelling
2017-01-12 20:29:08 +01:00
2017-01-16 10:22:32 +01:00
# global replace variables (dictionary)
2017-01-16 10:56:42 +01:00
declare -A GLOBREP_ALLFILES = ( )
declare -A GLOBREP_CURRENTFILE = ( )
declare -A GLOBREP_IGNORE = ( )
2017-01-12 20:29:08 +01:00
2017-01-25 11:25:27 +01:00
ERRORFOUND = NO
2017-01-20 08:02:37 +01:00
for I in $( seq -f '%02g' 0 $(( $SPLIT - 1 )) ) ; do
2017-01-25 10:26:35 +01:00
[ [ " $INTERACTIVE " = ~ YES ] ] && printf "Progress: %d/%d\n" $I $SPLIT
2017-01-20 08:02:37 +01:00
SPELLFILE = spelling$I ~
# if correction contains an uppercase letter and is the same as the error character wise, this means that the error is searched as a full word and case sensitive (not incorporated in a bigger one)
CASEMATCH_FIXCASE = $( ${ GP } sed -rn '/^(\w+):\1(:\*)?$/Ip' $SPELLFILE | ${ GP } sed -r 's/^(\w+):\1(:\*)?$/(\\b|_)\1(\\b|_)/I' )
REMAINS = $( ${ GP } sed -r '/^(\w+):\1(:\*)?$/Id' $SPELLFILE )
# for error or correction containing any non letter character (space, apostrophe) search is full word and case insensitive
IGNORECASE_FIXSPECIALCHAR = $( echo " $REMAINS " | perl -ne "print if /^(\w*[ '])*\w*:\w+(?(1)|[' ])/" | ${ GP } sed -r 's/^(.*?):.*?(:\*)?$/(\\b|_)\1(\\b|_)/' )
REMAINS = $( echo " $REMAINS " | perl -ne "print if not /^(\w*[ '])*\w*:\w+(?(1)|[' ])/" )
2017-01-12 20:29:08 +01:00
2017-01-16 15:12:15 +01:00
# This will try to look for misspelling within larger words.
2017-01-12 20:29:08 +01:00
# Condition is hard to explain in words.
2017-01-20 08:02:37 +01:00
# You can test it here: https://regex101.com/r/7kznVA/12
# adding :* in spelling.dat willextra words that should not be checked in longer words ca
# remove those in spelling.dat ending with :*
# following can be checked in longer words case insensitively
IGNORECASE_INWORD = $( echo " $REMAINS " | perl -ne 'print if /^(\w)(\w)(\w)\w*(\w)(\w)(\w):(?:(?!\2\3\w|\w\1\2).)\w*?(?:(?!\5\6\w|\w\4\5)\w\w\w)$/' | cut -d: -f1 )
REMAINS = $( echo " $REMAINS " | perl -ne 'print if not /^(\w)(\w)(\w)\w*(\w)(\w)(\w):(?:(?!\2\3\w|\w\1\2).)\w*?(?:(?!\5\6\w|\w\4\5)\w\w\w)$/' | cut -d: -f1 )
# Trying with the rest as whole words case insensitively
IGNORECASE_WHOLEWORD = $( echo " $REMAINS " | ${ GP } sed -r 's/^/(\\b|_)/; s/$/(\\b|_)/' )
# or in camel case, case sensitively for word of at least 4 chars
MATCHCASE_INWORD = $( echo " $REMAINS " | ${ GP } sed -r '/^.{,3}$/d' | ${ GP } sed -r 's/^(\w)(.*)/(\\b|_)(\l\1\L\2_|\u\1\U\2_|\u\1\L\2\U[_A-Z0-9])|\L[_a-z0-9]\u\1\L\2(\\b|\U[_A-Z0-9])|\L[_a-z0-9]\u\1\U\2\L(\\b|[_a-z0-9])/' )
if [ [ " $DEBUG " = ~ YES ] ] ; then
echo "*** FIX CASE (case sensitive) ***"
echo " $CASEMATCH_FIXCASE "
echo "*** SPECIAL CHAR (case insensitive) ***"
echo " $IGNORECASE_FIXSPECIALCHAR "
echo "*** IN WORD (case insensitive) ***"
echo " $IGNORECASE_INWORD "
echo "*** WHOLE WORDS (case insensitive) ***"
echo " $IGNORECASE_WHOLEWORD "
echo "*** IN WORD CAMELCASE (case sensitive) **"
echo " $MATCHCASE_INWORD "
echo "*****"
fi
CASEMATCH_FIXCASE = $( echo " $CASEMATCH_FIXCASE " | ${ GP } sed -r '/^\s*$/d' | tr '\n' '\|' | ${ GP } sed -r 's/\|$//' )
IGNORECASE_FIXSPECIALCHAR = $( echo " $IGNORECASE_FIXSPECIALCHAR " | ${ GP } sed -r '/^\s*$/d' | tr '\n' '\|' | ${ GP } sed -r 's/\|$//' )
IGNORECASE_INWORD = $( echo " $IGNORECASE_INWORD " | ${ GP } sed -r '/^\s*$/d' | tr '\n' '\|' | ${ GP } sed -r 's/\|$//' )
IGNORECASE_WHOLEWORD = $( echo " $IGNORECASE_WHOLEWORD " | ${ GP } sed -r '/^\s*$/d' | tr '\n' '\|' | ${ GP } sed -r 's/\|$//' )
MATCHCASE_INWORD = $( echo " $MATCHCASE_INWORD " | ${ GP } sed -r '/^\s*$/d' | tr '\n' '\|' | ${ GP } sed -r 's/\|$//' )
IGNORECASE = $( echo " ( ${ IGNORECASE_FIXSPECIALCHAR } | ${ IGNORECASE_INWORD } | ${ IGNORECASE_WHOLEWORD } ) " | ${ GP } sed -r 's/\(\|/(/' | ${ GP } sed -r 's/\|\|/|/g' | ${ GP } sed -r 's/\|\)/)/' ) '(?!.*' " ${ SPELLOKRX } " ')'
CASEMATCH = $( echo " ( ${ CASEMATCH_FIXCASE } | ${ MATCHCASE_INWORD } ) " | ${ GP } sed -r 's/\(\|/(/' | ${ GP } sed -r 's/\|\|/|/g' | ${ GP } sed -r 's/\|\)/)/' ) '(?!.*' " ${ SPELLOKRX } " ')'
2017-01-12 20:29:08 +01:00
FILE = $INPUTFILES # init with input files (if ag is run with single file, file path is now in output)
2017-01-25 11:25:27 +01:00
2017-01-12 20:29:08 +01:00
while read -u 3 -r LINE; do
2017-01-16 10:22:32 +01:00
echo " $LINE "
ERRORFOUND = YES
2017-01-20 08:02:37 +01:00
NOCOLOR = $( echo " $LINE " | ${ GP } sed -r 's/\x1B\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]//g' )
if [ [ " $NOCOLOR " = ~ ^[ [ :alnum:] ] [ [ :alnum:] \/ \. _-] +$ ] ] ; then
FILE = $NOCOLOR
GLOBREP_CURRENTFILE = ( )
fi
if [ [ " $NOCOLOR " = ~ ^[ 0-9] +: ] ] ; then
if [ [ -z $FILE ] ] ; then
echo "*** error: no file"
exit 1
2017-01-12 20:29:08 +01:00
fi
2017-01-20 08:02:37 +01:00
NUMBER = $( echo " $NOCOLOR " | cut -d: -f1)
ERRORLINE = $( echo " $NOCOLOR " | cut -d: -f2)
ERROR = $( echo " $LINE " | ${ GP } sed -r 's/^.*?\x1B\[30;43m(.*?)\x1B\[0m.*$/\1/' )
ERRORNOCOLOR = $( echo " $ERROR " | ${ GP } sed -r 's/\x1B\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]//g' )
if [ [ " $ERRORNOCOLOR " = ~ ^[ [ :digit:] ] +: ] ] ; then
echo " *** error: could not find error in $LINE " >& 2
else
# if the error is not in IGNORECASE_INWORD, then it matched previous and next character (needs to remove them)
# also make error small case and escape special chars: ( ) |
ERRORSMALLCASE = $( echo ${ ERROR ,, } | ${ GP } sed -r 's/\(/\\(/g' | ${ GP } sed -r 's/\)/\\)/g' | ${ GP } sed -r 's/\|/\\|/g' )
if [ [ ! " ${ ERRORSMALLCASE } " = ~ $IGNORECASE_INWORD ] ] ; then
if [ [ ! -z $( ag --nonumbers --case-sensitive " ^ ${ ERRORSMALLCASE : 1 :- 1 } ${ ERRORSMALLCASE : -1 } ?: " scripts/spell_check/spelling.dat) ] ] ; then
ERRORSMALLCASE = ${ ERRORSMALLCASE #? }
ERROR = ${ ERROR #? }
fi
if [ [ ! -z $( ag --nonumbers --case-sensitive " ^ ${ ERRORSMALLCASE : :- 1 } : " scripts/spell_check/spelling.dat) ] ] ; then
ERRORSMALLCASE = ${ ERRORSMALLCASE : :- 1 }
ERROR = ${ ERROR : :- 1 }
fi
2017-01-12 20:29:08 +01:00
fi
2017-01-20 08:02:37 +01:00
# get correction from spelling.dat
CORRECTION = $( ag --nonumbers --case-sensitive " ^ ${ ERRORSMALLCASE } : " ${ DIR } /spelling.dat | cut -d: -f2)
if [ [ -z $CORRECTION ] ] ; then
echo " could not find correction for $ERROR " >& 2
else
# Match case
MATCHCASE = " $ERROR : $CORRECTION "
CORRECTIONCASE = $( echo " $MATCHCASE " | ${ GP } sed -r 's/([A-Z]+):(.*)/\1:\U\2/; s/([A-Z][a-z]+):([a-z])/\1:\U\2\L/' | cut -d: -f2)
if [ [ ! -z $OUTPUTLOG ] ] ; then
echo " $FILE $NUMBER $ERROR $CORRECTIONCASE " >> $OUTPUTLOG
2017-01-16 15:12:15 +01:00
fi
2017-01-20 08:02:37 +01:00
if [ [ " $INTERACTIVE " = ~ YES ] ] ; then
# Skip global replace
if [ [ ! -z ${ GLOBREP_ALLFILES [ " $ERROR " ] } ] ] ; then
echo -e " replace \x1B[33m $ERROR \x1B[0m by \x1B[33m $CORRECTIONCASE \x1B[0m in \x1B[33m $FILE \x1B[0m "
${ GP } sed -i -r " / ${ SPELLOKRX } /! s/ $ERROR / $CORRECTIONCASE /g " $FILE
continue
elif [ [ ( ! -z ${ GLOBREP_CURRENTFILE [ " $ERROR " ] } ) || ( ! -z ${ GLOBREP_IGNORE [ " $ERROR " ] } ) ] ] ; then
echo "skipping occurrence"
continue
else
# escape string
SPELLOKSTR = '//#spellok'
if [ [ " $FILE " = ~ \. ( txt| html| htm| dox) $ ] ] ; then
SPELLOKSTR = '<!--#spellok-->'
fi
if [ [ " $FILE " = ~ \. ( h| cpp| sip) $ ] ] ; then
if [ [ " $ERRORLINE " = ~ ^\s *( \/ *\| \/ \/ ) ] ] ; then
SPELLOKSTR = '#spellok'
fi
fi
if [ [ " $FILE " = ~ \. ( py) $ ] ] ; then
SPELLOKSTR = '#spellok'
fi
SPELLOKSTR_ESC = $( echo " $SPELLOKSTR " | ${ GP } sed -r 's/\//\\\//g' )
# Display menu
echo "***"
echo -e " Error found: \x1B[31m $ERROR \x1B[0m "
echo -e " r) \x1B[4mr\x1B[0meplace by \x1B[33m $CORRECTIONCASE \x1B[0m at line $NUMBER "
echo -e " f) replace all occurrences by \x1B[33m $CORRECTIONCASE \x1B[0m in current \x1B[4mf\x1B[0mile "
echo -e " a) replace all occurrences by \x1B[33m $CORRECTIONCASE \x1B[0m in \x1B[4ma\x1B[0mll files "
echo -e " p) a\x1B[4mp\x1B[0mpend \x1B[33m $SPELLOKSTR \x1B[0m at the end of the line $NUMBER to avoid spell check on this line "
echo -e " t) \x1B[4mt\x1B[0mype your own correction"
echo -e " c) skip and \x1B[4mc\x1B[0montinue"
echo -e " o) skip all \x1B[4mo\x1B[0mccurences and continue"
echo -e " e) \x1B[4me\x1B[0mxit"
while read -n 1 n; do
echo ""
case $n in
r)
echo -e " replacing \x1B[33m $ERROR \x1B[0m by \x1B[33m $CORRECTIONCASE \x1B[0m in \x1B[33m $FILE \x1B[0m at line \x1B[33m $NUMBER \x1B[0m "
${ GP } sed -i " ${ NUMBER } s/ $ERROR / $CORRECTIONCASE /g " $FILE
break
; ;
f)
GLOBREP_CURRENTFILE += ( [ " $ERROR " ] = 1)
echo -e " replacing \x1B[33m $ERROR \x1B[0m by \x1B[33m $CORRECTIONCASE \x1B[0m in \x1B[33m $FILE \x1B[0m "
${ GP } sed -i -r " / ${ SPELLOKRX } /! s/ $ERROR / $CORRECTIONCASE /g " $FILE
break
; ;
a)
GLOBREP_CURRENTFILE += ( [ " $ERROR " ] = 1)
GLOBREP_ALLFILES += ( [ " $ERROR " ] = 1)
echo -e " replace \x1B[33m $ERROR \x1B[0m by \x1B[33m $CORRECTIONCASE \x1B[0m in \x1B[33m $FILE \x1B[0m "
${ GP } sed -i -r " / ${ SPELLOKRX } /! s/ $ERROR / $CORRECTIONCASE /g " $FILE
break
; ;
p)
echo -e " appending \x1B[33m $SPELLOKSTR \x1B[0m to \x1B[33m $FILE \x1B[0m at line \x1B[33m $NUMBER \x1B[0m "
${ GP } sed -i " ${ NUMBER } s/\$/ $SPELLOKSTR_ESC / " $FILE
break
; ;
t)
echo "Enter the correction: "
read CORRECTION
MATCHCASE = " $ERROR : $CORRECTION "
CORRECTIONCASE = $( echo " $MATCHCASE " | ${ GP } sed -r 's/([A-Z]+):(.*)/\1:\U\2/; s/([A-Z][a-z]+):([a-z])/\1:\U\2\L/' | cut -d: -f2)
echo -e " replacing \x1B[33m $ERROR \x1B[0m by \x1B[33m $CORRECTIONCASE \x1B[0m in \x1B[33m $FILE \x1B[0m at line \x1B[33m $NUMBER \x1B[0m "
${ GP } sed -i " ${ NUMBER } s/ $ERROR / $CORRECTIONCASE /g " $FILE
break
; ;
c)
break
; ;
o)
GLOBREP_IGNORE += ( [ " $ERROR " ] = 1)
break
; ;
e)
exit 1
; ;
*) invalid option; ;
esac
done
2017-01-16 15:12:15 +01:00
fi
fi
2017-01-12 20:29:08 +01:00
fi
fi
if [ [ " $NOCOLOR " = ~ ^\s *$ ] ] ; then
FILE = ""
fi
fi
2017-01-25 10:26:35 +01:00
done 3< <(
unbuffer ag --all-text --nopager --color-match "30;43" --numbers --nomultiline --ignore-case -p $AGIGNORE " ${ IGNORECASE } " $INPUTFILES
unbuffer ag --all-text --nopager --color-match "30;43" --numbers --nomultiline --case-sensitive -p $AGIGNORE " ${ CASEMATCH } " $INPUTFILES
)
2017-01-12 20:29:08 +01:00
2017-01-20 08:02:37 +01:00
rm -f $SPELLFILE
2017-01-12 20:29:08 +01:00
done
2017-01-16 15:12:15 +01:00
if [ [ " $ERRORFOUND " = ~ YES ] ] ; then
2017-01-20 08:02:37 +01:00
echo -e "\x1B[1msome errors have been found.\x1B[0m" >& 2
2017-01-16 15:12:15 +01:00
exit 1
else
exit 0
fi