Last active
October 3, 2019 14:08
-
-
Save frame/23c807ac6f1cb67fa4f8f69121360e71 to your computer and use it in GitHub Desktop.
Canon MF421DW - watch and process incoming scans (detect/remove blank pages, deskew, OCR)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
file=$2 | |
basename=${file%%.*} | |
source_path=$1 | |
if [[ $source_path == */User2/ ]] ; | |
then | |
target_path=/mnt/s/Files/Docs/Scans/User2/ | |
else | |
target_path=/mnt/s/Files/Docs/Scans/User1/ | |
fi | |
work_path=`mktemp -d` | |
[[ "${work_path}" != */ ]] && work_path="${work_path}/" | |
rm -f ${work_path}* | |
cd ${work_path} | |
convert -quiet -units PixelsPerInch -density 300 ${source_path}${file} ${work_path}${basename}-%02d.png | |
for filename in ${work_path}*.png; do | |
[ -f "$filename" ] || continue | |
white=`convert -quiet ${filename} -format "%[mean]" info:` | |
if [ ${white%.*} -gt 65400 ]; then | |
mkdir -p ${target_path}EmptyPages/ | |
mv ${filename} ${target_path}EmptyPages/ | |
else | |
convert -quiet ${filename} -deskew 40 +repage ${filename}-deskewed.png | |
fi | |
done | |
if [ ! -z "$(ls -A ${work_path})" ]; then | |
convert -quiet *-deskewed.png -compress zip ${work_path}${basename}.tif | |
tesseract ${work_path}${basename}.tif ${target_path}${basename} -l deu --psm 1 --oem 1 pdf | |
rm -rf ${work_path} | |
if [ -s ${target_path}${basename}.pdf ]; then | |
rm -f ${source_path}${file} | |
fi | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
inotifywait -m /mnt/s/Files/Docs/Scans/Incoming/User1 /mnt/s/Files/Docs/Scans/Incoming/User2 -e create -e moved_to | | |
while read path action file; do | |
if ! [[ $file == cifs_accesibility_check* ]] ; | |
then | |
echo $file detected | |
loopcount=0 | |
check="initial state"; | |
while [[ $check != "" ]] ; do | |
echo -n . | |
loopcount=$[$loopcount +1] | |
sleep 0.5 | |
check=`identify -quiet -format "" ${path}${file} 2>&1` | |
if [[ loopcount -gt 200 ]] ; | |
then | |
break | |
fi | |
done | |
echo "" | |
if [[ loopcount -gt 200 ]] ; | |
then | |
echo Error: ${check} | |
echo -e "\a" | |
echo -e "\a" | |
echo -e "\a" | |
else | |
filesize=$(stat -c%s "${path}${file}") | |
echo processing... ${filesize} | |
bash /mnt/c/Programs/Scripts/pdf-ocr-process.sh $path $file | |
echo -e "\a" | |
fi | |
fi | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# requires latest tesseract-ocr (4+) | |
sudo add-apt-repository ppa:alex-p/tesseract-ocr | |
sudo apt-get update | |
sudo apt-get install tesseract-ocr tesseract-ocr-deu | |
sudo apt-get install imagemagick | |
sudo apt-get install inotify-tools |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
nohup bash -c '/mnt/c/Programs/Scripts/pdf-ocr-watch.sh' &> /var/log/pdf-ocr.log& |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment