Last active
June 10, 2018 15:50
-
-
Save pkern/4a3e2a3939143745c8d393a61864dbe5 to your computer and use it in GitHub Desktop.
Convert a stack of simplex scanned pages to duplex, ignoring blank pages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Unit] | |
Description=Printer Duplex post-processing | |
[Path] | |
PathChanged=/srv/scans/duplex | |
[Install] | |
WantedBy=multi-user.target |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Unit] | |
Description=Printer Duplex post-processing | |
[Service] | |
Type=oneshot | |
User=printer | |
ExecStart=/usr/bin/go run /home/printer/simplex_to_duplex.go /srv/scans/duplex |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Command simplex_to_duplex reads an input PDF file with the following properties: | |
// - Scanned from the front, all pages | |
// - Scanned from the back, all pages in reverse order (by flipping the stack) | |
// It will filter out blank pages in between. These are judged by very low ink | |
// content, so punched holes will be recognized as blanks. | |
package main | |
import ( | |
"flag" | |
"fmt" | |
"io/ioutil" | |
"log" | |
"os/exec" | |
"path" | |
"strconv" | |
"strings" | |
"gopkg.in/pipe.v2" | |
) | |
// pageCount uses pdfinfo to count the PDF's pages. | |
func pageCount(fn string) (int, error) { | |
p := pipe.Line( | |
pipe.Exec("pdfinfo", fn), | |
pipe.Exec("grep", "^Pages:"), | |
pipe.Exec("tr", "-dc", "0-9"), | |
) | |
out, err := pipe.CombinedOutput(p) | |
if err != nil { | |
return 0, err | |
} | |
return strconv.Atoi(string(out)) | |
} | |
// blankPage calls out to Ghostscript to determine the ink content | |
// of the page. | |
func blankPage(fn string, page int) (bool, error) { | |
p := pipe.Line( | |
pipe.Exec("gs", "-o", "-", | |
fmt.Sprintf("-dFirstPage=%d", page), | |
fmt.Sprintf("-dLastPage=%d", page), | |
"-sDEVICE=inkcov", | |
fn), | |
pipe.Exec("grep", "CMYK"), | |
pipe.Exec("awk", `BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum }`), | |
) | |
out, err := pipe.CombinedOutput(p) | |
if err != nil { | |
return false, err | |
} | |
percent, err := strconv.ParseFloat(strings.TrimSpace(string(out)), 32) | |
return percent <= 0.001, err | |
} | |
// pageOrder takes pages from the front pile and then the back pile one after | |
// another. | |
func pageOrder(count int) <-chan int { | |
ch := make(chan int) | |
go func() { | |
for i := 1; i <= count; i++ { | |
if i%2 == 0 { | |
ch <- count - i/2 + 1 | |
} else { | |
ch <- i/2 + 1 | |
} | |
} | |
close(ch) | |
}() | |
return ch | |
} | |
func processPDF(fn string) error { | |
cnt, err := pageCount(fn) | |
if err != nil { | |
return err | |
} | |
log.Printf(" Pages: %v", cnt) | |
if cnt%2 != 0 { | |
return fmt.Errorf("page count %d not even", cnt) | |
} | |
blankPages := make(map[int]bool) | |
for i := 1; i <= cnt; i++ { | |
blank, err := blankPage(fn, i) | |
if err != nil { | |
return err | |
} | |
if blank { | |
blankPages[i] = true | |
} | |
} | |
log.Printf(" Blank pages: %v", blankPages) | |
args := []string{fn, "cat"} | |
for i := range pageOrder(cnt) { | |
if blankPages[i] { | |
log.Printf(" Ignoring blank page %d", i) | |
continue | |
} | |
log.Printf(" Selecting page %d", i) | |
args = append(args, strconv.Itoa(i)) | |
} | |
args = append(args, "output", fn+".out.pdf") | |
output, err := exec.Command("pdftk", args...).CombinedOutput() | |
if err != nil { | |
return fmt.Errorf("pdftk run failed: %v (%s)", err, output) | |
} | |
return nil | |
} | |
func main() { | |
flag.Parse() | |
for _, dir := range flag.Args() { | |
log.Printf("Processing directory %q ...", dir) | |
fis, err := ioutil.ReadDir(dir) | |
if err != nil { | |
log.Fatalf("Failed to read directory %q: %v", dir, err) | |
} | |
files := make(map[string]bool) | |
for _, fi := range fis { | |
files[fi.Name()] = true | |
} | |
for filename, _ := range files { | |
if strings.Contains(filename, ".out.pdf") { | |
continue | |
} | |
if files[filename+".out.pdf"] { | |
continue | |
} | |
fn := path.Join(dir, filename) | |
if err := processPDF(fn); err != nil { | |
log.Fatalf("Failed to process %q: %v", fn, err) | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment