hannesfrank · August 6, 2020 09:47
diff --git a/beamer-pdf-remove-pause-slides.sh b/beamer-pdf-remove-pause-slides.sh
 #!/usr/bin/env sh

 # This script removes partial slides from a beamer presentation.
 # Requirements: pdftk-java (v3.0.9)
 USAGE="$0 <input pdf> <output pdf>"

 # NOTE: This is based on pdf metadata. If it is not correct some
 # pages will be missing!

 # How it works
 # ------------
 # The beamer presentation consists of blocks of pages where we are
 # only interested in the last complete page.
 # Using pdftk dump_data we can extract PageLabelNewIndex which
 # indicate the first page of such blocks.
 # The labels start at 0 and there is a 1 appended for some reason.
 #
 # For example
 #   PageLabel starting indices:    0 1 2 - 4 - -
 #   Desired complete pages:        1 2 x 4 x x 7
 #
 # This means we can remove the first index and include the last page.

 IN_PDF=$1
 OUT_PDF=$2

 pdftk $IN_PDF cat $(\
  pdftk $IN_PDF dump_data \
    | grep PageLabelNewIndex `# filter page labels`\
    | cut -d' ' -f2   `# get label value`\
    | sed 's/.$//'    `# remove trailing 1`\
    | tail +2         `# skip first line`\
    | paste -s -d' '  `# paste to one line`\
  ) end  `# add last page`\
  output $OUT_PDF