Last active
August 29, 2015 14:19
-
-
Save jbfink/fa52f0dbf84fca57fff9 to your computer and use it in GitHub Desktop.
punchcard.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# punchcard.py | |
# | |
# Copyright (C) 2011: Michael Hamilton | |
# The code is GPL 3.0(GNU General Public License) ( http://www.gnu.org/copyleft/gpl.html ) | |
# | |
import Image | |
import sys | |
from optparse import OptionParser | |
CARD_COLUMNS = 80 | |
CARD_ROWS = 12 | |
# found measurements at http://www.quadibloc.com/comp/cardint.htm | |
CARD_WIDTH = 7.0 + 3.0/8.0 # Inches | |
CARD_HEIGHT = 3.25 # Inches | |
CARD_COL_WIDTH = 0.087 # Inches | |
CARD_HOLE_WIDTH = 0.055 # Inches IBM, 0.056 Control Data | |
CARD_ROW_HEIGHT = 0.25 # Inches | |
CARD_HOLE_HEIGHT = 0.125 # Inches | |
CARD_TOPBOT_MARGIN = 3.0/16.0 # Inches at top and bottom | |
CARD_SIDE_MARGIN = 0.2235 # Inches on each side | |
CARD_SIDE_MARGIN_RATIO = CARD_SIDE_MARGIN/CARD_WIDTH # as proportion of card width (margin/width) | |
CARD_TOP_MARGIN_RATIO = CARD_TOPBOT_MARGIN/CARD_HEIGHT # as proportion of card height (margin/height) | |
CARD_ROW_HEIGHT_RATIO = CARD_ROW_HEIGHT/CARD_HEIGHT # as proportion of card height - works | |
CARD_COL_WIDTH_RATIO = CARD_COL_WIDTH/CARD_WIDTH # as proportion of card height - works | |
CARD_HOLE_HEIGHT_RATIO = CARD_HOLE_HEIGHT/CARD_HEIGHT # as proportion of card height - works | |
CARD_HOLE_WIDTH_RATIO = CARD_HOLE_WIDTH/CARD_WIDTH # as a proportion of card width | |
BRIGHTNESS_THRESHOLD = 200 # pixel brightness value (i.e. (R+G+B)/3) | |
IBM_MODEL_029_KEYPUNCH = """ | |
/&-0123456789ABCDEFGHIJKLMNOPQR/STUVWXYZ:#@'="`.<(+|!$*);^~,%_>? | | |
12 / O OOOOOOOOO OOOOOO | | |
11| O OOOOOOOOO OOOOOO | | |
0| O OOOOOOOOO OOOOOO | | |
1| O O O O | | |
2| O O O O O O O O | | |
3| O O O O O O O O | | |
4| O O O O O O O O | | |
5| O O O O O O O O | | |
6| O O O O O O O O | | |
7| O O O O O O O O | | |
8| O O O O OOOOOOOOOOOOOOOOOOOOOOOO | | |
9| O O O O | | |
|__________________________________________________________________|""" | |
translate = None | |
if translate == None: | |
translate = {} | |
# Turn the ASCII art sideways and build a hash look up for | |
# column values, for example: | |
# (O, , ,O, , , , , , , , ):A | |
# (O, , , ,O, , , , , , , ):B | |
# (O, , , , ,O, , , , , , ):C | |
rows = IBM_MODEL_029_KEYPUNCH[1:].split('\n'); | |
rotated = [[ r[i] for r in rows[0:13]] for i in range(5, len(rows[0]) - 1)] | |
for v in rotated: | |
translate[tuple(v[1:])] = v[0] | |
#print translate | |
# generate a range of floats | |
def drange(start, stop, step=1.0): | |
r = start | |
while (step >= 0.0 and r < stop) or (step < 0.0 and r > stop): | |
yield r | |
r += step | |
# Represents a punchcard image plus scanned data | |
class PunchCard(object): | |
def __init__(self, image, bright=-1, debug=False, xstart=0, xstop=0, ystart=0, ystop=0, xadjust=0): | |
pass | |
self.text = '' | |
self.decoded = [] | |
self.surface = [] | |
self.debug = debug | |
self.threshold = 0 | |
self.ymin = ystart | |
self.ymax = ystop | |
self.xmin = xstart | |
self.xmax = xstop | |
self.xadjust = xadjust | |
self.image = image | |
self.pix = image.load() | |
self._crop() | |
self._scan(bright) | |
# Brightness is the average of RGB values | |
def _brightness(self, pixel): | |
#print max(pixel) | |
return ( pixel[0] + pixel[1] + pixel[2] ) / 3 | |
# For highlighting on the debug dump | |
def _flip(self, pixel): | |
return max(pixel) | |
# The search is started from the "crop" edges. | |
# Either use crop boundary of the image size or the valyes supplied | |
# by the command line args | |
def _crop(self): | |
self.xsize, self.ysize = image.size | |
if self.xmax == 0: | |
self.xmax = self.xsize | |
if self.ymax == 0: | |
self.ymax = self.ysize | |
self.midx = self.xmin + (self.xmax - self.xmin) / 2 + self.xadjust | |
self.midy = self.ymin + (self.ymax - self.ymin) / 2 | |
# heuristic for finding a reasonable cutoff brightness | |
def _find_threshold_brightness(self): | |
left = self._brightness(self.pix[self.xmin, self.midy]) | |
right = self._brightness(self.pix[self.xmax - 1, self.midy]) | |
return min(left, right, BRIGHTNESS_THRESHOLD) - 10 | |
vals = [] | |
last = 0 | |
for x in xrange(self.xmin,self.xmax): | |
val = self._brightness(self.pix[x, self.midy]) | |
if val > last: | |
left = val | |
else: | |
break | |
last = val | |
for x in xrange(self.xmax,self.xmin, -1): | |
val = self._brightness(self.pix[x, self.midy]) | |
if val > last: | |
right = val | |
else: | |
break | |
right = val | |
print left, right | |
return min(left, right,200) | |
for x in xrange(self.xmin,self.xmax): | |
val = self._brightness(self.pix[x, self.midy]) | |
vals.append(val) | |
vals.sort() | |
last_val = vals[0] | |
biggest_diff = 0 | |
threshold = 0 | |
for val in vals: | |
diff = val - last_val | |
#print val, diff | |
if val > 127 and val < 200 and diff >= 5: | |
biggest_diff = diff | |
threshold = val | |
last_val = val | |
if self.debug: | |
print "Threshold diff=", biggest_diff, "brightness=", val | |
return threshold - 10 | |
# Find the left and right edges of the data area at probe_y and from that | |
# figure out the column and hole vertical dimensions at probe_y. | |
def _find_data_horiz_dimensions(self, probe_y): | |
left_border, right_border = self.xmin, self.xmax - 1 | |
for x in xrange(self.xmin, self.midx): | |
if self._brightness(self.pix[x, probe_y]) < self.threshold: | |
left_border = x | |
break | |
for x in xrange(self.xmax-1, self.midx, -1): | |
if self._brightness(self.pix[x, probe_y]) < self.threshold: | |
right_border = x | |
break | |
width = right_border - left_border | |
card_side_margin_width = int(width * CARD_SIDE_MARGIN_RATIO) | |
data_left_x = left_border + card_side_margin_width | |
#data_right_x = right_border - card_side_margin_width | |
data_right_x = data_left_x + int((CARD_COLUMNS * width) * CARD_COL_WIDTH/CARD_WIDTH) | |
col_width = width * CARD_COL_WIDTH_RATIO | |
hole_width = width * CARD_HOLE_WIDTH_RATIO | |
#print col_width | |
if self.debug: | |
# mark left and right edges on the copy | |
for y in xrange(probe_y - self.ysize/100, probe_y + self.ysize/100): | |
self.debug_pix[left_border if left_border > 0 else 0,y] = 255 | |
self.debug_pix[right_border if right_border < self.xmax else self.xmax - 1,y] = 255 | |
for x in xrange(1, (self.xmax - self.xmin) / 200): | |
self.debug_pix[left_border + x, probe_y] = 255 | |
self.debug_pix[right_border - x, probe_y] = 255 | |
return data_left_x, data_right_x, col_width, hole_width | |
# find the top and bottom of the data area and from that the | |
# column and hole horizontal dimensions | |
def _find_data_vert_dimensions(self): | |
top_border, bottom_border = self.ymin, self.ymax | |
for y in xrange(self.ymin, self.midy): | |
#print pix[midx, y][0] | |
if self._brightness(self.pix[self.midx, y]) < self.threshold: | |
top_border = y | |
break | |
for y in xrange(self.ymax - 1, self.midy, -1): | |
if self._brightness(self.pix[self.midx, y]) < self.threshold: | |
bottom_border = y | |
break | |
card_height = bottom_border - top_border | |
card_top_margin = int(card_height * CARD_TOP_MARGIN_RATIO) | |
data_begins = top_border + card_top_margin | |
hole_height = int(card_height * CARD_HOLE_HEIGHT_RATIO) | |
data_top_y = data_begins + hole_height / 2 | |
col_height = int(card_height * CARD_ROW_HEIGHT_RATIO) | |
if self.debug: | |
# mark up the copy with the edges | |
for x in xrange(self.xmin, self.xmax-1): | |
self.debug_pix[x,top_border] = 255 | |
self.debug_pix[x,bottom_border] = 255 | |
if self.debug: | |
# mark search parameters | |
for x in xrange(self.midx - self.xsize/20, self.midx + self.xsize/20): | |
self.debug_pix[x,self.ymin] = 255 | |
self.debug_pix[x,self.ymax - 1] = 255 | |
for y in xrange(0, self.ymin): | |
self.debug_pix[self.midx,y] = 255 | |
for y in xrange(self.ymax - 1, self.ysize-1): | |
self.debug_pix[self.midx,y] = 255 | |
return data_top_y, data_top_y + col_height * 11, col_height, hole_height | |
def _scan(self, bright=-1): | |
if self.debug: | |
# if debugging make a copy we can draw on | |
self.debug_image = self.image.copy() | |
self.debug_pix = self.debug_image.load() | |
self.threshold = bright if bright > 0 else self._find_threshold_brightness() | |
#x_min, x_max, col_width = self._find_data_horiz_dimensions(image, pix, self.threshold, self.ystart, self.ystop) | |
y_data_pos, y_data_end, col_height, hole_height = self._find_data_vert_dimensions() | |
data = {} | |
# Chads are narrow so find then heuristically by accumulating pixel brightness | |
# along the row. Should be forgiving if the image is slightly wonky. | |
y = y_data_pos #- col_height/8 | |
for row_num in xrange(CARD_ROWS): | |
probe_y = y + col_height if row_num == 0 else ( y - col_height if row_num == CARD_ROWS -1 else y ) # Line 0 has a corner missing | |
x_data_left, x_data_right, col_width, hole_width = self._find_data_horiz_dimensions(probe_y) | |
left_edge = -1 # of a punch-hole | |
for x in xrange(x_data_left, x_data_right): | |
# Chads are tall so we can be sure if we probe around the middle of their height | |
val = self._brightness(self.pix[x, y]) | |
if val >= self.threshold: | |
if left_edge == -1: | |
left_edge = x | |
if self.debug: | |
self.debug_pix[x,y] = self._flip(self.pix[x,y]) | |
else: | |
if left_edge > -1: | |
hole_length = x - left_edge | |
if hole_length >= hole_width * 0.75: | |
col_num = int((left_edge + hole_length / 2.0 - x_data_left) / col_width + 0.25) | |
data[(col_num, row_num)] = hole_length | |
left_edge = -1 | |
if (self.debug): | |
# Plot where holes might be on this row | |
expected_top_edge = y - hole_height / 2 | |
expected_bottom_edge = y + hole_height / 2 | |
blue = 255 * 256 * 256 | |
for expected_left_edge in drange(x_data_left, x_data_right - 1, col_width): | |
for y_plot in drange(expected_top_edge, expected_bottom_edge, 2): | |
self.debug_pix[expected_left_edge,y_plot] = blue | |
#self.debug_pix[x + hole_width/2,yline] = 255 * 256 * 256 | |
self.debug_pix[expected_left_edge + hole_width,y_plot] = blue | |
for x_plot in drange(expected_left_edge, expected_left_edge + hole_width): | |
self.debug_pix[x_plot, expected_top_edge] = blue | |
self.debug_pix[x_plot, expected_bottom_edge] = blue | |
y += col_height | |
if self.debug: | |
self.debug_image.show() | |
# prevent run-a-way debug shows causing my desktop to run out of memory | |
raw_input("Press Enter to continue...") | |
self.decoded = [] | |
# Could fold this loop into the previous one - but would it be faster? | |
for col in xrange(0, CARD_COLUMNS): | |
col_pattern = [] | |
col_surface = [] | |
for row in xrange(CARD_ROWS): | |
key = (col, row) | |
# avergage for 1/3 of a column is greater than the threshold | |
col_pattern.append('O' if key in data else ' ') | |
col_surface.append(data[key] if key in data else 0) | |
tval = tuple(col_pattern) | |
global translate | |
self.text += translate[tval] if tval in translate else '@' | |
self.decoded.append(tval) | |
self.surface.append(col_surface) | |
return self | |
# ASCII art image of card | |
def dump(self, id, raw_data=False): | |
print ' Card Dump of Image file:', id, 'Format', 'Raw' if raw_data else 'Dump', 'threshold=', self.threshold | |
print ' ' + '123456789-' * (CARD_COLUMNS/10) | |
print ' ' + '_' * CARD_COLUMNS + ' ' | |
print '/' + self.text + '_' * (CARD_COLUMNS - len(self.text)) + '|' | |
for rnum in xrange(len(self.decoded[0])): | |
sys.stdout.write('|') | |
if raw_data: | |
for val in self.surface: | |
sys.stdout.write(("(%d)" % val[rnum]) if val[rnum] != 0 else '.' ) | |
else: | |
for col in self.decoded: | |
sys.stdout.write(col[rnum] if col[rnum] == 'O' else '.') | |
print '|' | |
print '`' + '-' * CARD_COLUMNS + "'" | |
print ' ' + '123456789-' * (CARD_COLUMNS/10) | |
print '' | |
if __name__ == '__main__': | |
usage = """usage: %prog [options] image [image...] | |
decode punch card image into ASCII.""" | |
parser = OptionParser(usage) | |
parser.add_option('-b', '--bright-threshold', type='int', dest='bright', default=-1, help='Brightness (R+G+B)/3, e.g. 127.') | |
parser.add_option('-s', '--side-margin-ratio', type='float', dest='side_margin_ratio', default=CARD_SIDE_MARGIN_RATIO, help='Manually set side margin ratio (sideMargin/cardWidth).') | |
parser.add_option('-d', '--dump', action='store_true', dest='dump', help='Output an ASCII-art version of the card.') | |
parser.add_option('-i', '--display-image', action='store_true', dest='display', help='Display an anotated version of the image.') | |
parser.add_option('-r', '--dump-raw', action='store_true', dest='dumpraw', help='Output ASCII-art with raw row/column accumulator values.') | |
parser.add_option('-x', '--x-start', type='int', dest='xstart', default=0, help='Start looking for a card edge at y position (pixels)') | |
parser.add_option('-X', '--x-stop', type='int', dest='xstop', default=0, help='Stop looking for a card edge at y position') | |
parser.add_option('-y', '--y-start', type='int', dest='ystart', default=0, help='Start looking for a card edge at y position') | |
parser.add_option('-Y', '--y-stop', type='int', dest='ystop', default=0, help='Stop looking for a card edge at y position') | |
parser.add_option('-a', '--adjust-x', type='int', dest='xadjust', default=0, help='Adjust middle edge detect location (pixels)') | |
(options, args) = parser.parse_args() | |
for arg in args: | |
image = Image.open(arg) | |
card = PunchCard(image, bright=options.bright, debug=options.display, xstart=options.xstart, xstop=options.xstop, ystart=options.ystart, ystop=options.ystop, xadjust=options.xadjust) | |
print card.text | |
if (options.dump): | |
card.dump(arg) | |
if (options.dumpraw): | |
card.dump(arg, raw_data=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment