Created
December 12, 2013 08:17
Revisions
-
georgepsarakis created this gist
Dec 12, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,208 @@ from collections import MutableSequence import marshal from operator import add, div, mul, neg, mod, sub from operator import ge, gt, le, lt, eq from itertools import imap ''' Extended list class - fast lookups by maintaining a dictionary with the values - numeric operations between 2 instances are possible (like Octave matrices) ''' class ListExtended(MutableSequence): ''' *args : initialize list with these elements **kwargs : - hashing : create hash of the list objects for the lookup table (boolean) - lookup : maintain a dictionary for fast element lookups ''' def __init__(self, *args, **kwargs): self.list = args[:] self.optimize = 'lookup' in kwargs and kwargs['lookup'] self.hashing = 'hashing' in kwargs and kwargs['hashing'] self.__rehash() def __len__(self): return len(self.list) def __repr__(self): return marshal.dumps(self.list) def __str__(self): return str(self.list) def __unicode__(self): return unicode(self.list) def __gt__(self, other): return self.__binary_operation(other, gt) def __lt__(self, other): return self.__binary_operation(other, lt) def __ge__(self, other): return self.__binary_operation(other, ge) def __le__(self, other): return self.__binary_operation(other, le) def __eq__(self, other): return self.__binary_operation(other, eq) def __getitem__(self, index): return self.list[index] def __remove_lookup(self, index): if self.optimize: key = self.list[index] if self.hashing: key = hash(marshal.dumps(key)) del self.lookup[key] def __set_lookup(self, index, value): if self.optimize: if self.hashing: value = hash(marshal.dumps(value)) self.lookup[value] = index def __delitem__(self, index): self.__remove_lookup(index) del self.list[index] def __setitem__(self, index, value): self.__remove_lookup(index) self.list[index] = value self.__set_lookup(index, value) def find(self, item): exists = False if self.optimize: if self.hashing: item = hash(marshal.dumps(item)) exists = item in self.lookup if exists: index = self.lookup[item] else: index = None else: try: index = self.list.index(item) exists = True except ValueError: index = None return (exists, index) def __contains__(self, item): exists = False if self.optimize: if self.hashing: item = hash(marshal.dumps(item)) exists = item in self.lookup else: try: index = self.list.index(item) except ValueError: exists = False return exists def __iter__(self): for item in self.list: yield item def __reversed__(self): L = len(self.list) for index in xrange(L - 1, -1, -1): yield self.list[index] ''' useful to treat lists as numeric arrays ''' def __binary_operation(self, other, operation): if len(other) != len(self.list): return None L = len(self.list) def op(index): return operation(self.list[index], other[index]) return map(op, xrange(L)) def __unary_operation(self, operation): return map(operation, self.list) def __add__(self, other): return self.__binary_operation(other, add) def __mul__(self, other): return self.__binary_operation(other, mul) def __div__(self, other): return self.__binary_operation(other, div) def __mod__(self, other): return self.__binary_operation(other, mod) def __sub__(self, other): return self.__binary_operation(other, sub) def __neg__(self): return self.__unary_operation(neg) def __rehash(self): self.lookup = {} if self.optimize: if self.hashing: L = imap(hash, imap(marshal.dumps, self.list)) index = 0 for item in L: self.lookup[item] = index index += 1 else: L = len(self.list) self.lookup = dict([ (self.list[item], item) for item in L ]) def insert(self, index, value): self.list.insert(index, value) ''' needs rehashing - slow operation ''' self.__rehash() if __name__ == "__main__": from time import time a = ListExtended(*range(2, 20, 1), lookup=True, hashing=True) b = ListExtended(*range(4, 40, 2)) print 'a', a print 'b', b print 'a + b = ', a + b print 'a * b = ', a * b print '-a = ', -a print 'b - a = ', b-a print 'b/a = ', b/a t_optimize = 0. for n in xrange(1000): t_optimize -= time() a.find(12) t_optimize += time() t_simple = 0. for n in xrange(1000): t_simple -= time() b.find(17) t_simple += time() ''' nothing surprising, just verifying it works! ''' ''' for small lists it does not make a difference ''' print "Optimized find:", t_optimize/1000. print "Simple find:", t_simple/1000. print "Simple/Optimized", t_simple/t_optimize a = ListExtended(*range(2, 20000, 1), lookup=True, hashing=True) b = ListExtended(*range(4, 40000, 2)) t_optimize = 0. for n in xrange(1000): t_optimize -= time() a.find(12) t_optimize += time() t_simple = 0. for n in xrange(1000): t_simple -= time() b.find(17) t_simple += time() ''' for larger lists it _does_ make a difference ''' print "Optimized find:", t_optimize/1000. print "Simple find:", t_simple/1000. print "Simple/Optimized", t_simple/t_optimize