Skip to content

Instantly share code, notes, and snippets.

@georgepsarakis
Created December 12, 2013 08:17

Revisions

  1. georgepsarakis created this gist Dec 12, 2013.
    208 changes: 208 additions & 0 deletions extended-list.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,208 @@
    from collections import MutableSequence
    import marshal
    from operator import add, div, mul, neg, mod, sub
    from operator import ge, gt, le, lt, eq
    from itertools import imap

    '''
    Extended list class
    - fast lookups by maintaining a dictionary with the values
    - numeric operations between 2 instances are possible (like Octave matrices)
    '''
    class ListExtended(MutableSequence):
    '''
    *args : initialize list with these elements
    **kwargs :
    - hashing : create hash of the list objects for the lookup table (boolean)
    - lookup : maintain a dictionary for fast element lookups
    '''
    def __init__(self, *args, **kwargs):
    self.list = args[:]
    self.optimize = 'lookup' in kwargs and kwargs['lookup']
    self.hashing = 'hashing' in kwargs and kwargs['hashing']
    self.__rehash()

    def __len__(self):
    return len(self.list)

    def __repr__(self):
    return marshal.dumps(self.list)

    def __str__(self):
    return str(self.list)

    def __unicode__(self):
    return unicode(self.list)

    def __gt__(self, other):
    return self.__binary_operation(other, gt)

    def __lt__(self, other):
    return self.__binary_operation(other, lt)

    def __ge__(self, other):
    return self.__binary_operation(other, ge)

    def __le__(self, other):
    return self.__binary_operation(other, le)

    def __eq__(self, other):
    return self.__binary_operation(other, eq)

    def __getitem__(self, index):
    return self.list[index]

    def __remove_lookup(self, index):
    if self.optimize:
    key = self.list[index]
    if self.hashing:
    key = hash(marshal.dumps(key))
    del self.lookup[key]

    def __set_lookup(self, index, value):
    if self.optimize:
    if self.hashing:
    value = hash(marshal.dumps(value))
    self.lookup[value] = index

    def __delitem__(self, index):
    self.__remove_lookup(index)
    del self.list[index]

    def __setitem__(self, index, value):
    self.__remove_lookup(index)
    self.list[index] = value
    self.__set_lookup(index, value)

    def find(self, item):
    exists = False
    if self.optimize:
    if self.hashing:
    item = hash(marshal.dumps(item))
    exists = item in self.lookup
    if exists:
    index = self.lookup[item]
    else:
    index = None
    else:
    try:
    index = self.list.index(item)
    exists = True
    except ValueError:
    index = None
    return (exists, index)

    def __contains__(self, item):
    exists = False
    if self.optimize:
    if self.hashing:
    item = hash(marshal.dumps(item))
    exists = item in self.lookup
    else:
    try:
    index = self.list.index(item)
    except ValueError:
    exists = False
    return exists

    def __iter__(self):
    for item in self.list:
    yield item

    def __reversed__(self):
    L = len(self.list)
    for index in xrange(L - 1, -1, -1):
    yield self.list[index]

    ''' useful to treat lists as numeric arrays '''
    def __binary_operation(self, other, operation):
    if len(other) != len(self.list):
    return None
    L = len(self.list)
    def op(index):
    return operation(self.list[index], other[index])
    return map(op, xrange(L))

    def __unary_operation(self, operation):
    return map(operation, self.list)

    def __add__(self, other):
    return self.__binary_operation(other, add)

    def __mul__(self, other):
    return self.__binary_operation(other, mul)

    def __div__(self, other):
    return self.__binary_operation(other, div)

    def __mod__(self, other):
    return self.__binary_operation(other, mod)

    def __sub__(self, other):
    return self.__binary_operation(other, sub)

    def __neg__(self):
    return self.__unary_operation(neg)

    def __rehash(self):
    self.lookup = {}
    if self.optimize:
    if self.hashing:
    L = imap(hash, imap(marshal.dumps, self.list))
    index = 0
    for item in L:
    self.lookup[item] = index
    index += 1
    else:
    L = len(self.list)
    self.lookup = dict([ (self.list[item], item) for item in L ])

    def insert(self, index, value):
    self.list.insert(index, value)
    ''' needs rehashing - slow operation '''
    self.__rehash()

    if __name__ == "__main__":
    from time import time
    a = ListExtended(*range(2, 20, 1), lookup=True, hashing=True)
    b = ListExtended(*range(4, 40, 2))
    print 'a', a
    print 'b', b
    print 'a + b = ', a + b
    print 'a * b = ', a * b
    print '-a = ', -a
    print 'b - a = ', b-a
    print 'b/a = ', b/a
    t_optimize = 0.
    for n in xrange(1000):
    t_optimize -= time()
    a.find(12)
    t_optimize += time()

    t_simple = 0.
    for n in xrange(1000):
    t_simple -= time()
    b.find(17)
    t_simple += time()
    ''' nothing surprising, just verifying it works! '''
    ''' for small lists it does not make a difference '''
    print "Optimized find:", t_optimize/1000.
    print "Simple find:", t_simple/1000.
    print "Simple/Optimized", t_simple/t_optimize
    a = ListExtended(*range(2, 20000, 1), lookup=True, hashing=True)
    b = ListExtended(*range(4, 40000, 2))
    t_optimize = 0.
    for n in xrange(1000):
    t_optimize -= time()
    a.find(12)
    t_optimize += time()

    t_simple = 0.
    for n in xrange(1000):
    t_simple -= time()
    b.find(17)
    t_simple += time()
    ''' for larger lists it _does_ make a difference '''
    print "Optimized find:", t_optimize/1000.
    print "Simple find:", t_simple/1000.
    print "Simple/Optimized", t_simple/t_optimize