Skip to content

Instantly share code, notes, and snippets.

@rlabbe
Last active February 9, 2017 14:31
Show Gist options
  • Save rlabbe/431c96f89251d704888bb6437da9392b to your computer and use it in GitHub Desktop.
Save rlabbe/431c96f89251d704888bb6437da9392b to your computer and use it in GitHub Desktop.
''' create object using numpy to read CSV. creates one variable per column
so you can use f.col_name instead of f.data['col_name']
f = NamedCSV(filename)
f.data # all data in the file in an np recarray
f.mass
f.data['mass'] # same as previous line
f.extract(f.mass > 180)
'''
class NamedCSV(object):
def __init__(self, name, **kwargs):
self.data = np.genfromtxt(name, delimiter=',', names=True, unpack=True, **kwargs)
self.unextract()
@property
def names(self):
""" names of all columns"""
return self.data.dtype.names
def extract(self, match):
"""match is a boolean array used to select out elements in each of the
fields.
Example:
b = NamedCSV('foo.csv')
match = b.col1 > 0
b.extract(match)
Now all the fields (not just col1) only have values when col1 > 0.
self.data is uneffected, so, in general len(self.data) != len(self.col_name)
after the call.
Call unextract() to undo this operation
"""
for field in self.data.dtype.names:
setattr(self, field, self.data[field][match])
def unextract(self):
for field in self.data.dtype.names:
setattr(self, field, self.data[field])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment