Skip to content

Instantly share code, notes, and snippets.

@joshainglis
Last active August 29, 2015 13:56
Show Gist options
  • Select an option

  • Save joshainglis/9302620 to your computer and use it in GitHub Desktop.

Select an option

Save joshainglis/9302620 to your computer and use it in GitHub Desktop.
USERNAME = "[email protected]"
PASSWORD = "yourgooglepassword"
from __future__ import division
from pylab import *
import gspread
import pandas
from pint import UnitRegistry, UndefinedUnitError
from settings import USERNAME, PASSWORD
ureg = UnitRegistry()
Q_ = ureg.Quantity
def get_data(ss_name):
gc = gspread.login(USERNAME, PASSWORD)
wks = gc.open(ss_name).get_worksheet(0)
list_of_lists = wks.get_all_values()
head = list_of_lists[0]
data = list_of_lists[1:]
df = pandas.DataFrame(dict(zip(head, zip(*data))), columns=head)
try:
df['Timestamp'] = pandas.to_datetime(df['Timestamp'], dayfirst=True)
except:
pass
return df
def subset_cols(df, q):
return df[[c for c in df.columns if c.startswith(q)]]
def subset_units(df, column, unit):
return df[df[column] == unit]
def get_mag(x):
return Q_(x).to_base_units().magnitude
def get_units(x):
return '{}'.format(Q_(x).to_base_units().units)
def normalise_units(df):
for col in df.columns[1:]:
try:
df[col] = df[col].astype(float)
continue
except ValueError:
pass
try:
df['{} - Converted'.format(col)] = df[col].map(get_mag)
df['{} - Units'.format(col)] = df[col].map(get_units)
except AttributeError:
continue
except UndefinedUnitError:
continue
return df
def w1_q3_get_data():
return get_data("SCIE1000 Week 1: Estimates (Responses)")
def w1_q3():
return normalise_units(w1_q3_get_data())
def _bwplot(ax, data):
ax.boxplot(data, vert=False, labels=[""], widths=0.9)
ax.axis('off')
def _kde(ax, data, **kwargs):
x = data.plot(kind='kde', ax=ax, grid=False, c='red', **kwargs)
ymax = x.lines[0].get_data()[1].max()
ymin = x.lines[0].get_data()[1].min()
ax.set_ylim([ymin,ymax*1.1])
ax.set_ylim([ymin,ymax*1.1])
ax.axis('off')
def _hist(ax, data, xlab):
x = ax.hist(data, bins=10)
xmax = x[0].max()
ax.set_ylim([0,xmax])
ax.set_xlim([x[1].min(),x[1].max()])
xlabel(xlab)
def hist_bwplot(data, ptitle, log_data=False, xlab=''):
data = log10(data) if log_data else data
xlab = 'log10(%s)' % xlab if log_data else xlab
figure(0, (7,5))
#ax1 = subplot2grid((8,8), (0,0), colspan=8, rowspan=1)
ax1 = subplot2grid((8,3), (0,0), colspan=3, rowspan=1)
ax2 = subplot2grid((8,3), (1,0), colspan=3, rowspan=7, sharex=ax1)
ax3 = ax2.twinx()
_bwplot(ax1, data)
_hist(ax2, data, xlab)
_kde(ax3, data)
ax1.set_title(ptitle)
ax2.set_xlabel(xlab)
ax2.xaxis.get_major_formatter().set_powerlimits((-3, 5))
subplots_adjust(hspace=0, wspace=0)
#tight_layout()
def w1_q3ai_plot(df, log_data=False):
hist_bwplot(df['3(a)(i)'],
'Babies born in the world during the two hour SCIE1000 tutorial class',
log_data=log_data,
xlab = 'Estimate of babies born / 2h')
def w1_q3aii_plot(df, log_data=False):
data = subset_units(df, '3(a)(ii) - Units', 'gram')['3(a)(ii) - Converted']
hist_bwplot(data,
'Total amount of pure alcohol consumed in Australia in a year',
log_data=log_data,
xlab = 'Estimate of Alcohol consumed per year in Aus (grams)')
def w2_7d_get_data():
gc = gspread.login(USERNAME, PASSWORD)
wks = gc.open("SCIE1000 Week2: BSA Estimates (Responses)").get_worksheet(0)
list_of_lists = wks.get_all_values()
head = list_of_lists[0]
data = list_of_lists[1:]
df = pandas.DataFrame(dict(zip(head, zip(*data))), columns=head)
df[head[3:]] = df[head[3:]].astype(float)
return df
def w2_7d_plot(df):
r"^\d+(?:\.\d+) ?(?:L|mL|(?:(?:km|m|cm|mm)(?:^|\*\*)3)) ?/ ?(?:km|m|cm|mm)(?:^|\*\*)2$"
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment