Skip to content

Instantly share code, notes, and snippets.

@sametz
Created June 13, 2017 21:02
Show Gist options
  • Save sametz/65f7754c0f8f82148409508f4615c207 to your computer and use it in GitHub Desktop.
Save sametz/65f7754c0f8f82148409508f4615c207 to your computer and use it in GitHub Desktop.
quick and dirty hack for cleaning up grade rosters exported by Canvas.
"""
This notebook assumes that the .csv file has columns, with headings renamed as
below, for:
* Section (contents unedited from the Canvas default)
* Final exam (final exam score /100)
* Lab Reports (total lab report score /100)
* Quizzes (total quiz score /100)
* Total (Total score /100)
It assumes that, if not given a specific csv filename for conversion,
that the file in the DEFAULT_FILE_NAME is in the same folder as the script.
"""
import numpy as np
import pandas as pd
# If script is run with no explicit csv filename given, uses the default below
DEFAULT_FILE_NAME = '326Spring16minimal.csv'
# Edit the dictionary below for TA names and arrays of section numbers
# assigned to that TA
TA_DICT = {
'Natasha': [20, 23, 29],
'Sarah': [21, 28, 39],
'Jianyu': [22, 34, 40],
'Wei': [24, 26, 31],
'Brian': [25, 32, 37],
'Chuanqi': [27, 30, 33],
'Sam': [35, 36],
'Sam Friday': [38],
}
def check_ta_dict(dict_=TA_DICT):
no_conflicts = True
for ta in dict_:
print('Current TA:', ta)
for other_ta in dict_:
if ta is not other_ta:
if set(dict_[ta]).intersection(dict_[other_ta]):
print(' TWO TAs SECTIONS OVERLAP FOUND:',
ta, dict_[ta], other_ta, dict_[other_ta])
no_conflicts = False
if no_conflicts:
print('No TA conflicts found!')
return no_conflicts
def match_ta(section, dict_=TA_DICT):
print('Evaluating section ', section)
for ta in dict_:
# print('Checking matches for', ta, dict_[ta])
if section in dict_[ta]:
print(ta, ' matched to Section ', section)
return ta
print("No matching TA found")
return ''
# Following function currently unused
def match_row_to_ta(row):
# print('Checking Section ', row['Section'])
return match_ta(row['Section'])
def process_csv(filename=DEFAULT_FILE_NAME):
original_csv = filename
df = pd.read_csv(original_csv) # df is the pandas DataFrame translation
# If a score is missing in a key field, it's a junk row that can be deleted
df = df.dropna(subset=['Final Exam', 'Lab Reports', 'Quizzes', 'Total'])
# Truncate section numbers to two digits
# Assumes "ddX-CHEM-0xxL" format to --> xx
# Summer session 2 begins with 9, not 0, so needs a refactor to acommodate
df['Section'] = df['Section'].str.replace('^\d{2}\w-CHEM\d{3}-0',
'').str.replace('L', '')
# Convert Section numbers to integers, because integers used in TA_DICT
df['Section'] = df['Section'].astype('int')
# Checks that no two TAs have the same section number
# (indicates error entering sections above)
check_ta_dict()
# Match TAs to sections and add them to the table
df['TA'] = np.vectorize(match_ta)(df['Section'])
# TODO: locate "Section" column and move TAs next to it
# Export df as csv with 'processed_' prefix
df.to_csv('processed_' + original_csv)
if __name__ == '__main__':
# I know there must be more concise ways to do the following, but hacking
# this in for now
import sys
try:
file_name = sys.argv[1]
print('converting ', file_name)
except:
file_name = DEFAULT_FILE_NAME
print('converting default file ', file_name)
process_csv(file_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment