Created
June 13, 2017 21:02
-
-
Save sametz/65f7754c0f8f82148409508f4615c207 to your computer and use it in GitHub Desktop.
quick and dirty hack for cleaning up grade rosters exported by Canvas.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This notebook assumes that the .csv file has columns, with headings renamed as | |
below, for: | |
* Section (contents unedited from the Canvas default) | |
* Final exam (final exam score /100) | |
* Lab Reports (total lab report score /100) | |
* Quizzes (total quiz score /100) | |
* Total (Total score /100) | |
It assumes that, if not given a specific csv filename for conversion, | |
that the file in the DEFAULT_FILE_NAME is in the same folder as the script. | |
""" | |
import numpy as np | |
import pandas as pd | |
# If script is run with no explicit csv filename given, uses the default below | |
DEFAULT_FILE_NAME = '326Spring16minimal.csv' | |
# Edit the dictionary below for TA names and arrays of section numbers | |
# assigned to that TA | |
TA_DICT = { | |
'Natasha': [20, 23, 29], | |
'Sarah': [21, 28, 39], | |
'Jianyu': [22, 34, 40], | |
'Wei': [24, 26, 31], | |
'Brian': [25, 32, 37], | |
'Chuanqi': [27, 30, 33], | |
'Sam': [35, 36], | |
'Sam Friday': [38], | |
} | |
def check_ta_dict(dict_=TA_DICT): | |
no_conflicts = True | |
for ta in dict_: | |
print('Current TA:', ta) | |
for other_ta in dict_: | |
if ta is not other_ta: | |
if set(dict_[ta]).intersection(dict_[other_ta]): | |
print(' TWO TAs SECTIONS OVERLAP FOUND:', | |
ta, dict_[ta], other_ta, dict_[other_ta]) | |
no_conflicts = False | |
if no_conflicts: | |
print('No TA conflicts found!') | |
return no_conflicts | |
def match_ta(section, dict_=TA_DICT): | |
print('Evaluating section ', section) | |
for ta in dict_: | |
# print('Checking matches for', ta, dict_[ta]) | |
if section in dict_[ta]: | |
print(ta, ' matched to Section ', section) | |
return ta | |
print("No matching TA found") | |
return '' | |
# Following function currently unused | |
def match_row_to_ta(row): | |
# print('Checking Section ', row['Section']) | |
return match_ta(row['Section']) | |
def process_csv(filename=DEFAULT_FILE_NAME): | |
original_csv = filename | |
df = pd.read_csv(original_csv) # df is the pandas DataFrame translation | |
# If a score is missing in a key field, it's a junk row that can be deleted | |
df = df.dropna(subset=['Final Exam', 'Lab Reports', 'Quizzes', 'Total']) | |
# Truncate section numbers to two digits | |
# Assumes "ddX-CHEM-0xxL" format to --> xx | |
# Summer session 2 begins with 9, not 0, so needs a refactor to acommodate | |
df['Section'] = df['Section'].str.replace('^\d{2}\w-CHEM\d{3}-0', | |
'').str.replace('L', '') | |
# Convert Section numbers to integers, because integers used in TA_DICT | |
df['Section'] = df['Section'].astype('int') | |
# Checks that no two TAs have the same section number | |
# (indicates error entering sections above) | |
check_ta_dict() | |
# Match TAs to sections and add them to the table | |
df['TA'] = np.vectorize(match_ta)(df['Section']) | |
# TODO: locate "Section" column and move TAs next to it | |
# Export df as csv with 'processed_' prefix | |
df.to_csv('processed_' + original_csv) | |
if __name__ == '__main__': | |
# I know there must be more concise ways to do the following, but hacking | |
# this in for now | |
import sys | |
try: | |
file_name = sys.argv[1] | |
print('converting ', file_name) | |
except: | |
file_name = DEFAULT_FILE_NAME | |
print('converting default file ', file_name) | |
process_csv(file_name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment