sametz · June 13, 2017 21:02
diff --git a/cleancanvas.py b/cleancanvas.py
 """
 This notebook assumes that the .csv file has columns, with headings renamed as 
 below, for:
 * Section (contents unedited from the Canvas default)
 * Final exam (final exam score /100)
 * Lab Reports (total lab report score /100)
 * Quizzes (total quiz score /100)
 * Total (Total score /100)

 It assumes that, if not given a specific csv filename for conversion, 
 that the file in the DEFAULT_FILE_NAME is in the same folder as the script.  
 """

 import numpy as np
 import pandas as pd

 # If script is run with no explicit csv filename given, uses the default below
 DEFAULT_FILE_NAME = '326Spring16minimal.csv'

 # Edit the dictionary below for TA names and arrays of section numbers
 # assigned to that TA
 TA_DICT = {
    'Natasha': [20, 23, 29],
    'Sarah': [21, 28, 39],
    'Jianyu': [22, 34, 40],
    'Wei': [24, 26, 31],
    'Brian': [25, 32, 37],
    'Chuanqi': [27, 30, 33],
    'Sam': [35, 36],
    'Sam Friday': [38],
 }


 def check_ta_dict(dict_=TA_DICT):
    no_conflicts = True
    for ta in dict_:
        print('Current TA:', ta)
        for other_ta in dict_:
            if ta is not other_ta:
                if set(dict_[ta]).intersection(dict_[other_ta]):
                    print('  TWO TAs SECTIONS OVERLAP FOUND:',
                          ta, dict_[ta], other_ta, dict_[other_ta])
                    no_conflicts = False
    if no_conflicts:
        print('No TA conflicts found!')
    return no_conflicts


 def match_ta(section, dict_=TA_DICT):
    print('Evaluating section ', section)
    for ta in dict_:
        # print('Checking matches for', ta, dict_[ta])
        if section in dict_[ta]:
            print(ta, ' matched to Section ', section)
            return ta
    print("No matching TA found")
    return ''


 # Following function currently unused
 def match_row_to_ta(row):
    # print('Checking Section ', row['Section'])
    return match_ta(row['Section'])


 def process_csv(filename=DEFAULT_FILE_NAME):

    original_csv = filename
    df = pd.read_csv(original_csv)  # df is the pandas DataFrame translation

    # If a score is missing in a key field, it's a junk row that can be deleted
    df = df.dropna(subset=['Final Exam', 'Lab Reports', 'Quizzes', 'Total'])

    # Truncate section numbers to two digits
    # Assumes "ddX-CHEM-0xxL" format to --> xx
    # Summer session 2 begins with 9, not 0, so needs a refactor to acommodate
    df['Section'] = df['Section'].str.replace('^\d{2}\w-CHEM\d{3}-0',
                                              '').str.replace('L', '')

    # Convert Section numbers to integers, because integers used in TA_DICT
    df['Section'] = df['Section'].astype('int')

    # Checks that no two TAs have the same section number
    # (indicates error entering sections above)
    check_ta_dict()

    # Match TAs to sections and add them to the table
    df['TA'] = np.vectorize(match_ta)(df['Section'])

    # TODO: locate "Section" column and move TAs next to it

    # Export df as csv with 'processed_' prefix
    df.to_csv('processed_' + original_csv)


 if __name__ == '__main__':
    # I know there must be more concise ways to do the following, but hacking
    # this in for now
    import sys
    try:
        file_name = sys.argv[1]
        print('converting ', file_name)
    except:
        file_name = DEFAULT_FILE_NAME
        print('converting default file ', file_name)

    process_csv(file_name)
	"""
	This notebook assumes that the .csv file has columns, with headings renamed as
	below, for:
	* Section (contents unedited from the Canvas default)
	* Final exam (final exam score /100)
	* Lab Reports (total lab report score /100)
	* Quizzes (total quiz score /100)
	* Total (Total score /100)

	It assumes that, if not given a specific csv filename for conversion,
	that the file in the DEFAULT_FILE_NAME is in the same folder as the script.
	"""

	import numpy as np
	import pandas as pd

	# If script is run with no explicit csv filename given, uses the default below
	DEFAULT_FILE_NAME = '326Spring16minimal.csv'

	# Edit the dictionary below for TA names and arrays of section numbers
	# assigned to that TA
	TA_DICT = {
	'Natasha': [20, 23, 29],
	'Sarah': [21, 28, 39],
	'Jianyu': [22, 34, 40],
	'Wei': [24, 26, 31],
	'Brian': [25, 32, 37],
	'Chuanqi': [27, 30, 33],
	'Sam': [35, 36],
	'Sam Friday': [38],
	}


	def check_ta_dict(dict_=TA_DICT):
	no_conflicts = True
	for ta in dict_:
	print('Current TA:', ta)
	for other_ta in dict_:
	if ta is not other_ta:
	if set(dict_[ta]).intersection(dict_[other_ta]):
	print(' TWO TAs SECTIONS OVERLAP FOUND:',
	ta, dict_[ta], other_ta, dict_[other_ta])
	no_conflicts = False
	if no_conflicts:
	print('No TA conflicts found!')
	return no_conflicts


	def match_ta(section, dict_=TA_DICT):
	print('Evaluating section ', section)
	for ta in dict_:
	# print('Checking matches for', ta, dict_[ta])
	if section in dict_[ta]:
	print(ta, ' matched to Section ', section)
	return ta
	print("No matching TA found")
	return ''


	# Following function currently unused
	def match_row_to_ta(row):
	# print('Checking Section ', row['Section'])
	return match_ta(row['Section'])


	def process_csv(filename=DEFAULT_FILE_NAME):

	original_csv = filename
	df = pd.read_csv(original_csv) # df is the pandas DataFrame translation

	# If a score is missing in a key field, it's a junk row that can be deleted
	df = df.dropna(subset=['Final Exam', 'Lab Reports', 'Quizzes', 'Total'])

	# Truncate section numbers to two digits
	# Assumes "ddX-CHEM-0xxL" format to --> xx
	# Summer session 2 begins with 9, not 0, so needs a refactor to acommodate
	df['Section'] = df['Section'].str.replace('^\d{2}\w-CHEM\d{3}-0',
	'').str.replace('L', '')

	# Convert Section numbers to integers, because integers used in TA_DICT
	df['Section'] = df['Section'].astype('int')

	# Checks that no two TAs have the same section number
	# (indicates error entering sections above)
	check_ta_dict()

	# Match TAs to sections and add them to the table
	df['TA'] = np.vectorize(match_ta)(df['Section'])

	# TODO: locate "Section" column and move TAs next to it

	# Export df as csv with 'processed_' prefix
	df.to_csv('processed_' + original_csv)


	if __name__ == '__main__':
	# I know there must be more concise ways to do the following, but hacking
	# this in for now
	import sys
	try:
	file_name = sys.argv[1]
	print('converting ', file_name)
	except:
	file_name = DEFAULT_FILE_NAME
	print('converting default file ', file_name)

	process_csv(file_name)