Created
October 2, 2013 10:50
-
-
Save benosteen/6791945 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import json | |
INPUTFILE = "History_Journal_Articles_KW.csv" | |
OUTPUTFILE = INPUTFILE[:-4] + "_numbered.csv" | |
in_file = open(INPUTFILE, "r") # "r" == Open file for reading | |
out_file = open(OUTPUTFILE, "w") # "w" for writing | |
# csv | |
rows_in = csv.reader(in_file) | |
rows_out = csv.writer(out_file) | |
# little function to get or generate a number for a word | |
topwords = {} | |
journals = {} | |
def get_or_set_number(word, words, counter): | |
if word not in words: | |
counter += 1 | |
words[word] = counter | |
return counter, counter | |
else: | |
return words[word], counter | |
headers = rows_in.next() | |
rows_out.writerow(headers) | |
numberofrows = 0 | |
counter = 0 | |
for row in rows_in: | |
# Which column is the word in? (Computer counting very often starts at zero, not one!) | |
topword_number, counter = get_or_set_number(row[0], topwords, counter) | |
row[1] = topword_number | |
journal_number, counter = get_or_set_number(row[6], journals, counter) | |
row[7] = journal_number | |
rows_out.writerow(row) | |
numberofrows += 1 | |
out_file.close() | |
in_file.close() | |
print("Number of ids: {0}".format(counter)) | |
print("Number of rows: {0}".format(numberofrows)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment