Last active
August 29, 2015 13:57
-
-
Save msuprun/9885235 to your computer and use it in GitHub Desktop.
Hamlet word count SC-NYU-Bootcamp 2014 - Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python | |
Hamlet example | |
data here: http://www.gutenberg.org/cache/epub/2265/pg2265.txt | |
def read_Hamlet(): | |
my_file = open("hamlet.txt") | |
return my_file | |
my_file = read_Hamlet() | |
wordCounts = dict() | |
maxCount = 0 | |
maxKey = None | |
for line in my_file: | |
#if 'Hamlet' in line: | |
# print line.strip() | |
words = line.split() | |
print words | |
for word in words: | |
if word in wordCounts.keys(): | |
wordCounts[word] = wordCounts[word] + 1 | |
else: | |
wordCounts[word] = 1 | |
if wordCounts[word] > maxCount: | |
maxCount = wordCounts[word] | |
maxKey = word | |
#break | |
my_file.close() | |
Function-ified | |
#functions we defined | |
def read_Hamlet(i): | |
#read the file and return a list of lines | |
#I expect i to be a... string or int | |
print i | |
my_file = open("hamlet.txt") | |
theLines = my_file.readlines() | |
print type(theLines) | |
my_file.close() | |
return theLines | |
def initializeDictionary(): | |
#make a dictionary for our results, and set it up for updating | |
wordCounts = dict() | |
maxCount = 0 | |
maxKey = None | |
wordCounts['MAXCOUNT'] = maxCount | |
wordCounts['MAXKEY'] = maxKey | |
wordCounts['NUMWORDS'] = 0 | |
return wordCounts | |
def updateDictionary(wordCounts, word): | |
#add a word to the dictionary, and/or update the counts | |
if word in wordCounts.keys(): | |
wordCounts[word] = wordCounts[word] + 1 | |
else: | |
wordCounts[word] = 1 | |
if wordCounts[word] > wordCounts['MAXCOUNT']: | |
wordCounts['MAXCOUNT'] = wordCounts[word] | |
wordCounts['MAXKEY'] = word | |
def getWords(line): | |
#split a line into words | |
words = line.lower().split() | |
return words | |
#main script | |
listOfLines = read_Hamlet('This is Elenas example.') | |
dictOfWordCounts = initializeDictionary() | |
#print dictOfWordCounts | |
#print type(listOfLines) | |
for line in listOfLines: | |
#print line | |
words = getWords(line) | |
#print words | |
for word in words: | |
updateDictionary(dictOfWordCounts,word) | |
dictOfWordCounts.keys()[0:10] #prints out first few keys as sanity check | |
def download_webpage(url): | |
'''Downloads a webpage and returns the text.''' | |
page = urllib2.urlopen(url) | |
return page.read() | |
hamlet_url = 'http://sydney.edu.au/engineering/it/~matty/Shakespeare/texts/tragedies/hamlet' | |
print(download_webpage(hamlet_url)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment