msuprun · August 29, 2015 13:57
diff --git a/gistfile1.py b/gistfile1.py
 Python 

 Hamlet example
 data here: http://www.gutenberg.org/cache/epub/2265/pg2265.txt

 def read_Hamlet():
    my_file = open("hamlet.txt")
    return my_file

 my_file = read_Hamlet()
 wordCounts = dict()
 maxCount = 0
 maxKey = None
 for line in my_file:
    #if 'Hamlet' in line:
    #    print line.strip()
    words = line.split()
    print words
    for word in words:
        if word in wordCounts.keys():
            wordCounts[word] = wordCounts[word] + 1
        else:
            wordCounts[word] = 1
        if wordCounts[word] > maxCount:
            maxCount = wordCounts[word]
            maxKey = word
    #break
 my_file.close()

 Function-ified
 #functions we defined
 def read_Hamlet(i):
    #read the file and return a list of lines
    #I expect i to be a... string or int
    print i
    my_file = open("hamlet.txt")
    theLines = my_file.readlines()
    print type(theLines)
    my_file.close()
    return theLines

 def initializeDictionary():
    #make a dictionary for our results, and set it up for updating
    wordCounts = dict()
    maxCount = 0
    maxKey = None
    wordCounts['MAXCOUNT'] = maxCount
    wordCounts['MAXKEY'] = maxKey
    wordCounts['NUMWORDS'] = 0
    return wordCounts
    
 def updateDictionary(wordCounts, word):
    #add a word to the dictionary, and/or update the counts
    if word in wordCounts.keys():
        wordCounts[word] = wordCounts[word] + 1
    else:
        wordCounts[word] = 1
    if wordCounts[word] > wordCounts['MAXCOUNT']:
        wordCounts['MAXCOUNT'] = wordCounts[word]
        wordCounts['MAXKEY'] = word

 def getWords(line):
    #split a line into words
    words = line.lower().split()
    return words

 #main script
 listOfLines = read_Hamlet('This is Elenas example.')
 dictOfWordCounts = initializeDictionary()
 #print dictOfWordCounts
 #print type(listOfLines)
 for line in listOfLines:
    #print line
    words = getWords(line)
    #print words
    for word in words:
        updateDictionary(dictOfWordCounts,word)

 dictOfWordCounts.keys()[0:10] #prints out first few keys as sanity check


 def download_webpage(url):
    '''Downloads a webpage and returns the text.'''
    page = urllib2.urlopen(url)
    return page.read()
 
 hamlet_url = 'http://sydney.edu.au/engineering/it/~matty/Shakespeare/texts/tragedies/hamlet'
 print(download_webpage(hamlet_url))
	Python

	Hamlet example
	data here: http://www.gutenberg.org/cache/epub/2265/pg2265.txt

	def read_Hamlet():
	my_file = open("hamlet.txt")
	return my_file

	my_file = read_Hamlet()
	wordCounts = dict()
	maxCount = 0
	maxKey = None
	for line in my_file:
	#if 'Hamlet' in line:
	# print line.strip()
	words = line.split()
	print words
	for word in words:
	if word in wordCounts.keys():
	wordCounts[word] = wordCounts[word] + 1
	else:
	wordCounts[word] = 1
	if wordCounts[word] > maxCount:
	maxCount = wordCounts[word]
	maxKey = word
	#break
	my_file.close()

	Function-ified
	#functions we defined
	def read_Hamlet(i):
	#read the file and return a list of lines
	#I expect i to be a... string or int
	print i
	my_file = open("hamlet.txt")
	theLines = my_file.readlines()
	print type(theLines)
	my_file.close()
	return theLines

	def initializeDictionary():
	#make a dictionary for our results, and set it up for updating
	wordCounts = dict()
	maxCount = 0
	maxKey = None
	wordCounts['MAXCOUNT'] = maxCount
	wordCounts['MAXKEY'] = maxKey
	wordCounts['NUMWORDS'] = 0
	return wordCounts

	def updateDictionary(wordCounts, word):
	#add a word to the dictionary, and/or update the counts
	if word in wordCounts.keys():
	wordCounts[word] = wordCounts[word] + 1
	else:
	wordCounts[word] = 1
	if wordCounts[word] > wordCounts['MAXCOUNT']:
	wordCounts['MAXCOUNT'] = wordCounts[word]
	wordCounts['MAXKEY'] = word

	def getWords(line):
	#split a line into words
	words = line.lower().split()
	return words

	#main script
	listOfLines = read_Hamlet('This is Elenas example.')
	dictOfWordCounts = initializeDictionary()
	#print dictOfWordCounts
	#print type(listOfLines)
	for line in listOfLines:
	#print line
	words = getWords(line)
	#print words
	for word in words:
	updateDictionary(dictOfWordCounts,word)

	dictOfWordCounts.keys()[0:10] #prints out first few keys as sanity check


	def download_webpage(url):
	'''Downloads a webpage and returns the text.'''
	page = urllib2.urlopen(url)
	return page.read()

	hamlet_url = 'http://sydney.edu.au/engineering/it/~matty/Shakespeare/texts/tragedies/hamlet'
	print(download_webpage(hamlet_url))