Created
June 22, 2015 23:35
-
-
Save khult/467c63a1c462b4af84ed to your computer and use it in GitHub Desktop.
Python Homework Assignments
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
There should be several functions in this module. Several are | |
already provided, in case they are useful: | |
datextract | |
eightdigs | |
fieldict | |
citypop | |
eventfreq | |
manuftop10 | |
are functions from a previous homework which might be handy. | |
Your task is to write four functions, plotEventFreq, plotManufTop10, | |
pagesub, and plotPopEvent. | |
Essential tricks: | |
1. YOU MUST HAVE MATPLOTLIB INSTALLED. There's no chance of | |
completing the homework without using matplotlib, a package | |
that can be added to Python2.7 environments. The computer | |
science lab does have matplotlib installed. You can find, | |
download, and install matplotlib if you want, on your own | |
computer (but it's not required, since you have an account | |
on the CS lab machines). | |
2. You will need several files in addition to this file (homework4.py) | |
- DOT1000.txt, one thousand lines from the DOT complaint file | |
- Top5000Population.txt (read by citypop() to build dictionary) | |
- page1.html (used by one of the unit tests) | |
- page2.html (used by one of the unit tests) | |
- ezplot.py (makes using matplotlib much easier) | |
3. The "*" trick for dealing with parameters. | |
A function definition like | |
def foo(*X): | |
print len(X), X | |
foo(True,"test",37) | |
will print 3 (True, 'test', 37) | |
foo(0,1,2,3,4,5,6) | |
will print 7 (0, 1, 2, 3, 4, 5, 6) | |
This "*" trick is how Python lets you define a function with an | |
unplanned number of arguments (the function treats all the | |
arguments as a tuple). | |
One more example: | |
def foo(*X): | |
print X[0] | |
foo(88,100,3) | |
will print 88 | |
4. The "*" trick for converting a list or tuple to arguments. | |
You can convert a list or tuple of items into | |
arguments for a function call: | |
def moo(a,b,c): | |
print a, b, c | |
Z = [True, "test", 37] | |
moo(*Z) | |
This will print True 'test' 37 | |
moo(*(6,1,False)) | |
will print 6 1 False | |
USE TRICKS 3 & 4 TO DO THE pagesub PROBLEM | |
5. The ezplot module makes it simple to plot graphs, putting them | |
into an image (png) file. Interactively, you can try it | |
with an example like this | |
import ezplot # needs matplotlib | |
X = [1.5, 2.7, 3.0, 6.2] | |
Y = [9, 3, 2, 6] # X and Y have same length | |
ezplot.barplot(X,Y) | |
This will write a file plot.png, which you can view. | |
There are two useful (for this homework) kinds of plotting in ezplot: | |
barplot(x,y,filename="plot.png") | |
corplot(x,y,filename="plot.png") | |
Use barplot for the plotEventFreq problem: just let the X-values | |
and Y-values be taken from the pairs that eventfreq returns. The | |
code for plotEventFreq quite easy to write, done properly. | |
Also, use barplot for the plotManufTop10 problem: just use X and | |
Y values that manuftop10 returns. If you look at the code of barplot | |
inside the ezplot.py file, you will see it is just checking whether | |
or not the X values are date objects or character strings or | |
numbers, then doing the appropriate kind of graphing. | |
In any case, you will need to pass long the name of the image file | |
to be written (the plotfile parameter). | |
For the plotPopEvent problem, you will need to work with the | |
dictionary that fieldict returns, and also the dictionary that | |
citypop returns (so start with making variables equal to these | |
dictionaries, simply by calling fieldict and citypop). Then | |
you will need to count the appropriate items so that you get | |
the X and Y needed to call ezplot.corplot | |
See the docstrings below for an explanation of what is expected. Test | |
cases follow: | |
>>> plotEventFreq(1995,1,plotfile="events.png") | |
>>> 12*1024 < os.stat("events.png")[stat.ST_SIZE] < 16*1024 | |
True | |
>>> plotEventFreq(1994,12,plotfile="hold.png") | |
>>> os.stat("hold.png")[stat.ST_SIZE] > 12*1024 | |
True | |
>>> plotManufTop10(plotfile="manuf.png") | |
>>> 25*1024 < os.stat("manuf.png")[stat.ST_SIZE] < 28*1024 | |
True | |
>>> i = pagesub("page1.html","Page One","function") | |
>>> i[0:12] | |
'<HTML><BODY>' | |
>>> len(i) | |
137 | |
>>> i = pagesub("page2.html","Second Page","attempt at programming") | |
>>> i.index("attempt") | |
42 | |
>>> | |
>>> plotPopEvent(plotfile="popevent.png") | |
>>> 26*1024< os.stat("popevent.png")[stat.ST_SIZE] < 32*1024 | |
True | |
''' | |
import os, stat, sys, ezplot, datetime | |
def datextract(S): | |
return (int(S[:4]),int(S[4:6]),int(S[6:])) | |
def eightdigs(S): | |
return type(S)==str and len(S)==8 and all([c in "0123456789" for c in S]) | |
def fieldict(filename): | |
D = { } | |
with open(filename) as FileObject: | |
for line in FileObject: | |
R = { } | |
T = line.strip().split('\t') | |
manuf, date, crash, city, state = T[2], T[7], T[6], T[12], T[13] | |
manuf, date, city, state = manuf.strip(), date.strip(), city.strip(), state.strip() | |
if eightdigs(date): | |
y, m, d = datextract(date) | |
date = datetime.date(y,m,d) | |
else: | |
date = datetime.date(1,1,1) | |
crash = (crash == "Y") | |
D[int(T[0])] = (manuf,date,crash,city,state) | |
return D | |
def citypop(): | |
import csv | |
R = { } | |
F = open("Top5000Population.txt") | |
CSV = csv.reader(F) | |
for row in CSV: | |
city, state, population = row | |
city = city.rstrip() | |
city = city.upper() | |
city = city[:12] | |
population = population.replace(",",'') | |
population = int(population) | |
R[(city,state)] = population | |
return R | |
def eventfreq(year,month): | |
Fd = fieldict("DOT1000.txt") | |
Q = { } # accumulate dates and complaint counts | |
for item in Fd.keys(): | |
thedate = Fd[item][1] | |
if thedate.year == year and thedate.month == month: | |
# fancy, but recommended way | |
Q[thedate] = Q.get(thedate,0) + 1 | |
M = Q.items() # list (key,value) pairs | |
M.sort() # will rearrange M to be increasing by date | |
return M | |
def manuftop10(): | |
from operator import itemgetter | |
Fd = fieldict("DOT1000.txt") | |
Q = { } # accumulate manufacturers and complaint counts | |
for item in Fd.keys(): | |
manuf = Fd[item][0] | |
Q[manuf] = Q.get(manuf,0) + 1 | |
# now comes the tricky part, sort big to small, by count | |
N = sorted(Q.items(),reverse=True,key=itemgetter(1)) | |
Top10 = N[:10] | |
return Top10 | |
#------------ Functions for this homework ------------------- | |
# replace pass in each function with your own function | |
# definitions to get the properly working program | |
def plotEventFreq(year,month,plotfile="events.png"): | |
''' | |
The plotEventFreq function uses ezplot.barplot to | |
graph the number of events for a particular year | |
and month in the DOT1000.txt data. Though plotEventFreq | |
doesn't return anything (other than None), it does | |
write to the file named in the plotfile parameter. | |
Basically, ezplot.barplot() takes care of the graphing | |
and writing to the file. | |
''' | |
import ezplot | |
x=[] | |
y=[] | |
MainList=eventfreq(year,month) | |
for t in range(len(MainList)): | |
x.append(MainList[t][0]) | |
y.append(MainList[t][1]) | |
ezplot.barplot(x,y,plotfile) #makes a graph ~2KB to large | |
return None | |
def plotManufTop10(plotfile="manuf.png"): | |
import ezplot | |
ManuList=manuftop10() | |
x=[] | |
y=[] | |
for t in range(len(ManuList)): | |
x.append(ManuList[t][0]) | |
y.append(ManuList[t][1]) | |
ezplot.barplot(x,y,plotfile) #makes a graph ~4KB to large | |
return None | |
def pagesub(*subs): | |
''' | |
The pagesubs() function reads a text file into a string, | |
then uses Python's format() to substitute for arguments | |
given in the subs parameter. | |
Here are examples: | |
pagesubs("index.html","December",18,2012) | |
This will return the content of the file index.html, but | |
with "December" substituted for {0}, and 18 substituted | |
for {1}, and 2012 substituted for {2}. | |
pagesubs("main.txt",False) | |
This just reads main.txt and substitutes False for {0} | |
in the text, returning that. | |
''' | |
A=open(subs[0], 'r').read() | |
B=subs[1:] | |
return A.format(*B) | |
def plotPopEvent(plotfile="popevent.png"): | |
''' | |
The plotPopEvent function uses ezplot.corplot() to | |
graph the number of complaints versus the city size. | |
The x-values given to ezplot.corplot() are the numbers | |
of complaints for a (city,state) locale; the corresponding | |
y-values given to ezplot.corplot() are the population | |
numbers that citypop() finds for the same (city,state). | |
''' | |
import ezplot | |
CityList=citypop().values() | |
Citypop=citypop() | |
CityState=[] | |
x=[] | |
y=[] | |
temp=fieldict('DOT1000.txt') | |
q=range(len(CityState)) | |
b=0 | |
for item in temp.keys(): | |
a=(temp[item][3],temp[item][4]) | |
CityState.append(a) #gets pairs of (city,state) to be used later | |
SortedCityList=CityState.sort() | |
SortedCitypop=(Citypop.keys()).sort() | |
Complaints=eventfreq(temp.items()[1][0],temp.items()[1][1]) | |
for t in Citypop.keys(): | |
x.append(CityState.count(t)) #adds number of citys with complaints | |
y=citypop().values() #adds the poplulations to y | |
''' | |
if b in SortedCityList[b]==SortedCitypop[b]: #tried to compair the lists from the 2 different files but got error: 'NoneType' object has no attribute '__getitem__' | |
for t in Citypop.keys(): | |
x.append(CityState.count(t)) | |
y=citypop().values() | |
b=b+1 | |
elif b==len(CityState): | |
return None | |
else: | |
b=b+1 | |
''' | |
ezplot.corplot(x,y,plotfile) | |
return None | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
There should be several functions in this module. Two are | |
already provided, in case they are useful: | |
datextract | |
eightdigs | |
fieldict | |
are functions from a previous homework which might be handy. | |
Essential tricks: | |
CSV FILES | |
One of the data files is a Comma Separated File | |
(see http://en.wikipedia.org/wiki/Comma-separated_values if needed) | |
Python has a module, the csv module, for reading and writing csv files. | |
Some information is found in these two links: | |
http://docs.python.org/2/library/csv.html | |
http://www.doughellmann.com/PyMOTW/csv/ | |
In case you don't read these, the brief example is this: | |
import csv | |
F = open("somedata.csv") # open a CSV file | |
csvF = csv.reader(F) # makes a "csv reader" object | |
for row in csvF: | |
print row # row is a tuple of the CSV fields (per line) | |
The beauty of this csv module is that it can handle ugly CSF records like: | |
Washer Assembly, 2504, "on order", "2,405,318" | |
Notice that this has four fields, separated by commas. But we cannot use | |
an expression like line.split(',') to get the four fields! The reason is | |
that Python will try to also split the last field, which contains commas. | |
The csv reader is smarter. It will respect the quoted fields. | |
Each row that a csv reader produces is a tuple of strings. | |
So how can you convert a string like '2,405,318' to a number? | |
There are two simple ideas: | |
1. x = field[2].split(',') | |
x = ''.join(x) # comma is gone! | |
x = int(x) | |
2. x = field[2].replace(',','') # replace comma by empty | |
x = int(x) | |
SORTING BY FIELD | |
Suppose you have a list of tuples, like M = [("X",50,3),("Y",3,6),("J",35,0)] | |
What is needed, however is to make a sorted version of M, sorted by the second | |
item of the tuples. That is, we want N = [("Y",3,6),("J",35,0),("X",50,3)]. | |
The problem is that if we just write N = sorted(M), we will get the tuples | |
sorted by the first item, so N would be [("J",35,0),("X",50,3),("Y",3,6)] | |
Is there some way to tell Python's sort which of the items to use for sorting? | |
YES! There's even a page on the subject: | |
http://wiki.python.org/moin/HowTo/Sorting/ | |
But a brief example is helpful here. The idea is to use keyword arguments | |
and another Python module, the operator module. | |
Here's the example: | |
from operator import itemgetter # used to customize sorting | |
N = sorted(M,key=itemgetter(1)) # says to use item 1 (0 is first item) | |
This will give us the needed result in variable N. What if, instead, we | |
wanted the result to be in decreasing order, rather than increasing order? | |
Another keyword argument does that: | |
N = sorted(M,key=itemgetter(1),reverse=True) | |
DICTIONARY ACCUMULATION | |
What if we need to build a dictionary where the key comes from some part | |
of a record in a file, and the value is the number of records that have | |
the same thing for that part. Maybe, if we are counting states (with | |
two-letter abbreviations), the dictionary might be something like this: | |
{'CA':620978, 'NY':583719, 'IA':2149} | |
This dictionary could be the result of reading through a data file that | |
had 620,978 records for California and 583,719 records for New York (plus | |
some for Iowa). As an example of creating this dictionary, consider a | |
data file with the state abbreviation as the first field of each record. | |
D = { } # empty dictionary for accumulation | |
for line in sys.stdin: # data file is standard input | |
st = line.split()[0] # get state abbreviation | |
if st not in D.keys(): | |
D[st] = 1 # first time for this state, count is 1 | |
else: | |
D[st] += 1 | |
There is another way to do the same thing, using a more advanced idea: | |
the get() method of the dictionary type, which has a default value argument. | |
D = { } # empty dictionary for accumulation | |
for line in sys.stdin: # data file is standard input | |
st = line.split()[0] # get state abbreviation | |
D[st] = D.get(st,0) + 1 | |
What you see above is D.get(st,0), which attempts to get the value D[st], | |
but will return 0 if st is not in the dictionary. The trick here is that | |
0+1 is 1, which is the right value to store into D[st] for the first time | |
a state abbreviation is found while reading the dictionary. It is a tricky | |
idea, which some Python programmers like. | |
DATETIME.DATE BREAKDOWN | |
Suppose G is a datetime.date object, for instance | |
import datetime | |
G = datetime.date(2012,12,1) # This is 1st December, 2012 | |
In a program, can you get the year, month and day as integers | |
out of the datetime.date object G? Yes, it's easy: | |
1 + G.year # G.year is an integer, equal to the year | |
# expression above is "next year" | |
Similarly, G.month is the month as an integer, and G.day is the day. | |
The task is to write three functions, citypop, eventfreq, and manuftop10. | |
See the docstrings below for an explanation of what is expected. Test | |
cases follow: | |
>>> citypopdict = citypop() | |
>>> len(citypopdict) | |
4991 | |
>>> citypopdict[ ('DES MOINES','IA') ] | |
197052 | |
>>> citypopdict[ ('CORALVILLE','IA') ] | |
18478 | |
>>> citypopdict[ ('STOCKTON','CA') ] | |
287037 | |
>>> evlist = eventfreq(1995,1) | |
>>> len(evlist) | |
17 | |
>>> evlist[0] | |
(datetime.date(1995, 1, 1), 5) | |
>>> evlist[14] | |
(datetime.date(1995, 1, 15), 1) | |
>>> len(eventfreq(1994,12)) | |
22 | |
>>> len(eventfreq(2012,2)) | |
0 | |
>>> manlist = manuftop10() | |
>>> len(manlist) | |
10 | |
>>> manlist[3] | |
('HONDA (AMERICAN HONDA MOTOR CO.)', 67) | |
>>> manlist[8] | |
('MITSUBISHI MOTORS NORTH AMERICA, INC.', 16) | |
''' | |
from operator import itemgetter | |
from string import whitespace | |
def datextract(S): | |
return (int(S[:4]),int(S[4:6]),int(S[6:])) | |
def eightdigs(S): | |
return type(S)==str and len(S)==8 and all([c in "0123456789" for c in S]) | |
def citylist(filename): | |
with open(filename) as FileObject: | |
X = [] | |
for line in FileObject: | |
T = line.strip().split('\t') | |
city = T[12].strip() | |
X.append(city) | |
return X | |
def statecount(filename): | |
with open(filename) as FileObject: | |
D = { } | |
for line in FileObject: | |
T = line.strip().split('\t') | |
state = T[13] | |
D[state] = 1 + D.get(state,0) | |
return D | |
def fieldict(filename): | |
''' | |
Returns a dictionary with record ID (integer) as | |
key, and a tuple as value. The tuple has this form: | |
(manufacturer, date, crash, city, state) | |
where date is a datetime.date object, crash is a boolean, | |
and other tuple items are strings. | |
''' | |
import datetime | |
D = { } | |
with open(filename) as FileObject: | |
for line in FileObject: | |
R = { } | |
T = line.strip().split('\t') | |
manuf, date, crash, city, state = T[2], T[7], T[6], T[12], T[13] | |
manuf, date, city, state = manuf.strip(), date.strip(), city.strip(), state.strip() | |
if eightdigs(date): | |
y, m, d = datextract(date) | |
date = datetime.date(y,m,d) | |
else: | |
date = datetime.date(1,1,1) | |
crash = (crash == "Y") | |
D[int(T[0])] = (manuf,date,crash,city,state) | |
return D | |
def citypop(filename): | |
''' | |
Read Top5000Population.txt and return a dictionary | |
of (city,state) as key, and population as value. | |
For compatibility with DOT data, convert city to | |
uppercase and truncate to at most 12 characters. | |
BE CAREFUL that the city field might need to | |
have trailing spaces removed (otherwise the test | |
cases could fail) | |
''' | |
import csv | |
with open(filename) as cvsfile: | |
csvF = csv.reader(filename) | |
i=0 | |
D={} | |
city=[] | |
state=[] | |
pop=[] | |
for row in csvF: | |
city+=[row[0].rstrip(whitespace)] | |
state+=[row[1].rstrip(whitespace)] | |
pop+=[row[2].rstrip(whitespace)] | |
x += field[2].replace(',','') # replace comma by empty | |
pop= int(x) | |
listA=[city, state, pop] | |
print listA | |
def eventfreq(year,month): | |
''' | |
Read DOT1000.txt and return a list of (d,ct) | |
pairs, where d is a date object of the form | |
datetime.date(A,B,C) | |
having A equal to the year argument and | |
B equal to the month argument to eventfreq(year,month). | |
The ct part of each pair is the number of records | |
that had a date equal to datetime.date(A,B,C). | |
One more requirement: sort the returned list | |
in increasing order by date (the sorted function will | |
do this for you) | |
Use fieldict("DOT1000.txt") to get the dictionary | |
of tuples used for building the list of pairs | |
that eventfreq(year,month) will return. | |
''' | |
def manuftop10(filename): | |
''' | |
This function returns a list of ten pairs. Each | |
pair has the form (man,ct) where man is a string | |
equal to a manufacturer name and ct is the number | |
of records in DOT1000.txt with that manufacturer. | |
In addition, the ten pairs returned are the "top 10" | |
(in decreasing order by count) of all the manufacturers | |
in the file. Use fielddict("DOT1000.txt") to get | |
the dictionary of tuples used for building the list | |
of pairs. | |
''' | |
D={} | |
with open(filename) as FileObject: | |
for line in FileObject: | |
List1=line.split("\t") | |
manu+=List1[2] | |
if manu not in D.keys(): | |
D[manu]=1 | |
else: | |
D[manu]=D[manu]+1 | |
ListB=D.items() | |
SortedList=(ListB, key = itemgetter(1),reverse=True) | |
return SortedList | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
There should be several functions in this module. Two are | |
already provided, in case they are useful: datextract and eightdigs | |
are functions from a previous homework which might be handy. | |
Three new functions to write are: | |
1. citylist(filename) reads a file in the DOT format | |
and returns a list of city names, one for each line in | |
the file. The DOT format may have trailing spaces on | |
the city name; make sure citylist creates a list of | |
city names with trailing spaces removed (easy using | |
Python's strip() method). Two test cases are shown | |
below, for lines at index 3 and 347. When we grade, we | |
will use other tests at different index values. | |
2. statecount(filename) reads a file in DOT format | |
and returns a dictionary with two-letter state abbreviations | |
as keys and the number of lines in the DOT file for that | |
state. | |
3. fieldict(filename) reads a file in DOT format and | |
returns a dictionary with the DOT CMPLID, converted to an | |
integer, as the key, and a tuple as the corresponding value | |
for that key. The format of the tuple is: | |
(manufacturer, date, crash, city, state) | |
where these tuple items have the following types: | |
manufacturer -- this comes from the MFR_NAME field in the DOT format | |
date -- this comes from the FAILDATE field in the DOT format, | |
but converted to a Python datetime.date object | |
crash -- this comes from the CRASH field in the DOT format, | |
but converted to a Python bool type (True for a crash) | |
city -- comes from the CITY field in the DOT format | |
state -- comes from the STATE field in the DOT format | |
Advice: | |
- The only DOT file used for testing below is the file "DOT500.txt", which | |
must be in the same place as this Python module, so it can be found. | |
- Study the file CMPL.txt to learn about the DOT format | |
- Study the file Example.py to learn how to read a file one record at a time | |
- Be careful not to fully trust the DOT format -- there can be fields | |
in some lines of the file which have bad data. Your functions should be | |
able to overcome the bad data without getting a Python error that stops | |
it from runnin. | |
Test cases for your functions follow: | |
>>> citylist("DOT500.txt")[3] | |
'TUCSON' | |
>>> citylist("DOT500.txt")[347] | |
'NORTH VILLE' | |
>>> statecount("DOT500.txt")['CA'] | |
76 | |
>>> statecount("DOT500.txt")['NV'] | |
4 | |
>>> fieldict("DOT500.txt")[416] | |
('DAIMLERCHRYSLER CORPORATION', datetime.date(1995, 1, 9), False, 'ARCADIA', 'FL') | |
>>> fieldict("DOT500.txt")[82] | |
('FORD MOTOR COMPANY', datetime.date(1995, 1, 1), False, 'MARBLE HEAD', 'MA') | |
''' | |
# for your convenience, here are some functions and an import statement | |
# that may be helpful in doing the homework | |
from string import whitespace | |
import datetime | |
def datextract(S): | |
if not eightdigs(S): | |
S = "00000000" | |
return S | |
else: | |
return datetime.date(int(S[:4]),int(S[4:6]),int(S[6:])) | |
def eightdigs(S): | |
return type(S)==str and len(S)==8 and all([c in "0123456789" for c in S]) | |
#----- define your functions here ------------------------ | |
def citylist(filename): | |
assert type(filename)==str | |
assert len(filename)>0 | |
with open(filename) as FileObject: | |
list1=FileObject | |
city=[] | |
for line in FileObject: | |
lineList = line.split('\t') | |
name=[lineList[12].rstrip(whitespace)] | |
city += name | |
return city | |
def statecount(filename): | |
with open(filename) as FileObject: | |
state=[] | |
for line in FileObject: | |
lineList = line.split('\t') | |
name=[lineList[13].rstrip(whitespace)] | |
state += name | |
state1={k:0 for k in state}.keys() | |
return {key:state.count(key) for key in state1} | |
def fieldict(filenam): | |
with open(filenam) as FileObject: | |
i=0 | |
keys=[] | |
manu=[] | |
date=[] | |
crash=[] | |
city=[] | |
state=[] | |
dictionary={} | |
for line in FileObject: | |
lineList = line.split('\t') | |
keys+=[int(lineList[0])] | |
manu+=[lineList[2].rstrip(whitespace)] | |
date+=[datextract(lineList[7])] | |
if lineList[6]=='Y': | |
crash+= [True] | |
else: | |
crash+= [False] | |
city+=[lineList[12].rstrip(whitespace)] | |
state+=[lineList[13].rstrip(whitespace)] | |
for i in range(len(keys)): | |
dictionary.update({i+1:(manu[i], date[i], crash[i], city[i], state[i])}) | |
return dictionary | |
#run unit testing as found in the docstring | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import BaseHTTPServer | |
from hw5util import * | |
GoogleMapLink = "http://maps.googleapis.com/maps/api/staticmap?" | |
GoogleMapLink += "&size=600x300&maptype=roadmap,&sensor=false" | |
GoogleMapPin = "&markers=color:blue|{0}" | |
class MyHandler(BaseHTTPServer.BaseHTTPRequestHandler): | |
def norm_headers(self): | |
self.send_response(200) | |
self.send_header('Content-type','text/html') | |
self.end_headers() | |
def nopage(self): | |
self.wfile.write("<html><body><h1>We Are Sorry The Page Was Not Found</h1></body></html>") | |
def default_page(self): | |
page = '<html><body>Kristofer Hult Webserver' | |
page +='<p>Table of Contents</p>' | |
page+= '<p>Click <A href=manuf>Here</A> for the Top 10 Manufacturers</p>' | |
page+='<p>Click <A HREF=map>here</A> for a map.</p>' | |
page+='<p>Click <A HREF=pop>here</A> for Events vs Population.</p>' | |
page+='<p>Click <A HREF=stat>here</A> for a Date Statistics.</p>' | |
self.wfile.write(page) | |
def map_page(self): | |
page = '<html><body>Sample Map<p><img src="' | |
page += GoogleMapLink | |
page += GoogleMapPin.format("40.702147,-74.015794") | |
page += GoogleMapPin.format("40.718217,-73.998284") | |
page +='Click Here for <a href=''</a> the Home Page' | |
page += '"/></p></body></html>' | |
self.wfile.write(page) | |
def manuf_page(self): | |
plotManufTop10() # build manuf.png | |
page = '<html><body>Top Ten Manufacturers' | |
page += '<p><img src="manuf.png"/></p></body></html>' | |
page +='Click Here for <a href=''</a> the Home Page' | |
page += '"/></p></body></html>' | |
self.wfile.write(page) | |
def pop_page(self): | |
plotEventFreq(1995,1,plotfile="events.png") # build manuf.png | |
page = '<html><body>Events vs Population' | |
page += '<p><img src="events.png"/></p></body></html>' | |
page +='Click Here for <a href=''</a> the Home Page' | |
self.wfile.write(page) | |
def stat_page(self): | |
plotPopEvent() # build manuf.png | |
page = '<html><body>Top Ten Manufacturers' | |
page += '<p><img src="popevent.png"/></p></body></html>' | |
page +='Click Here for <a href=''</a> the Home Page' | |
self.wfile.write(page) | |
def table(self,page): | |
lookup = {'':self.default_page, 'map':self.map_page, 'pop':self.pop_page, | |
'manuf':self.manuf_page, 'stat':self.stat_page} | |
if page in lookup: | |
lookup[page]() | |
else: | |
self.nopage() | |
# Here is where each browser request comes | |
def do_GET(self): | |
if self.path.endswith(".png"): | |
serveimage(self,self.path[1:]) | |
else: | |
self.norm_headers() | |
self.table(self.path[1:]) | |
# main part of the program starts here | |
try: | |
server = BaseHTTPServer.HTTPServer(('',8000), MyHandler) | |
print 'started httpserver...' # waiting on port 8000 | |
server.serve_forever() | |
except KeyboardInterrupt: | |
print '^C received, shutting down server' | |
server.socket.close() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Functions to write: | |
(1) nopuncend(S) -- returns S with any trailing punctuation removed, where | |
a punctuation character is any of these: !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ | |
NOTE: instead of typing these in yourself, this file defines punctuation | |
to be a string containing all these characters. | |
(2) notrail(S) -- returns S with any trailing whitespace characters removed, | |
where a whitespace character is any of these: \t\n\r (and blank of course) | |
NOTE: instead of typing these in yourself, this files deines whitespace | |
to be a string containing all these characters | |
SECOND NOTE: look up "python rstrip" on a search engine, it can simplify | |
your answers to (1) and (2) | |
(3) eightdigs(S) -- returns True if S is a string consisting of exactly 8 digits | |
(4) datextract(S) -- returns a tuple of (year,month,day) from an 8-digit string | |
(5) strcount(S) -- returns a dictionary with words as keys and the number of times | |
that word appears as the corresponding value | |
(6) sumcounts(D) -- where D is a dictionary with numbers as values, | |
returns the sum of all the values | |
NOTE: some useful things for this might be these methods: | |
D.keys() -- a list of all the keys in dictionary D | |
D.values() -- a list of all the values in dictionary D | |
len(D) -- how many items D has | |
D.items() -- a list of all items, as (key,value) pairs | |
(7) freqitems(S,p) --- returns a sorted list of the items in sequence S that | |
occur with at least p percent frequency, with no duplicates | |
NOTE: how to get a sorted list? If L is a list, then sorted(L) is | |
that same list in sorted order | |
SECOND NOTE: how to remove duplicates from a list? | |
{ k:0 for k in L }.keys() -- gives L with duplicates removed | |
What follows are test cases on (1)--(6) | |
>>> nopuncend("ordinary") | |
'ordinary' | |
>>> nopuncend("what?") | |
'what' | |
>>> nopuncend("so...") | |
'so' | |
>>> nopuncend("stop!") | |
'stop' | |
>>> notrail("simple") | |
'simple' | |
>>> notrail("let there be no more ") | |
'let there be no more' | |
>>> eightdigs("test") | |
False | |
>>> eightdigs("123456") | |
False | |
>>> eightdigs(8*'0') | |
True | |
>>> eightdigs("11112222") and eightdigs("80041209") | |
True | |
>>> datextract("20080702") | |
(2008, 7, 2) | |
>>> [datextract("19941128")] | |
[(1994, 11, 28)] | |
>>> strcount("a a a a b b") | |
{'a': 4, 'b': 2} | |
>>> strcount("one") | |
{'one': 1} | |
>>> sorted(strcount("this one and that one for one time").items()) | |
[('and', 1), ('for', 1), ('one', 3), ('that', 1), ('this', 1), ('time', 1)] | |
>>> sumcounts({"a":2.5, "b":7.5, "c":100}) | |
110.0 | |
>>> sumcounts({ }) | |
0 | |
>>> sumcounts(strcount("a a a b")) | |
4 | |
>>> freqitems([2,2,2,3],50) | |
[2] | |
>>> freqitems(5*["alpha"]+["beta"]+3*["gamma"]+7*["delta"], 25) | |
['alpha', 'delta'] | |
>>> freqitems(5*["alpha"]+["beta"]+3*["gamma"]+7*["delta"], 33) | |
['delta'] | |
''' | |
from string import punctuation | |
from string import whitespace | |
def nopuncend(S): | |
A = S.rstrip(punctuation) | |
return A | |
def notrail(S): | |
A=S.rstrip() | |
return A | |
def eightdigs(S): | |
if type(S)==str: | |
if len(S)==8: | |
return True | |
else: | |
return False | |
else: | |
return False | |
def datextract(S): | |
if len(S)==8: | |
Y=S[:4] | |
M=S[4:6] | |
D=S[6:8] | |
theDate=(int(Y), int(M), int(D)) | |
return theDate | |
else: | |
print'Must only enter 8 integers' | |
def strcount(S): | |
Char=S.split() | |
Char={k:0 for k in Char}.keys() | |
dic={key:S.count(key) for key in Char} | |
return dic | |
def sumcounts(D): | |
counts=D.values() | |
value=sum(counts) | |
return value | |
def freqitems(S,p): | |
sort=sorted(S) | |
removed={k:0 for k in sort}.keys() | |
dic={key: (float(S.count(key)/float(len(S))*100)) for key in sort if (float(S.count(key)/float(len(S))*100)) >= p } | |
ans=[x for x in dic] | |
return ans | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment