oliversinden · October 9, 2014 14:22
diff --git a/gistfile1.py b/gistfile1.py
 import os
 import mechanize
 from mechanize import Browser
 from datetime import datetime, date, time
 import itertools
 import csv
 import pdb
 import gzip
 import StringIO
 import urllib
 import HTMLParser
 import string
 from lxml import etree
 import datetime
 import pdb
 import time


 extractingtoday = False

 #extractdate= "2014m06d19"


 oneday = datetime.timedelta(days=1)
 d = date(2014,10,8)

 br = Browser()

 br.add_password("https://ws.webtrends.com", "[username]", "[password]")

 br.addheaders = [('Accept-Encoding', 'gzip'),('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'),('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36'), ('Cache-Control', 'max-age=0')]


 while extractingtoday == False:

 	print d
 	dt = d.timetuple()
 	extractdate = str(dt[0]) + "m" + str(dt[1]) + "d" + str(dt[2])
 	
 	

 	QueryString = ("[querystring]" + extractdate + "&end_period=" + extractdate +"&period_type=agg&measures=0*1&format=html&suppress_error_codes=true")

 	#pdb.set_trace()



 ##stra debug

 	query = None
 	while query is None:
 		try:
 			query = br.open(QueryString)
 		except:
 			pass

 		

 	br.response().get_data()
 	compressedstream = StringIO.StringIO(query.read()) 

 	gzipper = gzip.GzipFile(fileobj = compressedstream)

 	unzgipdata = gzipper.read()



 	parser = etree.XMLParser(recover=True)

 	data = unzgipdata.decode('utf-8')

 	tree = etree.fromstring(data.encode('utf-8'), parser=parser)

 	output_data = []

 	rowcount = 0
 	
 	for elem_row in tree[1].findall('tr'):
 	
 		if rowcount > 1 :
 			
 			row = []

 			for elem_cell in elem_row.findall('td'):
 				if elem_cell.text is None:
 					 row.append(elem_cell.text)
 				else:
 					row.append(elem_cell.text.encode('utf-8'))
 			output_data.append(row)
 		rowcount = rowcount + 1




 	
 		
 	br.close	

 	with open(extractdate+'.csv', 'wb') as csvfile:
 		csvwriter  = csv.writer(csvfile, dialect='excel')

 		for t in output_data:
 			csvwriter.writerow(t)
 		
 	##edn debug
 		
 	d = d + oneday
 	
 	if d == date.today():
 		extractingtoday = True
 	#time.sleep(40)




 #soup.p jumps to the first <P> tag inside a document, wherever it is. soup.table.tr.td jumps to the first column of the first row of the first table in the document.

 #These members actually alias to the first method, covered below. I mention it here because the alias makes it very easy to zoom in on an interesting part of a well-known parse tree.

 #An alternate form of this idiom lets you access the first <FOO> tag as .fooTag instead of .foo. For instance, soup.table.tr.td could also be expressed as soup.tableTag.trTag.tdTag, or even soup.tableTag.tr.tdTag. This is useful if you like to be more explicit about what you're doing, or if you're parsing XML whose tag names conflict with the names of Beautiful Soup methods and members.
	import os
	import mechanize
	from mechanize import Browser
	from datetime import datetime, date, time
	import itertools
	import csv
	import pdb
	import gzip
	import StringIO
	import urllib
	import HTMLParser
	import string
	from lxml import etree
	import datetime
	import pdb
	import time


	extractingtoday = False

	#extractdate= "2014m06d19"


	oneday = datetime.timedelta(days=1)
	d = date(2014,10,8)

	br = Browser()

	br.add_password("https://ws.webtrends.com", "[username]", "[password]")

	br.addheaders = [('Accept-Encoding', 'gzip'),('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8'),('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36'), ('Cache-Control', 'max-age=0')]


	while extractingtoday == False:

	print d
	dt = d.timetuple()
	extractdate = str(dt[0]) + "m" + str(dt[1]) + "d" + str(dt[2])



	QueryString = ("[querystring]" + extractdate + "&end_period=" + extractdate +"&period_type=agg&measures=0*1&format=html&suppress_error_codes=true")

	#pdb.set_trace()



	##stra debug

	query = None
	while query is None:
	try:
	query = br.open(QueryString)
	except:
	pass



	br.response().get_data()
	compressedstream = StringIO.StringIO(query.read())

	gzipper = gzip.GzipFile(fileobj = compressedstream)

	unzgipdata = gzipper.read()



	parser = etree.XMLParser(recover=True)

	data = unzgipdata.decode('utf-8')

	tree = etree.fromstring(data.encode('utf-8'), parser=parser)

	output_data = []

	rowcount = 0

	for elem_row in tree[1].findall('tr'):

	if rowcount > 1 :

	row = []

	for elem_cell in elem_row.findall('td'):
	if elem_cell.text is None:
	row.append(elem_cell.text)
	else:
	row.append(elem_cell.text.encode('utf-8'))
	output_data.append(row)
	rowcount = rowcount + 1






	br.close

	with open(extractdate+'.csv', 'wb') as csvfile:
	csvwriter = csv.writer(csvfile, dialect='excel')

	for t in output_data:
	csvwriter.writerow(t)

	##edn debug

	d = d + oneday

	if d == date.today():
	extractingtoday = True
	#time.sleep(40)




	#soup.p jumps to the first <P> tag inside a document, wherever it is. soup.table.tr.td jumps to the first column of the first row of the first table in the document.

	#These members actually alias to the first method, covered below. I mention it here because the alias makes it very easy to zoom in on an interesting part of a well-known parse tree.

	#An alternate form of this idiom lets you access the first <FOO> tag as .fooTag instead of .foo. For instance, soup.table.tr.td could also be expressed as soup.tableTag.trTag.tdTag, or even soup.tableTag.tr.tdTag. This is useful if you like to be more explicit about what you're doing, or if you're parsing XML whose tag names conflict with the names of Beautiful Soup methods and members.