Last active
January 26, 2016 23:46
-
-
Save controversial/9de0cddd41613ce03dc5 to your computer and use it in GitHub Desktop.
Plot the 25 most common domains found in Safari's history cache using matplotlib
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import urllib2 | |
from collections import Counter | |
#FETCH DATA | |
files=os.listdir(os.path.expanduser("~/Library/Caches/Metadata/Safari/History")) | |
webpages=[f for f in files if f.startswith("http")] #https also starts with http | |
print "Found {} pieces of data".format(len(webpages)) | |
webpages=[f[:-11] for f in webpages] #Remove .webhistory extension | |
visited=[urllib2.unquote(w) for w in webpages] #Undo HTML URL encoding | |
#Now we have a list of all the webpages stored in history. | |
domains=[urllib2.urlparse.urlparse(page).netloc for page in visited] #parse each URL to find the domain it's from. | |
count=Counter(domains) | |
tops=count.most_common(25) #Top 25 visited domains | |
#HISTOGRAM | |
from matplotlib import pyplot as plt | |
items,counts=zip(*tops) #List of items, list of times each appears | |
indices=range(len(tops)) #Index for each item | |
#Graph the graph | |
graph=plt.bar(indices, counts, 1, edgecolor="#FF3300", facecolor="#FF7700") | |
#Label stuff | |
xvals=[i+0.5 for i in indices] #Where each label will appear | |
plt.xticks(xvals, items,rotation="vertical") #Label the bars | |
plt.title("Top 25 Domains") # Title the graph | |
#Label the tops of the bars | |
for rect in graph: | |
x=rect.get_x()+0.6 | |
y=rect.get_height() + 50 | |
label=str(rect.get_height()) | |
plt.text(x,y,label,ha="center",va="bottom",rotation="vertical") | |
#Increase spacing | |
plt.ylim((0.0,4000.0)) | |
#Save an image | |
plt.savefig("topsites.png",bbox_inches="tight") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment