Created
February 9, 2012 01:57
Revisions
-
anoved revised this gist
Feb 9, 2012 . 1 changed file with 5 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -43,7 +43,11 @@ class PropertyListParser(BeautifulSoup.BeautifulStoneSoup): # Yank out extraneous newline strings (simplifies stepping from tag to tag). # soup = PropertyListParser(xml_data) rlid = soup.find(text='com.apple.ReadingList').parent rl_array = rlid.parent.find('array') if None == rl_array: exit() reading_list = rl_array.extract() [newline.extract() for newline in reading_list.findAll(text='\n')] -
anoved created this gist
Feb 9, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,68 @@ #!/usr/bin/env python # # What does this script do? # It prints information about the Unread items in your Safari Reading List. # The oldest item is printed first (maybe). Each item is printed on its own # line. The line format is "Title", "Preview text", "URL", "bookmark date". # # What is the Safari Reading List? # A category of bookmarks introduced in Safari 5.1, intended to represent # articles you intend to read at a later time. It syncs with iOS Safari. # # # This script uses Beautiful Soup 3.x for xml parsing. # http://www.crummy.com/software/BeautifulSoup/ # import BeautifulSoup import os # # Reading List items are stored as Safari bookmarks. # Safari bookmarks are stored as a binary property list file. # plutil can convert binary property lists to xml format. # The -o - option prints the output to stdout. # We plug our pipe into that. # xml_pipe = os.popen('/usr/bin/plutil -convert xml1 -o - ~/Library/Safari/Bookmarks.plist', 'r') xml_data = xml_pipe.read() xml_pipe.close() # # BeautifulStoneSoup is a generic xml parser. # We need to tell it a few things about property lists, or it'll get confused. # class PropertyListParser(BeautifulSoup.BeautifulStoneSoup): NESTABLE_TAGS = BeautifulSoup.buildTagMap([], ['array', 'dict']) SELF_CLOSING_TAGS = BeautifulSoup.buildTagMap(None, ['true', 'false']) # # Convert the Safari bookmarks data to tag soup. # Find the array containing reading list items; that's all we need. # Yank out extraneous newline strings (simplifies stepping from tag to tag). # soup = PropertyListParser(xml_data) rl_array = soup.find(text='com.apple.ReadingList').parent.findPrevious('array') reading_list = rl_array.extract() [newline.extract() for newline in reading_list.findAll(text='\n')] # # Loop through the list of reading list items, starting with the oldest item. # Skip items that have been viewed - we only want "Unread" items. # reading_list_items = reading_list.contents reading_list_items.reverse() for reading_list_item in reading_list_items: if None != reading_list_item.find(text='DateLastViewed'): continue # # Find item info the easy way, by finding it. # Value tags follow the key label tags. # item_title = reading_list_item.find(text='title').parent.nextSibling.string item_preview = reading_list_item.find(text='PreviewText').parent.nextSibling.string item_url = reading_list_item.find(text='URLString').parent.nextSibling.string item_fetchdate = reading_list_item.find(text='DateLastFetched').parent.nextSibling.string print('"%s", "%s", "%s", "%s"' % (item_title, item_preview, item_url, item_fetchdate))