anoved · February 9, 2012 01:57 · Feb 9, 2012 · Feb 9, 2012
diff --git a/readinglistdump.py b/readinglistdump.py
@@ -43,7 +43,11 @@ class PropertyListParser(BeautifulSoup.BeautifulStoneSoup):
 # Yank out extraneous newline strings (simplifies stepping from tag to tag).
 #
 soup = PropertyListParser(xml_data)
-rl_array = soup.find(text='com.apple.ReadingList').parent.findPrevious('array')
+rlid = soup.find(text='com.apple.ReadingList').parent
+rl_array = rlid.parent.find('array')
+if None == rl_array:
+	exit()
+
 reading_list = rl_array.extract()
 [newline.extract() for newline in reading_list.findAll(text='\n')]
 

diff --git a/readinglistdump.py b/readinglistdump.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+#
+# What does this script do?
+#	It prints information about the Unread items in your Safari Reading List.
+#	The oldest item is printed first (maybe). Each item is printed on its own
+#	line. The line format is "Title", "Preview text", "URL", "bookmark date".
+#
+# What is the Safari Reading List?
+#	A category of bookmarks introduced in Safari 5.1, intended to represent
+#	articles you intend to read at a later time. It syncs with iOS Safari.
+#
+
+#
+# This script uses Beautiful Soup 3.x for xml parsing.
+# http://www.crummy.com/software/BeautifulSoup/
+#
+import BeautifulSoup
+import os
+
+#
+# Reading List items are stored as Safari bookmarks.
+# Safari bookmarks are stored as a binary property list file.
+# plutil can convert binary property lists to xml format.
+# The -o - option prints the output to stdout.
+# We plug our pipe into that.
+#
+xml_pipe = os.popen('/usr/bin/plutil -convert xml1 -o - ~/Library/Safari/Bookmarks.plist', 'r')
+xml_data = xml_pipe.read()
+xml_pipe.close()
+
+#
+# BeautifulStoneSoup is a generic xml parser.
+# We need to tell it a few things about property lists, or it'll get confused.
+#
+class PropertyListParser(BeautifulSoup.BeautifulStoneSoup):
+	NESTABLE_TAGS = BeautifulSoup.buildTagMap([], ['array', 'dict'])
+	SELF_CLOSING_TAGS = BeautifulSoup.buildTagMap(None, ['true', 'false'])
+
+#
+# Convert the Safari bookmarks data to tag soup.
+# Find the array containing reading list items; that's all we need.
+# Yank out extraneous newline strings (simplifies stepping from tag to tag).
+#
+soup = PropertyListParser(xml_data)
+rl_array = soup.find(text='com.apple.ReadingList').parent.findPrevious('array')
+reading_list = rl_array.extract()
+[newline.extract() for newline in reading_list.findAll(text='\n')]
+
+#
+# Loop through the list of reading list items, starting with the oldest item.
+# Skip items that have been viewed - we only want "Unread" items.
+#
+reading_list_items = reading_list.contents
+reading_list_items.reverse()
+for reading_list_item in reading_list_items:
+	if None != reading_list_item.find(text='DateLastViewed'):
+		continue
+
+	#
+	# Find item info the easy way, by finding it.
+	# Value tags follow the key label tags.
+	#
+	item_title = reading_list_item.find(text='title').parent.nextSibling.string
+	item_preview = reading_list_item.find(text='PreviewText').parent.nextSibling.string
+	item_url = reading_list_item.find(text='URLString').parent.nextSibling.string
+	item_fetchdate = reading_list_item.find(text='DateLastFetched').parent.nextSibling.string
+	print('"%s", "%s", "%s", "%s"' % (item_title, item_preview, item_url, item_fetchdate))