edsu · December 1, 2016 17:36 · Feb 7, 2013 · Feb 7, 2013 · Feb 7, 2013 · Feb 7, 2013
diff --git a/worldcat_live.py b/worldcat_live.py
@@ -26,6 +26,7 @@
 import json
 import time
 import urllib
+import datetime
 import xml.etree.ElementTree as xml
 
 base_url = "http://experimental.worldcat.org/xwwg/rest/feed?format=json" 
@@ -53,6 +54,7 @@ def items(poll_time=10):
                 if not item.has_key("title"): item["title"] = None
                 item["org"] = get_org(item.get("instsym", None))
                 item["url"] = "http://worldcat.org/oclc/" + item["oclcno"]
+                item["created"] = datetime.datetime.fromtimestamp(float(item["created"]))
                 yield item
 
         maxseq = response["maxseq"]
@@ -78,4 +80,4 @@ def get_org(org_code):
 
 if __name__ == "__main__":
     for item in items():
-        print "%s <%s> %s <%s>" % (item["org"]["name"], item["org"]["url"], item["title"], item["url"])
+        print "[%s] %s <%s> %s <%s>" % (item["created"], item["org"]["name"], item["org"]["url"], item["title"], item["url"])
diff --git a/worldcat_live.py b/worldcat_live.py
@@ -21,10 +21,6 @@
 
 [1] http://experimental.worldcat.org/xwwg/
 [2] https://twitter.com/edsu/status/299469915906244608
-
-xml character entities 
-missing title
-missing instsym
 """
 
 import json

diff --git a/worldcat_live.py b/worldcat_live.py
@@ -15,9 +15,12 @@
 
 If you run the module directly you'll see new titles as they are cataloged 
 along with the name of the institution that cataloged them displayed on the 
-console.
+console. Sometimes you may notice the organization being displayed as 
+"None <None>" which is because WorldCat Live items are missing the instsym 
+sometimes [2].
 
 [1] http://experimental.worldcat.org/xwwg/
+[2] https://twitter.com/edsu/status/299469915906244608
 
 xml character entities 
 missing title

diff --git a/worldcat_live.py b/worldcat_live.py
@@ -10,8 +10,8 @@
       print item["title"]
 
 worldcat_live.items is a Python generator that will return recently cataloged
-items, forever. Optionally pass in a poll_time to control how often to check the 
-Worldcat Live API for more results (default is every 10 seconds).
+items, forever. Optionally pass in a poll_time to control how often to check 
+the Worldcat Live API for more results (default is every 10 seconds).
 
 If you run the module directly you'll see new titles as they are cataloged 
 along with the name of the institution that cataloged them displayed on the 

diff --git a/worldcat_live.py b/worldcat_live.py
@@ -9,12 +9,13 @@
   for item in worldcat_live.items():
       print item["title"]
 
-worldcat_live.items is a Python generator that will return items cataloged
-forever. Optionally pass in a poll_time to control how often to check the 
+worldcat_live.items is a Python generator that will return recently cataloged
+items, forever. Optionally pass in a poll_time to control how often to check the 
 Worldcat Live API for more results (default is every 10 seconds).
 
 If you run the module directly you'll see new titles as they are cataloged 
-along with the name of the institution that cataloged them. 
+along with the name of the institution that cataloged them displayed on the 
+console.
 
 [1] http://experimental.worldcat.org/xwwg/
 

diff --git a/worldcat_live.py b/worldcat_live.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+"""
+This is mainly a demonstration of OCLC's experimental Worldcat Live API [1] 
+from Python. You should be able to use this module like so:
+
+  import worldcat_live
+
+  for item in worldcat_live.items():
+      print item["title"]
+
+worldcat_live.items is a Python generator that will return items cataloged
+forever. Optionally pass in a poll_time to control how often to check the 
+Worldcat Live API for more results (default is every 10 seconds).
+
+If you run the module directly you'll see new titles as they are cataloged 
+along with the name of the institution that cataloged them. 
+
+[1] http://experimental.worldcat.org/xwwg/
+
+xml character entities 
+missing title
+missing instsym
+"""
+
+import json
+import time
+import urllib
+import xml.etree.ElementTree as xml
+
+base_url = "http://experimental.worldcat.org/xwwg/rest/feed?format=json" 
+
+
+def items(poll_time=10):
+    """A generator for new items added to Worldcat, it returns each item
+    as a Python dictionary that maps to the JSON response from the Worldcat 
+    Live API's JSON response.
+
+    The poll_time is the number of seconds to wait before polling for more 
+    results from the Worldcat Live API.
+
+    It does annotate the response with information about the organization 
+    that cataloged the item in the item's "org" key.
+    """
+    maxseq = None
+    while True:
+        url = base_url + "&start=seq-%s" % maxseq if maxseq else base_url
+        response = json.loads(urllib.urlopen(url).read())
+
+        for item in response["newrec"]:
+            if not maxseq or item["id"] >= maxseq:
+                # XXX remove this if we can rely on "title" being there
+                if not item.has_key("title"): item["title"] = None
+                item["org"] = get_org(item.get("instsym", None))
+                item["url"] = "http://worldcat.org/oclc/" + item["oclcno"]
+                yield item
+
+        maxseq = response["maxseq"]
+        time.sleep(poll_time)
+
+orgs = {} 
+def get_org(org_code):
+    """looks up a OCLC institution symbol and returns a dictionary of
+    information about that organization using the Worldcat Registry API.
+    """
+    if not org_code: return {"name": None, "url": None}
+    if orgs.has_key(org_code): return orgs[org_code]
+
+    url = "http://www.worldcat.org/webservices/registry/lookup/Institutions/oclcSymbol/%s?serviceLabel=content" % org_code
+    doc = xml.fromstring(urllib.urlopen(url).read())
+
+    org = {} 
+    org["name"] = doc.findtext(".//{info:rfa/rfaRegistry/xmlSchemas/institutions/nameLocation}institutionName")
+    org["url"] = doc.findtext(".//{info:rfa/rfaRegistry/xmlSchemas/institutions/nameLocation}infoSiteUrl")
+    orgs[org_code] = org
+    return org
+
+
+if __name__ == "__main__":
+    for item in items():
+        print "%s <%s> %s <%s>" % (item["org"]["name"], item["org"]["url"], item["title"], item["url"])