Created
April 5, 2012 18:10
-
-
Save fanzeyi/2312928 to your computer and use it in GitHub Desktop.
备份饭否消息。 非API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# AUTHOR: Zeray Rice <[email protected]> | |
# FILE: main.py | |
# CREATED: 01:15:37 06/04/2012 | |
# MODIFIED: 01:57:15 06/04/2012 | |
import requests | |
import datetime | |
from jinja2 import Environment, FileSystemLoader | |
from BeautifulSoup import BeautifulSoup | |
cookies = dict(ai='', u='', SID='', uuid='') # 在这里填写 Cookies 信息 | |
jinja = Environment(loader = FileSystemLoader('.')) | |
tpl = jinja.get_template("message.xml") | |
class Status(object): | |
def __init__(self, status): | |
# parse status HTML | |
self.text = status.findAll("span", {'class' : "content"})[0].text | |
self.time = status.findAll("a", {'class' : "time"})[0].attrMap['stime'] | |
self.time = datetime.datetime.strptime(self.time, "%a %b %d %H:%M:%S +0000 %Y") | |
def parseHTML(html, result): | |
bs = BeautifulSoup(html) | |
stream = bs.findAll("div", id = "stream")[0].findAll("li") | |
for status in stream: | |
st = Status(status) | |
result.append(st) | |
def renderMSG(result, filenameCount): | |
print "Saving to %d.xml" % filenameCount | |
with open("treeholes/%d.xml" % filenameCount, "w") as fp: | |
fp.write(tpl.render(status = result).encode("utf-8")) | |
def getStatus(): | |
filenameCount = 1 | |
result = [] | |
for p in range(1, 3512): | |
r = requests.get("http://fanfou.com/treeholes/p.%d" % p, cookies=cookies) | |
parseHTML(r.text, result) | |
print "Parsing Page %d.." % p | |
if len(result) >= 100: | |
renderMSG(result, filenameCount) | |
filenameCount = filenameCount + 1 | |
result = [] | |
renderMSG(result, filenameCount) | |
getStatus() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<timelime id="treeholes">{% for st in status %} | |
<status> | |
<text><![CDATA[{{ st.text }}]]></text> | |
<time><![CDATA[{{ st.time }}]]></time> | |
</status> | |
{% endfor %}</timelime> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment