Skip to content

Instantly share code, notes, and snippets.

@zyan0
Created December 30, 2012 16:24
Show Gist options
  • Save zyan0/4413606 to your computer and use it in GitHub Desktop.
Save zyan0/4413606 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# - * - coding: UTF-8 - * -
# Copyright (C) 2010 Upsuper <[email protected]>
# License: AGPLv3
from threading import Thread, Lock
from httplib import HTTPConnection
import urllib
import cgi, json
import sys, re
# 常数设置
THREADS_NUM = 5 # 最大线程数
RENREN_USER = '' # 人人网用户名
RENREN_PWD = '' # 人人网密码
USER_AGENT = 'urenren 0.1' # 提交给人人网的 User-Agent
# JSON 输出函数
json_dump = lambda v: json.dump(v, sys.stdout)
# 编译匹配用正则表达式
parse_re = re.compile(r'<td><p><a\s(?:[^>]+\s)?href="[^\?]+\?id=(\d+)[^"]*">'
r'<img src="([^"]+)"(?:[^>]*)/></a></p>'
r'<a href="[^"]+">([^<]+)</a></td>')
class RequestThread(Thread):
def __init__(self, id, sid, friends):
Thread.__init__(self)
self.__page = '/getfriends.do?curpage=%%d&id=%d&sid=%s' % (id, sid)
self.__friends = friends
self.__conn = HTTPConnection('3g.renren.com')
def run(self):
global curpage, stop_sign
while not stop_sign:
# 获取当前页面
curpage_lock.acquire()
page = curpage
curpage += 1
curpage_lock.release()
# 连接获取数据
conn = self.__conn
conn.request('GET', self.__page % (page, ), None, {
'User-Agent': USER_AGENT
})
data = conn.getresponse().read()
# 处理获取的数据
friend_iter = parse_re.finditer(data)
t_stop_sign = True
for f in friend_iter:
t_stop_sign = False
id = int(f.group(1))
self.__friends[id] = (f.group(3), f.group(2))
stop_sign = t_stop_sign
def readFriends(id):
# 初始化多线程
global curpage, curpage_lock, stop_sign
curpage = 0
curpage_lock = Lock()
stop_sign = False
threads = []
friends = {}
# 创建线程
for i in xrange(THREADS_NUM):
threads.append(RequestThread(id, sid, friends))
# 开始执行线程
for t in threads:
t.start()
# 等待线程结束
for t in threads:
t.join()
return friends
def main():
# 初始化CGI输出
print 'Content-Type: text/plain'
print
# 获取ID信息
form = cgi.FieldStorage()
id1, id2 = 0, 0
if form.has_key('id1'): id1 = int(form['id1'].value)
if form.has_key('id2'): id2 = int(form['id2'].value)
if not (id1 and id2):
json_dump({'error': True})
return
# 登入人人网
conn = HTTPConnection('3g.renren.com')
conn.request('POST', '/login.do',
urllib.urlencode({'email': RENREN_USER, 'password': RENREN_PWD}),
{
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': USER_AGENT,
})
response = conn.getresponse()
data = response.read()
conn.close()
match = re.search(r'sid=([0-9a-f]+)', data, re.I)
global sid
sid = match.group(1)
# 读取好友
friends1 = readFriends(id1)
friends2 = readFriends(id2)
if len(friends1) > len(friends2):
friends1, friends2 = friends2, friends1
# 判断交集
intersection = []
for i in friends1.iterkeys():
if i in friends2:
intersection.append((i, friends2[i][0], friends2[i][1]))
json_dump({'error': False, 'count': len(intersection), 'data': intersection})
if __name__ == '__main__':
try:
main()
except:
json_dump({'error': True})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment