Created
December 30, 2012 16:24
-
-
Save zyan0/4413606 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# - * - coding: UTF-8 - * - | |
# Copyright (C) 2010 Upsuper <[email protected]> | |
# License: AGPLv3 | |
from threading import Thread, Lock | |
from httplib import HTTPConnection | |
import urllib | |
import cgi, json | |
import sys, re | |
# 常数设置 | |
THREADS_NUM = 5 # 最大线程数 | |
RENREN_USER = '' # 人人网用户名 | |
RENREN_PWD = '' # 人人网密码 | |
USER_AGENT = 'urenren 0.1' # 提交给人人网的 User-Agent | |
# JSON 输出函数 | |
json_dump = lambda v: json.dump(v, sys.stdout) | |
# 编译匹配用正则表达式 | |
parse_re = re.compile(r'<td><p><a\s(?:[^>]+\s)?href="[^\?]+\?id=(\d+)[^"]*">' | |
r'<img src="([^"]+)"(?:[^>]*)/></a></p>' | |
r'<a href="[^"]+">([^<]+)</a></td>') | |
class RequestThread(Thread): | |
def __init__(self, id, sid, friends): | |
Thread.__init__(self) | |
self.__page = '/getfriends.do?curpage=%%d&id=%d&sid=%s' % (id, sid) | |
self.__friends = friends | |
self.__conn = HTTPConnection('3g.renren.com') | |
def run(self): | |
global curpage, stop_sign | |
while not stop_sign: | |
# 获取当前页面 | |
curpage_lock.acquire() | |
page = curpage | |
curpage += 1 | |
curpage_lock.release() | |
# 连接获取数据 | |
conn = self.__conn | |
conn.request('GET', self.__page % (page, ), None, { | |
'User-Agent': USER_AGENT | |
}) | |
data = conn.getresponse().read() | |
# 处理获取的数据 | |
friend_iter = parse_re.finditer(data) | |
t_stop_sign = True | |
for f in friend_iter: | |
t_stop_sign = False | |
id = int(f.group(1)) | |
self.__friends[id] = (f.group(3), f.group(2)) | |
stop_sign = t_stop_sign | |
def readFriends(id): | |
# 初始化多线程 | |
global curpage, curpage_lock, stop_sign | |
curpage = 0 | |
curpage_lock = Lock() | |
stop_sign = False | |
threads = [] | |
friends = {} | |
# 创建线程 | |
for i in xrange(THREADS_NUM): | |
threads.append(RequestThread(id, sid, friends)) | |
# 开始执行线程 | |
for t in threads: | |
t.start() | |
# 等待线程结束 | |
for t in threads: | |
t.join() | |
return friends | |
def main(): | |
# 初始化CGI输出 | |
print 'Content-Type: text/plain' | |
# 获取ID信息 | |
form = cgi.FieldStorage() | |
id1, id2 = 0, 0 | |
if form.has_key('id1'): id1 = int(form['id1'].value) | |
if form.has_key('id2'): id2 = int(form['id2'].value) | |
if not (id1 and id2): | |
json_dump({'error': True}) | |
return | |
# 登入人人网 | |
conn = HTTPConnection('3g.renren.com') | |
conn.request('POST', '/login.do', | |
urllib.urlencode({'email': RENREN_USER, 'password': RENREN_PWD}), | |
{ | |
'Content-Type': 'application/x-www-form-urlencoded', | |
'User-Agent': USER_AGENT, | |
}) | |
response = conn.getresponse() | |
data = response.read() | |
conn.close() | |
match = re.search(r'sid=([0-9a-f]+)', data, re.I) | |
global sid | |
sid = match.group(1) | |
# 读取好友 | |
friends1 = readFriends(id1) | |
friends2 = readFriends(id2) | |
if len(friends1) > len(friends2): | |
friends1, friends2 = friends2, friends1 | |
# 判断交集 | |
intersection = [] | |
for i in friends1.iterkeys(): | |
if i in friends2: | |
intersection.append((i, friends2[i][0], friends2[i][1])) | |
json_dump({'error': False, 'count': len(intersection), 'data': intersection}) | |
if __name__ == '__main__': | |
try: | |
main() | |
except: | |
json_dump({'error': True}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment