Skip to content

Instantly share code, notes, and snippets.

@pokidovea
Created April 30, 2013 09:04
Show Gist options
  • Save pokidovea/5487532 to your computer and use it in GitHub Desktop.
Save pokidovea/5487532 to your computer and use it in GitHub Desktop.
С рандомной страницы баша выдирает и выводит посты с рейтингом больше 10 000
# -*- coding: utf-8 -*-
'''
Created on 28.09.2012
Created by pokidovea([email protected])
'''
import lxml.html
import urllib2
from StringIO import StringIO
import gzip
request = urllib2.Request('http://bash.im/random')
request.add_header('Accept-encoding', 'gzip')
request.add_header('Cache-Control', 'no-cache')
request.add_header('Pragma', 'no-cache')
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:18.0) Gecko/20100101 Firefox/18.0')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
doc = lxml.html.document_fromstring(data)
for quote in doc.cssselect('.quote'):
if len(quote.cssselect('.rating')) > 0 and \
quote.cssselect('.rating')[0].text.isdigit() and \
int(quote.cssselect('.rating')[0].text) > 10000:
text_iterator = quote.cssselect('div.text')[0].itertext()
try:
while 1:
print text_iterator.next().encode('CP1252').decode('cp1251')
except StopIteration:
print u'\n++++++++++++++\n'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment