Last active
May 4, 2016 11:00
-
-
Save redapple/246bcb1f5897d937141c4586efab4ec4 to your computer and use it in GitHub Desktop.
Dockerized Install scrapy 1.1 RC4 on CentOS7
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ source /usr/bin/virtualenvwrapper.sh | |
$ mkvirtualenv scrapy11rc3.py2 | |
$ pip install scrapy==1.1.0rc4 | |
$ scrapy startproject tutorial | |
$ cd tutorial | |
$ scrapy genspider example example.com | |
$ cat tutorial/spiders/example.py | |
$ scrapy crawl example |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[scrapyuser@38e3052773ef ~]$ source /usr/bin/virtualenvwrapper.sh | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/premkproject | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/postmkproject | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/initialize | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/premkvirtualenv | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/postmkvirtualenv | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/prermvirtualenv | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/postrmvirtualenv | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/predeactivate | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/postdeactivate | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/preactivate | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/postactivate | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/get_env_details | |
[scrapyuser@38e3052773ef ~]$ mkvirtualenv scrapy11rc3.py2 | |
New python executable in /home/scrapyuser/.virtualenvs/scrapy11rc3.py2/bin/python | |
Installing setuptools, pip, wheel...done. | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/scrapy11rc3.py2/bin/predeactivate | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/scrapy11rc3.py2/bin/postdeactivate | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/scrapy11rc3.py2/bin/preactivate | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/scrapy11rc3.py2/bin/postactivate | |
virtualenvwrapper.user_scripts creating /home/scrapyuser/.virtualenvs/scrapy11rc3.py2/bin/get_env_details | |
(scrapy11rc3.py2) [scrapyuser@38e3052773ef ~]$ pip install scrapy==1.1.0rc4 | |
Collecting scrapy==1.1.0rc4 | |
Downloading Scrapy-1.1.0rc4-py2.py3-none-any.whl (294kB) | |
100% |################################| 296kB 787kB/s | |
Collecting service-identity (from scrapy==1.1.0rc4) | |
Downloading service_identity-16.0.0-py2.py3-none-any.whl | |
Collecting cssselect>=0.9 (from scrapy==1.1.0rc4) | |
Downloading cssselect-0.9.1.tar.gz | |
Collecting queuelib (from scrapy==1.1.0rc4) | |
Downloading queuelib-1.4.2-py2.py3-none-any.whl | |
Collecting w3lib>=1.13.0 (from scrapy==1.1.0rc4) | |
Downloading w3lib-1.14.2-py2.py3-none-any.whl | |
Collecting lxml (from scrapy==1.1.0rc4) | |
Downloading lxml-3.6.0.tar.gz (3.7MB) | |
100% |################################| 3.7MB 202kB/s | |
Collecting pyOpenSSL (from scrapy==1.1.0rc4) | |
Downloading pyOpenSSL-16.0.0-py2.py3-none-any.whl (45kB) | |
100% |################################| 51kB 1.9MB/s | |
Collecting six>=1.5.2 (from scrapy==1.1.0rc4) | |
Downloading six-1.10.0-py2.py3-none-any.whl | |
Collecting parsel>=0.9.3 (from scrapy==1.1.0rc4) | |
Downloading parsel-1.0.2-py2.py3-none-any.whl | |
Collecting PyDispatcher>=2.0.5 (from scrapy==1.1.0rc4) | |
Downloading PyDispatcher-2.0.5.tar.gz | |
Collecting Twisted>=10.0.0 (from scrapy==1.1.0rc4) | |
Downloading Twisted-16.1.1.tar.bz2 (2.9MB) | |
100% |################################| 2.9MB 296kB/s | |
Collecting pyasn1-modules (from service-identity->scrapy==1.1.0rc4) | |
Downloading pyasn1_modules-0.0.8-py2.py3-none-any.whl | |
Collecting attrs (from service-identity->scrapy==1.1.0rc4) | |
Downloading attrs-15.2.0-py2.py3-none-any.whl | |
Collecting pyasn1 (from service-identity->scrapy==1.1.0rc4) | |
Downloading pyasn1-0.1.9-py2.py3-none-any.whl | |
Collecting cryptography>=1.3 (from pyOpenSSL->scrapy==1.1.0rc4) | |
Downloading cryptography-1.3.1.tar.gz (383kB) | |
100% |################################| 389kB 857kB/s | |
Collecting zope.interface>=3.6.0 (from Twisted>=10.0.0->scrapy==1.1.0rc4) | |
Downloading zope.interface-4.1.3.tar.gz (141kB) | |
100% |################################| 143kB 773kB/s | |
Collecting idna>=2.0 (from cryptography>=1.3->pyOpenSSL->scrapy==1.1.0rc4) | |
Downloading idna-2.1-py2.py3-none-any.whl (54kB) | |
100% |################################| 61kB 1.7MB/s | |
Requirement already satisfied (use --upgrade to upgrade): setuptools>=11.3 in ./.virtualenvs/scrapy11rc3.py2/lib/python2.7/site-packages (from cryptography>=1.3->pyOpenSSL->scrapy==1.1.0rc4) | |
Collecting enum34 (from cryptography>=1.3->pyOpenSSL->scrapy==1.1.0rc4) | |
Downloading enum34-1.1.4-py2.py3-none-any.whl (61kB) | |
100% |################################| 61kB 1.4MB/s | |
Collecting ipaddress (from cryptography>=1.3->pyOpenSSL->scrapy==1.1.0rc4) | |
Downloading ipaddress-1.0.16-py27-none-any.whl | |
Collecting cffi>=1.4.1 (from cryptography>=1.3->pyOpenSSL->scrapy==1.1.0rc4) | |
Downloading cffi-1.6.0.tar.gz (397kB) | |
100% |################################| 399kB 886kB/s | |
Collecting pycparser (from cffi>=1.4.1->cryptography>=1.3->pyOpenSSL->scrapy==1.1.0rc4) | |
Downloading pycparser-2.14.tar.gz (223kB) | |
100% |################################| 225kB 1.2MB/s | |
Building wheels for collected packages: cssselect, lxml, PyDispatcher, Twisted, cryptography, zope.interface, cffi, pycparser | |
Running setup.py bdist_wheel for cssselect ... done | |
Stored in directory: /home/scrapyuser/.cache/pip/wheels/1b/41/70/480fa9516ccc4853a474faf7a9fb3638338fc99a9255456dd0 | |
Running setup.py bdist_wheel for lxml ... done | |
Stored in directory: /home/scrapyuser/.cache/pip/wheels/6c/eb/a1/e4ff54c99630e3cc6ec659287c4fd88345cd78199923544412 | |
Running setup.py bdist_wheel for PyDispatcher ... done | |
Stored in directory: /home/scrapyuser/.cache/pip/wheels/86/02/a1/5857c77600a28813aaf0f66d4e4568f50c9f133277a4122411 | |
Running setup.py bdist_wheel for Twisted ... done | |
Stored in directory: /home/scrapyuser/.cache/pip/wheels/bc/52/68/587c03fee7d98f2e61b8f410e0a31270d0b35e799d839fc650 | |
Running setup.py bdist_wheel for cryptography ... done | |
Stored in directory: /home/scrapyuser/.cache/pip/wheels/f7/ff/8e/0452559598fa46ed95a14149f020e413fb8c9843a8ce057c4e | |
Running setup.py bdist_wheel for zope.interface ... done | |
Stored in directory: /home/scrapyuser/.cache/pip/wheels/52/04/ad/12c971c57ca6ee5e6d77019c7a1b93105b1460d8c2db6e4ef1 | |
Running setup.py bdist_wheel for cffi ... done | |
Stored in directory: /home/scrapyuser/.cache/pip/wheels/8f/00/29/553c1b1db38bbeec3fec428ae4e400cd8349ecd99fe86edea1 | |
Running setup.py bdist_wheel for pycparser ... done | |
Stored in directory: /home/scrapyuser/.cache/pip/wheels/9b/f4/2e/d03e949a551719a1ffcb659f2c63d8444f4df12e994ce52112 | |
Successfully built cssselect lxml PyDispatcher Twisted cryptography zope.interface cffi pycparser | |
Installing collected packages: idna, pyasn1, six, enum34, ipaddress, pycparser, cffi, cryptography, pyOpenSSL, pyasn1-modules, attrs, service-identity, cssselect, queuelib, w3lib, lxml, parsel, PyDispatcher, zope.interface, Twisted, scrapy | |
Successfully installed PyDispatcher-2.0.5 Twisted-16.1.1 attrs-15.2.0 cffi-1.6.0 cryptography-1.3.1 cssselect-0.9.1 enum34-1.1.4 idna-2.1 ipaddress-1.0.16 lxml-3.6.0 parsel-1.0.2 pyOpenSSL-16.0.0 pyasn1-0.1.9 pyasn1-modules-0.0.8 pycparser-2.14 queuelib-1.4.2 scrapy-1.1.0rc4 service-identity-16.0.0 six-1.10.0 w3lib-1.14.2 zope.interface-4.1.3 | |
(scrapy11rc3.py2) [scrapyuser@38e3052773ef ~]$ scrapy startproject tutorial | |
New Scrapy project 'tutorial', using template directory '/home/scrapyuser/.virtualenvs/scrapy11rc3.py2/lib/python2.7/site-packages/scrapy/templates/project', created in: | |
/home/scrapyuser/tutorial | |
You can start your first spider with: | |
cd tutorial | |
scrapy genspider example example.com | |
(scrapy11rc3.py2) [scrapyuser@38e3052773ef ~]$ cd tutorial | |
(scrapy11rc3.py2) [scrapyuser@38e3052773ef tutorial]$ scrapy genspider example example.com | |
Created spider 'example' using template 'basic' in module: | |
tutorial.spiders.example | |
(scrapy11rc3.py2) [scrapyuser@38e3052773ef tutorial]$ cat tutorial/spiders/example.py | |
# -*- coding: utf-8 -*- | |
import scrapy | |
class ExampleSpider(scrapy.Spider): | |
name = "example" | |
allowed_domains = ["example.com"] | |
start_urls = ( | |
'http://www.example.com/', | |
) | |
def parse(self, response): | |
pass | |
(scrapy11rc3.py2) [scrapyuser@38e3052773ef tutorial]$ scrapy crawl example | |
2016-05-04 10:51:53 [scrapy] INFO: Scrapy 1.1.0rc4 started (bot: tutorial) | |
2016-05-04 10:51:53 [scrapy] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'tutorial.spiders', 'SPIDER_MODULES': ['tutorial.spiders'], 'ROBOTSTXT_OBEY': True, 'BOT_NAME': 'tutorial'} | |
2016-05-04 10:51:53 [scrapy] INFO: Enabled extensions: | |
['scrapy.extensions.logstats.LogStats', | |
'scrapy.extensions.telnet.TelnetConsole', | |
'scrapy.extensions.corestats.CoreStats'] | |
2016-05-04 10:51:53 [scrapy] INFO: Enabled downloader middlewares: | |
['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware', | |
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', | |
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', | |
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', | |
'scrapy.downloadermiddlewares.retry.RetryMiddleware', | |
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware', | |
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware', | |
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware', | |
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware', | |
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware', | |
'scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware', | |
'scrapy.downloadermiddlewares.stats.DownloaderStats'] | |
2016-05-04 10:51:53 [scrapy] INFO: Enabled spider middlewares: | |
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware', | |
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware', | |
'scrapy.spidermiddlewares.referer.RefererMiddleware', | |
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware', | |
'scrapy.spidermiddlewares.depth.DepthMiddleware'] | |
2016-05-04 10:51:53 [scrapy] INFO: Enabled item pipelines: | |
[] | |
2016-05-04 10:51:53 [scrapy] INFO: Spider opened | |
2016-05-04 10:51:53 [scrapy] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) | |
2016-05-04 10:51:53 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023 | |
2016-05-04 10:51:53 [scrapy] DEBUG: Crawled (404) <GET http://www.example.com/robots.txt> (referer: None) | |
2016-05-04 10:51:53 [scrapy] DEBUG: Crawled (200) <GET http://www.example.com/> (referer: None) | |
2016-05-04 10:51:53 [scrapy] INFO: Closing spider (finished) | |
2016-05-04 10:51:53 [scrapy] INFO: Dumping Scrapy stats: | |
{'downloader/request_bytes': 442, | |
'downloader/request_count': 2, | |
'downloader/request_method_count/GET': 2, | |
'downloader/response_bytes': 1906, | |
'downloader/response_count': 2, | |
'downloader/response_status_count/200': 1, | |
'downloader/response_status_count/404': 1, | |
'finish_reason': 'finished', | |
'finish_time': datetime.datetime(2016, 5, 4, 10, 51, 53, 723152), | |
'log_count/DEBUG': 3, | |
'log_count/INFO': 7, | |
'response_received_count': 2, | |
'scheduler/dequeued': 1, | |
'scheduler/dequeued/memory': 1, | |
'scheduler/enqueued': 1, | |
'scheduler/enqueued/memory': 1, | |
'start_time': datetime.datetime(2016, 5, 4, 10, 51, 53, 208724)} | |
2016-05-04 10:51:53 [scrapy] INFO: Spider closed (finished) | |
(scrapy11rc3.py2) [scrapyuser@38e3052773ef tutorial]$ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM centos:centos7 | |
RUN yum update -y | |
# Install Python and dev headers | |
RUN yum install -y \ | |
python-devel | |
# Install cryptography | |
# https://cryptography.io/en/latest/installation/#building-cryptography-on-linux | |
RUN yum install -y \ | |
gcc \ | |
libffi-devel \ | |
openssl-devel | |
# install lxml | |
# http://lxml.de/installation.html#requirements | |
RUN yum install -y \ | |
libxml2-devel \ | |
libxslt-devel | |
# install pip | |
RUN yum install -y epel-release | |
RUN yum install -y python-pip | |
RUN pip install --upgrade pip | |
RUN yum install -y which | |
RUN pip install virtualenvwrapper | |
RUN useradd --create-home --shell /bin/bash scrapyuser | |
USER scrapyuser | |
WORKDIR /home/scrapyuser |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo docker build -t redapple/scrapy-centos7 . | |
sudo docker run -t -i redapple/scrapy-centos7 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment