Created
September 3, 2016 10:03
-
-
Save taizilongxu/1c9aeac336118b8fcb44e81205625393 to your computer and use it in GitHub Desktop.
scrapy常用
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 命令 | |
scrapy startproject tutorial | |
scrapy crawl dmoz # spider name: dmoz | |
scrapy shell 'http://www.example.com' # enter shell | |
# start | |
start_urls | |
# fisrt start | |
def start_requests(self): | |
pass | |
# second start | |
def parse(self, response): | |
pass | |
# return post | |
yield scrapy.http.FormRequest(url, callback=self.parse_music, headers=headers, formdata=data) | |
# return get | |
yield scrapy.Request(url, callback=self.parse_playlist) | |
# selector | |
response.xpath('//*[@id="m-pl-container"]/li/div/a/@href').extract() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment