Created
May 17, 2018 12:10
-
-
Save JBPressac/a798236f150d7cb57a763f20982c3d54 to your computer and use it in GitHub Desktop.
Spider d'extraction de la page d'accueil du forum Python pour le tutoriel Scrapy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf8 | |
import scrapy | |
class DeveloppezSpyder(scrapy.Spider): | |
name = 'forum-python-developpez' | |
start_urls = ['https://www.developpez.net/forums/f1155/autres-langages/python-zope/general-python/'] | |
def parse(self, response): | |
for fil_discussion in response.css('#threads .inner'): | |
yield { | |
'titre': fil_discussion.css('.threadtitle a::text').extract_first(), | |
'auteur': fil_discussion.css('a.username::text').extract_first(), | |
'date': fil_discussion.css('.author span::text').re(r'\d{2}/\d{2}/\d{4}|Hier|Aujourd\'hui') | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment