Run a Scrapy spider on script

import scrapy

#--run a crawler in a script stuff
from pydispatch             import dispatcher
from scrapy                 import signals
from scrapy.crawler         import CrawlerProcess
from pydispatch             import dispatcher
from scrapy.utils.project   import get_project_settings
#--run a crawler in a script stuff

#--the spiders
from path.to.proyect.spiders.mySpider import mySpider
#--the spiders

def run_a_spider_on_script(spider, signal=signals.item_passed, slot=None): 
    '''
    @brief  A function given a spider run it. If a signal an a slot is given connect it

    @param  spider
            The spider itself
    
    @param  signal
            scrapy signal ( defualt item passed  )
    
    @param  slot
            Function to launch after the signal is triggered
    '''
    # The spider
    spiderObj = spider()

    # The process to execute the spider
    process = CrawlerProcess( get_project_settings() )

    # if the slot is not None...
    if (slot is not None):
        # Connect the signal with the slot
        # When the signal triggers execute the slot
        dispatcher.connect( slot, signal )

    # Set in the process the spider
    process.crawl( spider )
    process.start()

dianjuar/run scrapy spider on python script.md

Run a Scrapy spider on script

Ravisiswaliya commented Dec 17, 2017

Uh oh!

selfcontrol7 commented Sep 9, 2020

Uh oh!

selfcontrol7 commented Sep 9, 2020 •

edited

Loading

Uh oh!

dianjuar/run scrapy spider on python script.md

Run a Scrapy spider on script

Ravisiswaliya commented Dec 17, 2017

Uh oh!

selfcontrol7 commented Sep 9, 2020

Uh oh!

selfcontrol7 commented Sep 9, 2020 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

selfcontrol7 commented Sep 9, 2020 •

edited

Loading