import scrapy
#--run a crawler in a script stuff
from pydispatch import dispatcher
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from pydispatch import dispatcher
from scrapy.utils.project import get_project_settings
#--run a crawler in a script stuff
#--the spiders
from path.to.proyect.spiders.mySpider import mySpider
#--the spiders
def run_a_spider_on_script(spider, signal=signals.item_passed, slot=None):
'''
@brief A function given a spider run it. If a signal an a slot is given connect it
@param spider
The spider itself
@param signal
scrapy signal ( defualt item passed )
@param slot
Function to launch after the signal is triggered
'''
# The spider
spiderObj = spider()
# The process to execute the spider
process = CrawlerProcess( get_project_settings() )
# if the slot is not None...
if (slot is not None):
# Connect the signal with the slot
# When the signal triggers execute the slot
dispatcher.connect( slot, signal )
# Set in the process the spider
process.crawl( spider )
process.start()
Last active
September 9, 2020 07:41
-
-
Save dianjuar/68145ae39840df16d7f4e787fb80bfea to your computer and use it in GitHub Desktop.
function to run a scrapy crawler on python script
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I could make it download the content.
And also, I noticed that we do not need to instantiate the spider in the function. Passing the class in parameter was doing the work.
Thank you again.