import scrapy
#--run a crawler in a script stuff
from pydispatch import dispatcher
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from pydispatch import dispatcher
from scrapy.utils.project import get_project_settings
#--run a crawler in a script stuff
#--the spiders
from path.to.proyect.spiders.mySpider import mySpider
#--the spiders
def run_a_spider_on_script(spider, signal=signals.item_passed, slot=None):
'''
@brief A function given a spider run it. If a signal an a slot is given connect it
@param spider
The spider itself
@param signal
scrapy signal ( defualt item passed )
@param slot
Function to launch after the signal is triggered
'''
# The spider
spiderObj = spider()
# The process to execute the spider
process = CrawlerProcess( get_project_settings() )
# if the slot is not None...
if (slot is not None):
# Connect the signal with the slot
# When the signal triggers execute the slot
dispatcher.connect( slot, signal )
# Set in the process the spider
process.crawl( spider )
process.start()
Last active
September 9, 2020 07:41
-
-
Save dianjuar/68145ae39840df16d7f4e787fb80bfea to your computer and use it in GitHub Desktop.
function to run a scrapy crawler on python script
Thank you for this code.
But, please, how to make it crawl the site and download content?
Thank you for this code.
But, please, how to make it crawl the site and download content?
I could make it download the content.
And also, I noticed that we do not need to instantiate the spider in the function. Passing the class in parameter was doing the work.
Thank you again.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
Your script is running properly but not downloading any content