Last active
January 16, 2019 01:48
-
-
Save andykais/cdc12846203c524dc33ee4b0b749d994 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// current design | |
import ScrapePages from '../src' | |
import { normalizeConfig } from '../lib/normalize-config' | |
const config = { scrape: {} } | |
const options = { | |
input: { | |
username: 'bob' | |
}, | |
optionsEach: { | |
gallery: { | |
downloadPriority: 1, | |
logLevel: 'info' | |
} | |
}, | |
logger: { | |
level: 'debug', | |
useFile: 'downloads/record.log' | |
} | |
} | |
const fullConfig = normalizeConfig(config) | |
const siteScraper = new Scraper(config) | |
const emitter = siteScraper.run(options) | |
emitter.on('close', queryFor => queryFor().then(console.log)) | |
emit('stop', () => console.log('stopped.')) | |
// --=== API proposals ===-- // | |
/** | |
* First Design: | |
* least stateful | |
*/ | |
import { scraper, querier } from 'scraper' | |
import * as util from 'scraper/util' | |
// utils | |
const scrapers = util.flatten(config) | |
const fullConfig = util.normalize(config) | |
util.verify({ config, options }) | |
// main | |
const { on, emit, query } = scraper(config, options) | |
on('done', () => { | |
const results = query({ scrapers: ['image'] }) | |
}) | |
emit('stop') | |
// instantiate another sqlite db instance | |
const query = querier(config, options) | |
/** | |
* Second Design: | |
* class instance only contains config state, re-run with different options/input | |
*/ | |
import * as scraper from 'scraper' // util is available here too | |
import * as util from 'scraper/util' | |
// utils | |
const scrapers = util.flatten(config) | |
const fullConfig = util.normalize(config) | |
util.verifyConfig(config) | |
util.verifyOptions(options) | |
// main | |
const siteScraper = scraper.createScraper(config) | |
const { on, emit, query } = siteScraper.run(options) // push all async tasks into observable (creating folders) | |
on('done', () => { | |
const results = query({ scrapers: ['image'] }) | |
}) | |
emit('stop') | |
// instantiate another sqlite db instance | |
const query = scraper.createQuerier(config, options) | |
/** | |
* Third Design: | |
* most stateful, but allows instantiating two classes pointing at db | |
* one instance for scraping and one for querying (possibly two different worker threads) | |
*/ | |
import { Scraper } from 'scraper' | |
// utils | |
Scraper.verify(config) | |
const fullConfig = Scraper.normalize(config) | |
const scrapers = Scraper.flatten(config) | |
Scraper.verify({ config, options }) | |
// main | |
const siteScraper = new Scraper(config, options) | |
await siteScraper.init() // create folders, instantiate sqlite db, sync or async? | |
siteScraper.start() | |
const results = siteScraper.query({ scrapers: ['image'] }) // query can be called w/out calling start(), so long as init() has been called | |
siteScraper.on('done', () => { | |
const results = siteScraper.query({ scrapers: ['image'] }) | |
}) | |
siteScraper.emit('stop') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment