[scraper] schedules scrapers using cron instead of delay

This commit is contained in:
TheBeastLT
2020-04-23 20:08:23 +02:00
parent aa0bf8da44
commit a6b3b164aa
2 changed files with 17 additions and 14 deletions

View File

@@ -1,18 +1,21 @@
const schedule = require('node-schedule');
const scrapers = require('./scrapers');
const { delay, sequence } = require('../lib/promises')
const { sequence } = require('../lib/promises')
function scheduleScraping() {
return scrapers.forEach(provider => _continuousScrape(provider))
const allCrons = scrapers.reduce((crons, provider) => {
crons[provider.cron] = (crons[provider.cron] || []).concat(provider)
return crons;
}, {});
Object.entries(allCrons).forEach(([cron, providers]) => schedule.scheduleJob(cron, () => _scrapeProviders(providers)))
}
function scrapeAll() {
return sequence(scrapers.map(provider => () => _singleScrape(provider)))
return _scrapeProviders(scrapers)
}
async function _continuousScrape(provider) {
return _singleScrape(provider)
.then(() => delay(provider.scrapeInterval))
.then(() => _continuousScrape(provider))
async function _scrapeProviders(providers) {
return sequence(providers.map(provider => () => _singleScrape(provider)));
}
async function _singleScrape(provider) {