[scraper] schedules scrapers using cron instead of delay

This commit is contained in:
TheBeastLT
2020-04-23 20:08:23 +02:00
parent aa0bf8da44
commit a6b3b164aa
2 changed files with 17 additions and 14 deletions

View File

@@ -1,18 +1,21 @@
const schedule = require('node-schedule');
const scrapers = require('./scrapers');
const { delay, sequence } = require('../lib/promises')
const { sequence } = require('../lib/promises')
function scheduleScraping() {
return scrapers.forEach(provider => _continuousScrape(provider))
const allCrons = scrapers.reduce((crons, provider) => {
crons[provider.cron] = (crons[provider.cron] || []).concat(provider)
return crons;
}, {});
Object.entries(allCrons).forEach(([cron, providers]) => schedule.scheduleJob(cron, () => _scrapeProviders(providers)))
}
function scrapeAll() {
return sequence(scrapers.map(provider => () => _singleScrape(provider)))
return _scrapeProviders(scrapers)
}
async function _continuousScrape(provider) {
return _singleScrape(provider)
.then(() => delay(provider.scrapeInterval))
.then(() => _continuousScrape(provider))
async function _scrapeProviders(providers) {
return sequence(providers.map(provider => () => _singleScrape(provider)));
}
async function _singleScrape(provider) {

View File

@@ -7,13 +7,13 @@ const kickassScraper = require('../scrapers/kickass/kickass_scraper');
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
module.exports = [
{ scraper: ytsScraper, name: ytsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: eztvScraper, name: eztvScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: rarbgScraper, name: rarbgScraper.NAME, scrapeInterval: 2 * 60 * 60 * 1000 },
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: kickassScraper, name: kickassScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: leetxScraper, name: leetxScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' },
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: require('../scrapers/1337x/1337x_dump_scraper') }
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }