From a6b3b164aaa63fa0d306cf260fb76b9062b2b7ee Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Thu, 23 Apr 2020 20:08:23 +0200 Subject: [PATCH] [scraper] schedules scrapers using cron instead of delay --- scraper/scheduler/scraper.js | 17 ++++++++++------- scraper/scheduler/scrapers.js | 14 +++++++------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/scraper/scheduler/scraper.js b/scraper/scheduler/scraper.js index c6e836c..3f06643 100644 --- a/scraper/scheduler/scraper.js +++ b/scraper/scheduler/scraper.js @@ -1,18 +1,21 @@ +const schedule = require('node-schedule'); const scrapers = require('./scrapers'); -const { delay, sequence } = require('../lib/promises') +const { sequence } = require('../lib/promises') function scheduleScraping() { - return scrapers.forEach(provider => _continuousScrape(provider)) + const allCrons = scrapers.reduce((crons, provider) => { + crons[provider.cron] = (crons[provider.cron] || []).concat(provider) + return crons; + }, {}); + Object.entries(allCrons).forEach(([cron, providers]) => schedule.scheduleJob(cron, () => _scrapeProviders(providers))) } function scrapeAll() { - return sequence(scrapers.map(provider => () => _singleScrape(provider))) + return _scrapeProviders(scrapers) } -async function _continuousScrape(provider) { - return _singleScrape(provider) - .then(() => delay(provider.scrapeInterval)) - .then(() => _continuousScrape(provider)) +async function _scrapeProviders(providers) { + return sequence(providers.map(provider => () => _singleScrape(provider))); } async function _singleScrape(provider) { diff --git a/scraper/scheduler/scrapers.js b/scraper/scheduler/scrapers.js index 175ab91..b897c29 100644 --- a/scraper/scheduler/scrapers.js +++ b/scraper/scheduler/scrapers.js @@ -7,13 +7,13 @@ const kickassScraper = require('../scrapers/kickass/kickass_scraper'); const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper'); module.exports = [ - { scraper: ytsScraper, name: ytsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 }, - { scraper: eztvScraper, name: eztvScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 }, - { scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 }, - { scraper: rarbgScraper, name: rarbgScraper.NAME, scrapeInterval: 2 * 60 * 60 * 1000 }, - { scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 }, - { scraper: kickassScraper, name: kickassScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 }, - { scraper: leetxScraper, name: leetxScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 }, + { scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' }, + { scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' }, // { scraper: require('../scrapers/1337x/1337x_dump_scraper') } // { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') } // { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }