mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
[scraper] schedules scrapers using cron instead of delay
This commit is contained in:
@@ -1,18 +1,21 @@
|
|||||||
|
const schedule = require('node-schedule');
|
||||||
const scrapers = require('./scrapers');
|
const scrapers = require('./scrapers');
|
||||||
const { delay, sequence } = require('../lib/promises')
|
const { sequence } = require('../lib/promises')
|
||||||
|
|
||||||
function scheduleScraping() {
|
function scheduleScraping() {
|
||||||
return scrapers.forEach(provider => _continuousScrape(provider))
|
const allCrons = scrapers.reduce((crons, provider) => {
|
||||||
|
crons[provider.cron] = (crons[provider.cron] || []).concat(provider)
|
||||||
|
return crons;
|
||||||
|
}, {});
|
||||||
|
Object.entries(allCrons).forEach(([cron, providers]) => schedule.scheduleJob(cron, () => _scrapeProviders(providers)))
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAll() {
|
function scrapeAll() {
|
||||||
return sequence(scrapers.map(provider => () => _singleScrape(provider)))
|
return _scrapeProviders(scrapers)
|
||||||
}
|
}
|
||||||
|
|
||||||
async function _continuousScrape(provider) {
|
async function _scrapeProviders(providers) {
|
||||||
return _singleScrape(provider)
|
return sequence(providers.map(provider => () => _singleScrape(provider)));
|
||||||
.then(() => delay(provider.scrapeInterval))
|
|
||||||
.then(() => _continuousScrape(provider))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function _singleScrape(provider) {
|
async function _singleScrape(provider) {
|
||||||
|
|||||||
@@ -7,13 +7,13 @@ const kickassScraper = require('../scrapers/kickass/kickass_scraper');
|
|||||||
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
|
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
|
||||||
|
|
||||||
module.exports = [
|
module.exports = [
|
||||||
{ scraper: ytsScraper, name: ytsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
{ scraper: eztvScraper, name: eztvScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
{ scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
{ scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
{ scraper: rarbgScraper, name: rarbgScraper.NAME, scrapeInterval: 2 * 60 * 60 * 1000 },
|
{ scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' },
|
||||||
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
{ scraper: kickassScraper, name: kickassScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
{ scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
{ scraper: leetxScraper, name: leetxScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
// { scraper: require('../scrapers/1337x/1337x_dump_scraper') }
|
// { scraper: require('../scrapers/1337x/1337x_dump_scraper') }
|
||||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||||
// { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }
|
// { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }
|
||||||
|
|||||||
Reference in New Issue
Block a user