mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
[scraper] rework scraper scheduling and added seeders updating
This commit is contained in:
13
scraper/scheduler/scheduler.js
Normal file
13
scraper/scheduler/scheduler.js
Normal file
@@ -0,0 +1,13 @@
|
||||
const { scheduleScraping, scrapeAll } = require('./scraper')
|
||||
const { scheduleUpdateSeeders } = require('./seeders')
|
||||
|
||||
function startScraper() {
|
||||
if (process.env.ENABLE_SCHEDULING) {
|
||||
scheduleScraping();
|
||||
scheduleUpdateSeeders();
|
||||
} else {
|
||||
scrapeAll()
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { startScraper }
|
||||
25
scraper/scheduler/scraper.js
Normal file
25
scraper/scheduler/scraper.js
Normal file
@@ -0,0 +1,25 @@
|
||||
const scrapers = require('./scrapers');
|
||||
const { delay, sequence } = require('../lib/promises')
|
||||
|
||||
function scheduleScraping() {
|
||||
return scrapers.forEach(provider => _continuousScrape(provider))
|
||||
}
|
||||
|
||||
function scrapeAll() {
|
||||
return sequence(scrapers.map(provider => () => _singleScrape(provider)))
|
||||
}
|
||||
|
||||
async function _continuousScrape(provider) {
|
||||
return _singleScrape(provider)
|
||||
.then(() => delay(provider.scrapeInterval))
|
||||
.then(() => _continuousScrape(provider))
|
||||
}
|
||||
|
||||
async function _singleScrape(provider) {
|
||||
return provider.scraper.scrape().catch(error => {
|
||||
console.warn(`Failed ${provider.name} scraping due: `, error);
|
||||
return Promise.resolve()
|
||||
})
|
||||
}
|
||||
|
||||
module.exports = { scheduleScraping, scrapeAll }
|
||||
22
scraper/scheduler/scrapers.js
Normal file
22
scraper/scheduler/scrapers.js
Normal file
@@ -0,0 +1,22 @@
|
||||
const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper');
|
||||
const horribleSubsScraper = require('../scrapers/horriblesubs/horriblesubs_scraper');
|
||||
const ytsScraper = require('../scrapers/yts/yts_scraper');
|
||||
const eztvScraper = require('../scrapers/eztv/eztv_scraper');
|
||||
const leetxScraper = require('../scrapers/1337x/1337x_scraper');
|
||||
const kickassScraper = require('../scrapers/kickass/kickass_scraper');
|
||||
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
|
||||
|
||||
module.exports = [
|
||||
{ scraper: ytsScraper, name: ytsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
||||
{ scraper: eztvScraper, name: eztvScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
||||
{ scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
||||
{ scraper: rarbgScraper, name: rarbgScraper.NAME, scrapeInterval: 2 * 60 * 60 * 1000 },
|
||||
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
||||
{ scraper: kickassScraper, name: kickassScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
||||
{ scraper: leetxScraper, name: leetxScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
|
||||
// { scraper: require('../scrapers/1337x/1337x_dump_scraper') }
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
// { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }
|
||||
// { scraper: require('../scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper') }
|
||||
// { scraper: require('../scrapers/thepiratebay/thepiratebay_update_size_scraper') }
|
||||
];
|
||||
41
scraper/scheduler/seeders.js
Normal file
41
scraper/scheduler/seeders.js
Normal file
@@ -0,0 +1,41 @@
|
||||
const Bottleneck = require('bottleneck');
|
||||
const scrapers = require('./scrapers');
|
||||
const repository = require('../lib/repository')
|
||||
const { delay } = require('../lib/promises')
|
||||
const { updateCurrentSeeders } = require('../lib/torrent')
|
||||
const { updateTorrentSeeders } = require('../lib/torrentEntries')
|
||||
|
||||
const DELAY = 15 * 1000; // 15 seconds
|
||||
const limiter = new Bottleneck({ maxConcurrent: 20, minTime: 250 });
|
||||
const forceSeedersLimiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
|
||||
function scheduleUpdateSeeders() {
|
||||
console.log('Starting seeders update...')
|
||||
return repository.getUpdateSeedersTorrents()
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => _updateSeeders(torrent)))))
|
||||
.then(() => console.log('Finished seeders update'))
|
||||
.then(() => delay(DELAY))
|
||||
.then(() => scheduleUpdateSeeders());
|
||||
}
|
||||
|
||||
async function _updateSeeders(torrent) {
|
||||
const provider = await scrapers.find(provider => provider.name === torrent.provider);
|
||||
const updatedTorrents = await provider.scraper.updateSeeders(torrent, getImdbIdsMethod(torrent))
|
||||
.then(updated => Array.isArray(updated) ? updated : [updated])
|
||||
.catch(() => []);
|
||||
|
||||
if (!updatedTorrents.find(updated => updated.infoHash === torrent.infoHash)) {
|
||||
await forceSeedersLimiter.schedule(() => updateCurrentSeeders(torrent))
|
||||
.then(updated => updatedTorrents.push(updated));
|
||||
}
|
||||
|
||||
return Promise.all(updatedTorrents.map(updated => updateTorrentSeeders(updated)))
|
||||
}
|
||||
|
||||
async function getImdbIdsMethod(torrent) {
|
||||
return () => repository.getFiles(torrent)
|
||||
.then(files => files.map(file => file.imdbId).filter(id => id))
|
||||
.then(ids => Array.from(new Set(ids)));
|
||||
}
|
||||
|
||||
module.exports = { scheduleUpdateSeeders }
|
||||
Reference in New Issue
Block a user