[scraper] rework scraper scheduling and added seeders updating

This commit is contained in:
TheBeastLT
2020-04-23 16:33:08 +02:00
parent d01e2c9b35
commit 9ea3932af1
19 changed files with 128 additions and 108 deletions

View File

@@ -0,0 +1,13 @@
const { scheduleScraping, scrapeAll } = require('./scraper')
const { scheduleUpdateSeeders } = require('./seeders')
function startScraper() {
if (process.env.ENABLE_SCHEDULING) {
scheduleScraping();
scheduleUpdateSeeders();
} else {
scrapeAll()
}
}
module.exports = { startScraper }

View File

@@ -0,0 +1,25 @@
const scrapers = require('./scrapers');
const { delay, sequence } = require('../lib/promises')
function scheduleScraping() {
return scrapers.forEach(provider => _continuousScrape(provider))
}
function scrapeAll() {
return sequence(scrapers.map(provider => () => _singleScrape(provider)))
}
async function _continuousScrape(provider) {
return _singleScrape(provider)
.then(() => delay(provider.scrapeInterval))
.then(() => _continuousScrape(provider))
}
async function _singleScrape(provider) {
return provider.scraper.scrape().catch(error => {
console.warn(`Failed ${provider.name} scraping due: `, error);
return Promise.resolve()
})
}
module.exports = { scheduleScraping, scrapeAll }

View File

@@ -0,0 +1,22 @@
const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper');
const horribleSubsScraper = require('../scrapers/horriblesubs/horriblesubs_scraper');
const ytsScraper = require('../scrapers/yts/yts_scraper');
const eztvScraper = require('../scrapers/eztv/eztv_scraper');
const leetxScraper = require('../scrapers/1337x/1337x_scraper');
const kickassScraper = require('../scrapers/kickass/kickass_scraper');
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
module.exports = [
{ scraper: ytsScraper, name: ytsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: eztvScraper, name: eztvScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: rarbgScraper, name: rarbgScraper.NAME, scrapeInterval: 2 * 60 * 60 * 1000 },
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: kickassScraper, name: kickassScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
{ scraper: leetxScraper, name: leetxScraper.NAME, scrapeInterval: 4 * 60 * 60 * 1000 },
// { scraper: require('../scrapers/1337x/1337x_dump_scraper') }
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_update_size_scraper') }
];

View File

@@ -0,0 +1,41 @@
const Bottleneck = require('bottleneck');
const scrapers = require('./scrapers');
const repository = require('../lib/repository')
const { delay } = require('../lib/promises')
const { updateCurrentSeeders } = require('../lib/torrent')
const { updateTorrentSeeders } = require('../lib/torrentEntries')
const DELAY = 15 * 1000; // 15 seconds
const limiter = new Bottleneck({ maxConcurrent: 20, minTime: 250 });
const forceSeedersLimiter = new Bottleneck({ maxConcurrent: 5 });
function scheduleUpdateSeeders() {
console.log('Starting seeders update...')
return repository.getUpdateSeedersTorrents()
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => _updateSeeders(torrent)))))
.then(() => console.log('Finished seeders update'))
.then(() => delay(DELAY))
.then(() => scheduleUpdateSeeders());
}
async function _updateSeeders(torrent) {
const provider = await scrapers.find(provider => provider.name === torrent.provider);
const updatedTorrents = await provider.scraper.updateSeeders(torrent, getImdbIdsMethod(torrent))
.then(updated => Array.isArray(updated) ? updated : [updated])
.catch(() => []);
if (!updatedTorrents.find(updated => updated.infoHash === torrent.infoHash)) {
await forceSeedersLimiter.schedule(() => updateCurrentSeeders(torrent))
.then(updated => updatedTorrents.push(updated));
}
return Promise.all(updatedTorrents.map(updated => updateTorrentSeeders(updated)))
}
async function getImdbIdsMethod(torrent) {
return () => repository.getFiles(torrent)
.then(files => files.map(file => file.imdbId).filter(id => id))
.then(ids => Array.from(new Set(ids)));
}
module.exports = { scheduleUpdateSeeders }