[scraper] adds retries for rarbg

This commit is contained in:
TheBeastLT
2020-04-15 16:47:53 +02:00
parent 0133adbbda
commit adc78fdea7
2 changed files with 11 additions and 3 deletions

View File

@@ -17,7 +17,7 @@ const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() { async function scrape() {
console.log(`[${moment()}] starting ${NAME} dump scrape...`); console.log(`[${moment()}] starting ${NAME} dump scrape...`);
//const movieImdbIds = require('./rargb_movie_imdb_ids_2020-03-09.json'); //const movieImdbIds = require('./rargb_movie_imdb_ids_2020-03-09.json');
const seriesImdbIds = require('./rargb_series_imdb_ids_2020-03-09.json').slice(800); const seriesImdbIds = require('./rargb_series_imdb_ids_2020-03-09.json');
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds); //const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
return Promise.all( return Promise.all(
@@ -26,7 +26,7 @@ async function scrape() {
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`)); .then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
} }
async function getTorrentsForImdbId(imdbId) { async function getTorrentsForImdbId(imdbId, retries = 5) {
return rarbg.search(imdbId, { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 }, 'imdb') return rarbg.search(imdbId, { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 }, 'imdb')
.then(torrents => torrents.map(torrent => ({ .then(torrents => torrents.map(torrent => ({
name: torrent.title, name: torrent.title,
@@ -44,6 +44,10 @@ async function getTorrentsForImdbId(imdbId) {
return torrents; return torrents;
}) })
.catch(error => { .catch(error => {
if (retries > 0) {
console.log(`Retrying ${NAME} request for ${imdbId}...`);
return getTorrentsForImdbId(imdbId, retries - 1);
}
console.warn(`Failed ${NAME} request for ${imdbId}: `, error); console.warn(`Failed ${NAME} request for ${imdbId}: `, error);
return []; return [];
}); });

View File

@@ -60,12 +60,16 @@ async function scrapeLatestTorrents() {
.then(entries => entries.reduce((a, b) => a.concat(b), [])); .then(entries => entries.reduce((a, b) => a.concat(b), []));
} }
async function scrapeLatestTorrentsForCategory(category) { async function scrapeLatestTorrentsForCategory(category, retries = 5) {
console.log(`Scrapping ${NAME} ${category} category`); console.log(`Scrapping ${NAME} ${category} category`);
return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended', ranked: 0 }) return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended', ranked: 0 })
.then(results => results.map(result => toTorrent(result))) .then(results => results.map(result => toTorrent(result)))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))) .then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
.catch(error => { .catch(error => {
if (retries > 0) {
console.log(`Retrying ${NAME} request for ${category}...`);
return scrapeLatestTorrentsForCategory(category, retries - 1);
}
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error); console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
return Promise.resolve([]); return Promise.resolve([]);
}); });