[scraper] updates scrapers and unique index

This commit is contained in:
TheBeastLT
2020-03-14 22:25:51 +01:00
parent 1efaa0451c
commit 326a07b82e
13 changed files with 110 additions and 57 deletions

View File

@@ -16,12 +16,13 @@ const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
const movieImdbIds = require('./rargb_movie_imdb_ids_2020-03-09.json');
const seriesImdbIds = require('./rargb_series_imdb_ids_2020-03-09.json');
const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
//const movieImdbIds = require('./rargb_movie_imdb_ids_2020-03-09.json');
const seriesImdbIds = require('./rargb_series_imdb_ids_2020-03-09.json').slice(800);
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
return Promise.all(allImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId)
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))))))
return Promise.all(
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
}

View File

@@ -20,7 +20,7 @@ async function scrape() {
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return repository.updateProvider(lastScrape);
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
@@ -64,8 +64,8 @@ async function scrapeLatestTorrentsForCategory(category) {
})))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve();
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
return Promise.resolve([]);
});
}