[scraper] rework scraper scheduling and added seeders updating

2024-12-20 03:29:51 +00:00 · 2020-04-23 16:33:08 +02:00
parent d01e2c9b35
commit 9ea3932af1
19 changed files with 128 additions and 108 deletions
--- a/scraper/scrapers/eztv/eztv_api.js
+++ b/scraper/scrapers/eztv/eztv_api.js
@@ -33,7 +33,7 @@ function search(imdbId, config = {}, retries = 2) {
  return Promises.first(defaultProxies
      .map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config)))
      .then(results => parseResults(results))
-      .then(torrents => torrents.length === limit && page < maxPage
+      .then(torrents => torrents.length === limit && page < maxPage && !torrents.find(t => t.imdbId === imdbId)
          ? search(imdbId, { ...config, page: page + 1 })
              .catch(() => [])
              .then(nextTorrents => torrents.concat(nextTorrents))
--- a/scraper/scrapers/eztv/eztv_scraper.js
+++ b/scraper/scrapers/eztv/eztv_scraper.js
@@ -4,14 +4,12 @@ const eztv = require('./eztv_api');
 const { Type } = require('../../lib/types');
 const Promises = require('../../lib/promises');
 const repository = require('../../lib/repository');
-
-const { updateCurrentSeeders } = require('../../lib/torrent');
 const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries');

 const NAME = 'EZTV';
 const UNTIL_PAGE = 10;

-const limiter = new Bottleneck({ maxConcurrent: 20 });
+const limiter = new Bottleneck({ maxConcurrent: 1 });

 async function scrape() {
  const scrapeStart = moment();
@@ -26,11 +24,9 @@ async function scrape() {
      .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
 }

-async function updateSeeders(torrent) {
-  return limiter.schedule(() => eztv.torrent(torrent.torrentId)
-      .then(record => (torrent.seeders = record.seeders, torrent))
-      .catch(() => updateCurrentSeeders(torrent))
-      .then(updated => updateTorrentSeeders(updated)));
+async function updateSeeders(torrent, getImdbIdsMethod) {
+  return getImdbIdsMethod().then(imdbIds => Promises.sequence(imdbIds
+      .map(imdbId => limiter.schedule(() => eztv.search(imdbId)))));
 }

 async function scrapeLatestTorrents() {
@@ -45,7 +41,7 @@ async function scrapeLatestTorrentsForCategory(page = 1) {
        // return Promises.delay(30000).then(() => scrapeLatestTorrentsForCategory(page))
        return Promise.resolve([]);
      })
-      .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
+      .then(torrents => Promise.all(torrents.map(torrent => processTorrentRecord(torrent))))
      .then(resolved => resolved.length > 0 && page < UNTIL_PAGE
          ? scrapeLatestTorrentsForCategory(page + 1)
          : Promise.resolve());