From eb2211ddcbc89b5a20992d6c2ba3ac010afc8030 Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Thu, 23 Apr 2020 22:56:12 +0200 Subject: [PATCH] [scraper] add jitter for eztv retries --- scraper/scheduler/seeders.js | 3 ++- scraper/scrapers/eztv/eztv_api.js | 18 ++++++++++++------ scraper/scrapers/eztv/eztv_scraper.js | 4 ++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/scraper/scheduler/seeders.js b/scraper/scheduler/seeders.js index 06e6dcc..d6690d0 100644 --- a/scraper/scheduler/seeders.js +++ b/scraper/scheduler/seeders.js @@ -7,6 +7,7 @@ const { updateTorrentSeeders } = require('../lib/torrentEntries') const DELAY = 15 * 1000; // 15 seconds const limiter = new Bottleneck({ maxConcurrent: 20, minTime: 250 }); +const updateLimiter = new Bottleneck({ maxConcurrent: 20 }); const forceSeedersLimiter = new Bottleneck({ maxConcurrent: 5 }); function scheduleUpdateSeeders() { @@ -37,7 +38,7 @@ async function _updateSeeders(torrent) { .then(updated => updatedTorrents.push(updated)); } - return Promise.all(updatedTorrents.map(updated => updateTorrentSeeders(updated))) + return Promise.all(updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))); } function getImdbIdsMethod(torrent) { diff --git a/scraper/scrapers/eztv/eztv_api.js b/scraper/scrapers/eztv/eztv_api.js index 93d5f7b..f3ec52c 100644 --- a/scraper/scrapers/eztv/eztv_api.js +++ b/scraper/scrapers/eztv/eztv_api.js @@ -8,10 +8,12 @@ const defaultProxies = [ 'https://eztv.io' ]; const defaultTimeout = 30000; +const minDelay = 3000; +const jitterDelay = minDelay; const limit = 100; const maxPage = 5; -function torrent(torrentId, config = {}, retries = 2) { +function torrent(torrentId, config = {}, retries = 1) { if (!torrentId) { return Promise.reject(new Error(`Failed ${torrentId} search`)); } @@ -20,10 +22,10 @@ function torrent(torrentId, config = {}, retries = 2) { .map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config))) .then(body => parseTorrentPage(body)) .then(torrent => ({ torrentId, ...torrent })) - .catch(error => retries ? torrent(torrentId, config, retries - 1) : Promise.reject(error)); + .catch(error => retries ? jitter().then(() => torrent(torrentId, config, retries - 1)) : Promise.reject(error)); } -function search(imdbId, config = {}, retries = 2) { +function search(imdbId, config = {}, retries = 1) { if (!imdbId) { return Promise.reject(new Error(`Failed ${imdbId} search`)); } @@ -37,16 +39,16 @@ function search(imdbId, config = {}, retries = 2) { ? search(imdbId, { ...config, page: page + 1 }).catch(() => []) .then(nextTorrents => torrents.concat(nextTorrents)) : torrents) - .catch(error => retries ? search(imdbId, config, retries - 1) : Promise.reject(error)); + .catch(error => retries ? jitter().then(() => search(imdbId, config, retries - 1)) : Promise.reject(error)); } -function browse(config = {}, retries = 2) { +function browse(config = {}, retries = 1) { const page = config.page || 1; return Promises.first(defaultProxies .map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config))) .then(results => parseResults(results)) - .catch(error => retries ? browse(config, retries - 1) : Promise.reject(error)); + .catch(error => retries ? jitter().then(() => browse(config, retries - 1)) : Promise.reject(error)); } function singleRequest(requestUrl, config = {}) { @@ -120,4 +122,8 @@ function parseSize(sizeText) { return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); } +function jitter() { + return Promises.delay(minDelay + Math.round(Math.random() * jitterDelay)) +} + module.exports = { torrent, search, browse }; diff --git a/scraper/scrapers/eztv/eztv_scraper.js b/scraper/scrapers/eztv/eztv_scraper.js index 88663a0..ebe7b0b 100644 --- a/scraper/scrapers/eztv/eztv_scraper.js +++ b/scraper/scrapers/eztv/eztv_scraper.js @@ -2,7 +2,6 @@ const moment = require('moment'); const Bottleneck = require('bottleneck'); const eztv = require('./eztv_api'); const { Type } = require('../../lib/types'); -const Promises = require('../../lib/promises'); const repository = require('../../lib/repository'); const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries'); @@ -27,7 +26,8 @@ async function scrape() { async function updateSeeders(torrent, getImdbIdsMethod) { return getImdbIdsMethod() .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => eztv.search(imdbId))))) - .then(results => results.reduce((a, b) => a.concat(b), [])); + .then(results => results.reduce((a, b) => a.concat(b), [])) + .catch(() => limiter.schedule(() => eztv.torrent(torrent.torrentId))); } async function scrapeLatestTorrents() {