diff --git a/scraper/scrapers/comando/comando_api.js b/scraper/scrapers/comando/comando_api.js index d151ef2..640e9b9 100644 --- a/scraper/scrapers/comando/comando_api.js +++ b/scraper/scrapers/comando/comando_api.js @@ -141,8 +141,10 @@ function parseOriginalName(originalNameElem) { if (!originalNameElem[0]) { return ''; } - const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue; - return originalName.replace(/: ?/, ''); + const originalName = originalNameElem.next().text() + || originalNameElem[0].nextSibling.nodeValue + || originalNameElem.text(); + return originalName.replace(/[^:]*: ?/, '').trim(); } function parseCategory(categorys) { diff --git a/scraper/scrapers/lapumia/lapumia_scraper.js b/scraper/scrapers/lapumia/lapumia_scraper.js index f066643..0d264c3 100644 --- a/scraper/scrapers/lapumia/lapumia_scraper.js +++ b/scraper/scrapers/lapumia/lapumia_scraper.js @@ -102,9 +102,6 @@ function typeMapping() { } function untilPage(category) { - if (lapumia.Categories.TV === category) { - return 5; - } if (lapumia.Categories.ANIME === category) { return 2; } diff --git a/scraper/scrapers/ondebaixa/ondebaixa_scraper.js b/scraper/scrapers/ondebaixa/ondebaixa_scraper.js index c5c4003..e098a72 100644 --- a/scraper/scrapers/ondebaixa/ondebaixa_scraper.js +++ b/scraper/scrapers/ondebaixa/ondebaixa_scraper.js @@ -107,12 +107,6 @@ function typeMapping() { } function untilPage(category) { - if (ondebaixa.Categories.DESENHOS === category) { - return 5; - } - if (ondebaixa.Categories.TV === category) { - return 5; - } return UNTIL_PAGE; } diff --git a/scraper/scrapers/rarbg/rarbg_api.js b/scraper/scrapers/rarbg/rarbg_api.js index a61bb6e..694fe35 100644 --- a/scraper/scrapers/rarbg/rarbg_api.js +++ b/scraper/scrapers/rarbg/rarbg_api.js @@ -6,6 +6,7 @@ const { getRandomUserAgent } = require("../../lib/requestHelper"); const baseUrl = 'https://torrentapi.org/pubapi_v2.php'; const appId = 'torrentio-addon'; const defaultTimeout = 30000; +const retryDelay = 3000; let token; @@ -109,7 +110,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) { } if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) { // too many requests - return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1)); + return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1)); } if (response.status !== 200 || (response.data && response.data.error)) { // something went wrong @@ -120,7 +121,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) { }) .catch(error => { if (error.response && [429].includes(error.response.status) && retries > 0) { - return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1)); + return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1)); } return Promise.reject(error.message || error); }); diff --git a/scraper/scrapers/rarbg/rarbg_dump_scraper.js b/scraper/scrapers/rarbg/rarbg_dump_scraper.js index 59127b8..5cf07b5 100644 --- a/scraper/scrapers/rarbg/rarbg_dump_scraper.js +++ b/scraper/scrapers/rarbg/rarbg_dump_scraper.js @@ -12,14 +12,10 @@ const allowedCategories = [ rarbg.Options.category.MOVIES_XVID, rarbg.Options.category.MOVIES_XVID_720P, rarbg.Options.category.MOVIES_X265_1080P, - rarbg.Options.category.MOVIES_X265_4K, - rarbg.Options.category.MOVIES_X265_4K_HDR, rarbg.Options.category.MOVIES_X264, rarbg.Options.category.MOVIES_X264_720P, rarbg.Options.category.MOVIES_X264_1080P, - rarbg.Options.category.MOVIES_X264_3D, - rarbg.Options.category.MOVIES_X264_4K, - rarbg.Options.category.MOVIES_BD_REMUX, + rarbg.Options.category.MOVIES_HIGH_RES, rarbg.Options.category.TV_EPISODES, rarbg.Options.category.TV_UHD_EPISODES, rarbg.Options.category.TV_HD_EPISODES @@ -32,8 +28,8 @@ async function scrape() { //const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds); return Promise.all( - seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId)) - .then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))))) + seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId)) + .then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))))) .then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`)); } diff --git a/scraper/scrapers/rutor/rutor_api.js b/scraper/scrapers/rutor/rutor_api.js index aa3795c..2bb9175 100644 --- a/scraper/scrapers/rutor/rutor_api.js +++ b/scraper/scrapers/rutor/rutor_api.js @@ -8,6 +8,7 @@ const baseUrl = 'http://www.rutor.info'; const defaultTimeout = 10000; const Categories = { + ALL: '0', FOREIGN_FILMS: '1', RUSSIAN_FILMS: '5', SCIENCE_FILMS: '12', diff --git a/scraper/scrapers/thepiratebay/thepiratebay_scraper.js b/scraper/scrapers/thepiratebay/thepiratebay_scraper.js index 88349bf..33cc493 100644 --- a/scraper/scrapers/thepiratebay/thepiratebay_scraper.js +++ b/scraper/scrapers/thepiratebay/thepiratebay_scraper.js @@ -7,7 +7,6 @@ const Promises = require('../../lib/promises'); const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'ThePirateBay'; -const UNTIL_PAGE = 5; const limiter = new Bottleneck({ maxConcurrent: 10 }); @@ -54,7 +53,7 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) { return Promise.resolve([]); }) .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))) - .then(resolved => resolved.length > 0 && page < UNTIL_PAGE + .then(resolved => resolved.length > 0 && page < getUntilPage(category) ? scrapeLatestTorrentsForCategory(category, page + 1) : Promise.resolve()); } @@ -84,4 +83,16 @@ async function processTorrentRecord(record) { return createTorrentEntry(torrent); } +function getUntilPage(category) { + switch (category) { + case thepiratebay.Categories.VIDEO.MOVIES_3D: + return 1; + case thepiratebay.Categories.VIDEO.TV_SHOWS: + case thepiratebay.Categories.VIDEO.TV_SHOWS_HD: + return 10; + default: + return 5; + } +} + module.exports = { scrape, updateSeeders, NAME }; \ No newline at end of file diff --git a/scraper/scrapers/torrent9/torrent9v2_api.js b/scraper/scrapers/torrent9/torrent9v2_api.js index d94ac03..c51d281 100644 --- a/scraper/scrapers/torrent9/torrent9v2_api.js +++ b/scraper/scrapers/torrent9/torrent9v2_api.js @@ -72,14 +72,12 @@ function parseTableBody(body) { $('tr').each((i, element) => { const row = $(element); const titleElement = row.find('td a'); - try { + if (titleElement.length) { torrents.push({ title: titleElement.attr('title').trim(), torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1], seeders: parseInt(row.find('span.seed_ok').first().text()), }); - } catch (e) { - console.error('Failed parsing TorrentGalaxy row: ', e); } });