From 909ade0d8e3fdd10b1253086aa6d4dadd14d080b Mon Sep 17 00:00:00 2001 From: Gabisonfire Date: Wed, 17 Jan 2024 16:43:58 -0500 Subject: [PATCH] Updated scrapers to latest available commit --- scraper/lib/metadata.js | 32 +++++++------- scraper/lib/parseHelper.js | 2 +- scraper/lib/repository.js | 8 +++- scraper/lib/requestHelper.js | 7 ++- scraper/lib/torrent.js | 12 ++--- scraper/lib/torrentEntries.js | 2 +- scraper/lib/torrentFiles.js | 25 ++++++----- scraper/package-lock.json | 38 ++++++++++++++++ scraper/package.json | 1 + scraper/scheduler/scrapers.js | 8 ++-- scraper/scheduler/seeders.js | 16 +++++-- scraper/scrapers/1337x/1337x_api.js | 30 ++++--------- scraper/scrapers/1337x/1337x_scraper.js | 2 +- .../animestorrent/animestorrent_api.js | 13 ++---- scraper/scrapers/comando/comando_api.js | 18 ++++---- .../scrapers/comoeubaixo/comoeubaixo_api.js | 20 ++++----- scraper/scrapers/darkmahou/darkmahou_api.js | 13 ++---- scraper/scrapers/erairaws/erairaws_api.js | 3 +- scraper/scrapers/erairaws/erairaws_scraper.js | 4 +- scraper/scrapers/eztv/eztv_api.js | 44 ++++++------------- .../scrapers/horriblesubs/horriblesubs_api.js | 6 +-- scraper/scrapers/kickass/kickass_api.js | 30 ++++--------- scraper/scrapers/lapumia/lapumia_api.js | 32 ++++++++------ scraper/scrapers/lapumia/lapumia_scraper.js | 6 +-- scraper/scrapers/nyaasi/nyaa_si_api.js | 18 +------- scraper/scrapers/nyaasi/nyaa_si_scraper.js | 4 ++ scraper/scrapers/ondebaixa/ondebaixa_api.js | 19 ++++---- .../scrapers/ondebaixa/ondebaixa_scraper.js | 6 --- scraper/scrapers/rarbg/rarbg_api.js | 42 ++++++++++++------ scraper/scrapers/rarbg/rarbg_dump_scraper.js | 10 ++--- scraper/scrapers/rarbg/rarbg_scraper.js | 8 +--- scraper/scrapers/rutor/rutor_api.js | 11 ++--- scraper/scrapers/scraperHelper.js | 17 ++++++- .../scrapers/thepiratebay/thepiratebay_api.js | 8 ++-- .../thepiratebay/thepiratebay_dump_scraper.js | 8 ++-- .../thepiratebay/thepiratebay_scraper.js | 15 ++++++- .../torrentgalaxy/torrentgalaxy_api.js | 32 ++++---------- scraper/scrapers/yts/yts_api.js | 24 ++++++---- scraper/scrapers/yts/yts_scraper.js | 16 +++---- 39 files changed, 311 insertions(+), 299 deletions(-) diff --git a/scraper/lib/metadata.js b/scraper/lib/metadata.js index 3d9ae32..25f4f29 100644 --- a/scraper/lib/metadata.js +++ b/scraper/lib/metadata.js @@ -1,16 +1,17 @@ -const needle = require('needle'); +const axios = require('axios'); const nameToImdb = require('name-to-imdb'); -const googleIt = require('google-it'); const googleSr = require('google-sr'); -const bing = require('nodejs-bing'); const he = require('he'); const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache'); const { Type } = require('./types'); -const { getRandomUserAgent } = require('./requestHelper'); const CINEMETA_URL = 'https://v3-cinemeta.strem.io'; const KITSU_URL = 'https://anime-kitsu.strem.fun'; const TIMEOUT = 20000; +const selectors = { + ...googleSr.defaultSelectors, + LinkSelector: 'a:has(h3)' +} function getMetadata(id, type = Type.SERIES) { if (!id) { @@ -32,9 +33,9 @@ function getMetadata(id, type = Type.SERIES) { } function _requestMetadata(url) { - return needle('get', url, { open_timeout: TIMEOUT }) + return axios.get(url, { timeout: TIMEOUT }) .then((response) => { - const body = response.body; + const body = response.data; if (body && body.meta && (body.meta.imdb_id || body.meta.kitsu_id)) { return { kitsuId: body.meta.kitsu_id, @@ -85,7 +86,7 @@ function escapeTitle(title) { .replace(/[\u0300-\u036F]/g, '') .replace(/&/g, 'and') .replace(/[;, ~./]+/g, ' ') // replace dots, commas or underscores with spaces - .replace(/[^\w \-()+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars + .replace(/[^\w \-()×+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars .replace(/^\d{1,2}[.#\s]+(?=(?:\d+[.\s]*)?[\u0400-\u04ff])/i, '') // remove russian movie numbering .replace(/\s{2,}/, ' ') // replace multiple spaces .trim(); @@ -111,13 +112,10 @@ async function getImdbId(info, type) { reject(err || new Error('failed imdbId search')); } }); - // }).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true }) - // .then(results => results.length ? results : Promise.reject('No results')) - }).catch(() => googleSr(query) - .then(response => response.searchResults.length ? response.searchResults : Promise.reject('No results')) - // .catch(() => bing.web(query)) + }).catch(() => googleSr.search(query, { selectors }) + .then(response => response.length ? response : Promise.reject('No results')) .then(results => results - .map(result => result.link) + .map(result => result.Link) .find(result => result.includes('imdb.com/title/'))) .then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/)) .then(match => match && match[1]))) @@ -132,9 +130,9 @@ async function getKitsuId(info) { const query = encodeURIComponent(key); return cacheWrapKitsuId(key, - () => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 }) + () => axios.get(`${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { timeout: 60000 }) .then((response) => { - const body = response.body; + const body = response.data; if (body && body.metas && body.metas.length) { return body.metas[0].id.replace('kitsu:', ''); } else { @@ -147,8 +145,8 @@ async function isEpisodeImdbId(imdbId) { if (!imdbId) { return false; } - return needle('get', `https://www.imdb.com/title/${imdbId}/`, { open_timeout: 10000, follow: 2 }) - .then(response => !!(response.body && response.body.includes('video.episode'))) + return axios.get(`https://www.imdb.com/title/${imdbId}/`, { timeout: 10000 }) + .then(response => !!(response.data && response.data.includes('video.episode'))) .catch((err) => false); } diff --git a/scraper/lib/parseHelper.js b/scraper/lib/parseHelper.js index 3e1331f..6304946 100644 --- a/scraper/lib/parseHelper.js +++ b/scraper/lib/parseHelper.js @@ -43,7 +43,7 @@ function parseSeriesVideo(video, parsedTorrentName) { // force episode to any found number if it was not parsed if (!videoInfo.episodes && !videoInfo.date) { const epMatcher = videoInfo.title.match( - /(? { - if (!response.body || response.statusCode !== 200) { + if (!response.data || response.status !== 200) { throw new Error('torrent not found') } - return response.body + return response.data }) .then((body) => parseTorrent(body)) .then((info) => info.files.map((file, fileId) => ({ @@ -197,8 +197,8 @@ async function getTorrentTrackers(torrent) { } async function getDefaultTrackers(torrent, retry = 3) { - return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT }) - .then(response => response.body && response.body.trim()) + return cacheTrackers(() => axios.get(TRACKERS_URL, { timeout: SEEDS_CHECK_TIMEOUT }) + .then(response => response.data && response.data.trim()) .then(body => body && body.split('\n\n') || [])) .catch(() => retry > 0 ? delay(5000).then(() => getDefaultTrackers(torrent, retry - 1)) : []) .then(trackers => trackers.concat(ADDITIONAL_TRACKERS)) diff --git a/scraper/lib/torrentEntries.js b/scraper/lib/torrentEntries.js index 8e3f035..70a1d9f 100644 --- a/scraper/lib/torrentEntries.js +++ b/scraper/lib/torrentEntries.js @@ -107,7 +107,7 @@ async function checkAndUpdateTorrent(torrent) { if (!storedTorrent.languages && torrent.languages && storedTorrent.provider !== 'RARBG') { storedTorrent.languages = torrent.languages; storedTorrent.save(); - console.log(`Updated [${torrent.infoHash}] ${torrent.title} language to ${torrent.languages}`); + console.log(`Updated [${storedTorrent.infoHash}] ${storedTorrent.title} language to ${torrent.languages}`); } return createTorrentContents({ ...storedTorrent.get(), torrentLink: torrent.torrentLink }) .then(() => updateTorrentSeeders(torrent)); diff --git a/scraper/lib/torrentFiles.js b/scraper/lib/torrentFiles.js index 9ee68e0..ab5eda7 100644 --- a/scraper/lib/torrentFiles.js +++ b/scraper/lib/torrentFiles.js @@ -106,7 +106,7 @@ async function getSeriesTorrentContent(torrent) { async function mapSeriesEpisode(file, torrent, files) { if (!file.episodes && !file.kitsuEpisodes) { - if (files.some(otherFile => otherFile.episodes || otherFile.kitsuEpisodes) || parse(torrent.title).seasons) { + if (files.length === 1 || files.some(f => f.episodes || f.kitsuEpisodes) || parse(torrent.title).seasons) { return Promise.resolve({ infoHash: torrent.infoHash, fileIndex: file.fileIndex, @@ -187,7 +187,7 @@ async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] } decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata); } else if (isDateEpisodeFiles(files, metadata)) { decomposeDateEpisodeFiles(torrent, files, metadata); - } else if (isAbsoluteEpisodeFiles(files, metadata)) { + } else if (isAbsoluteEpisodeFiles(torrent, files, metadata)) { decomposeAbsoluteEpisodeFiles(torrent, files, metadata); } // decomposeEpisodeTitleFiles(torrent, files, metadata); @@ -237,26 +237,28 @@ function isDateEpisodeFiles(files, metadata) { return files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date); } -function isAbsoluteEpisodeFiles(files, metadata) { +function isAbsoluteEpisodeFiles(torrent, files, metadata) { const threshold = Math.ceil(files.length / 5); + const isAnime = torrent.type === Type.ANIME && torrent.kitsuId; const nonMovieEpisodes = files .filter(file => !file.isMovie && file.episodes); const absoluteEpisodes = files .filter(file => file.season && file.episodes) .filter(file => file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep)) - return nonMovieEpisodes.every(file => !file.season || file.season > metadata.episodeCount.length) - || absoluteEpisodes.length >= threshold - // && !isNewEpisodesNotInMetadata(files, metadata); + return nonMovieEpisodes.every(file => !file.season) + || (isAnime && nonMovieEpisodes.every(file => file.season > metadata.episodeCount.length)) + || absoluteEpisodes.length >= threshold; } -function isNewEpisodesNotInMetadata(files, metadata) { +function isNewEpisodeNotInMetadata(torrent, file, metadata) { // new episode might not yet been indexed by cinemeta. // detect this if episode number is larger than the last episode or season is larger than the last one - return files.length === 1 + // only for non anime metas + const isAnime = torrent.type === Type.ANIME && torrent.kitsuId; + return !isAnime && !file.isMovie && file.episodes && file.season !== 1 && /continuing|current/i.test(metadata.status) - && files.filter(file => !file.isMovie && file.episodes) - .every(file => file.season >= metadata.episodeCount.length - && file.episodes.every(ep => ep > metadata.episodeCount[file.season - 1])) + && file.season >= metadata.episodeCount.length + && file.episodes.every(ep => ep > (metadata.episodeCount[file.season - 1] || 0)); } function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) { @@ -282,6 +284,7 @@ function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) { } files .filter(file => file.episodes && !file.isMovie && file.season !== 0) + .filter(file => !isNewEpisodeNotInMetadata(torrent, file, metadata)) .filter(file => !file.season || (metadata.episodeCount[file.season - 1] || 0) < file.episodes[0]) .forEach(file => { const seasonIdx = ([...metadata.episodeCount.keys()] diff --git a/scraper/package-lock.json b/scraper/package-lock.json index 5bb0dc0..1b2c897 100644 --- a/scraper/package-lock.json +++ b/scraper/package-lock.json @@ -38,6 +38,7 @@ "real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#d1f7eaa8593b947edbfbc8a92a176448b48ef445", "request-ip": "^3.3.0", "router": "^1.3.8", + "rss-parser": "^3.13.0", "sequelize": "^6.31.1", "stremio-addon-sdk": "^1.6.10", "sugar-date": "^2.0.6", @@ -3236,6 +3237,23 @@ "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-3.0.0.tgz", "integrity": "sha512-zPMVc3ZYlGLNk4mpK1NzP2wg0ml9t7fUgDsayR5Y5rSzxQilzR9FGu/EH2jQOcKSAeAfWeylyW8juy3OkWRvNA==" }, + "node_modules/rss-parser": { + "version": "3.13.0", + "resolved": "https://registry.npmjs.org/rss-parser/-/rss-parser-3.13.0.tgz", + "integrity": "sha512-7jWUBV5yGN3rqMMj7CZufl/291QAhvrrGpDNE4k/02ZchL0npisiYYqULF71jCEKoIiHvK/Q2e6IkDwPziT7+w==", + "dependencies": { + "entities": "^2.0.3", + "xml2js": "^0.5.0" + } + }, + "node_modules/rss-parser/node_modules/entities": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz", + "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==", + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/run-async": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/run-async/-/run-async-2.4.0.tgz", @@ -4252,6 +4270,26 @@ "ultron": "1.0.x" } }, + "node_modules/xml2js": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz", + "integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==", + "dependencies": { + "sax": ">=0.6.0", + "xmlbuilder": "~11.0.0" + }, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/xmlbuilder": { + "version": "11.0.1", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", + "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==", + "engines": { + "node": ">=4.0" + } + }, "node_modules/xtend": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", diff --git a/scraper/package.json b/scraper/package.json index 02d7360..06bfc53 100644 --- a/scraper/package.json +++ b/scraper/package.json @@ -38,6 +38,7 @@ "real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#d1f7eaa8593b947edbfbc8a92a176448b48ef445", "request-ip": "^3.3.0", "router": "^1.3.8", + "rss-parser": "^3.13.0", "sequelize": "^6.31.1", "stremio-addon-sdk": "^1.6.10", "sugar-date": "^2.0.6", diff --git a/scraper/scheduler/scrapers.js b/scraper/scheduler/scrapers.js index 3bb6c48..aba31f6 100644 --- a/scraper/scheduler/scrapers.js +++ b/scraper/scheduler/scrapers.js @@ -1,9 +1,9 @@ const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper'); const thepiratebayFakeRemoval = require('../scrapers/thepiratebay/thepiratebay_fakes_removal'); const ytsScraper = require('../scrapers/yts/yts_scraper'); +const ytsFullScraper = require('../scrapers/yts/yts_full_scraper'); const eztvScraper = require('../scrapers/eztv/eztv_scraper'); const leetxScraper = require('../scrapers/1337x/1337x_scraper'); -const kickassScraper = require('../scrapers/kickass/kickass_scraper'); const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper'); const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper'); const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper'); @@ -16,9 +16,11 @@ const Lapumia = require('../scrapers/lapumia/lapumia_scraper') const OndeBaixa = require('../scrapers/ondebaixa/ondebaixa_scraper'); const AnimesTorrent = require('../scrapers/animestorrent/animestorrent_scraper') const DarkMahou = require('../scrapers/darkmahou/darkmahou_scraper') +const torrent9Scraper = require('../scrapers/torrent9/torrent9_scraper'); module.exports = [ { scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: ytsFullScraper, name: ytsFullScraper.NAME, cron: '0 0 0 * * 0' }, { scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: nyaaSiScraper, name: nyaaSiScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: nyaaPantsuScraper, name: nyaaPantsuScraper.NAME, cron: '0 0 */4 ? * *' }, @@ -28,14 +30,14 @@ module.exports = [ { scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' }, { scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' }, - // { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: torrent9Scraper, name: torrent9Scraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: Comando, name: Comando.NAME, cron: '0 0 */4 ? * *' }, { scraper: ComoEuBaixo, name: ComoEuBaixo.NAME, cron: '0 0 */4 ? * *' }, { scraper: Lapumia, name: Lapumia.NAME, cron: '0 0 */4 ? * *' }, { scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' }, // { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' }, // { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' }, - { scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */24 ? * *' }, + { scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */4 ? * *' }, // { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') } // { scraper: require('../scrapers/1337x/1337x_search_scraper') } // { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') } diff --git a/scraper/scheduler/seeders.js b/scraper/scheduler/seeders.js index 9fae79a..b7b3e4f 100644 --- a/scraper/scheduler/seeders.js +++ b/scraper/scheduler/seeders.js @@ -5,6 +5,9 @@ const { updateCurrentSeeders } = require('../lib/torrent') const { updateTorrentSeeders } = require('../lib/torrentEntries') const DELAY_MS = 0; // 0 seconds +const DELAY_NEW_MS = 30_000; // 30 seconds +const DELAY_NO_NEW_MS = 300_000; // 300 seconds +const DELAY_FAILED_TORRENTS_MS = 5_000; // 5 seconds const updateLimiter = new Bottleneck({ maxConcurrent: 5 }); const statistics = {}; const statisticsNew = {}; @@ -31,18 +34,25 @@ function scheduleUpdateSeedersForNewTorrents() { .then(torrents => updateStatistics(torrents, statisticsNew)) .then(() => console.log('Finished seeders update for new torrents:', statisticsNew)) .catch(error => console.warn('Failed seeders update for new torrents:', error)) - .then(() => delay(30_000)) + .then(() => delay(DELAY_NEW_MS)) .then(() => scheduleUpdateSeedersForNewTorrents()); } async function getTorrents() { return repository.getUpdateSeedersTorrents() - .catch(() => delay(5000).then(() => getTorrents())) + .catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getTorrents())); } async function getNewTorrents() { return repository.getUpdateSeedersNewTorrents() - .catch(() => delay(5000).then(() => getNewTorrents())) + .catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getNewTorrents())) + .then(torrents => { + if (!torrents.length) { + console.log('No new torrents to update seeders') + return delay(DELAY_NO_NEW_MS).then(() => getNewTorrents()) + } + return torrents; + }); } function updateStatistics(updatedTorrents, statisticsObject) { diff --git a/scraper/scrapers/1337x/1337x_api.js b/scraper/scrapers/1337x/1337x_api.js index 4877501..fa9dad6 100644 --- a/scraper/scrapers/1337x/1337x_api.js +++ b/scraper/scrapers/1337x/1337x_api.js @@ -1,10 +1,11 @@ +const axios = require('axios'); const cheerio = require('cheerio'); -const needle = require('needle'); const Sugar = require('sugar-date'); const decode = require('magnet-uri'); const Promises = require('../../lib/promises'); const { escapeHTML } = require('../../lib/metadata'); const { getRandomUserAgent } = require('../../lib/requestHelper'); +const { parseSize } = require("../scraperHelper"); const defaultProxies = [ 'https://1337x.to' @@ -32,7 +33,7 @@ function torrent(torrentId, config = {}, retries = 2) { const slug = torrentId.startsWith('/torrent/') ? torrentId.replace('/torrent/', '') : torrentId; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config))) + .map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config))) .then((body) => parseTorrentPage(body)) .then((torrent) => ({ torrentId: slug, ...torrent })) .catch((err) => torrent(slug, config, retries - 1)); @@ -51,7 +52,7 @@ function search(keyword, config = {}, retries = 2) { : `${proxyUrl}/search/${keyword}/${page}/`; return Promises.first(proxyList - .map(proxyUrl => singleRequest(requestUrl(proxyUrl), config))) + .map(proxyUrl => singleRequest(requestUrl(proxyUrl), config))) .then(body => parseTableBody(body)) .then(torrents => torrents.length === 40 && page < extendToPage ? search(keyword, { ...config, page: page + 1 }).catch(() => []) @@ -73,18 +74,18 @@ function browse(config = {}, retries = 2) { : `${proxyUrl}/cat/${category}/${page}/`; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config))) + .map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config))) .then((body) => parseTableBody(body)) .catch((err) => browse(config, retries - 1)); } function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then((response) => { - const body = response.body; + const body = response.data; if (!body) { throw new Error(`No body: ${requestUrl}`); } else if (body.includes('502: Bad gateway') || @@ -164,19 +165,4 @@ function parseDate(dateString) { return Sugar.Date.create(dateString); } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/,/g, '')) * scale); -} - module.exports = { torrent, search, browse, Categories }; diff --git a/scraper/scrapers/1337x/1337x_scraper.js b/scraper/scrapers/1337x/1337x_scraper.js index 3471694..188c8ec 100644 --- a/scraper/scrapers/1337x/1337x_scraper.js +++ b/scraper/scrapers/1337x/1337x_scraper.js @@ -92,7 +92,7 @@ async function processTorrentRecord(record) { function typeMapping() { const mapping = {}; mapping[leetx.Categories.MOVIE] = Type.MOVIE; - mapping[leetx.Categories.DOCUMENTARIES] = Type.SERIES; + mapping[leetx.Categories.DOCUMENTARIES] = Type.MOVIE; mapping[leetx.Categories.TV] = Type.SERIES; mapping[leetx.Categories.ANIME] = Type.ANIME; return mapping; diff --git a/scraper/scrapers/animestorrent/animestorrent_api.js b/scraper/scrapers/animestorrent/animestorrent_api.js index 836f8f6..b1004f6 100644 --- a/scraper/scrapers/animestorrent/animestorrent_api.js +++ b/scraper/scrapers/animestorrent/animestorrent_api.js @@ -1,7 +1,6 @@ -const needle = require("needle"); +const axios = require('axios'); const cheerio = require("cheerio"); const decode = require("magnet-uri"); -const Promises = require("../../lib/promises"); const { getRandomUserAgent } = require("../../lib/requestHelper"); const defaultTimeout = 10000; @@ -59,14 +58,10 @@ function browse(config = {}, retries = 2) { function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { - userAgent: getRandomUserAgent(), - open_timeout: timeout, - follow: 2, - }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout }; - return needle("get", requestUrl, options).then((response) => { - const body = response.body; + return axios.get(requestUrl, options).then((response) => { + const body = response.data; if (!body) { throw new Error(`No body: ${requestUrl}`); } else if ( diff --git a/scraper/scrapers/comando/comando_api.js b/scraper/scrapers/comando/comando_api.js index ca05705..640e9b9 100644 --- a/scraper/scrapers/comando/comando_api.js +++ b/scraper/scrapers/comando/comando_api.js @@ -1,8 +1,7 @@ -const needle = require("needle") +const axios = require('axios'); const moment = require("moment") const cheerio = require("cheerio"); const decode = require('magnet-uri'); -const Promises = require('../../lib/promises'); const { escapeHTML } = require('../../lib/metadata'); const { getRandomUserAgent } = require('../../lib/requestHelper'); const { isPtDubbed, sanitizePtName, sanitizePtLanguages, sanitizePtOriginalName } = require('../scraperHelper') @@ -63,11 +62,11 @@ function browse(config = {}, retries = 2) { function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then((response) => { - const body = response.body; + const body = response.data; if (!body) { throw new Error(`No body: ${requestUrl}`); } else if (body.includes('502: Bad gateway') || @@ -75,7 +74,8 @@ function singleRequest(requestUrl, config = {}) { throw new Error(`Invalid body contents: ${requestUrl}`); } return body; - }); + }) + .catch(error => Promise.reject(error.message || error)); } function parseTableBody(body) { @@ -141,8 +141,10 @@ function parseOriginalName(originalNameElem) { if (!originalNameElem[0]) { return ''; } - const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue; - return originalName.replace(/: ?/, ''); + const originalName = originalNameElem.next().text() + || originalNameElem[0].nextSibling.nodeValue + || originalNameElem.text(); + return originalName.replace(/[^:]*: ?/, '').trim(); } function parseCategory(categorys) { diff --git a/scraper/scrapers/comoeubaixo/comoeubaixo_api.js b/scraper/scrapers/comoeubaixo/comoeubaixo_api.js index 6e042ae..80ada38 100644 --- a/scraper/scrapers/comoeubaixo/comoeubaixo_api.js +++ b/scraper/scrapers/comoeubaixo/comoeubaixo_api.js @@ -1,11 +1,11 @@ -const needle = require("needle") +const axios = require('axios'); const cheerio = require("cheerio"); const decode = require('magnet-uri'); const { escapeHTML } = require('../../lib/metadata'); const { getRandomUserAgent } = require('../../lib/requestHelper'); const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper') -const defaultTimeout = 10000; +const defaultTimeout = 30000; const maxSearchPage = 50 const baseUrl = 'https://comoeubaixo.com'; @@ -62,11 +62,11 @@ function browse(config = {}, retries = 2) { function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then((response) => { - const body = response.body; + const body = response.data; if (!body || (Buffer.isBuffer(body) && !body.size)) { throw new Error(`No body: ${requestUrl}`); } else if (body.includes('502: Bad gateway') || @@ -74,7 +74,8 @@ function singleRequest(requestUrl, config = {}) { throw new Error(`Invalid body contents: ${requestUrl}`); } return body; - }); + }) + .catch(error => Promise.reject(error.message || error)); } function parseTableBody(body) { @@ -113,13 +114,12 @@ function parseTorrentPage(body) { const category = details.find('strong:contains(\'Gêneros: \')').next().attr('href').split('/')[0] const torrents = magnets.map(magnetLink => { const decodedMagnet = decode(magnetLink); - const name = escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '); - const sanitizedTitle = sanitizePtName(name); + const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' ')); const originalTitle = details.find('strong:contains(\'Baixar\')')[0].nextSibling.nodeValue.split('-')[0]; const year = details.find('strong:contains(\'Data de Lançamento: \')').next().text().trim(); - const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${sanitizedTitle.trim()}`; + const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`; return { - title: sanitizedTitle.length > 5 ? sanitizedTitle : fallBackTitle, + title: name.length > 5 ? name : fallBackTitle, infoHash: decodedMagnet.infoHash, magnetLink: magnetLink, category: category, diff --git a/scraper/scrapers/darkmahou/darkmahou_api.js b/scraper/scrapers/darkmahou/darkmahou_api.js index e1d5a22..b9324ca 100644 --- a/scraper/scrapers/darkmahou/darkmahou_api.js +++ b/scraper/scrapers/darkmahou/darkmahou_api.js @@ -1,7 +1,6 @@ -const needle = require("needle"); +const axios = require('axios'); const cheerio = require("cheerio"); const decode = require("magnet-uri"); -const Promises = require("../../lib/promises"); const { getRandomUserAgent } = require("../../lib/requestHelper"); const defaultTimeout = 10000; @@ -60,14 +59,10 @@ function browse(config = {}, retries = 2) { function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { - userAgent: getRandomUserAgent(), - open_timeout: timeout, - follow: 2, - }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout }; - return needle("get", requestUrl, options).then((response) => { - const body = response.body; + return axios.get(requestUrl, options).then((response) => { + const body = response.data; if (!body) { throw new Error(`No body: ${requestUrl}`); } else if ( diff --git a/scraper/scrapers/erairaws/erairaws_api.js b/scraper/scrapers/erairaws/erairaws_api.js index 9535171..17599b0 100644 --- a/scraper/scrapers/erairaws/erairaws_api.js +++ b/scraper/scrapers/erairaws/erairaws_api.js @@ -1,4 +1,3 @@ -const needle = require("needle"); const axios = require('axios'); const cheerio = require("cheerio"); const decode = require("magnet-uri"); @@ -31,7 +30,7 @@ function browse(config = {}, retries = 2) { function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2, }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout, }; return axios.get(requestUrl, options).then((response) => { const body = response.data; diff --git a/scraper/scrapers/erairaws/erairaws_scraper.js b/scraper/scrapers/erairaws/erairaws_scraper.js index 3db4474..20c454a 100644 --- a/scraper/scrapers/erairaws/erairaws_scraper.js +++ b/scraper/scrapers/erairaws/erairaws_scraper.js @@ -1,6 +1,6 @@ const moment = require('moment'); const Bottleneck = require('bottleneck'); -const erairaws = require('./erairaws_api'); +const erairaws = require('./erairaws_rss_api'); const { checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'EraiRaws'; @@ -41,7 +41,7 @@ function untilPage(category) { if (category === erairaws.Categories.ANIMES) { return 45; } - return 3; + return 1; } module.exports = { scrape, NAME }; diff --git a/scraper/scrapers/eztv/eztv_api.js b/scraper/scrapers/eztv/eztv_api.js index af33e0e..0e8ec2c 100644 --- a/scraper/scrapers/eztv/eztv_api.js +++ b/scraper/scrapers/eztv/eztv_api.js @@ -1,8 +1,10 @@ +const axios = require('axios'); const cheerio = require('cheerio'); -const needle = require('needle'); const moment = require('moment'); +const { decode } = require("magnet-uri"); const Promises = require('../../lib/promises'); const { getRandomUserAgent } = require('./../../lib/requestHelper'); +const { parseSize } = require("../scraperHelper"); const defaultProxies = [ 'https://eztv.re' @@ -19,7 +21,7 @@ function torrent(torrentId, config = {}, retries = 1) { } return Promises.first(defaultProxies - .map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config))) + .map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config))) .then(body => parseTorrentPage(body)) .then(torrent => ({ torrentId, ...torrent })) .catch(error => retries ? jitter().then(() => torrent(torrentId, config, retries - 1)) : Promise.reject(error)); @@ -33,7 +35,7 @@ function search(imdbId, config = {}, retries = 1) { const page = config.page || 1; return Promises.first(defaultProxies - .map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config))) + .map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config))) .then(results => parseResults(results)) .then(torrents => torrents.length === limit && page < maxPage ? search(imdbId, { ...config, page: page + 1 }).catch(() => []) @@ -46,27 +48,21 @@ function browse(config = {}, retries = 1) { const page = config.page || 1; return Promises.first(defaultProxies - .map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config))) + .map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config))) .then(results => parseResults(results)) .catch(error => retries ? jitter().then(() => browse(config, retries - 1)) : Promise.reject(error)); } function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { - userAgent: getRandomUserAgent(), - open_timeout: timeout, - response_timeout: timeout, - read_timeout: timeout, - follow: 2 - }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then(response => { - if (!response.body) { + if (!response.data) { return Promise.reject(`No body: ${requestUrl}`); } - return Promise.resolve(response.body); + return Promise.resolve(response.data); }); } @@ -99,10 +95,11 @@ function parseTorrentPage(body) { reject(new Error('Failed loading body')); } const content = $('table[class="forum_header_border_normal"]'); + const magnetLink = content.find('a[title="Magnet Link"]').attr('href'); const torrent = { name: content.find('h1 > span').text().replace(/EZTV$/, ''), - infoHash: content.find('b:contains(\'Torrent Hash:\')')[0].nextSibling.data.trim().toLowerCase(), - magnetLink: content.find('a[title="Magnet Link"]').attr('href'), + infoHash: decode(magnetLink).infoHash, + magnetLink: magnetLink, torrentLink: content.find('a[title="Download Torrent"]').attr('href'), seeders: parseInt(content.find('span[class="stat_red"]').first().text(), 10) || 0, size: parseSize(content.find('b:contains(\'Filesize:\')')[0].nextSibling.data), @@ -113,21 +110,6 @@ function parseTorrentPage(body) { }); } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KB') || sizeText.includes('kB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); -} - function jitter() { return Promises.delay(minDelay + Math.round(Math.random() * jitterDelay)) } diff --git a/scraper/scrapers/horriblesubs/horriblesubs_api.js b/scraper/scrapers/horriblesubs/horriblesubs_api.js index aff3cbb..5ba306a 100644 --- a/scraper/scrapers/horriblesubs/horriblesubs_api.js +++ b/scraper/scrapers/horriblesubs/horriblesubs_api.js @@ -1,5 +1,5 @@ +const axios = require('axios'); const cheerio = require('cheerio'); -const needle = require('needle'); const moment = require('moment'); const Promises = require('../../lib/promises'); @@ -45,8 +45,8 @@ function _getContent(endpoint, config = {},) { ? endpoint.replace(/https?:\/\/[^/]+/, baseUrl) : `${baseUrl}${endpoint}`; - return needle('get', url, { open_timeout: timeout }) - .then((response) => response.body) + return axios.get(url, { timeout: timeout }) + .then((response) => response.data) .then((body) => cheerio.load(body)); } diff --git a/scraper/scrapers/kickass/kickass_api.js b/scraper/scrapers/kickass/kickass_api.js index d15d3e9..43fb061 100644 --- a/scraper/scrapers/kickass/kickass_api.js +++ b/scraper/scrapers/kickass/kickass_api.js @@ -1,8 +1,9 @@ +const axios = require('axios'); const cheerio = require('cheerio'); -const needle = require('needle'); const moment = require('moment'); const decode = require('magnet-uri'); const Promises = require('../../lib/promises'); +const { parseSize } = require("../scraperHelper"); const defaultProxies = [ 'https://katcr.co' @@ -28,7 +29,7 @@ function torrent(torrentId, config = {}, retries = 2) { const proxyList = config.proxyList || defaultProxies; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config))) + .map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config))) .then((body) => parseTorrentPage(body)) .then((torrent) => ({ torrentId, ...torrent })) .catch((err) => torrent(torrentId, config, retries - 1)); @@ -43,7 +44,7 @@ function search(keyword, config = {}, retries = 2) { const category = config.category; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config))) + .map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config))) .then((body) => parseTableBody(body)) .catch((err) => search(keyword, config, retries - 1)); } @@ -57,18 +58,18 @@ function browse(config = {}, retries = 2) { const category = config.category; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config))) + .map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config))) .then((body) => parseTableBody(body)) .catch((err) => browse(config, retries - 1)); } function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { open_timeout: timeout, follow: 2 }; + const options = { timeout: timeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then((response) => { - const body = response.body; + const body = response.data; if (!body) { throw new Error(`No body: ${requestUrl}`); } else if (body.includes('Access Denied')) { @@ -157,19 +158,4 @@ function parseTorrentPage(body) { }); } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KB') || sizeText.includes('kB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); -} - module.exports = { torrent, search, browse, Categories }; diff --git a/scraper/scrapers/lapumia/lapumia_api.js b/scraper/scrapers/lapumia/lapumia_api.js index 66a16de..5f9a9da 100644 --- a/scraper/scrapers/lapumia/lapumia_api.js +++ b/scraper/scrapers/lapumia/lapumia_api.js @@ -1,4 +1,4 @@ -const needle = require("needle") +const axios = require('axios'); const moment = require("moment") const cheerio = require("cheerio"); const decode = require('magnet-uri'); @@ -21,12 +21,11 @@ function torrent(torrentId, config = {}, retries = 2) { if (!torrentId || retries === 0) { return Promise.reject(new Error(`Failed ${torrentId} query`)); } - const slug = torrentId.split('?p=')[1]; - return singleRequest(`${baseUrl}/?p=${slug}`, config) + return singleRequest(`${baseUrl}/${torrentId}`, config) .then((body) => parseTorrentPage(body)) - .then((torrent) => torrent.map(el => ({ torrentId: slug, ...el }))) + .then((torrent) => torrent.map(el => ({ torrentId, ...el }))) .catch((err) => { - console.warn(`Failed Lapumia ${slug} request: `, err); + console.warn(`Failed Lapumia ${torrentId} request: `, err); return torrent(torrentId, config, retries - 1) }); } @@ -62,11 +61,11 @@ function browse(config = {}, retries = 2) { function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 }; + const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2 }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then((response) => { - const body = response.body; + const body = response.data; if (!body) { throw new Error(`No body: ${requestUrl}`); } else if (body.includes('502: Bad gateway') || @@ -74,7 +73,8 @@ function singleRequest(requestUrl, config = {}) { throw new Error(`Invalid body contents: ${requestUrl}`); } return body; - }); + }) + .catch(error => Promise.reject(error.message || error)); } function parseTableBody(body) { @@ -89,10 +89,14 @@ function parseTableBody(body) { $('div.post').each((i, element) => { const row = $(element); - torrents.push({ - name: row.find("div > a").text(), - torrentId: row.find("div > a").attr("href") - }); + try { + torrents.push({ + name: row.find("div > a").text(), + torrentId: row.find("div > a").attr("href").split('/')[3] + }); + } catch (e) { + console.log("Failed parsing Lupumia table entry") + } }); resolve(torrents); }); @@ -112,7 +116,7 @@ function parseTorrentPage(body) { .map((i, section) => $(section).attr("href")).get(); const category = parseCategory($('div.category').html()); const details = $('div.content') - const torrents = magnets.map(magnetLink => ({ + const torrents = magnets.filter(magnetLink => decode(magnetLink).name).map(magnetLink => ({ title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))), originalName: sanitizePtOriginalName(details.find('b:contains(\'Titulo Original:\')')[0].nextSibling.nodeValue), year: details.find('b:contains(\'Ano de Lançamento:\')')[0].nextSibling.nodeValue.trim(), diff --git a/scraper/scrapers/lapumia/lapumia_scraper.js b/scraper/scrapers/lapumia/lapumia_scraper.js index f066643..e5ec085 100644 --- a/scraper/scrapers/lapumia/lapumia_scraper.js +++ b/scraper/scrapers/lapumia/lapumia_scraper.js @@ -33,7 +33,8 @@ async function updateSeeders(torrent) { async function scrapeLatestTorrents() { const allowedCategories = [ - lapumia.Categories.MOVIE + lapumia.Categories.MOVIE, + lapumia.Categories.TV ]; return Promises.sequence(allowedCategories @@ -102,9 +103,6 @@ function typeMapping() { } function untilPage(category) { - if (lapumia.Categories.TV === category) { - return 5; - } if (lapumia.Categories.ANIME === category) { return 2; } diff --git a/scraper/scrapers/nyaasi/nyaa_si_api.js b/scraper/scrapers/nyaasi/nyaa_si_api.js index 36e11f1..39d2cb6 100644 --- a/scraper/scrapers/nyaasi/nyaa_si_api.js +++ b/scraper/scrapers/nyaasi/nyaa_si_api.js @@ -1,4 +1,5 @@ const { si } = require('nyaapi') +const { parseSize } = require("../scraperHelper"); const Categories = { ANIME: { @@ -34,7 +35,7 @@ function torrent(torrentId) { } function search(query) { - return si.search(query) + return si.search(query, null, { category: Categories.ANIME.ENGLISH}) .then(results => results.map(torrent => parseTorrent(torrent))); } @@ -62,19 +63,4 @@ function parseTorrent(torrent) { } } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GiB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MiB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KiB') || sizeText.includes('kB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); -} - module.exports = { torrent, search, browse, Categories }; diff --git a/scraper/scrapers/nyaasi/nyaa_si_scraper.js b/scraper/scrapers/nyaasi/nyaa_si_scraper.js index ce47b9d..d1ba1f7 100644 --- a/scraper/scrapers/nyaasi/nyaa_si_scraper.js +++ b/scraper/scrapers/nyaasi/nyaa_si_scraper.js @@ -20,6 +20,10 @@ async function scrape() { // return Promise.all(ids.map(id => limiter.schedule(() => nyaasi.torrent(id) // .then(torrent => processTorrentRecord(torrent))))) // .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); + // const queries = ['Sagrada Reset', 'Sakurada Reset']; + // return Promise.all(queries.map(query => limiter.schedule(() => nyaasi.search(query) + // .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))))) + // .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); return scrapeLatestTorrents() .then(() => { lastScrape.lastScraped = scrapeStart; diff --git a/scraper/scrapers/ondebaixa/ondebaixa_api.js b/scraper/scrapers/ondebaixa/ondebaixa_api.js index bd9af49..5b56924 100644 --- a/scraper/scrapers/ondebaixa/ondebaixa_api.js +++ b/scraper/scrapers/ondebaixa/ondebaixa_api.js @@ -1,11 +1,11 @@ -const needle = require("needle") +const axios = require('axios'); const cheerio = require("cheerio"); const decode = require('magnet-uri'); const { escapeHTML } = require('../../lib/metadata'); const { getRandomUserAgent } = require('../../lib/requestHelper'); const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper') -const defaultTimeout = 10000; +const defaultTimeout = 30000; const maxSearchPage = 50 const baseUrl = 'https://ondebaixa.com'; @@ -62,11 +62,11 @@ function browse(config = {}, retries = 2) { function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then((response) => { - const body = response.body; + const body = response.data; if (!body) { throw new Error(`No body: ${requestUrl}`); } else if (body.includes('502: Bad gateway') || @@ -74,7 +74,8 @@ function singleRequest(requestUrl, config = {}) { throw new Error(`Invalid body contents: ${requestUrl}`); } return body; - }); + }) + .catch(error => Promise.reject(error.message || error)); } function parseTableBody(body) { @@ -112,12 +113,12 @@ function parseTorrentPage(body) { const category = details.find('span:contains(\'Gêneros: \')').next().html() const torrents = magnets.map(magnetLink => { const decodedMagnet = decode(magnetLink); - const name = escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '); + const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' ')); const originalTitle = details.find('span:contains(\'Título Original: \')').next().text().trim(); const year = details.find('span:contains(\'Ano de Lançamento: \')').next().text().trim(); - const fallbackTitle = `${originalTitle} ${year}`; + const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`; return { - title: name ? sanitizePtName(name) : fallbackTitle, + title: name.length > 5 ? name : fallBackTitle, originalName: sanitizePtOriginalName(originalTitle), year: year, infoHash: decodedMagnet.infoHash, diff --git a/scraper/scrapers/ondebaixa/ondebaixa_scraper.js b/scraper/scrapers/ondebaixa/ondebaixa_scraper.js index c5c4003..e098a72 100644 --- a/scraper/scrapers/ondebaixa/ondebaixa_scraper.js +++ b/scraper/scrapers/ondebaixa/ondebaixa_scraper.js @@ -107,12 +107,6 @@ function typeMapping() { } function untilPage(category) { - if (ondebaixa.Categories.DESENHOS === category) { - return 5; - } - if (ondebaixa.Categories.TV === category) { - return 5; - } return UNTIL_PAGE; } diff --git a/scraper/scrapers/rarbg/rarbg_api.js b/scraper/scrapers/rarbg/rarbg_api.js index c827417..694fe35 100644 --- a/scraper/scrapers/rarbg/rarbg_api.js +++ b/scraper/scrapers/rarbg/rarbg_api.js @@ -1,10 +1,12 @@ -const needle = require('needle'); +const axios = require('axios'); const decode = require('magnet-uri'); const Promises = require('../../lib/promises'); +const { getRandomUserAgent } = require("../../lib/requestHelper"); const baseUrl = 'https://torrentapi.org/pubapi_v2.php'; const appId = 'torrentio-addon'; const defaultTimeout = 30000; +const retryDelay = 3000; let token; @@ -22,6 +24,7 @@ const Options = { MOVIES_X265_4K_HDR: [52], MOVIES_FULL_BD: [42], MOVIES_BD_REMUX: [46], + MOVIES_HIGH_RES: [47, 50, 51, 52, 46], TV_EPISODES: [18], TV_UHD_EPISODES: [49], TV_HD_EPISODES: [41], @@ -84,33 +87,43 @@ function browse(params = {}) { return singleRequest(parameters).then(results => parseResults(results)); } -async function singleRequest(params = {}, config = {}, retries = 10) { +async function singleRequest(params = {}, config = {}, retries = 15) { const timeout = config.timeout || defaultTimeout; - const options = { open_timeout: timeout, follow: 2 }; + const headers = { + 'user-agent': getRandomUserAgent(), + 'accept-encoding': 'gzip, deflate', + 'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4' + }; params.token = await getToken(); params.app_id = appId; Object.keys(params) .filter(key => params[key] === undefined || params[key] === null) .forEach(key => delete params[key]); - - return needle('get', baseUrl, params, options) + const options = { headers, timeout, params }; + return axios.get(baseUrl, options) .then(response => { - if (response.body && response.body.error_code === 4) { + if (response.data && response.data.error_code === 4) { // token expired token = undefined; return singleRequest(params, config); } - if ((!response.body || !response.body.length || [5, 20].includes(response.body.error_code)) && retries > 0) { + if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) { // too many requests - return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1)); + return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1)); } - if (response.statusCode !== 200 || (response.body && response.body.error)) { + if (response.status !== 200 || (response.data && response.data.error)) { // something went wrong - return Promise.reject(response.body || `Failed RARGB request with status=${response.statusCode}`); + return Promise.reject(response.data || `Failed RARGB request with status=${response.status}`); } - return response.body; + return response.data; + }) + .catch(error => { + if (error.response && [429].includes(error.response.status) && retries > 0) { + return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1)); + } + return Promise.reject(error.message || error); }); } @@ -137,9 +150,10 @@ function parseResult(result) { async function getToken() { if (!token) { - const options = { open_timeout: defaultTimeout }; - token = await needle('get', baseUrl, { get_token: 'get_token', app_id: appId }, options) - .then(response => response.body.token); + const params = { get_token: 'get_token', app_id: appId }; + const options = { timeout: defaultTimeout, params }; + token = await axios.get(baseUrl, options) + .then(response => response.data.token); } return token; } diff --git a/scraper/scrapers/rarbg/rarbg_dump_scraper.js b/scraper/scrapers/rarbg/rarbg_dump_scraper.js index 59127b8..5cf07b5 100644 --- a/scraper/scrapers/rarbg/rarbg_dump_scraper.js +++ b/scraper/scrapers/rarbg/rarbg_dump_scraper.js @@ -12,14 +12,10 @@ const allowedCategories = [ rarbg.Options.category.MOVIES_XVID, rarbg.Options.category.MOVIES_XVID_720P, rarbg.Options.category.MOVIES_X265_1080P, - rarbg.Options.category.MOVIES_X265_4K, - rarbg.Options.category.MOVIES_X265_4K_HDR, rarbg.Options.category.MOVIES_X264, rarbg.Options.category.MOVIES_X264_720P, rarbg.Options.category.MOVIES_X264_1080P, - rarbg.Options.category.MOVIES_X264_3D, - rarbg.Options.category.MOVIES_X264_4K, - rarbg.Options.category.MOVIES_BD_REMUX, + rarbg.Options.category.MOVIES_HIGH_RES, rarbg.Options.category.TV_EPISODES, rarbg.Options.category.TV_UHD_EPISODES, rarbg.Options.category.TV_HD_EPISODES @@ -32,8 +28,8 @@ async function scrape() { //const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds); return Promise.all( - seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId)) - .then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))))) + seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId)) + .then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))))) .then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`)); } diff --git a/scraper/scrapers/rarbg/rarbg_scraper.js b/scraper/scrapers/rarbg/rarbg_scraper.js index c6d80ee..3a8c9d9 100644 --- a/scraper/scrapers/rarbg/rarbg_scraper.js +++ b/scraper/scrapers/rarbg/rarbg_scraper.js @@ -36,21 +36,17 @@ async function scrapeLatestTorrents() { rarbg.Options.category.MOVIES_XVID, rarbg.Options.category.MOVIES_XVID_720P, rarbg.Options.category.MOVIES_X265_1080P, - rarbg.Options.category.MOVIES_X265_4K, - rarbg.Options.category.MOVIES_X265_4K_HDR, rarbg.Options.category.MOVIES_X264, rarbg.Options.category.MOVIES_X264_720P, rarbg.Options.category.MOVIES_X264_1080P, - rarbg.Options.category.MOVIES_X264_3D, - rarbg.Options.category.MOVIES_X264_4K, - rarbg.Options.category.MOVIES_BD_REMUX, + rarbg.Options.category.MOVIES_HIGH_RES, rarbg.Options.category.TV_EPISODES, rarbg.Options.category.TV_UHD_EPISODES, rarbg.Options.category.TV_HD_EPISODES ]; return Promises.sequence(allowedCategories - .map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category)))) + .map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category)))) .then(entries => entries.reduce((a, b) => a.concat(b), [])); } diff --git a/scraper/scrapers/rutor/rutor_api.js b/scraper/scrapers/rutor/rutor_api.js index 6928a92..2bb9175 100644 --- a/scraper/scrapers/rutor/rutor_api.js +++ b/scraper/scrapers/rutor/rutor_api.js @@ -1,5 +1,5 @@ +const axios = require('axios'); const cheerio = require('cheerio'); -const needle = require('needle'); const moment = require('moment'); const decode = require('magnet-uri'); const { defaultOptionsWithProxy } = require('../../lib/requestHelper'); @@ -8,6 +8,7 @@ const baseUrl = 'http://www.rutor.info'; const defaultTimeout = 10000; const Categories = { + ALL: '0', FOREIGN_FILMS: '1', RUSSIAN_FILMS: '5', SCIENCE_FILMS: '12', @@ -57,13 +58,13 @@ function files(torrentId) { } function singleRequest(requestUrl) { - const options = { ...defaultOptionsWithProxy(), open_timeout: defaultTimeout, follow: 2 }; + const options = { ...defaultOptionsWithProxy(), timeout: defaultTimeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then((response) => { - const body = response.body; + const body = response.data; if (!body) { - throw new Error(`No body: ${requestUrl} with status ${response.statusCode}`); + throw new Error(`No body: ${requestUrl} with status ${response.status}`); } else if (body.includes('Access Denied')) { console.log(`Access Denied: ${requestUrl}`); throw new Error(`Access Denied: ${requestUrl}`); diff --git a/scraper/scrapers/scraperHelper.js b/scraper/scrapers/scraperHelper.js index 3598a7c..44aa0d0 100644 --- a/scraper/scrapers/scraperHelper.js +++ b/scraper/scrapers/scraperHelper.js @@ -23,4 +23,19 @@ function sanitizePtLanguages(languages) { .trim(); } -module.exports = { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } \ No newline at end of file +function parseSize(sizeText) { + if (!sizeText) { + return undefined; + } + let scale = 1; + if (/Gi?B|Go/.test(sizeText)) { + scale = 1024 * 1024 * 1024 + } else if (/Mi?B|Mo/.test(sizeText)) { + scale = 1024 * 1024; + } else if (/[Kk]i?B|Ko/.test(sizeText)) { + scale = 1024; + } + return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); +} + +module.exports = { parseSize, isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } \ No newline at end of file diff --git a/scraper/scrapers/thepiratebay/thepiratebay_api.js b/scraper/scrapers/thepiratebay/thepiratebay_api.js index f5114a5..4ff6a42 100644 --- a/scraper/scrapers/thepiratebay/thepiratebay_api.js +++ b/scraper/scrapers/thepiratebay/thepiratebay_api.js @@ -1,4 +1,4 @@ -const needle = require('needle'); +const axios = require('axios'); const { escapeHTML } = require('../../lib/metadata'); const baseUrl = 'https://apibay.org'; @@ -102,10 +102,10 @@ function browse(config = {}, retries = 2) { async function _request(endpoint) { const url = `${baseUrl}/${endpoint}`; - return needle('get', url, { open_timeout: timeout }) + return axios.get(url, { timeout: timeout }) .then(response => { - if (typeof response.body === 'object') { - return response.body; + if (typeof response.data === 'object') { + return response.data; } return Promise.reject(`Unexpected response body`); }); diff --git a/scraper/scrapers/thepiratebay/thepiratebay_dump_scraper.js b/scraper/scrapers/thepiratebay/thepiratebay_dump_scraper.js index e5b8605..8754298 100644 --- a/scraper/scrapers/thepiratebay/thepiratebay_dump_scraper.js +++ b/scraper/scrapers/thepiratebay/thepiratebay_dump_scraper.js @@ -1,5 +1,5 @@ +const axios = require('axios'); const moment = require('moment'); -const needle = require('needle'); const Bottleneck = require('bottleneck'); const { ungzip } = require('node-gzip'); const LineByLineReader = require('line-by-line'); @@ -60,7 +60,7 @@ async function scrape() { } limiter.schedule(() => processTorrentRecord(torrent) - .catch((error) => console.log(`failed ${torrent.title} due: ${error}`))) + .catch((error) => console.log(`failed ${torrent.title} due: ${error}`))) .then(() => limiter.empty()) .then((empty) => empty && lr.resume()) .then(() => entriesProcessed++); @@ -160,8 +160,8 @@ function downloadDump(dump) { } console.log('downloading dump file...'); - return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' }) - .then((response) => response.body) + return axios.get(dump.url, { timeout: 2000, responseType: 'stream' }) + .then((response) => response.data) .then((body) => { console.log('unzipping dump file...'); return ungzip(body); diff --git a/scraper/scrapers/thepiratebay/thepiratebay_scraper.js b/scraper/scrapers/thepiratebay/thepiratebay_scraper.js index 88349bf..33cc493 100644 --- a/scraper/scrapers/thepiratebay/thepiratebay_scraper.js +++ b/scraper/scrapers/thepiratebay/thepiratebay_scraper.js @@ -7,7 +7,6 @@ const Promises = require('../../lib/promises'); const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'ThePirateBay'; -const UNTIL_PAGE = 5; const limiter = new Bottleneck({ maxConcurrent: 10 }); @@ -54,7 +53,7 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) { return Promise.resolve([]); }) .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))) - .then(resolved => resolved.length > 0 && page < UNTIL_PAGE + .then(resolved => resolved.length > 0 && page < getUntilPage(category) ? scrapeLatestTorrentsForCategory(category, page + 1) : Promise.resolve()); } @@ -84,4 +83,16 @@ async function processTorrentRecord(record) { return createTorrentEntry(torrent); } +function getUntilPage(category) { + switch (category) { + case thepiratebay.Categories.VIDEO.MOVIES_3D: + return 1; + case thepiratebay.Categories.VIDEO.TV_SHOWS: + case thepiratebay.Categories.VIDEO.TV_SHOWS_HD: + return 10; + default: + return 5; + } +} + module.exports = { scrape, updateSeeders, NAME }; \ No newline at end of file diff --git a/scraper/scrapers/torrentgalaxy/torrentgalaxy_api.js b/scraper/scrapers/torrentgalaxy/torrentgalaxy_api.js index cd57181..7cbf610 100644 --- a/scraper/scrapers/torrentgalaxy/torrentgalaxy_api.js +++ b/scraper/scrapers/torrentgalaxy/torrentgalaxy_api.js @@ -1,9 +1,10 @@ +const axios = require('axios'); const cheerio = require('cheerio'); -const needle = require('needle'); const moment = require('moment'); const decode = require('magnet-uri'); const Promises = require('../../lib/promises'); const { getRandomUserAgent } = require('../../lib/requestHelper'); +const { parseSize } = require("../scraperHelper"); const defaultProxies = [ // 'https://torrentgalaxy.to', @@ -34,7 +35,7 @@ function torrent(torrentId, config = {}, retries = 2) { const proxyList = config.proxyList || defaultProxies; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`))) + .map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`))) .then((body) => parseTorrentPage(body)) .then((torrent) => ({ torrentId, ...torrent })) .catch((err) => torrent(torrentId, config, retries - 1)); @@ -49,7 +50,7 @@ function search(keyword, config = {}, retries = 2) { const category = config.category; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`))) + .map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`))) .then((body) => parseTableBody(body)) .catch(() => search(keyword, config, retries - 1)); } @@ -63,19 +64,19 @@ function browse(config = {}, retries = 2, error = null) { const category = config.category; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`))) + .map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`))) .then((body) => parseTableBody(body)) .catch((err) => browse(config, retries - 1, err)); } function singleRequest(requestUrl) { - const options = { userAgent: getRandomUserAgent(), open_timeout: defaultTimeout, follow: 2 }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: defaultTimeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then((response) => { - const body = response.body; + const body = response.data; if (!body) { - throw new Error(`No body: ${requestUrl} with status ${response.statusCode}`); + throw new Error(`No body: ${requestUrl} with status ${response.status}`); } else if (body.includes('Access Denied')) { console.log(`Access Denied: ${requestUrl}`); throw new Error(`Access Denied: ${requestUrl}`); @@ -157,21 +158,6 @@ function parseTorrentPage(body) { }); } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KB') || sizeText.includes('kB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); -} - function parseDate(dateString) { if (dateString.includes('ago')) { const amount = parseInt(dateString, 10); diff --git a/scraper/scrapers/yts/yts_api.js b/scraper/scrapers/yts/yts_api.js index cc32233..3e77095 100644 --- a/scraper/scrapers/yts/yts_api.js +++ b/scraper/scrapers/yts/yts_api.js @@ -1,4 +1,4 @@ -const needle = require('needle'); +const axios = require('axios'); const Promises = require('../../lib/promises'); const { getRandomUserAgent } = require('./../../lib/requestHelper'); @@ -14,7 +14,7 @@ function torrent(torrentId, config = {}, retries = 2) { } return Promises.first(defaultProxies - .map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config))) + .map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config))) .then(body => parseResults(body)) .catch(error => torrent(torrentId, config, retries - 1)); } @@ -25,7 +25,7 @@ function search(query, config = {}, retries = 2) { } return Promises.first(defaultProxies - .map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config))) + .map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config))) .then(results => parseResults(results)) .catch(error => search(query, config, retries - 1)); } @@ -37,21 +37,27 @@ function browse(config = {}, retries = 2) { const page = config.page || 1; return Promises.first(defaultProxies - .map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config))) + .map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config))) .then(results => parseResults(results)) .catch(error => browse(config, retries - 1)); } +function maxPage() { + return Promises.first(defaultProxies + .map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}`))) + .then(results => Math.round((results?.data?.movie_count || 0) / limit)) +} + function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; - const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 }; + const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout }; - return needle('get', requestUrl, options) + return axios.get(requestUrl, options) .then(response => { - if (!response.body) { + if (!response.data) { return Promise.reject(`No body: ${requestUrl}`); } - return Promise.resolve(response.body); + return Promise.resolve(response.data); }); } @@ -89,4 +95,4 @@ function formatType(type) { return type.toUpperCase(); } -module.exports = { torrent, search, browse }; \ No newline at end of file +module.exports = { torrent, search, browse, maxPage }; \ No newline at end of file diff --git a/scraper/scrapers/yts/yts_scraper.js b/scraper/scrapers/yts/yts_scraper.js index d9df186..7e26620 100644 --- a/scraper/scrapers/yts/yts_scraper.js +++ b/scraper/scrapers/yts/yts_scraper.js @@ -6,16 +6,16 @@ const repository = require('../../lib/repository'); const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'YTS'; -const UNTIL_PAGE = 2; +const UNTIL_PAGE = 10; const limiter = new Bottleneck({ maxConcurrent: 10 }); -async function scrape() { +async function scrape(maxPage) { const scrapeStart = moment(); const lastScrape = await repository.getProvider({ name: NAME }); console.log(`[${scrapeStart}] starting ${NAME} scrape...`); - return scrapeLatestTorrents() + return scrapeLatestTorrentsForCategory(maxPage) .then(() => { lastScrape.lastScraped = scrapeStart; return lastScrape.save(); @@ -27,11 +27,7 @@ async function updateSeeders(torrent) { return limiter.schedule(() => yts.torrent(torrent.torrentId)); } -async function scrapeLatestTorrents() { - return scrapeLatestTorrentsForCategory(); -} - -async function scrapeLatestTorrentsForCategory(page = 1) { +async function scrapeLatestTorrentsForCategory(maxPage = UNTIL_PAGE, page = 1) { console.log(`Scrapping ${NAME} page ${page}`); return yts.browse(({ page })) .catch(error => { @@ -39,8 +35,8 @@ async function scrapeLatestTorrentsForCategory(page = 1) { return Promise.resolve([]); }) .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))) - .then(resolved => resolved.length > 0 && page < UNTIL_PAGE - ? scrapeLatestTorrentsForCategory(page + 1) + .then(resolved => resolved.length > 0 && page < maxPage + ? scrapeLatestTorrentsForCategory(maxPage, page + 1) : Promise.resolve()); }