Updated scrapers to latest available commit

This commit is contained in:
Gabisonfire
2024-01-17 16:43:58 -05:00
parent cab7f38c66
commit 909ade0d8e
39 changed files with 311 additions and 299 deletions

View File

@@ -1,16 +1,17 @@
const needle = require('needle');
const axios = require('axios');
const nameToImdb = require('name-to-imdb');
const googleIt = require('google-it');
const googleSr = require('google-sr');
const bing = require('nodejs-bing');
const he = require('he');
const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache');
const { Type } = require('./types');
const { getRandomUserAgent } = require('./requestHelper');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.strem.fun';
const TIMEOUT = 20000;
const selectors = {
...googleSr.defaultSelectors,
LinkSelector: 'a:has(h3)'
}
function getMetadata(id, type = Type.SERIES) {
if (!id) {
@@ -32,9 +33,9 @@ function getMetadata(id, type = Type.SERIES) {
}
function _requestMetadata(url) {
return needle('get', url, { open_timeout: TIMEOUT })
return axios.get(url, { timeout: TIMEOUT })
.then((response) => {
const body = response.body;
const body = response.data;
if (body && body.meta && (body.meta.imdb_id || body.meta.kitsu_id)) {
return {
kitsuId: body.meta.kitsu_id,
@@ -85,7 +86,7 @@ function escapeTitle(title) {
.replace(/[\u0300-\u036F]/g, '')
.replace(/&/g, 'and')
.replace(/[;, ~./]+/g, ' ') // replace dots, commas or underscores with spaces
.replace(/[^\w \-()+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
.replace(/[^\w \-()×+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
.replace(/^\d{1,2}[.#\s]+(?=(?:\d+[.\s]*)?[\u0400-\u04ff])/i, '') // remove russian movie numbering
.replace(/\s{2,}/, ' ') // replace multiple spaces
.trim();
@@ -111,13 +112,10 @@ async function getImdbId(info, type) {
reject(err || new Error('failed imdbId search'));
}
});
// }).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true })
// .then(results => results.length ? results : Promise.reject('No results'))
}).catch(() => googleSr(query)
.then(response => response.searchResults.length ? response.searchResults : Promise.reject('No results'))
// .catch(() => bing.web(query))
}).catch(() => googleSr.search(query, { selectors })
.then(response => response.length ? response : Promise.reject('No results'))
.then(results => results
.map(result => result.link)
.map(result => result.Link)
.find(result => result.includes('imdb.com/title/')))
.then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
.then(match => match && match[1])))
@@ -132,9 +130,9 @@ async function getKitsuId(info) {
const query = encodeURIComponent(key);
return cacheWrapKitsuId(key,
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
() => axios.get(`${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { timeout: 60000 })
.then((response) => {
const body = response.body;
const body = response.data;
if (body && body.metas && body.metas.length) {
return body.metas[0].id.replace('kitsu:', '');
} else {
@@ -147,8 +145,8 @@ async function isEpisodeImdbId(imdbId) {
if (!imdbId) {
return false;
}
return needle('get', `https://www.imdb.com/title/${imdbId}/`, { open_timeout: 10000, follow: 2 })
.then(response => !!(response.body && response.body.includes('video.episode')))
return axios.get(`https://www.imdb.com/title/${imdbId}/`, { timeout: 10000 })
.then(response => !!(response.data && response.data.includes('video.episode')))
.catch((err) => false);
}

View File

@@ -43,7 +43,7 @@ function parseSeriesVideo(video, parsedTorrentName) {
// force episode to any found number if it was not parsed
if (!videoInfo.episodes && !videoInfo.date) {
const epMatcher = videoInfo.title.match(
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W)(\d{1,4})(?:a|b|c|v\d)?(?:\W|$)(?!disk|movie|film)/i);
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W|_)(\d{1,4})(?:a|b|c|v\d)?(?:_|\W|$)(?!disk|movie|film)/i);
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
}

View File

@@ -1,7 +1,6 @@
const moment = require('moment');
const Promises = require('./promises')
const { Sequelize, DataTypes, fn, col, literal } = require('sequelize');
const Op = Sequelize.Op;
const { Sequelize, Op, DataTypes, fn, col, literal } = require('sequelize');
const DATABASE_URI = process.env.DATABASE_URI;
@@ -177,6 +176,10 @@ function getTorrentsBasedOnQuery(where) {
return Torrent.findAll({ where: where });
}
function getFilesBasedOnQuery(where) {
return File.findAll({ where: where });
}
function getTorrentsWithoutSize() {
return Torrent.findAll({
where: literal(
@@ -326,6 +329,7 @@ module.exports = {
getTorrent,
getTorrentsBasedOnTitle,
getTorrentsBasedOnQuery,
getFilesBasedOnQuery,
deleteTorrent,
getUpdateSeedersTorrents,
getUpdateSeedersNewTorrents,

View File

@@ -8,14 +8,17 @@ function getRandomUserAgent() {
function defaultOptionsWithProxy() {
if (process.env.PROXY_HOST && process.env.PROXY_TYPE) {
return {
proxy: process.env.PROXY_HOST,
proxy: {
host: process.env.PROXY_HOST.match(/\/\/(.*):/)[1],
port: process.env.PROXY_HOST.match(/:(\d+)/)[1]
},
headers: {
'user-agent': getRandomUserAgent(),
'proxy-type': process.env.PROXY_TYPE
}
}
}
return { userAgent: getRandomUserAgent() };
return { headers: { 'user-agent': getRandomUserAgent() } };
}
module.exports = { getRandomUserAgent, defaultOptionsWithProxy };

View File

@@ -1,5 +1,5 @@
const torrentStream = require('torrent-stream');
const needle = require('needle');
const axios = require('axios');
const parseTorrent = require('parse-torrent');
const BTClient = require('bittorrent-tracker')
const async = require('async');
@@ -113,12 +113,12 @@ async function filesFromTorrentFile(torrent) {
return Promise.reject(new Error("no torrentLink"));
}
return needle('get', torrent.torrentLink, { open_timeout: 10000 })
return axios.get(torrent.torrentLink, { timeout: 10000, responseType: 'arraybuffer' })
.then((response) => {
if (!response.body || response.statusCode !== 200) {
if (!response.data || response.status !== 200) {
throw new Error('torrent not found')
}
return response.body
return response.data
})
.then((body) => parseTorrent(body))
.then((info) => info.files.map((file, fileId) => ({
@@ -197,8 +197,8 @@ async function getTorrentTrackers(torrent) {
}
async function getDefaultTrackers(torrent, retry = 3) {
return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT })
.then(response => response.body && response.body.trim())
return cacheTrackers(() => axios.get(TRACKERS_URL, { timeout: SEEDS_CHECK_TIMEOUT })
.then(response => response.data && response.data.trim())
.then(body => body && body.split('\n\n') || []))
.catch(() => retry > 0 ? delay(5000).then(() => getDefaultTrackers(torrent, retry - 1)) : [])
.then(trackers => trackers.concat(ADDITIONAL_TRACKERS))

View File

@@ -107,7 +107,7 @@ async function checkAndUpdateTorrent(torrent) {
if (!storedTorrent.languages && torrent.languages && storedTorrent.provider !== 'RARBG') {
storedTorrent.languages = torrent.languages;
storedTorrent.save();
console.log(`Updated [${torrent.infoHash}] ${torrent.title} language to ${torrent.languages}`);
console.log(`Updated [${storedTorrent.infoHash}] ${storedTorrent.title} language to ${torrent.languages}`);
}
return createTorrentContents({ ...storedTorrent.get(), torrentLink: torrent.torrentLink })
.then(() => updateTorrentSeeders(torrent));

View File

@@ -106,7 +106,7 @@ async function getSeriesTorrentContent(torrent) {
async function mapSeriesEpisode(file, torrent, files) {
if (!file.episodes && !file.kitsuEpisodes) {
if (files.some(otherFile => otherFile.episodes || otherFile.kitsuEpisodes) || parse(torrent.title).seasons) {
if (files.length === 1 || files.some(f => f.episodes || f.kitsuEpisodes) || parse(torrent.title).seasons) {
return Promise.resolve({
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
@@ -187,7 +187,7 @@ async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
} else if (isDateEpisodeFiles(files, metadata)) {
decomposeDateEpisodeFiles(torrent, files, metadata);
} else if (isAbsoluteEpisodeFiles(files, metadata)) {
} else if (isAbsoluteEpisodeFiles(torrent, files, metadata)) {
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
}
// decomposeEpisodeTitleFiles(torrent, files, metadata);
@@ -237,26 +237,28 @@ function isDateEpisodeFiles(files, metadata) {
return files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date);
}
function isAbsoluteEpisodeFiles(files, metadata) {
function isAbsoluteEpisodeFiles(torrent, files, metadata) {
const threshold = Math.ceil(files.length / 5);
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
const nonMovieEpisodes = files
.filter(file => !file.isMovie && file.episodes);
const absoluteEpisodes = files
.filter(file => file.season && file.episodes)
.filter(file => file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep))
return nonMovieEpisodes.every(file => !file.season || file.season > metadata.episodeCount.length)
|| absoluteEpisodes.length >= threshold
// && !isNewEpisodesNotInMetadata(files, metadata);
return nonMovieEpisodes.every(file => !file.season)
|| (isAnime && nonMovieEpisodes.every(file => file.season > metadata.episodeCount.length))
|| absoluteEpisodes.length >= threshold;
}
function isNewEpisodesNotInMetadata(files, metadata) {
function isNewEpisodeNotInMetadata(torrent, file, metadata) {
// new episode might not yet been indexed by cinemeta.
// detect this if episode number is larger than the last episode or season is larger than the last one
return files.length === 1
// only for non anime metas
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
return !isAnime && !file.isMovie && file.episodes && file.season !== 1
&& /continuing|current/i.test(metadata.status)
&& files.filter(file => !file.isMovie && file.episodes)
.every(file => file.season >= metadata.episodeCount.length
&& file.episodes.every(ep => ep > metadata.episodeCount[file.season - 1]))
&& file.season >= metadata.episodeCount.length
&& file.episodes.every(ep => ep > (metadata.episodeCount[file.season - 1] || 0));
}
function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
@@ -282,6 +284,7 @@ function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
}
files
.filter(file => file.episodes && !file.isMovie && file.season !== 0)
.filter(file => !isNewEpisodeNotInMetadata(torrent, file, metadata))
.filter(file => !file.season || (metadata.episodeCount[file.season - 1] || 0) < file.episodes[0])
.forEach(file => {
const seasonIdx = ([...metadata.episodeCount.keys()]

View File

@@ -38,6 +38,7 @@
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#d1f7eaa8593b947edbfbc8a92a176448b48ef445",
"request-ip": "^3.3.0",
"router": "^1.3.8",
"rss-parser": "^3.13.0",
"sequelize": "^6.31.1",
"stremio-addon-sdk": "^1.6.10",
"sugar-date": "^2.0.6",
@@ -3236,6 +3237,23 @@
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-3.0.0.tgz",
"integrity": "sha512-zPMVc3ZYlGLNk4mpK1NzP2wg0ml9t7fUgDsayR5Y5rSzxQilzR9FGu/EH2jQOcKSAeAfWeylyW8juy3OkWRvNA=="
},
"node_modules/rss-parser": {
"version": "3.13.0",
"resolved": "https://registry.npmjs.org/rss-parser/-/rss-parser-3.13.0.tgz",
"integrity": "sha512-7jWUBV5yGN3rqMMj7CZufl/291QAhvrrGpDNE4k/02ZchL0npisiYYqULF71jCEKoIiHvK/Q2e6IkDwPziT7+w==",
"dependencies": {
"entities": "^2.0.3",
"xml2js": "^0.5.0"
}
},
"node_modules/rss-parser/node_modules/entities": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz",
"integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==",
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/run-async": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/run-async/-/run-async-2.4.0.tgz",
@@ -4252,6 +4270,26 @@
"ultron": "1.0.x"
}
},
"node_modules/xml2js": {
"version": "0.5.0",
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz",
"integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==",
"dependencies": {
"sax": ">=0.6.0",
"xmlbuilder": "~11.0.0"
},
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/xmlbuilder": {
"version": "11.0.1",
"resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz",
"integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==",
"engines": {
"node": ">=4.0"
}
},
"node_modules/xtend": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",

View File

@@ -38,6 +38,7 @@
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#d1f7eaa8593b947edbfbc8a92a176448b48ef445",
"request-ip": "^3.3.0",
"router": "^1.3.8",
"rss-parser": "^3.13.0",
"sequelize": "^6.31.1",
"stremio-addon-sdk": "^1.6.10",
"sugar-date": "^2.0.6",

View File

@@ -1,9 +1,9 @@
const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper');
const thepiratebayFakeRemoval = require('../scrapers/thepiratebay/thepiratebay_fakes_removal');
const ytsScraper = require('../scrapers/yts/yts_scraper');
const ytsFullScraper = require('../scrapers/yts/yts_full_scraper');
const eztvScraper = require('../scrapers/eztv/eztv_scraper');
const leetxScraper = require('../scrapers/1337x/1337x_scraper');
const kickassScraper = require('../scrapers/kickass/kickass_scraper');
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper');
const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper');
@@ -16,9 +16,11 @@ const Lapumia = require('../scrapers/lapumia/lapumia_scraper')
const OndeBaixa = require('../scrapers/ondebaixa/ondebaixa_scraper');
const AnimesTorrent = require('../scrapers/animestorrent/animestorrent_scraper')
const DarkMahou = require('../scrapers/darkmahou/darkmahou_scraper')
const torrent9Scraper = require('../scrapers/torrent9/torrent9_scraper');
module.exports = [
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: ytsFullScraper, name: ytsFullScraper.NAME, cron: '0 0 0 * * 0' },
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: nyaaSiScraper, name: nyaaSiScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: nyaaPantsuScraper, name: nyaaPantsuScraper.NAME, cron: '0 0 */4 ? * *' },
@@ -28,14 +30,14 @@ module.exports = [
{ scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' },
{ scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: torrent9Scraper, name: torrent9Scraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: Comando, name: Comando.NAME, cron: '0 0 */4 ? * *' },
{ scraper: ComoEuBaixo, name: ComoEuBaixo.NAME, cron: '0 0 */4 ? * *' },
{ scraper: Lapumia, name: Lapumia.NAME, cron: '0 0 */4 ? * *' },
{ scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' },
// { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' },
// { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' },
{ scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */24 ? * *' },
{ scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
// { scraper: require('../scrapers/1337x/1337x_search_scraper') }
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }

View File

@@ -5,6 +5,9 @@ const { updateCurrentSeeders } = require('../lib/torrent')
const { updateTorrentSeeders } = require('../lib/torrentEntries')
const DELAY_MS = 0; // 0 seconds
const DELAY_NEW_MS = 30_000; // 30 seconds
const DELAY_NO_NEW_MS = 300_000; // 300 seconds
const DELAY_FAILED_TORRENTS_MS = 5_000; // 5 seconds
const updateLimiter = new Bottleneck({ maxConcurrent: 5 });
const statistics = {};
const statisticsNew = {};
@@ -31,18 +34,25 @@ function scheduleUpdateSeedersForNewTorrents() {
.then(torrents => updateStatistics(torrents, statisticsNew))
.then(() => console.log('Finished seeders update for new torrents:', statisticsNew))
.catch(error => console.warn('Failed seeders update for new torrents:', error))
.then(() => delay(30_000))
.then(() => delay(DELAY_NEW_MS))
.then(() => scheduleUpdateSeedersForNewTorrents());
}
async function getTorrents() {
return repository.getUpdateSeedersTorrents()
.catch(() => delay(5000).then(() => getTorrents()))
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getTorrents()));
}
async function getNewTorrents() {
return repository.getUpdateSeedersNewTorrents()
.catch(() => delay(5000).then(() => getNewTorrents()))
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getNewTorrents()))
.then(torrents => {
if (!torrents.length) {
console.log('No new torrents to update seeders')
return delay(DELAY_NO_NEW_MS).then(() => getNewTorrents())
}
return torrents;
});
}
function updateStatistics(updatedTorrents, statisticsObject) {

View File

@@ -1,10 +1,11 @@
const axios = require('axios');
const cheerio = require('cheerio');
const needle = require('needle');
const Sugar = require('sugar-date');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://1337x.to'
@@ -32,7 +33,7 @@ function torrent(torrentId, config = {}, retries = 2) {
const slug = torrentId.startsWith('/torrent/') ? torrentId.replace('/torrent/', '') : torrentId;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config)))
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId: slug, ...torrent }))
.catch((err) => torrent(slug, config, retries - 1));
@@ -51,7 +52,7 @@ function search(keyword, config = {}, retries = 2) {
: `${proxyUrl}/search/${keyword}/${page}/`;
return Promises.first(proxyList
.map(proxyUrl => singleRequest(requestUrl(proxyUrl), config)))
.map(proxyUrl => singleRequest(requestUrl(proxyUrl), config)))
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
@@ -73,18 +74,18 @@ function browse(config = {}, retries = 2) {
: `${proxyUrl}/cat/${category}/${page}/`;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)))
.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)))
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then((response) => {
const body = response.body;
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
@@ -164,19 +165,4 @@ function parseDate(dateString) {
return Sugar.Date.create(dateString);
}
function parseSize(sizeText) {
if (!sizeText) {
return undefined;
}
let scale = 1;
if (sizeText.includes('GB')) {
scale = 1024 * 1024 * 1024
} else if (sizeText.includes('MB')) {
scale = 1024 * 1024;
} else if (sizeText.includes('KB')) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/,/g, '')) * scale);
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -92,7 +92,7 @@ async function processTorrentRecord(record) {
function typeMapping() {
const mapping = {};
mapping[leetx.Categories.MOVIE] = Type.MOVIE;
mapping[leetx.Categories.DOCUMENTARIES] = Type.SERIES;
mapping[leetx.Categories.DOCUMENTARIES] = Type.MOVIE;
mapping[leetx.Categories.TV] = Type.SERIES;
mapping[leetx.Categories.ANIME] = Type.ANIME;
return mapping;

View File

@@ -1,7 +1,6 @@
const needle = require("needle");
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const Promises = require("../../lib/promises");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
@@ -59,14 +58,10 @@ function browse(config = {}, retries = 2) {
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = {
userAgent: getRandomUserAgent(),
open_timeout: timeout,
follow: 2,
};
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return needle("get", requestUrl, options).then((response) => {
const body = response.body;
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (

View File

@@ -1,8 +1,7 @@
const needle = require("needle")
const axios = require('axios');
const moment = require("moment")
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtLanguages, sanitizePtOriginalName } = require('../scraperHelper')
@@ -63,11 +62,11 @@ function browse(config = {}, retries = 2) {
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then((response) => {
const body = response.body;
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
@@ -75,7 +74,8 @@ function singleRequest(requestUrl, config = {}) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
@@ -141,8 +141,10 @@ function parseOriginalName(originalNameElem) {
if (!originalNameElem[0]) {
return '';
}
const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue;
return originalName.replace(/: ?/, '');
const originalName = originalNameElem.next().text()
|| originalNameElem[0].nextSibling.nodeValue
|| originalNameElem.text();
return originalName.replace(/[^:]*: ?/, '').trim();
}
function parseCategory(categorys) {

View File

@@ -1,11 +1,11 @@
const needle = require("needle")
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')
const defaultTimeout = 10000;
const defaultTimeout = 30000;
const maxSearchPage = 50
const baseUrl = 'https://comoeubaixo.com';
@@ -62,11 +62,11 @@ function browse(config = {}, retries = 2) {
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then((response) => {
const body = response.body;
const body = response.data;
if (!body || (Buffer.isBuffer(body) && !body.size)) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
@@ -74,7 +74,8 @@ function singleRequest(requestUrl, config = {}) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
@@ -113,13 +114,12 @@ function parseTorrentPage(body) {
const category = details.find('strong:contains(\'Gêneros: \')').next().attr('href').split('/')[0]
const torrents = magnets.map(magnetLink => {
const decodedMagnet = decode(magnetLink);
const name = escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' ');
const sanitizedTitle = sanitizePtName(name);
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
const originalTitle = details.find('strong:contains(\'Baixar\')')[0].nextSibling.nodeValue.split('-')[0];
const year = details.find('strong:contains(\'Data de Lançamento: \')').next().text().trim();
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${sanitizedTitle.trim()}`;
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
return {
title: sanitizedTitle.length > 5 ? sanitizedTitle : fallBackTitle,
title: name.length > 5 ? name : fallBackTitle,
infoHash: decodedMagnet.infoHash,
magnetLink: magnetLink,
category: category,

View File

@@ -1,7 +1,6 @@
const needle = require("needle");
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const Promises = require("../../lib/promises");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
@@ -60,14 +59,10 @@ function browse(config = {}, retries = 2) {
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = {
userAgent: getRandomUserAgent(),
open_timeout: timeout,
follow: 2,
};
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return needle("get", requestUrl, options).then((response) => {
const body = response.body;
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (

View File

@@ -1,4 +1,3 @@
const needle = require("needle");
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
@@ -31,7 +30,7 @@ function browse(config = {}, retries = 2) {
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2, };
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout, };
return axios.get(requestUrl, options).then((response) => {
const body = response.data;

View File

@@ -1,6 +1,6 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const erairaws = require('./erairaws_api');
const erairaws = require('./erairaws_rss_api');
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'EraiRaws';
@@ -41,7 +41,7 @@ function untilPage(category) {
if (category === erairaws.Categories.ANIMES) {
return 45;
}
return 3;
return 1;
}
module.exports = { scrape, NAME };

View File

@@ -1,8 +1,10 @@
const axios = require('axios');
const cheerio = require('cheerio');
const needle = require('needle');
const moment = require('moment');
const { decode } = require("magnet-uri");
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('./../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://eztv.re'
@@ -19,7 +21,7 @@ function torrent(torrentId, config = {}, retries = 1) {
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config)))
.map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config)))
.then(body => parseTorrentPage(body))
.then(torrent => ({ torrentId, ...torrent }))
.catch(error => retries ? jitter().then(() => torrent(torrentId, config, retries - 1)) : Promise.reject(error));
@@ -33,7 +35,7 @@ function search(imdbId, config = {}, retries = 1) {
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config)))
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config)))
.then(results => parseResults(results))
.then(torrents => torrents.length === limit && page < maxPage
? search(imdbId, { ...config, page: page + 1 }).catch(() => [])
@@ -46,27 +48,21 @@ function browse(config = {}, retries = 1) {
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config)))
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config)))
.then(results => parseResults(results))
.catch(error => retries ? jitter().then(() => browse(config, retries - 1)) : Promise.reject(error));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = {
userAgent: getRandomUserAgent(),
open_timeout: timeout,
response_timeout: timeout,
read_timeout: timeout,
follow: 2
};
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then(response => {
if (!response.body) {
if (!response.data) {
return Promise.reject(`No body: ${requestUrl}`);
}
return Promise.resolve(response.body);
return Promise.resolve(response.data);
});
}
@@ -99,10 +95,11 @@ function parseTorrentPage(body) {
reject(new Error('Failed loading body'));
}
const content = $('table[class="forum_header_border_normal"]');
const magnetLink = content.find('a[title="Magnet Link"]').attr('href');
const torrent = {
name: content.find('h1 > span').text().replace(/EZTV$/, ''),
infoHash: content.find('b:contains(\'Torrent Hash:\')')[0].nextSibling.data.trim().toLowerCase(),
magnetLink: content.find('a[title="Magnet Link"]').attr('href'),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentLink: content.find('a[title="Download Torrent"]').attr('href'),
seeders: parseInt(content.find('span[class="stat_red"]').first().text(), 10) || 0,
size: parseSize(content.find('b:contains(\'Filesize:\')')[0].nextSibling.data),
@@ -113,21 +110,6 @@ function parseTorrentPage(body) {
});
}
function parseSize(sizeText) {
if (!sizeText) {
return undefined;
}
let scale = 1;
if (sizeText.includes('GB')) {
scale = 1024 * 1024 * 1024
} else if (sizeText.includes('MB')) {
scale = 1024 * 1024;
} else if (sizeText.includes('KB') || sizeText.includes('kB')) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
}
function jitter() {
return Promises.delay(minDelay + Math.round(Math.random() * jitterDelay))
}

View File

@@ -1,5 +1,5 @@
const axios = require('axios');
const cheerio = require('cheerio');
const needle = require('needle');
const moment = require('moment');
const Promises = require('../../lib/promises');
@@ -45,8 +45,8 @@ function _getContent(endpoint, config = {},) {
? endpoint.replace(/https?:\/\/[^/]+/, baseUrl)
: `${baseUrl}${endpoint}`;
return needle('get', url, { open_timeout: timeout })
.then((response) => response.body)
return axios.get(url, { timeout: timeout })
.then((response) => response.data)
.then((body) => cheerio.load(body));
}

View File

@@ -1,8 +1,9 @@
const axios = require('axios');
const cheerio = require('cheerio');
const needle = require('needle');
const moment = require('moment');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://katcr.co'
@@ -28,7 +29,7 @@ function torrent(torrentId, config = {}, retries = 2) {
const proxyList = config.proxyList || defaultProxies;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config)))
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => torrent(torrentId, config, retries - 1));
@@ -43,7 +44,7 @@ function search(keyword, config = {}, retries = 2) {
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config)))
.map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config)))
.then((body) => parseTableBody(body))
.catch((err) => search(keyword, config, retries - 1));
}
@@ -57,18 +58,18 @@ function browse(config = {}, retries = 2) {
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config)))
.map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config)))
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { open_timeout: timeout, follow: 2 };
const options = { timeout: timeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then((response) => {
const body = response.body;
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('Access Denied')) {
@@ -157,19 +158,4 @@ function parseTorrentPage(body) {
});
}
function parseSize(sizeText) {
if (!sizeText) {
return undefined;
}
let scale = 1;
if (sizeText.includes('GB')) {
scale = 1024 * 1024 * 1024
} else if (sizeText.includes('MB')) {
scale = 1024 * 1024;
} else if (sizeText.includes('KB') || sizeText.includes('kB')) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,4 +1,4 @@
const needle = require("needle")
const axios = require('axios');
const moment = require("moment")
const cheerio = require("cheerio");
const decode = require('magnet-uri');
@@ -21,12 +21,11 @@ function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = torrentId.split('?p=')[1];
return singleRequest(`${baseUrl}/?p=${slug}`, config)
return singleRequest(`${baseUrl}/${torrentId}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
.then((torrent) => torrent.map(el => ({ torrentId, ...el })))
.catch((err) => {
console.warn(`Failed Lapumia ${slug} request: `, err);
console.warn(`Failed Lapumia ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
@@ -62,11 +61,11 @@ function browse(config = {}, retries = 2) {
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2 };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then((response) => {
const body = response.body;
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
@@ -74,7 +73,8 @@ function singleRequest(requestUrl, config = {}) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
@@ -89,10 +89,14 @@ function parseTableBody(body) {
$('div.post').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("div > a").text(),
torrentId: row.find("div > a").attr("href")
});
try {
torrents.push({
name: row.find("div > a").text(),
torrentId: row.find("div > a").attr("href").split('/')[3]
});
} catch (e) {
console.log("Failed parsing Lupumia table entry")
}
});
resolve(torrents);
});
@@ -112,7 +116,7 @@ function parseTorrentPage(body) {
.map((i, section) => $(section).attr("href")).get();
const category = parseCategory($('div.category').html());
const details = $('div.content')
const torrents = magnets.map(magnetLink => ({
const torrents = magnets.filter(magnetLink => decode(magnetLink).name).map(magnetLink => ({
title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))),
originalName: sanitizePtOriginalName(details.find('b:contains(\'Titulo Original:\')')[0].nextSibling.nodeValue),
year: details.find('b:contains(\'Ano de Lançamento:\')')[0].nextSibling.nodeValue.trim(),

View File

@@ -33,7 +33,8 @@ async function updateSeeders(torrent) {
async function scrapeLatestTorrents() {
const allowedCategories = [
lapumia.Categories.MOVIE
lapumia.Categories.MOVIE,
lapumia.Categories.TV
];
return Promises.sequence(allowedCategories
@@ -102,9 +103,6 @@ function typeMapping() {
}
function untilPage(category) {
if (lapumia.Categories.TV === category) {
return 5;
}
if (lapumia.Categories.ANIME === category) {
return 2;
}

View File

@@ -1,4 +1,5 @@
const { si } = require('nyaapi')
const { parseSize } = require("../scraperHelper");
const Categories = {
ANIME: {
@@ -34,7 +35,7 @@ function torrent(torrentId) {
}
function search(query) {
return si.search(query)
return si.search(query, null, { category: Categories.ANIME.ENGLISH})
.then(results => results.map(torrent => parseTorrent(torrent)));
}
@@ -62,19 +63,4 @@ function parseTorrent(torrent) {
}
}
function parseSize(sizeText) {
if (!sizeText) {
return undefined;
}
let scale = 1;
if (sizeText.includes('GiB')) {
scale = 1024 * 1024 * 1024
} else if (sizeText.includes('MiB')) {
scale = 1024 * 1024;
} else if (sizeText.includes('KiB') || sizeText.includes('kB')) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -20,6 +20,10 @@ async function scrape() {
// return Promise.all(ids.map(id => limiter.schedule(() => nyaasi.torrent(id)
// .then(torrent => processTorrentRecord(torrent)))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
// const queries = ['Sagrada Reset', 'Sakurada Reset'];
// return Promise.all(queries.map(query => limiter.schedule(() => nyaasi.search(query)
// .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;

View File

@@ -1,11 +1,11 @@
const needle = require("needle")
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper')
const defaultTimeout = 10000;
const defaultTimeout = 30000;
const maxSearchPage = 50
const baseUrl = 'https://ondebaixa.com';
@@ -62,11 +62,11 @@ function browse(config = {}, retries = 2) {
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then((response) => {
const body = response.body;
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
@@ -74,7 +74,8 @@ function singleRequest(requestUrl, config = {}) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
@@ -112,12 +113,12 @@ function parseTorrentPage(body) {
const category = details.find('span:contains(\'Gêneros: \')').next().html()
const torrents = magnets.map(magnetLink => {
const decodedMagnet = decode(magnetLink);
const name = escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' ');
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
const originalTitle = details.find('span:contains(\'Título Original: \')').next().text().trim();
const year = details.find('span:contains(\'Ano de Lançamento: \')').next().text().trim();
const fallbackTitle = `${originalTitle} ${year}`;
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
return {
title: name ? sanitizePtName(name) : fallbackTitle,
title: name.length > 5 ? name : fallBackTitle,
originalName: sanitizePtOriginalName(originalTitle),
year: year,
infoHash: decodedMagnet.infoHash,

View File

@@ -107,12 +107,6 @@ function typeMapping() {
}
function untilPage(category) {
if (ondebaixa.Categories.DESENHOS === category) {
return 5;
}
if (ondebaixa.Categories.TV === category) {
return 5;
}
return UNTIL_PAGE;
}

View File

@@ -1,10 +1,12 @@
const needle = require('needle');
const axios = require('axios');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require("../../lib/requestHelper");
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
const appId = 'torrentio-addon';
const defaultTimeout = 30000;
const retryDelay = 3000;
let token;
@@ -22,6 +24,7 @@ const Options = {
MOVIES_X265_4K_HDR: [52],
MOVIES_FULL_BD: [42],
MOVIES_BD_REMUX: [46],
MOVIES_HIGH_RES: [47, 50, 51, 52, 46],
TV_EPISODES: [18],
TV_UHD_EPISODES: [49],
TV_HD_EPISODES: [41],
@@ -84,33 +87,43 @@ function browse(params = {}) {
return singleRequest(parameters).then(results => parseResults(results));
}
async function singleRequest(params = {}, config = {}, retries = 10) {
async function singleRequest(params = {}, config = {}, retries = 15) {
const timeout = config.timeout || defaultTimeout;
const options = { open_timeout: timeout, follow: 2 };
const headers = {
'user-agent': getRandomUserAgent(),
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
};
params.token = await getToken();
params.app_id = appId;
Object.keys(params)
.filter(key => params[key] === undefined || params[key] === null)
.forEach(key => delete params[key]);
return needle('get', baseUrl, params, options)
const options = { headers, timeout, params };
return axios.get(baseUrl, options)
.then(response => {
if (response.body && response.body.error_code === 4) {
if (response.data && response.data.error_code === 4) {
// token expired
token = undefined;
return singleRequest(params, config);
}
if ((!response.body || !response.body.length || [5, 20].includes(response.body.error_code)) && retries > 0) {
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
// too many requests
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1));
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
}
if (response.statusCode !== 200 || (response.body && response.body.error)) {
if (response.status !== 200 || (response.data && response.data.error)) {
// something went wrong
return Promise.reject(response.body || `Failed RARGB request with status=${response.statusCode}`);
return Promise.reject(response.data || `Failed RARGB request with status=${response.status}`);
}
return response.body;
return response.data;
})
.catch(error => {
if (error.response && [429].includes(error.response.status) && retries > 0) {
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
}
return Promise.reject(error.message || error);
});
}
@@ -137,9 +150,10 @@ function parseResult(result) {
async function getToken() {
if (!token) {
const options = { open_timeout: defaultTimeout };
token = await needle('get', baseUrl, { get_token: 'get_token', app_id: appId }, options)
.then(response => response.body.token);
const params = { get_token: 'get_token', app_id: appId };
const options = { timeout: defaultTimeout, params };
token = await axios.get(baseUrl, options)
.then(response => response.data.token);
}
return token;
}

View File

@@ -12,14 +12,10 @@ const allowedCategories = [
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X265_4K,
rarbg.Options.category.MOVIES_X265_4K_HDR,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_X264_3D,
rarbg.Options.category.MOVIES_X264_4K,
rarbg.Options.category.MOVIES_BD_REMUX,
rarbg.Options.category.MOVIES_HIGH_RES,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
@@ -32,8 +28,8 @@ async function scrape() {
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
return Promise.all(
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
}

View File

@@ -36,21 +36,17 @@ async function scrapeLatestTorrents() {
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X265_4K,
rarbg.Options.category.MOVIES_X265_4K_HDR,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_X264_3D,
rarbg.Options.category.MOVIES_X264_4K,
rarbg.Options.category.MOVIES_BD_REMUX,
rarbg.Options.category.MOVIES_HIGH_RES,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
];
return Promises.sequence(allowedCategories
.map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category))))
.map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category))))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}

View File

@@ -1,5 +1,5 @@
const axios = require('axios');
const cheerio = require('cheerio');
const needle = require('needle');
const moment = require('moment');
const decode = require('magnet-uri');
const { defaultOptionsWithProxy } = require('../../lib/requestHelper');
@@ -8,6 +8,7 @@ const baseUrl = 'http://www.rutor.info';
const defaultTimeout = 10000;
const Categories = {
ALL: '0',
FOREIGN_FILMS: '1',
RUSSIAN_FILMS: '5',
SCIENCE_FILMS: '12',
@@ -57,13 +58,13 @@ function files(torrentId) {
}
function singleRequest(requestUrl) {
const options = { ...defaultOptionsWithProxy(), open_timeout: defaultTimeout, follow: 2 };
const options = { ...defaultOptionsWithProxy(), timeout: defaultTimeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then((response) => {
const body = response.body;
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl} with status ${response.statusCode}`);
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
} else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`);

View File

@@ -23,4 +23,19 @@ function sanitizePtLanguages(languages) {
.trim();
}
module.exports = { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages }
function parseSize(sizeText) {
if (!sizeText) {
return undefined;
}
let scale = 1;
if (/Gi?B|Go/.test(sizeText)) {
scale = 1024 * 1024 * 1024
} else if (/Mi?B|Mo/.test(sizeText)) {
scale = 1024 * 1024;
} else if (/[Kk]i?B|Ko/.test(sizeText)) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
}
module.exports = { parseSize, isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages }

View File

@@ -1,4 +1,4 @@
const needle = require('needle');
const axios = require('axios');
const { escapeHTML } = require('../../lib/metadata');
const baseUrl = 'https://apibay.org';
@@ -102,10 +102,10 @@ function browse(config = {}, retries = 2) {
async function _request(endpoint) {
const url = `${baseUrl}/${endpoint}`;
return needle('get', url, { open_timeout: timeout })
return axios.get(url, { timeout: timeout })
.then(response => {
if (typeof response.body === 'object') {
return response.body;
if (typeof response.data === 'object') {
return response.data;
}
return Promise.reject(`Unexpected response body`);
});

View File

@@ -1,5 +1,5 @@
const axios = require('axios');
const moment = require('moment');
const needle = require('needle');
const Bottleneck = require('bottleneck');
const { ungzip } = require('node-gzip');
const LineByLineReader = require('line-by-line');
@@ -60,7 +60,7 @@ async function scrape() {
}
limiter.schedule(() => processTorrentRecord(torrent)
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
.then(() => limiter.empty())
.then((empty) => empty && lr.resume())
.then(() => entriesProcessed++);
@@ -160,8 +160,8 @@ function downloadDump(dump) {
}
console.log('downloading dump file...');
return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' })
.then((response) => response.body)
return axios.get(dump.url, { timeout: 2000, responseType: 'stream' })
.then((response) => response.data)
.then((body) => {
console.log('unzipping dump file...');
return ungzip(body);

View File

@@ -7,7 +7,6 @@ const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
const UNTIL_PAGE = 5;
const limiter = new Bottleneck({ maxConcurrent: 10 });
@@ -54,7 +53,7 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) {
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
@@ -84,4 +83,16 @@ async function processTorrentRecord(record) {
return createTorrentEntry(torrent);
}
function getUntilPage(category) {
switch (category) {
case thepiratebay.Categories.VIDEO.MOVIES_3D:
return 1;
case thepiratebay.Categories.VIDEO.TV_SHOWS:
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
return 10;
default:
return 5;
}
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,9 +1,10 @@
const axios = require('axios');
const cheerio = require('cheerio');
const needle = require('needle');
const moment = require('moment');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
// 'https://torrentgalaxy.to',
@@ -34,7 +35,7 @@ function torrent(torrentId, config = {}, retries = 2) {
const proxyList = config.proxyList || defaultProxies;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`)))
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => torrent(torrentId, config, retries - 1));
@@ -49,7 +50,7 @@ function search(keyword, config = {}, retries = 2) {
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`)))
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`)))
.then((body) => parseTableBody(body))
.catch(() => search(keyword, config, retries - 1));
}
@@ -63,19 +64,19 @@ function browse(config = {}, retries = 2, error = null) {
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function singleRequest(requestUrl) {
const options = { userAgent: getRandomUserAgent(), open_timeout: defaultTimeout, follow: 2 };
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: defaultTimeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then((response) => {
const body = response.body;
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl} with status ${response.statusCode}`);
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
} else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`);
@@ -157,21 +158,6 @@ function parseTorrentPage(body) {
});
}
function parseSize(sizeText) {
if (!sizeText) {
return undefined;
}
let scale = 1;
if (sizeText.includes('GB')) {
scale = 1024 * 1024 * 1024
} else if (sizeText.includes('MB')) {
scale = 1024 * 1024;
} else if (sizeText.includes('KB') || sizeText.includes('kB')) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
}
function parseDate(dateString) {
if (dateString.includes('ago')) {
const amount = parseInt(dateString, 10);

View File

@@ -1,4 +1,4 @@
const needle = require('needle');
const axios = require('axios');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('./../../lib/requestHelper');
@@ -14,7 +14,7 @@ function torrent(torrentId, config = {}, retries = 2) {
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config)))
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config)))
.then(body => parseResults(body))
.catch(error => torrent(torrentId, config, retries - 1));
}
@@ -25,7 +25,7 @@ function search(query, config = {}, retries = 2) {
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config)))
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config)))
.then(results => parseResults(results))
.catch(error => search(query, config, retries - 1));
}
@@ -37,21 +37,27 @@ function browse(config = {}, retries = 2) {
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config)))
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config)))
.then(results => parseResults(results))
.catch(error => browse(config, retries - 1));
}
function maxPage() {
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}`)))
.then(results => Math.round((results?.data?.movie_count || 0) / limit))
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return needle('get', requestUrl, options)
return axios.get(requestUrl, options)
.then(response => {
if (!response.body) {
if (!response.data) {
return Promise.reject(`No body: ${requestUrl}`);
}
return Promise.resolve(response.body);
return Promise.resolve(response.data);
});
}
@@ -89,4 +95,4 @@ function formatType(type) {
return type.toUpperCase();
}
module.exports = { torrent, search, browse };
module.exports = { torrent, search, browse, maxPage };

View File

@@ -6,16 +6,16 @@ const repository = require('../../lib/repository');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'YTS';
const UNTIL_PAGE = 2;
const UNTIL_PAGE = 10;
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
async function scrape(maxPage) {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
return scrapeLatestTorrentsForCategory(maxPage)
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
@@ -27,11 +27,7 @@ async function updateSeeders(torrent) {
return limiter.schedule(() => yts.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
return scrapeLatestTorrentsForCategory();
}
async function scrapeLatestTorrentsForCategory(page = 1) {
async function scrapeLatestTorrentsForCategory(maxPage = UNTIL_PAGE, page = 1) {
console.log(`Scrapping ${NAME} page ${page}`);
return yts.browse(({ page }))
.catch(error => {
@@ -39,8 +35,8 @@ async function scrapeLatestTorrentsForCategory(page = 1) {
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(page + 1)
.then(resolved => resolved.length > 0 && page < maxPage
? scrapeLatestTorrentsForCategory(maxPage, page + 1)
: Promise.resolve());
}