Updated scrapers to latest available commit
This commit is contained in:
@@ -1,16 +1,17 @@
|
||||
const needle = require('needle');
|
||||
const axios = require('axios');
|
||||
const nameToImdb = require('name-to-imdb');
|
||||
const googleIt = require('google-it');
|
||||
const googleSr = require('google-sr');
|
||||
const bing = require('nodejs-bing');
|
||||
const he = require('he');
|
||||
const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache');
|
||||
const { Type } = require('./types');
|
||||
const { getRandomUserAgent } = require('./requestHelper');
|
||||
|
||||
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
|
||||
const KITSU_URL = 'https://anime-kitsu.strem.fun';
|
||||
const TIMEOUT = 20000;
|
||||
const selectors = {
|
||||
...googleSr.defaultSelectors,
|
||||
LinkSelector: 'a:has(h3)'
|
||||
}
|
||||
|
||||
function getMetadata(id, type = Type.SERIES) {
|
||||
if (!id) {
|
||||
@@ -32,9 +33,9 @@ function getMetadata(id, type = Type.SERIES) {
|
||||
}
|
||||
|
||||
function _requestMetadata(url) {
|
||||
return needle('get', url, { open_timeout: TIMEOUT })
|
||||
return axios.get(url, { timeout: TIMEOUT })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (body && body.meta && (body.meta.imdb_id || body.meta.kitsu_id)) {
|
||||
return {
|
||||
kitsuId: body.meta.kitsu_id,
|
||||
@@ -85,7 +86,7 @@ function escapeTitle(title) {
|
||||
.replace(/[\u0300-\u036F]/g, '')
|
||||
.replace(/&/g, 'and')
|
||||
.replace(/[;, ~./]+/g, ' ') // replace dots, commas or underscores with spaces
|
||||
.replace(/[^\w \-()+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
|
||||
.replace(/[^\w \-()×+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
|
||||
.replace(/^\d{1,2}[.#\s]+(?=(?:\d+[.\s]*)?[\u0400-\u04ff])/i, '') // remove russian movie numbering
|
||||
.replace(/\s{2,}/, ' ') // replace multiple spaces
|
||||
.trim();
|
||||
@@ -111,13 +112,10 @@ async function getImdbId(info, type) {
|
||||
reject(err || new Error('failed imdbId search'));
|
||||
}
|
||||
});
|
||||
// }).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true })
|
||||
// .then(results => results.length ? results : Promise.reject('No results'))
|
||||
}).catch(() => googleSr(query)
|
||||
.then(response => response.searchResults.length ? response.searchResults : Promise.reject('No results'))
|
||||
// .catch(() => bing.web(query))
|
||||
}).catch(() => googleSr.search(query, { selectors })
|
||||
.then(response => response.length ? response : Promise.reject('No results'))
|
||||
.then(results => results
|
||||
.map(result => result.link)
|
||||
.map(result => result.Link)
|
||||
.find(result => result.includes('imdb.com/title/')))
|
||||
.then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
|
||||
.then(match => match && match[1])))
|
||||
@@ -132,9 +130,9 @@ async function getKitsuId(info) {
|
||||
const query = encodeURIComponent(key);
|
||||
|
||||
return cacheWrapKitsuId(key,
|
||||
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
|
||||
() => axios.get(`${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { timeout: 60000 })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (body && body.metas && body.metas.length) {
|
||||
return body.metas[0].id.replace('kitsu:', '');
|
||||
} else {
|
||||
@@ -147,8 +145,8 @@ async function isEpisodeImdbId(imdbId) {
|
||||
if (!imdbId) {
|
||||
return false;
|
||||
}
|
||||
return needle('get', `https://www.imdb.com/title/${imdbId}/`, { open_timeout: 10000, follow: 2 })
|
||||
.then(response => !!(response.body && response.body.includes('video.episode')))
|
||||
return axios.get(`https://www.imdb.com/title/${imdbId}/`, { timeout: 10000 })
|
||||
.then(response => !!(response.data && response.data.includes('video.episode')))
|
||||
.catch((err) => false);
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ function parseSeriesVideo(video, parsedTorrentName) {
|
||||
// force episode to any found number if it was not parsed
|
||||
if (!videoInfo.episodes && !videoInfo.date) {
|
||||
const epMatcher = videoInfo.title.match(
|
||||
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W)(\d{1,4})(?:a|b|c|v\d)?(?:\W|$)(?!disk|movie|film)/i);
|
||||
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W|_)(\d{1,4})(?:a|b|c|v\d)?(?:_|\W|$)(?!disk|movie|film)/i);
|
||||
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
|
||||
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
const moment = require('moment');
|
||||
const Promises = require('./promises')
|
||||
const { Sequelize, DataTypes, fn, col, literal } = require('sequelize');
|
||||
const Op = Sequelize.Op;
|
||||
const { Sequelize, Op, DataTypes, fn, col, literal } = require('sequelize');
|
||||
|
||||
const DATABASE_URI = process.env.DATABASE_URI;
|
||||
|
||||
@@ -177,6 +176,10 @@ function getTorrentsBasedOnQuery(where) {
|
||||
return Torrent.findAll({ where: where });
|
||||
}
|
||||
|
||||
function getFilesBasedOnQuery(where) {
|
||||
return File.findAll({ where: where });
|
||||
}
|
||||
|
||||
function getTorrentsWithoutSize() {
|
||||
return Torrent.findAll({
|
||||
where: literal(
|
||||
@@ -326,6 +329,7 @@ module.exports = {
|
||||
getTorrent,
|
||||
getTorrentsBasedOnTitle,
|
||||
getTorrentsBasedOnQuery,
|
||||
getFilesBasedOnQuery,
|
||||
deleteTorrent,
|
||||
getUpdateSeedersTorrents,
|
||||
getUpdateSeedersNewTorrents,
|
||||
|
||||
@@ -8,14 +8,17 @@ function getRandomUserAgent() {
|
||||
function defaultOptionsWithProxy() {
|
||||
if (process.env.PROXY_HOST && process.env.PROXY_TYPE) {
|
||||
return {
|
||||
proxy: process.env.PROXY_HOST,
|
||||
proxy: {
|
||||
host: process.env.PROXY_HOST.match(/\/\/(.*):/)[1],
|
||||
port: process.env.PROXY_HOST.match(/:(\d+)/)[1]
|
||||
},
|
||||
headers: {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'proxy-type': process.env.PROXY_TYPE
|
||||
}
|
||||
}
|
||||
}
|
||||
return { userAgent: getRandomUserAgent() };
|
||||
return { headers: { 'user-agent': getRandomUserAgent() } };
|
||||
}
|
||||
|
||||
module.exports = { getRandomUserAgent, defaultOptionsWithProxy };
|
||||
@@ -1,5 +1,5 @@
|
||||
const torrentStream = require('torrent-stream');
|
||||
const needle = require('needle');
|
||||
const axios = require('axios');
|
||||
const parseTorrent = require('parse-torrent');
|
||||
const BTClient = require('bittorrent-tracker')
|
||||
const async = require('async');
|
||||
@@ -113,12 +113,12 @@ async function filesFromTorrentFile(torrent) {
|
||||
return Promise.reject(new Error("no torrentLink"));
|
||||
}
|
||||
|
||||
return needle('get', torrent.torrentLink, { open_timeout: 10000 })
|
||||
return axios.get(torrent.torrentLink, { timeout: 10000, responseType: 'arraybuffer' })
|
||||
.then((response) => {
|
||||
if (!response.body || response.statusCode !== 200) {
|
||||
if (!response.data || response.status !== 200) {
|
||||
throw new Error('torrent not found')
|
||||
}
|
||||
return response.body
|
||||
return response.data
|
||||
})
|
||||
.then((body) => parseTorrent(body))
|
||||
.then((info) => info.files.map((file, fileId) => ({
|
||||
@@ -197,8 +197,8 @@ async function getTorrentTrackers(torrent) {
|
||||
}
|
||||
|
||||
async function getDefaultTrackers(torrent, retry = 3) {
|
||||
return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT })
|
||||
.then(response => response.body && response.body.trim())
|
||||
return cacheTrackers(() => axios.get(TRACKERS_URL, { timeout: SEEDS_CHECK_TIMEOUT })
|
||||
.then(response => response.data && response.data.trim())
|
||||
.then(body => body && body.split('\n\n') || []))
|
||||
.catch(() => retry > 0 ? delay(5000).then(() => getDefaultTrackers(torrent, retry - 1)) : [])
|
||||
.then(trackers => trackers.concat(ADDITIONAL_TRACKERS))
|
||||
|
||||
@@ -107,7 +107,7 @@ async function checkAndUpdateTorrent(torrent) {
|
||||
if (!storedTorrent.languages && torrent.languages && storedTorrent.provider !== 'RARBG') {
|
||||
storedTorrent.languages = torrent.languages;
|
||||
storedTorrent.save();
|
||||
console.log(`Updated [${torrent.infoHash}] ${torrent.title} language to ${torrent.languages}`);
|
||||
console.log(`Updated [${storedTorrent.infoHash}] ${storedTorrent.title} language to ${torrent.languages}`);
|
||||
}
|
||||
return createTorrentContents({ ...storedTorrent.get(), torrentLink: torrent.torrentLink })
|
||||
.then(() => updateTorrentSeeders(torrent));
|
||||
|
||||
@@ -106,7 +106,7 @@ async function getSeriesTorrentContent(torrent) {
|
||||
|
||||
async function mapSeriesEpisode(file, torrent, files) {
|
||||
if (!file.episodes && !file.kitsuEpisodes) {
|
||||
if (files.some(otherFile => otherFile.episodes || otherFile.kitsuEpisodes) || parse(torrent.title).seasons) {
|
||||
if (files.length === 1 || files.some(f => f.episodes || f.kitsuEpisodes) || parse(torrent.title).seasons) {
|
||||
return Promise.resolve({
|
||||
infoHash: torrent.infoHash,
|
||||
fileIndex: file.fileIndex,
|
||||
@@ -187,7 +187,7 @@ async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }
|
||||
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
|
||||
} else if (isDateEpisodeFiles(files, metadata)) {
|
||||
decomposeDateEpisodeFiles(torrent, files, metadata);
|
||||
} else if (isAbsoluteEpisodeFiles(files, metadata)) {
|
||||
} else if (isAbsoluteEpisodeFiles(torrent, files, metadata)) {
|
||||
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
|
||||
}
|
||||
// decomposeEpisodeTitleFiles(torrent, files, metadata);
|
||||
@@ -237,26 +237,28 @@ function isDateEpisodeFiles(files, metadata) {
|
||||
return files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date);
|
||||
}
|
||||
|
||||
function isAbsoluteEpisodeFiles(files, metadata) {
|
||||
function isAbsoluteEpisodeFiles(torrent, files, metadata) {
|
||||
const threshold = Math.ceil(files.length / 5);
|
||||
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
|
||||
const nonMovieEpisodes = files
|
||||
.filter(file => !file.isMovie && file.episodes);
|
||||
const absoluteEpisodes = files
|
||||
.filter(file => file.season && file.episodes)
|
||||
.filter(file => file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep))
|
||||
return nonMovieEpisodes.every(file => !file.season || file.season > metadata.episodeCount.length)
|
||||
|| absoluteEpisodes.length >= threshold
|
||||
// && !isNewEpisodesNotInMetadata(files, metadata);
|
||||
return nonMovieEpisodes.every(file => !file.season)
|
||||
|| (isAnime && nonMovieEpisodes.every(file => file.season > metadata.episodeCount.length))
|
||||
|| absoluteEpisodes.length >= threshold;
|
||||
}
|
||||
|
||||
function isNewEpisodesNotInMetadata(files, metadata) {
|
||||
function isNewEpisodeNotInMetadata(torrent, file, metadata) {
|
||||
// new episode might not yet been indexed by cinemeta.
|
||||
// detect this if episode number is larger than the last episode or season is larger than the last one
|
||||
return files.length === 1
|
||||
// only for non anime metas
|
||||
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
|
||||
return !isAnime && !file.isMovie && file.episodes && file.season !== 1
|
||||
&& /continuing|current/i.test(metadata.status)
|
||||
&& files.filter(file => !file.isMovie && file.episodes)
|
||||
.every(file => file.season >= metadata.episodeCount.length
|
||||
&& file.episodes.every(ep => ep > metadata.episodeCount[file.season - 1]))
|
||||
&& file.season >= metadata.episodeCount.length
|
||||
&& file.episodes.every(ep => ep > (metadata.episodeCount[file.season - 1] || 0));
|
||||
}
|
||||
|
||||
function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
|
||||
@@ -282,6 +284,7 @@ function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
|
||||
}
|
||||
files
|
||||
.filter(file => file.episodes && !file.isMovie && file.season !== 0)
|
||||
.filter(file => !isNewEpisodeNotInMetadata(torrent, file, metadata))
|
||||
.filter(file => !file.season || (metadata.episodeCount[file.season - 1] || 0) < file.episodes[0])
|
||||
.forEach(file => {
|
||||
const seasonIdx = ([...metadata.episodeCount.keys()]
|
||||
|
||||
38
scraper/package-lock.json
generated
38
scraper/package-lock.json
generated
@@ -38,6 +38,7 @@
|
||||
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#d1f7eaa8593b947edbfbc8a92a176448b48ef445",
|
||||
"request-ip": "^3.3.0",
|
||||
"router": "^1.3.8",
|
||||
"rss-parser": "^3.13.0",
|
||||
"sequelize": "^6.31.1",
|
||||
"stremio-addon-sdk": "^1.6.10",
|
||||
"sugar-date": "^2.0.6",
|
||||
@@ -3236,6 +3237,23 @@
|
||||
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-3.0.0.tgz",
|
||||
"integrity": "sha512-zPMVc3ZYlGLNk4mpK1NzP2wg0ml9t7fUgDsayR5Y5rSzxQilzR9FGu/EH2jQOcKSAeAfWeylyW8juy3OkWRvNA=="
|
||||
},
|
||||
"node_modules/rss-parser": {
|
||||
"version": "3.13.0",
|
||||
"resolved": "https://registry.npmjs.org/rss-parser/-/rss-parser-3.13.0.tgz",
|
||||
"integrity": "sha512-7jWUBV5yGN3rqMMj7CZufl/291QAhvrrGpDNE4k/02ZchL0npisiYYqULF71jCEKoIiHvK/Q2e6IkDwPziT7+w==",
|
||||
"dependencies": {
|
||||
"entities": "^2.0.3",
|
||||
"xml2js": "^0.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/rss-parser/node_modules/entities": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz",
|
||||
"integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==",
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/entities?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/run-async": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/run-async/-/run-async-2.4.0.tgz",
|
||||
@@ -4252,6 +4270,26 @@
|
||||
"ultron": "1.0.x"
|
||||
}
|
||||
},
|
||||
"node_modules/xml2js": {
|
||||
"version": "0.5.0",
|
||||
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz",
|
||||
"integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==",
|
||||
"dependencies": {
|
||||
"sax": ">=0.6.0",
|
||||
"xmlbuilder": "~11.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/xmlbuilder": {
|
||||
"version": "11.0.1",
|
||||
"resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz",
|
||||
"integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==",
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/xtend": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#d1f7eaa8593b947edbfbc8a92a176448b48ef445",
|
||||
"request-ip": "^3.3.0",
|
||||
"router": "^1.3.8",
|
||||
"rss-parser": "^3.13.0",
|
||||
"sequelize": "^6.31.1",
|
||||
"stremio-addon-sdk": "^1.6.10",
|
||||
"sugar-date": "^2.0.6",
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper');
|
||||
const thepiratebayFakeRemoval = require('../scrapers/thepiratebay/thepiratebay_fakes_removal');
|
||||
const ytsScraper = require('../scrapers/yts/yts_scraper');
|
||||
const ytsFullScraper = require('../scrapers/yts/yts_full_scraper');
|
||||
const eztvScraper = require('../scrapers/eztv/eztv_scraper');
|
||||
const leetxScraper = require('../scrapers/1337x/1337x_scraper');
|
||||
const kickassScraper = require('../scrapers/kickass/kickass_scraper');
|
||||
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
|
||||
const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper');
|
||||
const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper');
|
||||
@@ -16,9 +16,11 @@ const Lapumia = require('../scrapers/lapumia/lapumia_scraper')
|
||||
const OndeBaixa = require('../scrapers/ondebaixa/ondebaixa_scraper');
|
||||
const AnimesTorrent = require('../scrapers/animestorrent/animestorrent_scraper')
|
||||
const DarkMahou = require('../scrapers/darkmahou/darkmahou_scraper')
|
||||
const torrent9Scraper = require('../scrapers/torrent9/torrent9_scraper');
|
||||
|
||||
module.exports = [
|
||||
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: ytsFullScraper, name: ytsFullScraper.NAME, cron: '0 0 0 * * 0' },
|
||||
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: nyaaSiScraper, name: nyaaSiScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: nyaaPantsuScraper, name: nyaaPantsuScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
@@ -28,14 +30,14 @@ module.exports = [
|
||||
{ scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' },
|
||||
{ scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: torrent9Scraper, name: torrent9Scraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: Comando, name: Comando.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: ComoEuBaixo, name: ComoEuBaixo.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: Lapumia, name: Lapumia.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */24 ? * *' },
|
||||
{ scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
// { scraper: require('../scrapers/1337x/1337x_search_scraper') }
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
|
||||
@@ -5,6 +5,9 @@ const { updateCurrentSeeders } = require('../lib/torrent')
|
||||
const { updateTorrentSeeders } = require('../lib/torrentEntries')
|
||||
|
||||
const DELAY_MS = 0; // 0 seconds
|
||||
const DELAY_NEW_MS = 30_000; // 30 seconds
|
||||
const DELAY_NO_NEW_MS = 300_000; // 300 seconds
|
||||
const DELAY_FAILED_TORRENTS_MS = 5_000; // 5 seconds
|
||||
const updateLimiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
const statistics = {};
|
||||
const statisticsNew = {};
|
||||
@@ -31,18 +34,25 @@ function scheduleUpdateSeedersForNewTorrents() {
|
||||
.then(torrents => updateStatistics(torrents, statisticsNew))
|
||||
.then(() => console.log('Finished seeders update for new torrents:', statisticsNew))
|
||||
.catch(error => console.warn('Failed seeders update for new torrents:', error))
|
||||
.then(() => delay(30_000))
|
||||
.then(() => delay(DELAY_NEW_MS))
|
||||
.then(() => scheduleUpdateSeedersForNewTorrents());
|
||||
}
|
||||
|
||||
async function getTorrents() {
|
||||
return repository.getUpdateSeedersTorrents()
|
||||
.catch(() => delay(5000).then(() => getTorrents()))
|
||||
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getTorrents()));
|
||||
}
|
||||
|
||||
async function getNewTorrents() {
|
||||
return repository.getUpdateSeedersNewTorrents()
|
||||
.catch(() => delay(5000).then(() => getNewTorrents()))
|
||||
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getNewTorrents()))
|
||||
.then(torrents => {
|
||||
if (!torrents.length) {
|
||||
console.log('No new torrents to update seeders')
|
||||
return delay(DELAY_NO_NEW_MS).then(() => getNewTorrents())
|
||||
}
|
||||
return torrents;
|
||||
});
|
||||
}
|
||||
|
||||
function updateStatistics(updatedTorrents, statisticsObject) {
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const needle = require('needle');
|
||||
const Sugar = require('sugar-date');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const defaultProxies = [
|
||||
'https://1337x.to'
|
||||
@@ -32,7 +33,7 @@ function torrent(torrentId, config = {}, retries = 2) {
|
||||
const slug = torrentId.startsWith('/torrent/') ? torrentId.replace('/torrent/', '') : torrentId;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config)))
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config)))
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => ({ torrentId: slug, ...torrent }))
|
||||
.catch((err) => torrent(slug, config, retries - 1));
|
||||
@@ -51,7 +52,7 @@ function search(keyword, config = {}, retries = 2) {
|
||||
: `${proxyUrl}/search/${keyword}/${page}/`;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map(proxyUrl => singleRequest(requestUrl(proxyUrl), config)))
|
||||
.map(proxyUrl => singleRequest(requestUrl(proxyUrl), config)))
|
||||
.then(body => parseTableBody(body))
|
||||
.then(torrents => torrents.length === 40 && page < extendToPage
|
||||
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
|
||||
@@ -73,18 +74,18 @@ function browse(config = {}, retries = 2) {
|
||||
: `${proxyUrl}/cat/${category}/${page}/`;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)))
|
||||
.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
@@ -164,19 +165,4 @@ function parseDate(dateString) {
|
||||
return Sugar.Date.create(dateString);
|
||||
}
|
||||
|
||||
function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return undefined;
|
||||
}
|
||||
let scale = 1;
|
||||
if (sizeText.includes('GB')) {
|
||||
scale = 1024 * 1024 * 1024
|
||||
} else if (sizeText.includes('MB')) {
|
||||
scale = 1024 * 1024;
|
||||
} else if (sizeText.includes('KB')) {
|
||||
scale = 1024;
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText.replace(/,/g, '')) * scale);
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
|
||||
@@ -92,7 +92,7 @@ async function processTorrentRecord(record) {
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[leetx.Categories.MOVIE] = Type.MOVIE;
|
||||
mapping[leetx.Categories.DOCUMENTARIES] = Type.SERIES;
|
||||
mapping[leetx.Categories.DOCUMENTARIES] = Type.MOVIE;
|
||||
mapping[leetx.Categories.TV] = Type.SERIES;
|
||||
mapping[leetx.Categories.ANIME] = Type.ANIME;
|
||||
return mapping;
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
const needle = require("needle");
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
@@ -59,14 +58,10 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = {
|
||||
userAgent: getRandomUserAgent(),
|
||||
open_timeout: timeout,
|
||||
follow: 2,
|
||||
};
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return needle("get", requestUrl, options).then((response) => {
|
||||
const body = response.body;
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
const needle = require("needle")
|
||||
const axios = require('axios');
|
||||
const moment = require("moment")
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { isPtDubbed, sanitizePtName, sanitizePtLanguages, sanitizePtOriginalName } = require('../scraperHelper')
|
||||
@@ -63,11 +62,11 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
@@ -75,7 +74,8 @@ function singleRequest(requestUrl, config = {}) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
@@ -141,8 +141,10 @@ function parseOriginalName(originalNameElem) {
|
||||
if (!originalNameElem[0]) {
|
||||
return '';
|
||||
}
|
||||
const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue;
|
||||
return originalName.replace(/: ?/, '');
|
||||
const originalName = originalNameElem.next().text()
|
||||
|| originalNameElem[0].nextSibling.nodeValue
|
||||
|| originalNameElem.text();
|
||||
return originalName.replace(/[^:]*: ?/, '').trim();
|
||||
}
|
||||
|
||||
function parseCategory(categorys) {
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
const needle = require("needle")
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require('magnet-uri');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
const defaultTimeout = 30000;
|
||||
const maxSearchPage = 50
|
||||
|
||||
const baseUrl = 'https://comoeubaixo.com';
|
||||
@@ -62,11 +62,11 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (!body || (Buffer.isBuffer(body) && !body.size)) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
@@ -74,7 +74,8 @@ function singleRequest(requestUrl, config = {}) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
@@ -113,13 +114,12 @@ function parseTorrentPage(body) {
|
||||
const category = details.find('strong:contains(\'Gêneros: \')').next().attr('href').split('/')[0]
|
||||
const torrents = magnets.map(magnetLink => {
|
||||
const decodedMagnet = decode(magnetLink);
|
||||
const name = escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' ');
|
||||
const sanitizedTitle = sanitizePtName(name);
|
||||
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
|
||||
const originalTitle = details.find('strong:contains(\'Baixar\')')[0].nextSibling.nodeValue.split('-')[0];
|
||||
const year = details.find('strong:contains(\'Data de Lançamento: \')').next().text().trim();
|
||||
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${sanitizedTitle.trim()}`;
|
||||
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
|
||||
return {
|
||||
title: sanitizedTitle.length > 5 ? sanitizedTitle : fallBackTitle,
|
||||
title: name.length > 5 ? name : fallBackTitle,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
magnetLink: magnetLink,
|
||||
category: category,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
const needle = require("needle");
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
@@ -60,14 +59,10 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = {
|
||||
userAgent: getRandomUserAgent(),
|
||||
open_timeout: timeout,
|
||||
follow: 2,
|
||||
};
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return needle("get", requestUrl, options).then((response) => {
|
||||
const body = response.body;
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
const needle = require("needle");
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
@@ -31,7 +30,7 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2, };
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout, };
|
||||
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const erairaws = require('./erairaws_api');
|
||||
const erairaws = require('./erairaws_rss_api');
|
||||
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'EraiRaws';
|
||||
@@ -41,7 +41,7 @@ function untilPage(category) {
|
||||
if (category === erairaws.Categories.ANIMES) {
|
||||
return 45;
|
||||
}
|
||||
return 3;
|
||||
return 1;
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const needle = require('needle');
|
||||
const moment = require('moment');
|
||||
const { decode } = require("magnet-uri");
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomUserAgent } = require('./../../lib/requestHelper');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const defaultProxies = [
|
||||
'https://eztv.re'
|
||||
@@ -19,7 +21,7 @@ function torrent(torrentId, config = {}, retries = 1) {
|
||||
}
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config)))
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config)))
|
||||
.then(body => parseTorrentPage(body))
|
||||
.then(torrent => ({ torrentId, ...torrent }))
|
||||
.catch(error => retries ? jitter().then(() => torrent(torrentId, config, retries - 1)) : Promise.reject(error));
|
||||
@@ -33,7 +35,7 @@ function search(imdbId, config = {}, retries = 1) {
|
||||
const page = config.page || 1;
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config)))
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config)))
|
||||
.then(results => parseResults(results))
|
||||
.then(torrents => torrents.length === limit && page < maxPage
|
||||
? search(imdbId, { ...config, page: page + 1 }).catch(() => [])
|
||||
@@ -46,27 +48,21 @@ function browse(config = {}, retries = 1) {
|
||||
const page = config.page || 1;
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config)))
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config)))
|
||||
.then(results => parseResults(results))
|
||||
.catch(error => retries ? jitter().then(() => browse(config, retries - 1)) : Promise.reject(error));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = {
|
||||
userAgent: getRandomUserAgent(),
|
||||
open_timeout: timeout,
|
||||
response_timeout: timeout,
|
||||
read_timeout: timeout,
|
||||
follow: 2
|
||||
};
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then(response => {
|
||||
if (!response.body) {
|
||||
if (!response.data) {
|
||||
return Promise.reject(`No body: ${requestUrl}`);
|
||||
}
|
||||
return Promise.resolve(response.body);
|
||||
return Promise.resolve(response.data);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -99,10 +95,11 @@ function parseTorrentPage(body) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const content = $('table[class="forum_header_border_normal"]');
|
||||
const magnetLink = content.find('a[title="Magnet Link"]').attr('href');
|
||||
const torrent = {
|
||||
name: content.find('h1 > span').text().replace(/EZTV$/, ''),
|
||||
infoHash: content.find('b:contains(\'Torrent Hash:\')')[0].nextSibling.data.trim().toLowerCase(),
|
||||
magnetLink: content.find('a[title="Magnet Link"]').attr('href'),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
torrentLink: content.find('a[title="Download Torrent"]').attr('href'),
|
||||
seeders: parseInt(content.find('span[class="stat_red"]').first().text(), 10) || 0,
|
||||
size: parseSize(content.find('b:contains(\'Filesize:\')')[0].nextSibling.data),
|
||||
@@ -113,21 +110,6 @@ function parseTorrentPage(body) {
|
||||
});
|
||||
}
|
||||
|
||||
function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return undefined;
|
||||
}
|
||||
let scale = 1;
|
||||
if (sizeText.includes('GB')) {
|
||||
scale = 1024 * 1024 * 1024
|
||||
} else if (sizeText.includes('MB')) {
|
||||
scale = 1024 * 1024;
|
||||
} else if (sizeText.includes('KB') || sizeText.includes('kB')) {
|
||||
scale = 1024;
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
|
||||
}
|
||||
|
||||
function jitter() {
|
||||
return Promises.delay(minDelay + Math.round(Math.random() * jitterDelay))
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const needle = require('needle');
|
||||
const moment = require('moment');
|
||||
const Promises = require('../../lib/promises');
|
||||
|
||||
@@ -45,8 +45,8 @@ function _getContent(endpoint, config = {},) {
|
||||
? endpoint.replace(/https?:\/\/[^/]+/, baseUrl)
|
||||
: `${baseUrl}${endpoint}`;
|
||||
|
||||
return needle('get', url, { open_timeout: timeout })
|
||||
.then((response) => response.body)
|
||||
return axios.get(url, { timeout: timeout })
|
||||
.then((response) => response.data)
|
||||
.then((body) => cheerio.load(body));
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const needle = require('needle');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const defaultProxies = [
|
||||
'https://katcr.co'
|
||||
@@ -28,7 +29,7 @@ function torrent(torrentId, config = {}, retries = 2) {
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config)))
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config)))
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => ({ torrentId, ...torrent }))
|
||||
.catch((err) => torrent(torrentId, config, retries - 1));
|
||||
@@ -43,7 +44,7 @@ function search(keyword, config = {}, retries = 2) {
|
||||
const category = config.category;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config)))
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
@@ -57,18 +58,18 @@ function browse(config = {}, retries = 2) {
|
||||
const category = config.category;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config)))
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { open_timeout: timeout, follow: 2 };
|
||||
const options = { timeout: timeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('Access Denied')) {
|
||||
@@ -157,19 +158,4 @@ function parseTorrentPage(body) {
|
||||
});
|
||||
}
|
||||
|
||||
function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return undefined;
|
||||
}
|
||||
let scale = 1;
|
||||
if (sizeText.includes('GB')) {
|
||||
scale = 1024 * 1024 * 1024
|
||||
} else if (sizeText.includes('MB')) {
|
||||
scale = 1024 * 1024;
|
||||
} else if (sizeText.includes('KB') || sizeText.includes('kB')) {
|
||||
scale = 1024;
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const needle = require("needle")
|
||||
const axios = require('axios');
|
||||
const moment = require("moment")
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require('magnet-uri');
|
||||
@@ -21,12 +21,11 @@ function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} query`));
|
||||
}
|
||||
const slug = torrentId.split('?p=')[1];
|
||||
return singleRequest(`${baseUrl}/?p=${slug}`, config)
|
||||
return singleRequest(`${baseUrl}/${torrentId}`, config)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
|
||||
.then((torrent) => torrent.map(el => ({ torrentId, ...el })))
|
||||
.catch((err) => {
|
||||
console.warn(`Failed Lapumia ${slug} request: `, err);
|
||||
console.warn(`Failed Lapumia ${torrentId} request: `, err);
|
||||
return torrent(torrentId, config, retries - 1)
|
||||
});
|
||||
}
|
||||
@@ -62,11 +61,11 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
|
||||
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2 };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
@@ -74,7 +73,8 @@ function singleRequest(requestUrl, config = {}) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
@@ -89,10 +89,14 @@ function parseTableBody(body) {
|
||||
|
||||
$('div.post').each((i, element) => {
|
||||
const row = $(element);
|
||||
torrents.push({
|
||||
name: row.find("div > a").text(),
|
||||
torrentId: row.find("div > a").attr("href")
|
||||
});
|
||||
try {
|
||||
torrents.push({
|
||||
name: row.find("div > a").text(),
|
||||
torrentId: row.find("div > a").attr("href").split('/')[3]
|
||||
});
|
||||
} catch (e) {
|
||||
console.log("Failed parsing Lupumia table entry")
|
||||
}
|
||||
});
|
||||
resolve(torrents);
|
||||
});
|
||||
@@ -112,7 +116,7 @@ function parseTorrentPage(body) {
|
||||
.map((i, section) => $(section).attr("href")).get();
|
||||
const category = parseCategory($('div.category').html());
|
||||
const details = $('div.content')
|
||||
const torrents = magnets.map(magnetLink => ({
|
||||
const torrents = magnets.filter(magnetLink => decode(magnetLink).name).map(magnetLink => ({
|
||||
title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))),
|
||||
originalName: sanitizePtOriginalName(details.find('b:contains(\'Titulo Original:\')')[0].nextSibling.nodeValue),
|
||||
year: details.find('b:contains(\'Ano de Lançamento:\')')[0].nextSibling.nodeValue.trim(),
|
||||
|
||||
@@ -33,7 +33,8 @@ async function updateSeeders(torrent) {
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
lapumia.Categories.MOVIE
|
||||
lapumia.Categories.MOVIE,
|
||||
lapumia.Categories.TV
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
@@ -102,9 +103,6 @@ function typeMapping() {
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (lapumia.Categories.TV === category) {
|
||||
return 5;
|
||||
}
|
||||
if (lapumia.Categories.ANIME === category) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
const { si } = require('nyaapi')
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const Categories = {
|
||||
ANIME: {
|
||||
@@ -34,7 +35,7 @@ function torrent(torrentId) {
|
||||
}
|
||||
|
||||
function search(query) {
|
||||
return si.search(query)
|
||||
return si.search(query, null, { category: Categories.ANIME.ENGLISH})
|
||||
.then(results => results.map(torrent => parseTorrent(torrent)));
|
||||
}
|
||||
|
||||
@@ -62,19 +63,4 @@ function parseTorrent(torrent) {
|
||||
}
|
||||
}
|
||||
|
||||
function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return undefined;
|
||||
}
|
||||
let scale = 1;
|
||||
if (sizeText.includes('GiB')) {
|
||||
scale = 1024 * 1024 * 1024
|
||||
} else if (sizeText.includes('MiB')) {
|
||||
scale = 1024 * 1024;
|
||||
} else if (sizeText.includes('KiB') || sizeText.includes('kB')) {
|
||||
scale = 1024;
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
|
||||
@@ -20,6 +20,10 @@ async function scrape() {
|
||||
// return Promise.all(ids.map(id => limiter.schedule(() => nyaasi.torrent(id)
|
||||
// .then(torrent => processTorrentRecord(torrent)))))
|
||||
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
// const queries = ['Sagrada Reset', 'Sakurada Reset'];
|
||||
// return Promise.all(queries.map(query => limiter.schedule(() => nyaasi.search(query)
|
||||
// .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))))))
|
||||
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
const needle = require("needle")
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require('magnet-uri');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper')
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
const defaultTimeout = 30000;
|
||||
const maxSearchPage = 50
|
||||
|
||||
const baseUrl = 'https://ondebaixa.com';
|
||||
@@ -62,11 +62,11 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
@@ -74,7 +74,8 @@ function singleRequest(requestUrl, config = {}) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
@@ -112,12 +113,12 @@ function parseTorrentPage(body) {
|
||||
const category = details.find('span:contains(\'Gêneros: \')').next().html()
|
||||
const torrents = magnets.map(magnetLink => {
|
||||
const decodedMagnet = decode(magnetLink);
|
||||
const name = escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' ');
|
||||
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
|
||||
const originalTitle = details.find('span:contains(\'Título Original: \')').next().text().trim();
|
||||
const year = details.find('span:contains(\'Ano de Lançamento: \')').next().text().trim();
|
||||
const fallbackTitle = `${originalTitle} ${year}`;
|
||||
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
|
||||
return {
|
||||
title: name ? sanitizePtName(name) : fallbackTitle,
|
||||
title: name.length > 5 ? name : fallBackTitle,
|
||||
originalName: sanitizePtOriginalName(originalTitle),
|
||||
year: year,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
|
||||
@@ -107,12 +107,6 @@ function typeMapping() {
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (ondebaixa.Categories.DESENHOS === category) {
|
||||
return 5;
|
||||
}
|
||||
if (ondebaixa.Categories.TV === category) {
|
||||
return 5;
|
||||
}
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
const needle = require('needle');
|
||||
const axios = require('axios');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
|
||||
const appId = 'torrentio-addon';
|
||||
const defaultTimeout = 30000;
|
||||
const retryDelay = 3000;
|
||||
|
||||
let token;
|
||||
|
||||
@@ -22,6 +24,7 @@ const Options = {
|
||||
MOVIES_X265_4K_HDR: [52],
|
||||
MOVIES_FULL_BD: [42],
|
||||
MOVIES_BD_REMUX: [46],
|
||||
MOVIES_HIGH_RES: [47, 50, 51, 52, 46],
|
||||
TV_EPISODES: [18],
|
||||
TV_UHD_EPISODES: [49],
|
||||
TV_HD_EPISODES: [41],
|
||||
@@ -84,33 +87,43 @@ function browse(params = {}) {
|
||||
return singleRequest(parameters).then(results => parseResults(results));
|
||||
}
|
||||
|
||||
async function singleRequest(params = {}, config = {}, retries = 10) {
|
||||
async function singleRequest(params = {}, config = {}, retries = 15) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { open_timeout: timeout, follow: 2 };
|
||||
const headers = {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'accept-encoding': 'gzip, deflate',
|
||||
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
|
||||
};
|
||||
params.token = await getToken();
|
||||
params.app_id = appId;
|
||||
|
||||
Object.keys(params)
|
||||
.filter(key => params[key] === undefined || params[key] === null)
|
||||
.forEach(key => delete params[key]);
|
||||
|
||||
return needle('get', baseUrl, params, options)
|
||||
const options = { headers, timeout, params };
|
||||
return axios.get(baseUrl, options)
|
||||
.then(response => {
|
||||
if (response.body && response.body.error_code === 4) {
|
||||
if (response.data && response.data.error_code === 4) {
|
||||
// token expired
|
||||
token = undefined;
|
||||
return singleRequest(params, config);
|
||||
}
|
||||
if ((!response.body || !response.body.length || [5, 20].includes(response.body.error_code)) && retries > 0) {
|
||||
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
|
||||
// too many requests
|
||||
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1));
|
||||
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
|
||||
}
|
||||
if (response.statusCode !== 200 || (response.body && response.body.error)) {
|
||||
if (response.status !== 200 || (response.data && response.data.error)) {
|
||||
// something went wrong
|
||||
return Promise.reject(response.body || `Failed RARGB request with status=${response.statusCode}`);
|
||||
return Promise.reject(response.data || `Failed RARGB request with status=${response.status}`);
|
||||
}
|
||||
|
||||
return response.body;
|
||||
return response.data;
|
||||
})
|
||||
.catch(error => {
|
||||
if (error.response && [429].includes(error.response.status) && retries > 0) {
|
||||
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
|
||||
}
|
||||
return Promise.reject(error.message || error);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -137,9 +150,10 @@ function parseResult(result) {
|
||||
|
||||
async function getToken() {
|
||||
if (!token) {
|
||||
const options = { open_timeout: defaultTimeout };
|
||||
token = await needle('get', baseUrl, { get_token: 'get_token', app_id: appId }, options)
|
||||
.then(response => response.body.token);
|
||||
const params = { get_token: 'get_token', app_id: appId };
|
||||
const options = { timeout: defaultTimeout, params };
|
||||
token = await axios.get(baseUrl, options)
|
||||
.then(response => response.data.token);
|
||||
}
|
||||
return token;
|
||||
}
|
||||
|
||||
@@ -12,14 +12,10 @@ const allowedCategories = [
|
||||
rarbg.Options.category.MOVIES_XVID,
|
||||
rarbg.Options.category.MOVIES_XVID_720P,
|
||||
rarbg.Options.category.MOVIES_X265_1080P,
|
||||
rarbg.Options.category.MOVIES_X265_4K,
|
||||
rarbg.Options.category.MOVIES_X265_4K_HDR,
|
||||
rarbg.Options.category.MOVIES_X264,
|
||||
rarbg.Options.category.MOVIES_X264_720P,
|
||||
rarbg.Options.category.MOVIES_X264_1080P,
|
||||
rarbg.Options.category.MOVIES_X264_3D,
|
||||
rarbg.Options.category.MOVIES_X264_4K,
|
||||
rarbg.Options.category.MOVIES_BD_REMUX,
|
||||
rarbg.Options.category.MOVIES_HIGH_RES,
|
||||
rarbg.Options.category.TV_EPISODES,
|
||||
rarbg.Options.category.TV_UHD_EPISODES,
|
||||
rarbg.Options.category.TV_HD_EPISODES
|
||||
@@ -32,8 +28,8 @@ async function scrape() {
|
||||
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
||||
|
||||
return Promise.all(
|
||||
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
||||
}
|
||||
|
||||
|
||||
@@ -36,21 +36,17 @@ async function scrapeLatestTorrents() {
|
||||
rarbg.Options.category.MOVIES_XVID,
|
||||
rarbg.Options.category.MOVIES_XVID_720P,
|
||||
rarbg.Options.category.MOVIES_X265_1080P,
|
||||
rarbg.Options.category.MOVIES_X265_4K,
|
||||
rarbg.Options.category.MOVIES_X265_4K_HDR,
|
||||
rarbg.Options.category.MOVIES_X264,
|
||||
rarbg.Options.category.MOVIES_X264_720P,
|
||||
rarbg.Options.category.MOVIES_X264_1080P,
|
||||
rarbg.Options.category.MOVIES_X264_3D,
|
||||
rarbg.Options.category.MOVIES_X264_4K,
|
||||
rarbg.Options.category.MOVIES_BD_REMUX,
|
||||
rarbg.Options.category.MOVIES_HIGH_RES,
|
||||
rarbg.Options.category.TV_EPISODES,
|
||||
rarbg.Options.category.TV_UHD_EPISODES,
|
||||
rarbg.Options.category.TV_HD_EPISODES
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
.map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category))))
|
||||
.map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category))))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const needle = require('needle');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const { defaultOptionsWithProxy } = require('../../lib/requestHelper');
|
||||
@@ -8,6 +8,7 @@ const baseUrl = 'http://www.rutor.info';
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const Categories = {
|
||||
ALL: '0',
|
||||
FOREIGN_FILMS: '1',
|
||||
RUSSIAN_FILMS: '5',
|
||||
SCIENCE_FILMS: '12',
|
||||
@@ -57,13 +58,13 @@ function files(torrentId) {
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl) {
|
||||
const options = { ...defaultOptionsWithProxy(), open_timeout: defaultTimeout, follow: 2 };
|
||||
const options = { ...defaultOptionsWithProxy(), timeout: defaultTimeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl} with status ${response.statusCode}`);
|
||||
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
|
||||
} else if (body.includes('Access Denied')) {
|
||||
console.log(`Access Denied: ${requestUrl}`);
|
||||
throw new Error(`Access Denied: ${requestUrl}`);
|
||||
|
||||
@@ -23,4 +23,19 @@ function sanitizePtLanguages(languages) {
|
||||
.trim();
|
||||
}
|
||||
|
||||
module.exports = { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages }
|
||||
function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return undefined;
|
||||
}
|
||||
let scale = 1;
|
||||
if (/Gi?B|Go/.test(sizeText)) {
|
||||
scale = 1024 * 1024 * 1024
|
||||
} else if (/Mi?B|Mo/.test(sizeText)) {
|
||||
scale = 1024 * 1024;
|
||||
} else if (/[Kk]i?B|Ko/.test(sizeText)) {
|
||||
scale = 1024;
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
|
||||
}
|
||||
|
||||
module.exports = { parseSize, isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages }
|
||||
@@ -1,4 +1,4 @@
|
||||
const needle = require('needle');
|
||||
const axios = require('axios');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
|
||||
const baseUrl = 'https://apibay.org';
|
||||
@@ -102,10 +102,10 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
async function _request(endpoint) {
|
||||
const url = `${baseUrl}/${endpoint}`;
|
||||
return needle('get', url, { open_timeout: timeout })
|
||||
return axios.get(url, { timeout: timeout })
|
||||
.then(response => {
|
||||
if (typeof response.body === 'object') {
|
||||
return response.body;
|
||||
if (typeof response.data === 'object') {
|
||||
return response.data;
|
||||
}
|
||||
return Promise.reject(`Unexpected response body`);
|
||||
});
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
const axios = require('axios');
|
||||
const moment = require('moment');
|
||||
const needle = require('needle');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { ungzip } = require('node-gzip');
|
||||
const LineByLineReader = require('line-by-line');
|
||||
@@ -60,7 +60,7 @@ async function scrape() {
|
||||
}
|
||||
|
||||
limiter.schedule(() => processTorrentRecord(torrent)
|
||||
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
|
||||
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
|
||||
.then(() => limiter.empty())
|
||||
.then((empty) => empty && lr.resume())
|
||||
.then(() => entriesProcessed++);
|
||||
@@ -160,8 +160,8 @@ function downloadDump(dump) {
|
||||
}
|
||||
|
||||
console.log('downloading dump file...');
|
||||
return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' })
|
||||
.then((response) => response.body)
|
||||
return axios.get(dump.url, { timeout: 2000, responseType: 'stream' })
|
||||
.then((response) => response.data)
|
||||
.then((body) => {
|
||||
console.log('unzipping dump file...');
|
||||
return ungzip(body);
|
||||
|
||||
@@ -7,7 +7,6 @@ const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'ThePirateBay';
|
||||
const UNTIL_PAGE = 5;
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
@@ -54,7 +53,7 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
@@ -84,4 +83,16 @@ async function processTorrentRecord(record) {
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function getUntilPage(category) {
|
||||
switch (category) {
|
||||
case thepiratebay.Categories.VIDEO.MOVIES_3D:
|
||||
return 1;
|
||||
case thepiratebay.Categories.VIDEO.TV_SHOWS:
|
||||
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
|
||||
return 10;
|
||||
default:
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,9 +1,10 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const needle = require('needle');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const defaultProxies = [
|
||||
// 'https://torrentgalaxy.to',
|
||||
@@ -34,7 +35,7 @@ function torrent(torrentId, config = {}, retries = 2) {
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`)))
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`)))
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => ({ torrentId, ...torrent }))
|
||||
.catch((err) => torrent(torrentId, config, retries - 1));
|
||||
@@ -49,7 +50,7 @@ function search(keyword, config = {}, retries = 2) {
|
||||
const category = config.category;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`)))
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch(() => search(keyword, config, retries - 1));
|
||||
}
|
||||
@@ -63,19 +64,19 @@ function browse(config = {}, retries = 2, error = null) {
|
||||
const category = config.category;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1, err));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl) {
|
||||
const options = { userAgent: getRandomUserAgent(), open_timeout: defaultTimeout, follow: 2 };
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: defaultTimeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl} with status ${response.statusCode}`);
|
||||
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
|
||||
} else if (body.includes('Access Denied')) {
|
||||
console.log(`Access Denied: ${requestUrl}`);
|
||||
throw new Error(`Access Denied: ${requestUrl}`);
|
||||
@@ -157,21 +158,6 @@ function parseTorrentPage(body) {
|
||||
});
|
||||
}
|
||||
|
||||
function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return undefined;
|
||||
}
|
||||
let scale = 1;
|
||||
if (sizeText.includes('GB')) {
|
||||
scale = 1024 * 1024 * 1024
|
||||
} else if (sizeText.includes('MB')) {
|
||||
scale = 1024 * 1024;
|
||||
} else if (sizeText.includes('KB') || sizeText.includes('kB')) {
|
||||
scale = 1024;
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
|
||||
}
|
||||
|
||||
function parseDate(dateString) {
|
||||
if (dateString.includes('ago')) {
|
||||
const amount = parseInt(dateString, 10);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const needle = require('needle');
|
||||
const axios = require('axios');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomUserAgent } = require('./../../lib/requestHelper');
|
||||
|
||||
@@ -14,7 +14,7 @@ function torrent(torrentId, config = {}, retries = 2) {
|
||||
}
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config)))
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config)))
|
||||
.then(body => parseResults(body))
|
||||
.catch(error => torrent(torrentId, config, retries - 1));
|
||||
}
|
||||
@@ -25,7 +25,7 @@ function search(query, config = {}, retries = 2) {
|
||||
}
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config)))
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config)))
|
||||
.then(results => parseResults(results))
|
||||
.catch(error => search(query, config, retries - 1));
|
||||
}
|
||||
@@ -37,21 +37,27 @@ function browse(config = {}, retries = 2) {
|
||||
const page = config.page || 1;
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config)))
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config)))
|
||||
.then(results => parseResults(results))
|
||||
.catch(error => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function maxPage() {
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}`)))
|
||||
.then(results => Math.round((results?.data?.movie_count || 0) / limit))
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), open_timeout: timeout, follow: 2 };
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return needle('get', requestUrl, options)
|
||||
return axios.get(requestUrl, options)
|
||||
.then(response => {
|
||||
if (!response.body) {
|
||||
if (!response.data) {
|
||||
return Promise.reject(`No body: ${requestUrl}`);
|
||||
}
|
||||
return Promise.resolve(response.body);
|
||||
return Promise.resolve(response.data);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -89,4 +95,4 @@ function formatType(type) {
|
||||
return type.toUpperCase();
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse };
|
||||
module.exports = { torrent, search, browse, maxPage };
|
||||
@@ -6,16 +6,16 @@ const repository = require('../../lib/repository');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'YTS';
|
||||
const UNTIL_PAGE = 2;
|
||||
const UNTIL_PAGE = 10;
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
async function scrape(maxPage) {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
return scrapeLatestTorrentsForCategory(maxPage)
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
@@ -27,11 +27,7 @@ async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => yts.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return scrapeLatestTorrentsForCategory();
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(page = 1) {
|
||||
async function scrapeLatestTorrentsForCategory(maxPage = UNTIL_PAGE, page = 1) {
|
||||
console.log(`Scrapping ${NAME} page ${page}`);
|
||||
return yts.browse(({ page }))
|
||||
.catch(error => {
|
||||
@@ -39,8 +35,8 @@ async function scrapeLatestTorrentsForCategory(page = 1) {
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||
? scrapeLatestTorrentsForCategory(page + 1)
|
||||
.then(resolved => resolved.length > 0 && page < maxPage
|
||||
? scrapeLatestTorrentsForCategory(maxPage, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user