[scraper] implements custom rargb api class

This commit is contained in:
TheBeastLT
2020-05-26 23:22:08 +02:00
parent 3c31c040c3
commit f0939f9943
7 changed files with 190 additions and 98 deletions

5
package-lock.json generated
View File

@@ -1949,11 +1949,6 @@
"resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz",
"integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4="
},
"rarbg-api": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/rarbg-api/-/rarbg-api-1.1.4.tgz",
"integrity": "sha512-BxhHwCW/h18l8m8nwONfEVcNRN0qt0mBp0eM0yecdDIG6h1VcNMdCViLqyPethZC0UExTCGsioCWvVWbCkBMhg=="
},
"raw-body": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.3.tgz",

View File

@@ -34,7 +34,6 @@
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#345c33536b2a5e7455da91cdde0146625bb9b254",
"pg": "^7.8.2",
"pg-hstore": "^2.3.2",
"rarbg-api": "^1.1.4",
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d",
"rutracker-api-2": "^1.10.0",
"sequelize": "^5.21.5",

View File

@@ -189,7 +189,7 @@ function getTorrentsWithoutSize() {
function getUpdateSeedersTorrents() {
const until = moment().subtract(7, 'days').format('YYYY-MM-DD');
return Torrent.findAll({
where: literal(`torrent."updatedAt" < \'${until}\' and torrent."provider" not in (\'NyaaPantsu\', \'RARBG\')`),
where: literal(`torrent."updatedAt" < \'${until}\' and torrent."provider" not in (\'NyaaPantsu\')`),
limit: 100,
order: [
['seeders', 'DESC'],

View File

@@ -10,7 +10,7 @@ module.exports = [
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' },
{ scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' },
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },

View File

@@ -0,0 +1,148 @@
const needle = require('needle');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { defaultOptionsWithProxy } = require('./../../lib/request_helper');
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
const appId = 'node-rarbg-api';
const defaultTimeout = 30000;
let token;
const Options = {
category: {
MOVIES_XVID: [14],
MOVIES_XVID_720P: [48],
MOVIES_X264: [17],
MOVIES_X264_1080P: [44],
MOVIES_X264_720P: [45],
MOVIES_X264_3D: [47],
MOVIES_X264_4K: [50],
MOVIES_X265_1080P: [54],
MOVIES_X265_4K: [51],
MOVIES_X265_4K_HDR: [52],
MOVIES_FULL_BD: [42],
MOVIES_BD_REMUX: [46],
TV_EPISODES: [18],
TV_UHD_EPISODES: [49],
TV_HD_EPISODES: [41],
MUSIC_MP3: [23],
MUSIC_FLAC: [25],
GAMES_PC_ISO: [27],
GAMES_PC_RIP: [28],
GAMES_PS3: [40],
GAMES_XBOX_360: [32],
SOFTWARE_PC_ISO: [33],
EBOOKS: [35],
XXX: [4],
},
sort: {
LAST: 'last',
SEEDERS: 'seeders',
LEECHERS: 'leechers'
},
format: {
JSON: 'json',
JSON_EXTENDED: 'json_extended'
},
ranked: {
TRUE: 1,
FALSE: 0
}
}
function search(imdbId, params = {}) {
if (!imdbId) {
return Promise.reject(new Error(`Must define imdbId`));
}
const parameters = {
mode: 'search',
search_imdb: imdbId,
category: params.category && params.category.join(';') || null,
limit: params.limit || 100,
sort: params.sort || Options.sort.SEEDERS,
min_seeders: params.min_seeders || undefined,
min_leechers: params.min_leechers || undefined,
format: params.format || Options.format.JSON_EXTENDED,
ranked: params.ranked || Options.ranked.FALSE
}
return singleRequest(parameters).then(results => parseResults(results));
}
function browse(params = {}) {
const parameters = {
mode: 'list',
category: params.category && params.category.join(';') || null,
limit: params.limit || 100,
sort: params.sort || Options.sort.LAST,
min_seeders: params.min_seeders || undefined,
min_leechers: params.min_leechers || undefined,
format: params.format || Options.format.JSON_EXTENDED,
ranked: params.ranked || Options.ranked.FALSE
}
return singleRequest(parameters).then(results => parseResults(results));
}
async function singleRequest(params = {}, config = {}, retries = 5) {
const timeout = config.timeout || defaultTimeout;
const options = { ...defaultOptionsWithProxy(), open_timeout: timeout, follow: 2 };
params.token = await getToken();
params.app_id = appId;
Object.keys(params)
.filter(key => params[key] === undefined || params[key] === null)
.forEach(key => delete params[key]);
return needle('get', baseUrl, params, options)
.then(response => {
if (response.body && response.body.error_code === 4) {
// token expired
token = undefined;
return singleRequest(params, config);
}
if ((!response.body || [5, 20].includes(response.body.error_code)) && retries > 0) {
// too many requests
return Promises.delay(2100).then(() => singleRequest(params, config, retries - 1));
}
if (response.statusCode !== 200) {
// something went wrong
return Promise.reject(response.body || `Failed RARGB request with status=${response.statusCode}`);
}
return response.body;
});
}
function parseResults(results) {
if (!results || !Array.isArray(results.torrent_results)) {
return Promise.reject(`Incorrect results ${JSON.stringify(results)}`)
}
return results.torrent_results.map(result => parseResult(result));
}
function parseResult(result) {
return {
title: result.title,
infoHash: decode(result.download).infoHash,
magnetLink: result.download,
seeders: result.seeders,
leechers: result.leechers,
category: result.category,
size: result.size,
uploadDate: new Date(result.pubdate),
imdbId: result.episode_info && result.episode_info.imdb
}
}
async function getToken() {
if (!token) {
const options = { ...defaultOptionsWithProxy(), open_timeout: defaultTimeout };
token = await needle('get', baseUrl, { get_token: 'get_token', app_id: appId }, options)
.then(response => response.body.token);
}
return token;
}
module.exports = { search, browse, Options };

View File

@@ -1,31 +1,29 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const rarbg = require('rarbg-api');
const decode = require('magnet-uri');
const rarbg = require('./rarbg_api');
const { Type } = require('../../lib/types');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'RARBG';
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
const entryLimiter = new Bottleneck({ maxConcurrent: 20 });
const allowedCategories = [
rarbg.CATEGORY.MOVIES_XVID,
rarbg.CATEGORY.MOVIES_XVID_720P,
rarbg.CATEGORY.MOVIES_X265_1080P,
rarbg.CATEGORY.MOVIES_X265_4K,
rarbg.CATEGORY.MOVIES_X265_4K_HDR,
rarbg.CATEGORY.MOVIES_X264,
rarbg.CATEGORY.MOVIES_X264_720P,
rarbg.CATEGORY.MOVIES_X264_1080P,
rarbg.CATEGORY.MOVIES_X264_3D,
rarbg.CATEGORY.MOVIES_X264_4K,
rarbg.CATEGORY.MOVIES_BD_REMUX,
rarbg.CATEGORY.TV_EPISODES,
rarbg.CATEGORY.TV_UHD_EPISODES,
rarbg.CATEGORY.TV_HD_EPISODES
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X265_4K,
rarbg.Options.category.MOVIES_X265_4K_HDR,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_X264_3D,
rarbg.Options.category.MOVIES_X264_4K,
rarbg.Options.category.MOVIES_BD_REMUX,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
].reduce((a, b) => a.concat(b), [])
const searchOptions = { limit: 100, category: allowedCategories, sort: 'seeders', format: 'json_extended', ranked: 0 }
async function scrape() {
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
@@ -39,28 +37,13 @@ async function scrape() {
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
}
async function getTorrentsForImdbId(imdbId, retries = 5) {
return rarbg.search(imdbId, searchOptions, 'imdb')
.then(torrents => torrents.map(torrent => ({
name: torrent.title,
infoHash: decode(torrent.download).infoHash,
magnetLink: torrent.download,
seeders: torrent.seeders,
leechers: torrent.leechers,
category: torrent.category,
size: torrent.size,
uploadDate: new Date(torrent.pubdate),
imdbId: torrent.episode_info && torrent.episode_info.imdb
})))
async function getTorrentsForImdbId(imdbId) {
return rarbg.search(imdbId, { category: allowedCategories })
.then(torrents => {
console.log(`Completed ${imdbId} request`);
return torrents;
})
.catch(error => {
if (retries > 0) {
console.log(`Retrying ${NAME} request for ${imdbId}...`);
return getTorrentsForImdbId(imdbId, retries - 1);
}
console.warn(`Failed ${NAME} request for ${imdbId}: `, error);
return [];
});
@@ -74,7 +57,7 @@ async function processTorrentRecord(record) {
const torrent = {
provider: NAME,
infoHash: record.infoHash,
title: record.name,
title: record.title,
type: getType(record.category),
seeders: record.seeders,
size: record.size,

View File

@@ -1,14 +1,12 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const rarbg = require('rarbg-api');
const decode = require('magnet-uri');
const rarbg = require('./rarbg_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'RARBG';
const SEARCH_OPTIONS = { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 };
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
@@ -27,27 +25,28 @@ async function scrape() {
}
async function updateSeeders(torrent, getImdbIdsMethod) {
return getImdbIdsMethod()
.then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId)))))
.then(results => results.reduce((a, b) => a.concat(b), []));
// return getImdbIdsMethod()
// .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId)))))
// .then(results => results.reduce((a, b) => a.concat(b), []));
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
const allowedCategories = [
rarbg.CATEGORY.MOVIES_XVID,
rarbg.CATEGORY.MOVIES_XVID_720P,
rarbg.CATEGORY.MOVIES_X265_1080P,
rarbg.CATEGORY.MOVIES_X265_4K,
rarbg.CATEGORY.MOVIES_X265_4K_HDR,
rarbg.CATEGORY.MOVIES_X264,
rarbg.CATEGORY.MOVIES_X264_720P,
rarbg.CATEGORY.MOVIES_X264_1080P,
rarbg.CATEGORY.MOVIES_X264_3D,
rarbg.CATEGORY.MOVIES_X264_4K,
rarbg.CATEGORY.MOVIES_BD_REMUX,
rarbg.CATEGORY.TV_EPISODES,
rarbg.CATEGORY.TV_UHD_EPISODES,
rarbg.CATEGORY.TV_HD_EPISODES
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X265_4K,
rarbg.Options.category.MOVIES_X265_4K_HDR,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_X264_3D,
rarbg.Options.category.MOVIES_X264_4K,
rarbg.Options.category.MOVIES_BD_REMUX,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
];
return Promises.sequence(allowedCategories
@@ -55,16 +54,11 @@ async function scrapeLatestTorrents() {
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, retries = 5) {
async function scrapeLatestTorrentsForCategory(category) {
console.log(`Scrapping ${NAME} ${category} category`);
return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended', ranked: 0 })
.then(results => results.map(result => toTorrent(result)))
return rarbg.browse({ category: category })
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
.catch(error => {
if (retries > 0) {
console.log(`Retrying ${NAME} request for ${category}...`);
return scrapeLatestTorrentsForCategory(category, retries - 1);
}
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
return Promise.resolve([]);
});
@@ -89,33 +83,6 @@ async function processTorrentRecord(record) {
return createTorrentEntry(torrent);
}
async function search(imdbId, retries = 5) {
return rarbg.search(imdbId, SEARCH_OPTIONS, 'imdb')
.then(results => results.map(result => toTorrent(result)))
.catch(error => {
if (retries > 0) {
console.log(`Retrying ${imdbId} search...`);
return search(imdbId, retries - 1);
}
return Promise.reject(error);
});
}
function toTorrent(result) {
return {
title: result.title,
provider: NAME,
infoHash: decode(result.download).infoHash,
magnetLink: result.download,
seeders: result.seeders,
leechers: result.leechers,
category: result.category,
size: result.size,
uploadDate: new Date(result.pubdate),
imdbId: result.episode_info && result.episode_info.imdb
};
}
const seriesCategories = [
'TV Episodes',
'Movies/TV-UHD-episodes',