[scraper] implements custom rargb api class
This commit is contained in:
5
package-lock.json
generated
5
package-lock.json
generated
@@ -1949,11 +1949,6 @@
|
||||
"resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz",
|
||||
"integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4="
|
||||
},
|
||||
"rarbg-api": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/rarbg-api/-/rarbg-api-1.1.4.tgz",
|
||||
"integrity": "sha512-BxhHwCW/h18l8m8nwONfEVcNRN0qt0mBp0eM0yecdDIG6h1VcNMdCViLqyPethZC0UExTCGsioCWvVWbCkBMhg=="
|
||||
},
|
||||
"raw-body": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.3.tgz",
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#345c33536b2a5e7455da91cdde0146625bb9b254",
|
||||
"pg": "^7.8.2",
|
||||
"pg-hstore": "^2.3.2",
|
||||
"rarbg-api": "^1.1.4",
|
||||
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d",
|
||||
"rutracker-api-2": "^1.10.0",
|
||||
"sequelize": "^5.21.5",
|
||||
|
||||
@@ -189,7 +189,7 @@ function getTorrentsWithoutSize() {
|
||||
function getUpdateSeedersTorrents() {
|
||||
const until = moment().subtract(7, 'days').format('YYYY-MM-DD');
|
||||
return Torrent.findAll({
|
||||
where: literal(`torrent."updatedAt" < \'${until}\' and torrent."provider" not in (\'NyaaPantsu\', \'RARBG\')`),
|
||||
where: literal(`torrent."updatedAt" < \'${until}\' and torrent."provider" not in (\'NyaaPantsu\')`),
|
||||
limit: 100,
|
||||
order: [
|
||||
['seeders', 'DESC'],
|
||||
|
||||
@@ -10,7 +10,7 @@ module.exports = [
|
||||
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' },
|
||||
{ scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' },
|
||||
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
|
||||
148
scraper/scrapers/rarbg/rarbg_api.js
Normal file
148
scraper/scrapers/rarbg/rarbg_api.js
Normal file
@@ -0,0 +1,148 @@
|
||||
const needle = require('needle');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { defaultOptionsWithProxy } = require('./../../lib/request_helper');
|
||||
|
||||
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
|
||||
const appId = 'node-rarbg-api';
|
||||
const defaultTimeout = 30000;
|
||||
|
||||
let token;
|
||||
|
||||
const Options = {
|
||||
category: {
|
||||
MOVIES_XVID: [14],
|
||||
MOVIES_XVID_720P: [48],
|
||||
MOVIES_X264: [17],
|
||||
MOVIES_X264_1080P: [44],
|
||||
MOVIES_X264_720P: [45],
|
||||
MOVIES_X264_3D: [47],
|
||||
MOVIES_X264_4K: [50],
|
||||
MOVIES_X265_1080P: [54],
|
||||
MOVIES_X265_4K: [51],
|
||||
MOVIES_X265_4K_HDR: [52],
|
||||
MOVIES_FULL_BD: [42],
|
||||
MOVIES_BD_REMUX: [46],
|
||||
TV_EPISODES: [18],
|
||||
TV_UHD_EPISODES: [49],
|
||||
TV_HD_EPISODES: [41],
|
||||
MUSIC_MP3: [23],
|
||||
MUSIC_FLAC: [25],
|
||||
GAMES_PC_ISO: [27],
|
||||
GAMES_PC_RIP: [28],
|
||||
GAMES_PS3: [40],
|
||||
GAMES_XBOX_360: [32],
|
||||
SOFTWARE_PC_ISO: [33],
|
||||
EBOOKS: [35],
|
||||
XXX: [4],
|
||||
},
|
||||
sort: {
|
||||
LAST: 'last',
|
||||
SEEDERS: 'seeders',
|
||||
LEECHERS: 'leechers'
|
||||
},
|
||||
format: {
|
||||
JSON: 'json',
|
||||
JSON_EXTENDED: 'json_extended'
|
||||
},
|
||||
ranked: {
|
||||
TRUE: 1,
|
||||
FALSE: 0
|
||||
}
|
||||
}
|
||||
|
||||
function search(imdbId, params = {}) {
|
||||
if (!imdbId) {
|
||||
return Promise.reject(new Error(`Must define imdbId`));
|
||||
}
|
||||
const parameters = {
|
||||
mode: 'search',
|
||||
search_imdb: imdbId,
|
||||
category: params.category && params.category.join(';') || null,
|
||||
limit: params.limit || 100,
|
||||
sort: params.sort || Options.sort.SEEDERS,
|
||||
min_seeders: params.min_seeders || undefined,
|
||||
min_leechers: params.min_leechers || undefined,
|
||||
format: params.format || Options.format.JSON_EXTENDED,
|
||||
ranked: params.ranked || Options.ranked.FALSE
|
||||
}
|
||||
|
||||
return singleRequest(parameters).then(results => parseResults(results));
|
||||
}
|
||||
|
||||
function browse(params = {}) {
|
||||
const parameters = {
|
||||
mode: 'list',
|
||||
category: params.category && params.category.join(';') || null,
|
||||
limit: params.limit || 100,
|
||||
sort: params.sort || Options.sort.LAST,
|
||||
min_seeders: params.min_seeders || undefined,
|
||||
min_leechers: params.min_leechers || undefined,
|
||||
format: params.format || Options.format.JSON_EXTENDED,
|
||||
ranked: params.ranked || Options.ranked.FALSE
|
||||
}
|
||||
|
||||
return singleRequest(parameters).then(results => parseResults(results));
|
||||
}
|
||||
|
||||
async function singleRequest(params = {}, config = {}, retries = 5) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { ...defaultOptionsWithProxy(), open_timeout: timeout, follow: 2 };
|
||||
params.token = await getToken();
|
||||
params.app_id = appId;
|
||||
|
||||
Object.keys(params)
|
||||
.filter(key => params[key] === undefined || params[key] === null)
|
||||
.forEach(key => delete params[key]);
|
||||
|
||||
return needle('get', baseUrl, params, options)
|
||||
.then(response => {
|
||||
if (response.body && response.body.error_code === 4) {
|
||||
// token expired
|
||||
token = undefined;
|
||||
return singleRequest(params, config);
|
||||
}
|
||||
if ((!response.body || [5, 20].includes(response.body.error_code)) && retries > 0) {
|
||||
// too many requests
|
||||
return Promises.delay(2100).then(() => singleRequest(params, config, retries - 1));
|
||||
}
|
||||
if (response.statusCode !== 200) {
|
||||
// something went wrong
|
||||
return Promise.reject(response.body || `Failed RARGB request with status=${response.statusCode}`);
|
||||
}
|
||||
|
||||
return response.body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseResults(results) {
|
||||
if (!results || !Array.isArray(results.torrent_results)) {
|
||||
return Promise.reject(`Incorrect results ${JSON.stringify(results)}`)
|
||||
}
|
||||
return results.torrent_results.map(result => parseResult(result));
|
||||
}
|
||||
|
||||
function parseResult(result) {
|
||||
return {
|
||||
title: result.title,
|
||||
infoHash: decode(result.download).infoHash,
|
||||
magnetLink: result.download,
|
||||
seeders: result.seeders,
|
||||
leechers: result.leechers,
|
||||
category: result.category,
|
||||
size: result.size,
|
||||
uploadDate: new Date(result.pubdate),
|
||||
imdbId: result.episode_info && result.episode_info.imdb
|
||||
}
|
||||
}
|
||||
|
||||
async function getToken() {
|
||||
if (!token) {
|
||||
const options = { ...defaultOptionsWithProxy(), open_timeout: defaultTimeout };
|
||||
token = await needle('get', baseUrl, { get_token: 'get_token', app_id: appId }, options)
|
||||
.then(response => response.body.token);
|
||||
}
|
||||
return token;
|
||||
}
|
||||
|
||||
module.exports = { search, browse, Options };
|
||||
@@ -1,31 +1,29 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const rarbg = require('rarbg-api');
|
||||
const decode = require('magnet-uri');
|
||||
const rarbg = require('./rarbg_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'RARBG';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 20 });
|
||||
const allowedCategories = [
|
||||
rarbg.CATEGORY.MOVIES_XVID,
|
||||
rarbg.CATEGORY.MOVIES_XVID_720P,
|
||||
rarbg.CATEGORY.MOVIES_X265_1080P,
|
||||
rarbg.CATEGORY.MOVIES_X265_4K,
|
||||
rarbg.CATEGORY.MOVIES_X265_4K_HDR,
|
||||
rarbg.CATEGORY.MOVIES_X264,
|
||||
rarbg.CATEGORY.MOVIES_X264_720P,
|
||||
rarbg.CATEGORY.MOVIES_X264_1080P,
|
||||
rarbg.CATEGORY.MOVIES_X264_3D,
|
||||
rarbg.CATEGORY.MOVIES_X264_4K,
|
||||
rarbg.CATEGORY.MOVIES_BD_REMUX,
|
||||
rarbg.CATEGORY.TV_EPISODES,
|
||||
rarbg.CATEGORY.TV_UHD_EPISODES,
|
||||
rarbg.CATEGORY.TV_HD_EPISODES
|
||||
rarbg.Options.category.MOVIES_XVID,
|
||||
rarbg.Options.category.MOVIES_XVID_720P,
|
||||
rarbg.Options.category.MOVIES_X265_1080P,
|
||||
rarbg.Options.category.MOVIES_X265_4K,
|
||||
rarbg.Options.category.MOVIES_X265_4K_HDR,
|
||||
rarbg.Options.category.MOVIES_X264,
|
||||
rarbg.Options.category.MOVIES_X264_720P,
|
||||
rarbg.Options.category.MOVIES_X264_1080P,
|
||||
rarbg.Options.category.MOVIES_X264_3D,
|
||||
rarbg.Options.category.MOVIES_X264_4K,
|
||||
rarbg.Options.category.MOVIES_BD_REMUX,
|
||||
rarbg.Options.category.TV_EPISODES,
|
||||
rarbg.Options.category.TV_UHD_EPISODES,
|
||||
rarbg.Options.category.TV_HD_EPISODES
|
||||
].reduce((a, b) => a.concat(b), [])
|
||||
const searchOptions = { limit: 100, category: allowedCategories, sort: 'seeders', format: 'json_extended', ranked: 0 }
|
||||
|
||||
async function scrape() {
|
||||
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
|
||||
@@ -39,28 +37,13 @@ async function scrape() {
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
||||
}
|
||||
|
||||
async function getTorrentsForImdbId(imdbId, retries = 5) {
|
||||
return rarbg.search(imdbId, searchOptions, 'imdb')
|
||||
.then(torrents => torrents.map(torrent => ({
|
||||
name: torrent.title,
|
||||
infoHash: decode(torrent.download).infoHash,
|
||||
magnetLink: torrent.download,
|
||||
seeders: torrent.seeders,
|
||||
leechers: torrent.leechers,
|
||||
category: torrent.category,
|
||||
size: torrent.size,
|
||||
uploadDate: new Date(torrent.pubdate),
|
||||
imdbId: torrent.episode_info && torrent.episode_info.imdb
|
||||
})))
|
||||
async function getTorrentsForImdbId(imdbId) {
|
||||
return rarbg.search(imdbId, { category: allowedCategories })
|
||||
.then(torrents => {
|
||||
console.log(`Completed ${imdbId} request`);
|
||||
return torrents;
|
||||
})
|
||||
.catch(error => {
|
||||
if (retries > 0) {
|
||||
console.log(`Retrying ${NAME} request for ${imdbId}...`);
|
||||
return getTorrentsForImdbId(imdbId, retries - 1);
|
||||
}
|
||||
console.warn(`Failed ${NAME} request for ${imdbId}: `, error);
|
||||
return [];
|
||||
});
|
||||
@@ -74,7 +57,7 @@ async function processTorrentRecord(record) {
|
||||
const torrent = {
|
||||
provider: NAME,
|
||||
infoHash: record.infoHash,
|
||||
title: record.name,
|
||||
title: record.title,
|
||||
type: getType(record.category),
|
||||
seeders: record.seeders,
|
||||
size: record.size,
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const rarbg = require('rarbg-api');
|
||||
const decode = require('magnet-uri');
|
||||
const rarbg = require('./rarbg_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'RARBG';
|
||||
const SEARCH_OPTIONS = { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 };
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
@@ -27,27 +25,28 @@ async function scrape() {
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent, getImdbIdsMethod) {
|
||||
return getImdbIdsMethod()
|
||||
.then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId)))))
|
||||
.then(results => results.reduce((a, b) => a.concat(b), []));
|
||||
// return getImdbIdsMethod()
|
||||
// .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId)))))
|
||||
// .then(results => results.reduce((a, b) => a.concat(b), []));
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
rarbg.CATEGORY.MOVIES_XVID,
|
||||
rarbg.CATEGORY.MOVIES_XVID_720P,
|
||||
rarbg.CATEGORY.MOVIES_X265_1080P,
|
||||
rarbg.CATEGORY.MOVIES_X265_4K,
|
||||
rarbg.CATEGORY.MOVIES_X265_4K_HDR,
|
||||
rarbg.CATEGORY.MOVIES_X264,
|
||||
rarbg.CATEGORY.MOVIES_X264_720P,
|
||||
rarbg.CATEGORY.MOVIES_X264_1080P,
|
||||
rarbg.CATEGORY.MOVIES_X264_3D,
|
||||
rarbg.CATEGORY.MOVIES_X264_4K,
|
||||
rarbg.CATEGORY.MOVIES_BD_REMUX,
|
||||
rarbg.CATEGORY.TV_EPISODES,
|
||||
rarbg.CATEGORY.TV_UHD_EPISODES,
|
||||
rarbg.CATEGORY.TV_HD_EPISODES
|
||||
rarbg.Options.category.MOVIES_XVID,
|
||||
rarbg.Options.category.MOVIES_XVID_720P,
|
||||
rarbg.Options.category.MOVIES_X265_1080P,
|
||||
rarbg.Options.category.MOVIES_X265_4K,
|
||||
rarbg.Options.category.MOVIES_X265_4K_HDR,
|
||||
rarbg.Options.category.MOVIES_X264,
|
||||
rarbg.Options.category.MOVIES_X264_720P,
|
||||
rarbg.Options.category.MOVIES_X264_1080P,
|
||||
rarbg.Options.category.MOVIES_X264_3D,
|
||||
rarbg.Options.category.MOVIES_X264_4K,
|
||||
rarbg.Options.category.MOVIES_BD_REMUX,
|
||||
rarbg.Options.category.TV_EPISODES,
|
||||
rarbg.Options.category.TV_UHD_EPISODES,
|
||||
rarbg.Options.category.TV_HD_EPISODES
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
@@ -55,16 +54,11 @@ async function scrapeLatestTorrents() {
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, retries = 5) {
|
||||
async function scrapeLatestTorrentsForCategory(category) {
|
||||
console.log(`Scrapping ${NAME} ${category} category`);
|
||||
return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended', ranked: 0 })
|
||||
.then(results => results.map(result => toTorrent(result)))
|
||||
return rarbg.browse({ category: category })
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
|
||||
.catch(error => {
|
||||
if (retries > 0) {
|
||||
console.log(`Retrying ${NAME} request for ${category}...`);
|
||||
return scrapeLatestTorrentsForCategory(category, retries - 1);
|
||||
}
|
||||
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
});
|
||||
@@ -89,33 +83,6 @@ async function processTorrentRecord(record) {
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
async function search(imdbId, retries = 5) {
|
||||
return rarbg.search(imdbId, SEARCH_OPTIONS, 'imdb')
|
||||
.then(results => results.map(result => toTorrent(result)))
|
||||
.catch(error => {
|
||||
if (retries > 0) {
|
||||
console.log(`Retrying ${imdbId} search...`);
|
||||
return search(imdbId, retries - 1);
|
||||
}
|
||||
return Promise.reject(error);
|
||||
});
|
||||
}
|
||||
|
||||
function toTorrent(result) {
|
||||
return {
|
||||
title: result.title,
|
||||
provider: NAME,
|
||||
infoHash: decode(result.download).infoHash,
|
||||
magnetLink: result.download,
|
||||
seeders: result.seeders,
|
||||
leechers: result.leechers,
|
||||
category: result.category,
|
||||
size: result.size,
|
||||
uploadDate: new Date(result.pubdate),
|
||||
imdbId: result.episode_info && result.episode_info.imdb
|
||||
};
|
||||
}
|
||||
|
||||
const seriesCategories = [
|
||||
'TV Episodes',
|
||||
'Movies/TV-UHD-episodes',
|
||||
|
||||
Reference in New Issue
Block a user