diff --git a/package-lock.json b/package-lock.json index 1cf9dde..a891a42 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1949,11 +1949,6 @@ "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz", "integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4=" }, - "rarbg-api": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/rarbg-api/-/rarbg-api-1.1.4.tgz", - "integrity": "sha512-BxhHwCW/h18l8m8nwONfEVcNRN0qt0mBp0eM0yecdDIG6h1VcNMdCViLqyPethZC0UExTCGsioCWvVWbCkBMhg==" - }, "raw-body": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.3.tgz", diff --git a/package.json b/package.json index 831ef12..553d8f2 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,6 @@ "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#345c33536b2a5e7455da91cdde0146625bb9b254", "pg": "^7.8.2", "pg-hstore": "^2.3.2", - "rarbg-api": "^1.1.4", "real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d", "rutracker-api-2": "^1.10.0", "sequelize": "^5.21.5", diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index ad6d06c..0c44a48 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -189,7 +189,7 @@ function getTorrentsWithoutSize() { function getUpdateSeedersTorrents() { const until = moment().subtract(7, 'days').format('YYYY-MM-DD'); return Torrent.findAll({ - where: literal(`torrent."updatedAt" < \'${until}\' and torrent."provider" not in (\'NyaaPantsu\', \'RARBG\')`), + where: literal(`torrent."updatedAt" < \'${until}\' and torrent."provider" not in (\'NyaaPantsu\')`), limit: 100, order: [ ['seeders', 'DESC'], diff --git a/scraper/scheduler/scrapers.js b/scraper/scheduler/scrapers.js index 035afdd..7bbcb4d 100644 --- a/scraper/scheduler/scrapers.js +++ b/scraper/scheduler/scrapers.js @@ -10,7 +10,7 @@ module.exports = [ { scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, cron: '0 0 */4 ? * *' }, - // { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' }, + { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' }, { scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' }, diff --git a/scraper/scrapers/rarbg/rarbg_api.js b/scraper/scrapers/rarbg/rarbg_api.js new file mode 100644 index 0000000..07ec93b --- /dev/null +++ b/scraper/scrapers/rarbg/rarbg_api.js @@ -0,0 +1,148 @@ +const needle = require('needle'); +const decode = require('magnet-uri'); +const Promises = require('../../lib/promises'); +const { defaultOptionsWithProxy } = require('./../../lib/request_helper'); + +const baseUrl = 'https://torrentapi.org/pubapi_v2.php'; +const appId = 'node-rarbg-api'; +const defaultTimeout = 30000; + +let token; + +const Options = { + category: { + MOVIES_XVID: [14], + MOVIES_XVID_720P: [48], + MOVIES_X264: [17], + MOVIES_X264_1080P: [44], + MOVIES_X264_720P: [45], + MOVIES_X264_3D: [47], + MOVIES_X264_4K: [50], + MOVIES_X265_1080P: [54], + MOVIES_X265_4K: [51], + MOVIES_X265_4K_HDR: [52], + MOVIES_FULL_BD: [42], + MOVIES_BD_REMUX: [46], + TV_EPISODES: [18], + TV_UHD_EPISODES: [49], + TV_HD_EPISODES: [41], + MUSIC_MP3: [23], + MUSIC_FLAC: [25], + GAMES_PC_ISO: [27], + GAMES_PC_RIP: [28], + GAMES_PS3: [40], + GAMES_XBOX_360: [32], + SOFTWARE_PC_ISO: [33], + EBOOKS: [35], + XXX: [4], + }, + sort: { + LAST: 'last', + SEEDERS: 'seeders', + LEECHERS: 'leechers' + }, + format: { + JSON: 'json', + JSON_EXTENDED: 'json_extended' + }, + ranked: { + TRUE: 1, + FALSE: 0 + } +} + +function search(imdbId, params = {}) { + if (!imdbId) { + return Promise.reject(new Error(`Must define imdbId`)); + } + const parameters = { + mode: 'search', + search_imdb: imdbId, + category: params.category && params.category.join(';') || null, + limit: params.limit || 100, + sort: params.sort || Options.sort.SEEDERS, + min_seeders: params.min_seeders || undefined, + min_leechers: params.min_leechers || undefined, + format: params.format || Options.format.JSON_EXTENDED, + ranked: params.ranked || Options.ranked.FALSE + } + + return singleRequest(parameters).then(results => parseResults(results)); +} + +function browse(params = {}) { + const parameters = { + mode: 'list', + category: params.category && params.category.join(';') || null, + limit: params.limit || 100, + sort: params.sort || Options.sort.LAST, + min_seeders: params.min_seeders || undefined, + min_leechers: params.min_leechers || undefined, + format: params.format || Options.format.JSON_EXTENDED, + ranked: params.ranked || Options.ranked.FALSE + } + + return singleRequest(parameters).then(results => parseResults(results)); +} + +async function singleRequest(params = {}, config = {}, retries = 5) { + const timeout = config.timeout || defaultTimeout; + const options = { ...defaultOptionsWithProxy(), open_timeout: timeout, follow: 2 }; + params.token = await getToken(); + params.app_id = appId; + + Object.keys(params) + .filter(key => params[key] === undefined || params[key] === null) + .forEach(key => delete params[key]); + + return needle('get', baseUrl, params, options) + .then(response => { + if (response.body && response.body.error_code === 4) { + // token expired + token = undefined; + return singleRequest(params, config); + } + if ((!response.body || [5, 20].includes(response.body.error_code)) && retries > 0) { + // too many requests + return Promises.delay(2100).then(() => singleRequest(params, config, retries - 1)); + } + if (response.statusCode !== 200) { + // something went wrong + return Promise.reject(response.body || `Failed RARGB request with status=${response.statusCode}`); + } + + return response.body; + }); +} + +function parseResults(results) { + if (!results || !Array.isArray(results.torrent_results)) { + return Promise.reject(`Incorrect results ${JSON.stringify(results)}`) + } + return results.torrent_results.map(result => parseResult(result)); +} + +function parseResult(result) { + return { + title: result.title, + infoHash: decode(result.download).infoHash, + magnetLink: result.download, + seeders: result.seeders, + leechers: result.leechers, + category: result.category, + size: result.size, + uploadDate: new Date(result.pubdate), + imdbId: result.episode_info && result.episode_info.imdb + } +} + +async function getToken() { + if (!token) { + const options = { ...defaultOptionsWithProxy(), open_timeout: defaultTimeout }; + token = await needle('get', baseUrl, { get_token: 'get_token', app_id: appId }, options) + .then(response => response.body.token); + } + return token; +} + +module.exports = { search, browse, Options }; diff --git a/scraper/scrapers/rarbg/rarbg_dump_scraper.js b/scraper/scrapers/rarbg/rarbg_dump_scraper.js index 43eb1a8..a069f3d 100644 --- a/scraper/scrapers/rarbg/rarbg_dump_scraper.js +++ b/scraper/scrapers/rarbg/rarbg_dump_scraper.js @@ -1,31 +1,29 @@ const moment = require('moment'); const Bottleneck = require('bottleneck'); -const rarbg = require('rarbg-api'); -const decode = require('magnet-uri'); +const rarbg = require('./rarbg_api'); const { Type } = require('../../lib/types'); const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'RARBG'; const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 }); -const entryLimiter = new Bottleneck({ maxConcurrent: 40 }); +const entryLimiter = new Bottleneck({ maxConcurrent: 20 }); const allowedCategories = [ - rarbg.CATEGORY.MOVIES_XVID, - rarbg.CATEGORY.MOVIES_XVID_720P, - rarbg.CATEGORY.MOVIES_X265_1080P, - rarbg.CATEGORY.MOVIES_X265_4K, - rarbg.CATEGORY.MOVIES_X265_4K_HDR, - rarbg.CATEGORY.MOVIES_X264, - rarbg.CATEGORY.MOVIES_X264_720P, - rarbg.CATEGORY.MOVIES_X264_1080P, - rarbg.CATEGORY.MOVIES_X264_3D, - rarbg.CATEGORY.MOVIES_X264_4K, - rarbg.CATEGORY.MOVIES_BD_REMUX, - rarbg.CATEGORY.TV_EPISODES, - rarbg.CATEGORY.TV_UHD_EPISODES, - rarbg.CATEGORY.TV_HD_EPISODES + rarbg.Options.category.MOVIES_XVID, + rarbg.Options.category.MOVIES_XVID_720P, + rarbg.Options.category.MOVIES_X265_1080P, + rarbg.Options.category.MOVIES_X265_4K, + rarbg.Options.category.MOVIES_X265_4K_HDR, + rarbg.Options.category.MOVIES_X264, + rarbg.Options.category.MOVIES_X264_720P, + rarbg.Options.category.MOVIES_X264_1080P, + rarbg.Options.category.MOVIES_X264_3D, + rarbg.Options.category.MOVIES_X264_4K, + rarbg.Options.category.MOVIES_BD_REMUX, + rarbg.Options.category.TV_EPISODES, + rarbg.Options.category.TV_UHD_EPISODES, + rarbg.Options.category.TV_HD_EPISODES ].reduce((a, b) => a.concat(b), []) -const searchOptions = { limit: 100, category: allowedCategories, sort: 'seeders', format: 'json_extended', ranked: 0 } async function scrape() { console.log(`[${moment()}] starting ${NAME} dump scrape...`); @@ -39,28 +37,13 @@ async function scrape() { .then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`)); } -async function getTorrentsForImdbId(imdbId, retries = 5) { - return rarbg.search(imdbId, searchOptions, 'imdb') - .then(torrents => torrents.map(torrent => ({ - name: torrent.title, - infoHash: decode(torrent.download).infoHash, - magnetLink: torrent.download, - seeders: torrent.seeders, - leechers: torrent.leechers, - category: torrent.category, - size: torrent.size, - uploadDate: new Date(torrent.pubdate), - imdbId: torrent.episode_info && torrent.episode_info.imdb - }))) +async function getTorrentsForImdbId(imdbId) { + return rarbg.search(imdbId, { category: allowedCategories }) .then(torrents => { console.log(`Completed ${imdbId} request`); return torrents; }) .catch(error => { - if (retries > 0) { - console.log(`Retrying ${NAME} request for ${imdbId}...`); - return getTorrentsForImdbId(imdbId, retries - 1); - } console.warn(`Failed ${NAME} request for ${imdbId}: `, error); return []; }); @@ -74,7 +57,7 @@ async function processTorrentRecord(record) { const torrent = { provider: NAME, infoHash: record.infoHash, - title: record.name, + title: record.title, type: getType(record.category), seeders: record.seeders, size: record.size, diff --git a/scraper/scrapers/rarbg/rarbg_scraper.js b/scraper/scrapers/rarbg/rarbg_scraper.js index 53b6ffa..c6d80ee 100644 --- a/scraper/scrapers/rarbg/rarbg_scraper.js +++ b/scraper/scrapers/rarbg/rarbg_scraper.js @@ -1,14 +1,12 @@ const moment = require('moment'); const Bottleneck = require('bottleneck'); -const rarbg = require('rarbg-api'); -const decode = require('magnet-uri'); +const rarbg = require('./rarbg_api'); const { Type } = require('../../lib/types'); const repository = require('../../lib/repository'); const Promises = require('../../lib/promises'); const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'RARBG'; -const SEARCH_OPTIONS = { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 }; const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 }); const entryLimiter = new Bottleneck({ maxConcurrent: 10 }); @@ -27,27 +25,28 @@ async function scrape() { } async function updateSeeders(torrent, getImdbIdsMethod) { - return getImdbIdsMethod() - .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId))))) - .then(results => results.reduce((a, b) => a.concat(b), [])); + // return getImdbIdsMethod() + // .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId))))) + // .then(results => results.reduce((a, b) => a.concat(b), [])); + return Promise.resolve([]); } async function scrapeLatestTorrents() { const allowedCategories = [ - rarbg.CATEGORY.MOVIES_XVID, - rarbg.CATEGORY.MOVIES_XVID_720P, - rarbg.CATEGORY.MOVIES_X265_1080P, - rarbg.CATEGORY.MOVIES_X265_4K, - rarbg.CATEGORY.MOVIES_X265_4K_HDR, - rarbg.CATEGORY.MOVIES_X264, - rarbg.CATEGORY.MOVIES_X264_720P, - rarbg.CATEGORY.MOVIES_X264_1080P, - rarbg.CATEGORY.MOVIES_X264_3D, - rarbg.CATEGORY.MOVIES_X264_4K, - rarbg.CATEGORY.MOVIES_BD_REMUX, - rarbg.CATEGORY.TV_EPISODES, - rarbg.CATEGORY.TV_UHD_EPISODES, - rarbg.CATEGORY.TV_HD_EPISODES + rarbg.Options.category.MOVIES_XVID, + rarbg.Options.category.MOVIES_XVID_720P, + rarbg.Options.category.MOVIES_X265_1080P, + rarbg.Options.category.MOVIES_X265_4K, + rarbg.Options.category.MOVIES_X265_4K_HDR, + rarbg.Options.category.MOVIES_X264, + rarbg.Options.category.MOVIES_X264_720P, + rarbg.Options.category.MOVIES_X264_1080P, + rarbg.Options.category.MOVIES_X264_3D, + rarbg.Options.category.MOVIES_X264_4K, + rarbg.Options.category.MOVIES_BD_REMUX, + rarbg.Options.category.TV_EPISODES, + rarbg.Options.category.TV_UHD_EPISODES, + rarbg.Options.category.TV_HD_EPISODES ]; return Promises.sequence(allowedCategories @@ -55,16 +54,11 @@ async function scrapeLatestTorrents() { .then(entries => entries.reduce((a, b) => a.concat(b), [])); } -async function scrapeLatestTorrentsForCategory(category, retries = 5) { +async function scrapeLatestTorrentsForCategory(category) { console.log(`Scrapping ${NAME} ${category} category`); - return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended', ranked: 0 }) - .then(results => results.map(result => toTorrent(result))) + return rarbg.browse({ category: category }) .then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))) .catch(error => { - if (retries > 0) { - console.log(`Retrying ${NAME} request for ${category}...`); - return scrapeLatestTorrentsForCategory(category, retries - 1); - } console.warn(`Failed ${NAME} scrapping for ${category} due: `, error); return Promise.resolve([]); }); @@ -89,33 +83,6 @@ async function processTorrentRecord(record) { return createTorrentEntry(torrent); } -async function search(imdbId, retries = 5) { - return rarbg.search(imdbId, SEARCH_OPTIONS, 'imdb') - .then(results => results.map(result => toTorrent(result))) - .catch(error => { - if (retries > 0) { - console.log(`Retrying ${imdbId} search...`); - return search(imdbId, retries - 1); - } - return Promise.reject(error); - }); -} - -function toTorrent(result) { - return { - title: result.title, - provider: NAME, - infoHash: decode(result.download).infoHash, - magnetLink: result.download, - seeders: result.seeders, - leechers: result.leechers, - category: result.category, - size: result.size, - uploadDate: new Date(result.pubdate), - imdbId: result.episode_info && result.episode_info.imdb - }; -} - const seriesCategories = [ 'TV Episodes', 'Movies/TV-UHD-episodes',