diff --git a/index.js b/index.js index 48eb057..468c0d0 100644 --- a/index.js +++ b/index.js @@ -6,13 +6,15 @@ const thepiratebayScraper = require('./scrapers/thepiratebay/thepiratebay_scrape const horribleSubsScraper = require('./scrapers/horriblesubs/horriblesubs_scraper'); const leetxScraper = require('./scrapers/1337x/1337x_scraper'); const kickassScraper = require('./scrapers/kickass/kickass_scraper'); +const rarbgScraper = require('./scrapers/rarbg/rarbg_scraper'); const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_dump_scraper'); const thepiratebayUnofficialDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper'); const providers = [ // horribleSubsScraper, + rarbgScraper, // thepiratebayScraper, - kickassScraper, + // kickassScraper, // leetxScraper ]; diff --git a/package-lock.json b/package-lock.json index af7e436..9ad3b0b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1692,6 +1692,19 @@ "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz", "integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4=" }, + "rarbg": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/rarbg/-/rarbg-1.3.0.tgz", + "integrity": "sha512-otXBLZYk+02TQkPnxdfHlx5rzbzNZ+9dcZGkDeNKH+cLVw16yO4CCJkc9h60cGSlMlhHqdt33y9UEZc9hk0Uiw==", + "requires": { + "moment": "^2.22.2" + } + }, + "rarbg-api": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/rarbg-api/-/rarbg-api-1.1.3.tgz", + "integrity": "sha512-9pkjwIDLOOOwB4U0AUw4tN9P4vV6tDizvHtkbu5KLpbJjLHneP3FMbHJeFT50yVy/9dTOaFtxg2zD3pGFgOaOg==" + }, "raw-body": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.3.tgz", diff --git a/package.json b/package.json index f626a22..3280f93 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "peer-search": "^0.6.x", "pg": "^7.8.2", "pg-hstore": "^2.3.2", + "rarbg-api": "^1.1.3", "sequelize": "^4.43.0", "sugar-date": "^2.0.6", "torrent-stream": "^1.1.0" diff --git a/scrapers/rarbg/rarbg_scraper.js b/scrapers/rarbg/rarbg_scraper.js new file mode 100644 index 0000000..e7b2f04 --- /dev/null +++ b/scrapers/rarbg/rarbg_scraper.js @@ -0,0 +1,103 @@ +const moment = require('moment'); +const Bottleneck = require('bottleneck'); +const rarbg = require('rarbg-api'); +const decode = require('magnet-uri'); +const { Type } = require('../../lib/types'); +const repository = require('../../lib/repository'); +const { + createTorrentEntry, + getStoredTorrentEntry, + updateTorrentSeeders +} = require('../../lib/torrentEntries'); + +const NAME = 'RARBG'; + +const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 }); +const entryLimiter = new Bottleneck({ maxConcurrent: 40 }); + +async function scrape() { + const scrapeStart = moment(); + const lastScrape = await repository.getProvider({ name: NAME }); + console.log(`[${scrapeStart}] starting ${NAME} scrape...`); + + const latestTorrents = await getLatestTorrents(); + return Promise.all(latestTorrents.map(torrent => entryLimiter.schedule(() => processTorrentRecord(torrent)))) + .then(() => { + lastScrape.lastScraped = scrapeStart; + lastScrape.lastScrapedId = latestTorrents.length && latestTorrents[latestTorrents.length - 1].torrentId; + return repository.updateProvider(lastScrape); + }) + .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); +} + +async function getLatestTorrents() { + const allowedCategories = [ + rarbg.CATEGORY['4K_MOVIES_X264_4k'], + rarbg.CATEGORY['4K_X265_4k'], + rarbg.CATEGORY['4k_X264_4k_HDR'], + rarbg.CATEGORY.MOVIES_XVID, + rarbg.CATEGORY.MOVIES_XVID_720P, + rarbg.CATEGORY.MOVIES_X264, + rarbg.CATEGORY.MOVIES_X264_1080P, + rarbg.CATEGORY.MOVIES_X264_720P, + rarbg.CATEGORY.MOVIES_X264_3D, + rarbg.CATEGORY.MOVIES_FULL_BD, + rarbg.CATEGORY.MOVIES_BD_REMUX, + rarbg.CATEGORY.TV_EPISODES, + rarbg.CATEGORY.TV_UHD_EPISODES, + rarbg.CATEGORY.TV_HD_EPISODES + ]; + + return Promise.all(allowedCategories.map(category => limiter.schedule(() => getLatestTorrentsForCategory(category)))) + .then(entries => entries.reduce((a, b) => a.concat(b), [])); +} + +async function getLatestTorrentsForCategory(category) { + return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended' }) + .then(torrents => torrents.map(torrent => ({ + name: torrent.title, + infoHash: decode(torrent.download).infoHash, + magnetLink: torrent.download, + seeders: torrent.seeders, + leechers: torrent.leechers, + category: torrent.category, + size: torrent.size, + uploadDate: new Date(torrent.pubdate), + imdbId: torrent.episode_info && torrent.episode_info.imdb + }))) + .catch((err) => []); +} + +async function processTorrentRecord(record) { + if (await getStoredTorrentEntry(record)) { + return updateTorrentSeeders(record); + } + + const torrent = { + provider: NAME, + infoHash: record.infoHash, + title: record.name, + type: getType(record.category), + seeders: record.seeders, + size: record.size, + uploadDate: record.uploadDate, + imdbId: record.imdbId + }; + + return createTorrentEntry(torrent); +} + +const seriesCategories = [ + 'TV Episodes', + 'Movies/TV-UHD-episodes', + 'TV HD Episodes', +]; + +function getType(category) { + if (seriesCategories.includes(category)) { + return Type.SERIES; + } + return Type.MOVIE; +} + +module.exports = { scrape }; \ No newline at end of file