diff --git a/addon/package-lock.json b/addon/package-lock.json index d966c0a..7f075a6 100644 --- a/addon/package-lock.json +++ b/addon/package-lock.json @@ -1845,8 +1845,8 @@ } }, "parse-torrent-title": { - "version": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2", - "from": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2", + "version": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f", + "from": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f", "requires": { "moment": "^2.24.0" } diff --git a/addon/package.json b/addon/package.json index c4f5a27..1551118 100644 --- a/addon/package.json +++ b/addon/package.json @@ -19,7 +19,7 @@ "magnet-uri": "^5.1.7", "named-queue": "^2.2.1", "needle": "^2.2.4", - "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2", + "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f", "pg": "^7.8.2", "pg-hstore": "^2.3.2", "premiumize-api": "^1.0.3", diff --git a/package-lock.json b/package-lock.json index 3045e24..ea16e75 100644 --- a/package-lock.json +++ b/package-lock.json @@ -95,6 +95,14 @@ "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==" }, + "axios": { + "version": "0.21.4", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz", + "integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==", + "requires": { + "follow-redirects": "^1.14.0" + } + }, "balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", @@ -212,6 +220,44 @@ } } }, + "bl": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/bl/-/bl-2.2.1.tgz", + "integrity": "sha512-6Pesp1w0DEX1N550i/uGV/TqucVL4AM/pgThFSN/Qq9si1/DF9aIHs1BxD8V/QU0HoeHO6cQRTAuYnLPKq1e4g==", + "requires": { + "readable-stream": "^2.3.5", + "safe-buffer": "^5.1.1" + }, + "dependencies": { + "isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=" + }, + "readable-stream": { + "version": "2.3.7", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.7.tgz", + "integrity": "sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "requires": { + "safe-buffer": "~5.1.0" + } + } + } + }, "blob-to-buffer": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/blob-to-buffer/-/blob-to-buffer-1.2.8.tgz", @@ -718,6 +764,11 @@ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" }, + "denque": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/denque/-/denque-1.5.1.tgz", + "integrity": "sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw==" + }, "depd": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", @@ -960,6 +1011,11 @@ "resolved": "https://registry.npmjs.org/flatten/-/flatten-0.0.1.tgz", "integrity": "sha1-VURAdm2goNYDmZ9DNFP2wvxqdcE=" }, + "follow-redirects": { + "version": "1.14.4", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.4.tgz", + "integrity": "sha512-zwGkiSXC1MUJG/qmeIFH2HBJx9u0V46QGUe3YR1fXG8bXQxq7fLj0RjLZQ5nubr9qNJUZrH+xUcwXEoXNpfS+g==" + }, "forever-agent": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", @@ -1700,12 +1756,14 @@ } }, "mongodb": { - "version": "3.4.1", - "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.4.1.tgz", - "integrity": "sha512-juqt5/Z42J4DcE7tG7UdVaTKmUC6zinF4yioPfpeOSNBieWSK6qCY+0tfGQcHLKrauWPDdMZVROHJOa8q2pWsA==", + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.7.1.tgz", + "integrity": "sha512-iSVgexYr8ID0ieeNFUbRfQeOZxOchRck6kEDVySQRaa8VIw/1Pm+/LgcpZcl/BWV6nT0L8lP9qyl7dRPJ6mnLw==", "requires": { - "bson": "^1.1.1", - "require_optional": "^1.0.1", + "bl": "^2.2.1", + "bson": "^1.1.4", + "denque": "^1.4.1", + "optional-require": "^1.0.3", "safe-buffer": "^5.1.2", "saslprep": "^1.0.0" } @@ -1916,6 +1974,14 @@ "mimic-fn": "^2.1.0" } }, + "optional-require": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/optional-require/-/optional-require-1.1.7.tgz", + "integrity": "sha512-cIeRZocXsZnZYn+SevbtSqNlLbeoS4mLzuNn4fvXRMDRNhTGg0sxuKXl0FnZCtnew85LorNxIbZp5OeliILhMw==", + "requires": { + "require-at": "^1.0.6" + } + }, "options": { "version": "0.0.6", "resolved": "https://registry.npmjs.org/options/-/options-0.0.6.tgz", @@ -1992,8 +2058,8 @@ } }, "parse-torrent-title": { - "version": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2", - "from": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2", + "version": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f", + "from": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f", "requires": { "moment": "^2.24.0" } @@ -2358,19 +2424,10 @@ "lodash": "^4.17.11" } }, - "require_optional": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/require_optional/-/require_optional-1.0.1.tgz", - "integrity": "sha512-qhM/y57enGWHAe3v/NcwML6a3/vfESLe/sGM2dII+gEO0BpKRUkWZow/tyloNqJyN6kXSl3RyyM8Ll5D/sJP8g==", - "requires": { - "resolve-from": "^2.0.0", - "semver": "^5.1.0" - } - }, - "resolve-from": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-2.0.0.tgz", - "integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c=" + "require-at": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/require-at/-/require-at-1.0.6.tgz", + "integrity": "sha512-7i1auJbMUrXEAZCOQ0VNJgmcT2VOKPRl2YGJwgpHpC9CE91Mv4/4UYIUm4chGJaI381ZDq1JUicFii64Hapd8g==" }, "restore-cursor": { "version": "3.1.0", @@ -2546,11 +2603,6 @@ "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==" }, - "semver": { - "version": "5.6.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz", - "integrity": "sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg==" - }, "send": { "version": "0.16.2", "resolved": "https://registry.npmjs.org/send/-/send-0.16.2.tgz", diff --git a/package.json b/package.json index 9fffdda..8552c07 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "license": "MIT", "dependencies": { "async": "0.9.2", + "axios": "^0.21.4", "bittorrent-tracker": "^9.18.2", "bottleneck": "^2.16.2", "cache-manager": "^2.9.0", @@ -33,7 +34,7 @@ "nodejs-bing": "^0.1.0", "nyaapi": "^2.3.3", "parse-torrent": "^6.1.2", - "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2", + "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f", "pg": "^7.8.2", "pg-hstore": "^2.3.2", "real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d", diff --git a/scraper/scheduler/scrapers.js b/scraper/scheduler/scrapers.js index 4d76734..3bb6c48 100644 --- a/scraper/scheduler/scrapers.js +++ b/scraper/scheduler/scrapers.js @@ -7,6 +7,7 @@ const kickassScraper = require('../scrapers/kickass/kickass_scraper'); const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper'); const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper'); const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper'); +const erairawsScraper = require('../scrapers/erairaws/erairaws_scraper'); const torrentGalaxyScraper = require('../scrapers/torrentgalaxy/torrentgalaxy_scraper'); const rutorScraper = require('../scrapers/rutor/rutor_scraper'); const Comando = require('../scrapers/comando/comando_scraper') @@ -34,6 +35,7 @@ module.exports = [ { scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' }, // { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' }, // { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' }, + { scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */24 ? * *' }, // { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') } // { scraper: require('../scrapers/1337x/1337x_search_scraper') } // { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') } diff --git a/scraper/scrapers/erairaws/erairaws_api.js b/scraper/scrapers/erairaws/erairaws_api.js new file mode 100644 index 0000000..9535171 --- /dev/null +++ b/scraper/scrapers/erairaws/erairaws_api.js @@ -0,0 +1,94 @@ +const needle = require("needle"); +const axios = require('axios'); +const cheerio = require("cheerio"); +const decode = require("magnet-uri"); +const Promises = require("../../lib/promises"); +const { getRandomUserAgent } = require("../../lib/requestHelper"); + +const defaultTimeout = 10000; + +const baseUrl = 'https://www.erai-raws.info'; + +const Categories = { + ANIMES: 'anime', + EPISODES: 'episodes' +}; + +function browse(config = {}, retries = 2) { + if (retries === 0) { + return Promise.reject(new Error(`Failed browse request`)); + } + const page = config.page || 1; + const category = config.category; + + return singleRequest(`${baseUrl}/${category}/page/${page}/`, config) + .then((body) => parseTableBody(body) + .then(animes => Promises.sequence(animes.map(anime => () => singleRequest(anime.animeLink)))) + .then(animeBodies => Promise.all(animeBodies.map(animeBody => parseTorrentPage(animeBody)))) + .then(animeInfos => animeInfos.reduce((a, b) => a.concat(b), []))) + .catch((err) => browse(config, retries - 1)); +} + +function singleRequest(requestUrl, config = {}) { + const timeout = config.timeout || defaultTimeout; + const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2, }; + + return axios.get(requestUrl, options).then((response) => { + const body = response.data; + if (!body || (Buffer.isBuffer(body) && !body.size)) { + throw new Error(`No body: ${requestUrl}`); + } else if ( + body.includes("502: Bad gateway") || + body.includes("403 Forbidden") + ) { + throw new Error(`Invalid body contents: ${requestUrl}`); + } + return body; + }); +} + +function parseTableBody(body) { + return new Promise((resolve, reject) => { + const $ = cheerio.load(body); + + if (!$) { + reject(new Error("Failed loading body")); + } + + const links = $('[itemprop=\'headline\'] a, .content-area a.aa_ss_ops_new') + .map((i, element) => ({ + name: $(element).text(), + animeLink: $(element).attr("href"), + })).get(); + resolve(links); + }); +} + +function parseTorrentPage(body) { + return new Promise(async (resolve, reject) => { + const $ = cheerio.load(body); + + if (!$) { + reject(new Error("Failed loading body")); + } + const entries = $('.tab-content table, .content-area table') + .map((i, entry) => { + const languages = $(entry).find('.tooltip3').map((_, l) => $(l).attr('data-title')).get().join('/'); + const magnets = $(entry).find('a[href^="magnet"]').map((_, m) => $(m).attr('href')).get(); + return { languages, magnets } + }).get(); + const torrents = entries + .map(entry => entry.magnets + .map(magnet => decode(magnet)) + .map(decodedMagnet => ({ + title: decodedMagnet.name, + infoHash: decodedMagnet.infoHash, + trackers: decodedMagnet.tr, + languages: entry.languages + }))) + .reduce((a, b) => a.concat(b), []); + resolve(torrents); + }); +} + +module.exports = { browse, Categories }; diff --git a/scraper/scrapers/erairaws/erairaws_scraper.js b/scraper/scrapers/erairaws/erairaws_scraper.js new file mode 100644 index 0000000..3db4474 --- /dev/null +++ b/scraper/scrapers/erairaws/erairaws_scraper.js @@ -0,0 +1,47 @@ +const moment = require('moment'); +const Bottleneck = require('bottleneck'); +const erairaws = require('./erairaws_api'); +const { checkAndUpdateTorrent } = require('../../lib/torrentEntries'); + +const NAME = 'EraiRaws'; + +const limiter = new Bottleneck({ maxConcurrent: 10 }); + +async function scrape() { + const scrapeStart = moment(); + console.log(`[${scrapeStart}] starting ${NAME} scrape...`); + + return scrapeLatestTorrents() + .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); +} + +async function scrapeLatestTorrents() { + return scrapeLatestTorrentsForCategory(erairaws.Categories.EPISODES) + .then((entries) => entries.reduce((a, b) => a.concat(b), [])); +} + +async function scrapeLatestTorrentsForCategory(category, page = 1) { + console.log(`Scrapping ${NAME} ${category} category page ${page}`); + return erairaws.browse({ category, page }) + .catch((error) => { + console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error); + return Promise.resolve([]); + }) + .then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processRecord(torrent))))) + .then((resolved) => resolved.length > 0 && page < untilPage(category) + ? scrapeLatestTorrentsForCategory(category, page + 1) + : Promise.resolve([])); +} + +async function processRecord(foundTorrent) { + return checkAndUpdateTorrent({ provider: NAME, ...foundTorrent }).then(() => foundTorrent); +} + +function untilPage(category) { + if (category === erairaws.Categories.ANIMES) { + return 45; + } + return 3; +} + +module.exports = { scrape, NAME };