add erairaws language scraper

This commit is contained in:
TheBeastLT
2021-09-17 14:35:52 +02:00
committed by TheBeastLT
parent 53b03aa1eb
commit 195a45b270
7 changed files with 225 additions and 29 deletions

View File

@@ -1845,8 +1845,8 @@
}
},
"parse-torrent-title": {
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
"requires": {
"moment": "^2.24.0"
}

View File

@@ -19,7 +19,7 @@
"magnet-uri": "^5.1.7",
"named-queue": "^2.2.1",
"needle": "^2.2.4",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
"pg": "^7.8.2",
"pg-hstore": "^2.3.2",
"premiumize-api": "^1.0.3",

102
package-lock.json generated
View File

@@ -95,6 +95,14 @@
"resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz",
"integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ=="
},
"axios": {
"version": "0.21.4",
"resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
"integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
"requires": {
"follow-redirects": "^1.14.0"
}
},
"balanced-match": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
@@ -212,6 +220,44 @@
}
}
},
"bl": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/bl/-/bl-2.2.1.tgz",
"integrity": "sha512-6Pesp1w0DEX1N550i/uGV/TqucVL4AM/pgThFSN/Qq9si1/DF9aIHs1BxD8V/QU0HoeHO6cQRTAuYnLPKq1e4g==",
"requires": {
"readable-stream": "^2.3.5",
"safe-buffer": "^5.1.1"
},
"dependencies": {
"isarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE="
},
"readable-stream": {
"version": "2.3.7",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.7.tgz",
"integrity": "sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==",
"requires": {
"core-util-is": "~1.0.0",
"inherits": "~2.0.3",
"isarray": "~1.0.0",
"process-nextick-args": "~2.0.0",
"safe-buffer": "~5.1.1",
"string_decoder": "~1.1.1",
"util-deprecate": "~1.0.1"
}
},
"string_decoder": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
"requires": {
"safe-buffer": "~5.1.0"
}
}
}
},
"blob-to-buffer": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/blob-to-buffer/-/blob-to-buffer-1.2.8.tgz",
@@ -718,6 +764,11 @@
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk="
},
"denque": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/denque/-/denque-1.5.1.tgz",
"integrity": "sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw=="
},
"depd": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
@@ -960,6 +1011,11 @@
"resolved": "https://registry.npmjs.org/flatten/-/flatten-0.0.1.tgz",
"integrity": "sha1-VURAdm2goNYDmZ9DNFP2wvxqdcE="
},
"follow-redirects": {
"version": "1.14.4",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.4.tgz",
"integrity": "sha512-zwGkiSXC1MUJG/qmeIFH2HBJx9u0V46QGUe3YR1fXG8bXQxq7fLj0RjLZQ5nubr9qNJUZrH+xUcwXEoXNpfS+g=="
},
"forever-agent": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
@@ -1700,12 +1756,14 @@
}
},
"mongodb": {
"version": "3.4.1",
"resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.4.1.tgz",
"integrity": "sha512-juqt5/Z42J4DcE7tG7UdVaTKmUC6zinF4yioPfpeOSNBieWSK6qCY+0tfGQcHLKrauWPDdMZVROHJOa8q2pWsA==",
"version": "3.7.1",
"resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.7.1.tgz",
"integrity": "sha512-iSVgexYr8ID0ieeNFUbRfQeOZxOchRck6kEDVySQRaa8VIw/1Pm+/LgcpZcl/BWV6nT0L8lP9qyl7dRPJ6mnLw==",
"requires": {
"bson": "^1.1.1",
"require_optional": "^1.0.1",
"bl": "^2.2.1",
"bson": "^1.1.4",
"denque": "^1.4.1",
"optional-require": "^1.0.3",
"safe-buffer": "^5.1.2",
"saslprep": "^1.0.0"
}
@@ -1916,6 +1974,14 @@
"mimic-fn": "^2.1.0"
}
},
"optional-require": {
"version": "1.1.7",
"resolved": "https://registry.npmjs.org/optional-require/-/optional-require-1.1.7.tgz",
"integrity": "sha512-cIeRZocXsZnZYn+SevbtSqNlLbeoS4mLzuNn4fvXRMDRNhTGg0sxuKXl0FnZCtnew85LorNxIbZp5OeliILhMw==",
"requires": {
"require-at": "^1.0.6"
}
},
"options": {
"version": "0.0.6",
"resolved": "https://registry.npmjs.org/options/-/options-0.0.6.tgz",
@@ -1992,8 +2058,8 @@
}
},
"parse-torrent-title": {
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
"requires": {
"moment": "^2.24.0"
}
@@ -2358,19 +2424,10 @@
"lodash": "^4.17.11"
}
},
"require_optional": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/require_optional/-/require_optional-1.0.1.tgz",
"integrity": "sha512-qhM/y57enGWHAe3v/NcwML6a3/vfESLe/sGM2dII+gEO0BpKRUkWZow/tyloNqJyN6kXSl3RyyM8Ll5D/sJP8g==",
"requires": {
"resolve-from": "^2.0.0",
"semver": "^5.1.0"
}
},
"resolve-from": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-2.0.0.tgz",
"integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c="
"require-at": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/require-at/-/require-at-1.0.6.tgz",
"integrity": "sha512-7i1auJbMUrXEAZCOQ0VNJgmcT2VOKPRl2YGJwgpHpC9CE91Mv4/4UYIUm4chGJaI381ZDq1JUicFii64Hapd8g=="
},
"restore-cursor": {
"version": "3.1.0",
@@ -2546,11 +2603,6 @@
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
"integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw=="
},
"semver": {
"version": "5.6.0",
"resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz",
"integrity": "sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg=="
},
"send": {
"version": "0.16.2",
"resolved": "https://registry.npmjs.org/send/-/send-0.16.2.tgz",

View File

@@ -13,6 +13,7 @@
"license": "MIT",
"dependencies": {
"async": "0.9.2",
"axios": "^0.21.4",
"bittorrent-tracker": "^9.18.2",
"bottleneck": "^2.16.2",
"cache-manager": "^2.9.0",
@@ -33,7 +34,7 @@
"nodejs-bing": "^0.1.0",
"nyaapi": "^2.3.3",
"parse-torrent": "^6.1.2",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
"pg": "^7.8.2",
"pg-hstore": "^2.3.2",
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d",

View File

@@ -7,6 +7,7 @@ const kickassScraper = require('../scrapers/kickass/kickass_scraper');
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper');
const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper');
const erairawsScraper = require('../scrapers/erairaws/erairaws_scraper');
const torrentGalaxyScraper = require('../scrapers/torrentgalaxy/torrentgalaxy_scraper');
const rutorScraper = require('../scrapers/rutor/rutor_scraper');
const Comando = require('../scrapers/comando/comando_scraper')
@@ -34,6 +35,7 @@ module.exports = [
{ scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' },
// { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' },
// { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' },
{ scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */24 ? * *' },
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
// { scraper: require('../scrapers/1337x/1337x_search_scraper') }
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }

View File

@@ -0,0 +1,94 @@
const needle = require("needle");
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const Promises = require("../../lib/promises");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
const baseUrl = 'https://www.erai-raws.info';
const Categories = {
ANIMES: 'anime',
EPISODES: 'episodes'
};
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/${category}/page/${page}/`, config)
.then((body) => parseTableBody(body)
.then(animes => Promises.sequence(animes.map(anime => () => singleRequest(anime.animeLink))))
.then(animeBodies => Promise.all(animeBodies.map(animeBody => parseTorrentPage(animeBody))))
.then(animeInfos => animeInfos.reduce((a, b) => a.concat(b), [])))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2, };
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body || (Buffer.isBuffer(body) && !body.size)) {
throw new Error(`No body: ${requestUrl}`);
} else if (
body.includes("502: Bad gateway") ||
body.includes("403 Forbidden")
) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const links = $('[itemprop=\'headline\'] a, .content-area a.aa_ss_ops_new')
.map((i, element) => ({
name: $(element).text(),
animeLink: $(element).attr("href"),
})).get();
resolve(links);
});
}
function parseTorrentPage(body) {
return new Promise(async (resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const entries = $('.tab-content table, .content-area table')
.map((i, entry) => {
const languages = $(entry).find('.tooltip3').map((_, l) => $(l).attr('data-title')).get().join('/');
const magnets = $(entry).find('a[href^="magnet"]').map((_, m) => $(m).attr('href')).get();
return { languages, magnets }
}).get();
const torrents = entries
.map(entry => entry.magnets
.map(magnet => decode(magnet))
.map(decodedMagnet => ({
title: decodedMagnet.name,
infoHash: decodedMagnet.infoHash,
trackers: decodedMagnet.tr,
languages: entry.languages
})))
.reduce((a, b) => a.concat(b), []);
resolve(torrents);
});
}
module.exports = { browse, Categories };

View File

@@ -0,0 +1,47 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const erairaws = require('./erairaws_api');
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'EraiRaws';
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function scrapeLatestTorrents() {
return scrapeLatestTorrentsForCategory(erairaws.Categories.EPISODES)
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return erairaws.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processRecord(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve([]));
}
async function processRecord(foundTorrent) {
return checkAndUpdateTorrent({ provider: NAME, ...foundTorrent }).then(() => foundTorrent);
}
function untilPage(category) {
if (category === erairaws.Categories.ANIMES) {
return 45;
}
return 3;
}
module.exports = { scrape, NAME };