add erairaws language scraper
This commit is contained in:
4
addon/package-lock.json
generated
4
addon/package-lock.json
generated
@@ -1845,8 +1845,8 @@
|
||||
}
|
||||
},
|
||||
"parse-torrent-title": {
|
||||
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
|
||||
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
|
||||
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
|
||||
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
|
||||
"requires": {
|
||||
"moment": "^2.24.0"
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"magnet-uri": "^5.1.7",
|
||||
"named-queue": "^2.2.1",
|
||||
"needle": "^2.2.4",
|
||||
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
|
||||
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
|
||||
"pg": "^7.8.2",
|
||||
"pg-hstore": "^2.3.2",
|
||||
"premiumize-api": "^1.0.3",
|
||||
|
||||
102
package-lock.json
generated
102
package-lock.json
generated
@@ -95,6 +95,14 @@
|
||||
"resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz",
|
||||
"integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ=="
|
||||
},
|
||||
"axios": {
|
||||
"version": "0.21.4",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
|
||||
"integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
|
||||
"requires": {
|
||||
"follow-redirects": "^1.14.0"
|
||||
}
|
||||
},
|
||||
"balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
@@ -212,6 +220,44 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"bl": {
|
||||
"version": "2.2.1",
|
||||
"resolved": "https://registry.npmjs.org/bl/-/bl-2.2.1.tgz",
|
||||
"integrity": "sha512-6Pesp1w0DEX1N550i/uGV/TqucVL4AM/pgThFSN/Qq9si1/DF9aIHs1BxD8V/QU0HoeHO6cQRTAuYnLPKq1e4g==",
|
||||
"requires": {
|
||||
"readable-stream": "^2.3.5",
|
||||
"safe-buffer": "^5.1.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"isarray": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
|
||||
"integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE="
|
||||
},
|
||||
"readable-stream": {
|
||||
"version": "2.3.7",
|
||||
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.7.tgz",
|
||||
"integrity": "sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==",
|
||||
"requires": {
|
||||
"core-util-is": "~1.0.0",
|
||||
"inherits": "~2.0.3",
|
||||
"isarray": "~1.0.0",
|
||||
"process-nextick-args": "~2.0.0",
|
||||
"safe-buffer": "~5.1.1",
|
||||
"string_decoder": "~1.1.1",
|
||||
"util-deprecate": "~1.0.1"
|
||||
}
|
||||
},
|
||||
"string_decoder": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
|
||||
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
|
||||
"requires": {
|
||||
"safe-buffer": "~5.1.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"blob-to-buffer": {
|
||||
"version": "1.2.8",
|
||||
"resolved": "https://registry.npmjs.org/blob-to-buffer/-/blob-to-buffer-1.2.8.tgz",
|
||||
@@ -718,6 +764,11 @@
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk="
|
||||
},
|
||||
"denque": {
|
||||
"version": "1.5.1",
|
||||
"resolved": "https://registry.npmjs.org/denque/-/denque-1.5.1.tgz",
|
||||
"integrity": "sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw=="
|
||||
},
|
||||
"depd": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
|
||||
@@ -960,6 +1011,11 @@
|
||||
"resolved": "https://registry.npmjs.org/flatten/-/flatten-0.0.1.tgz",
|
||||
"integrity": "sha1-VURAdm2goNYDmZ9DNFP2wvxqdcE="
|
||||
},
|
||||
"follow-redirects": {
|
||||
"version": "1.14.4",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.4.tgz",
|
||||
"integrity": "sha512-zwGkiSXC1MUJG/qmeIFH2HBJx9u0V46QGUe3YR1fXG8bXQxq7fLj0RjLZQ5nubr9qNJUZrH+xUcwXEoXNpfS+g=="
|
||||
},
|
||||
"forever-agent": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
|
||||
@@ -1700,12 +1756,14 @@
|
||||
}
|
||||
},
|
||||
"mongodb": {
|
||||
"version": "3.4.1",
|
||||
"resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.4.1.tgz",
|
||||
"integrity": "sha512-juqt5/Z42J4DcE7tG7UdVaTKmUC6zinF4yioPfpeOSNBieWSK6qCY+0tfGQcHLKrauWPDdMZVROHJOa8q2pWsA==",
|
||||
"version": "3.7.1",
|
||||
"resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.7.1.tgz",
|
||||
"integrity": "sha512-iSVgexYr8ID0ieeNFUbRfQeOZxOchRck6kEDVySQRaa8VIw/1Pm+/LgcpZcl/BWV6nT0L8lP9qyl7dRPJ6mnLw==",
|
||||
"requires": {
|
||||
"bson": "^1.1.1",
|
||||
"require_optional": "^1.0.1",
|
||||
"bl": "^2.2.1",
|
||||
"bson": "^1.1.4",
|
||||
"denque": "^1.4.1",
|
||||
"optional-require": "^1.0.3",
|
||||
"safe-buffer": "^5.1.2",
|
||||
"saslprep": "^1.0.0"
|
||||
}
|
||||
@@ -1916,6 +1974,14 @@
|
||||
"mimic-fn": "^2.1.0"
|
||||
}
|
||||
},
|
||||
"optional-require": {
|
||||
"version": "1.1.7",
|
||||
"resolved": "https://registry.npmjs.org/optional-require/-/optional-require-1.1.7.tgz",
|
||||
"integrity": "sha512-cIeRZocXsZnZYn+SevbtSqNlLbeoS4mLzuNn4fvXRMDRNhTGg0sxuKXl0FnZCtnew85LorNxIbZp5OeliILhMw==",
|
||||
"requires": {
|
||||
"require-at": "^1.0.6"
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"version": "0.0.6",
|
||||
"resolved": "https://registry.npmjs.org/options/-/options-0.0.6.tgz",
|
||||
@@ -1992,8 +2058,8 @@
|
||||
}
|
||||
},
|
||||
"parse-torrent-title": {
|
||||
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
|
||||
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
|
||||
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
|
||||
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
|
||||
"requires": {
|
||||
"moment": "^2.24.0"
|
||||
}
|
||||
@@ -2358,19 +2424,10 @@
|
||||
"lodash": "^4.17.11"
|
||||
}
|
||||
},
|
||||
"require_optional": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/require_optional/-/require_optional-1.0.1.tgz",
|
||||
"integrity": "sha512-qhM/y57enGWHAe3v/NcwML6a3/vfESLe/sGM2dII+gEO0BpKRUkWZow/tyloNqJyN6kXSl3RyyM8Ll5D/sJP8g==",
|
||||
"requires": {
|
||||
"resolve-from": "^2.0.0",
|
||||
"semver": "^5.1.0"
|
||||
}
|
||||
},
|
||||
"resolve-from": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-2.0.0.tgz",
|
||||
"integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c="
|
||||
"require-at": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/require-at/-/require-at-1.0.6.tgz",
|
||||
"integrity": "sha512-7i1auJbMUrXEAZCOQ0VNJgmcT2VOKPRl2YGJwgpHpC9CE91Mv4/4UYIUm4chGJaI381ZDq1JUicFii64Hapd8g=="
|
||||
},
|
||||
"restore-cursor": {
|
||||
"version": "3.1.0",
|
||||
@@ -2546,11 +2603,6 @@
|
||||
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
|
||||
"integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw=="
|
||||
},
|
||||
"semver": {
|
||||
"version": "5.6.0",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz",
|
||||
"integrity": "sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg=="
|
||||
},
|
||||
"send": {
|
||||
"version": "0.16.2",
|
||||
"resolved": "https://registry.npmjs.org/send/-/send-0.16.2.tgz",
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"async": "0.9.2",
|
||||
"axios": "^0.21.4",
|
||||
"bittorrent-tracker": "^9.18.2",
|
||||
"bottleneck": "^2.16.2",
|
||||
"cache-manager": "^2.9.0",
|
||||
@@ -33,7 +34,7 @@
|
||||
"nodejs-bing": "^0.1.0",
|
||||
"nyaapi": "^2.3.3",
|
||||
"parse-torrent": "^6.1.2",
|
||||
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#1ebaf887b6848c7648638ad1ab3c719e6cee47d2",
|
||||
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#8fd87b3bfc7aa04143edcd4f0112868bb77dce0f",
|
||||
"pg": "^7.8.2",
|
||||
"pg-hstore": "^2.3.2",
|
||||
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d",
|
||||
|
||||
@@ -7,6 +7,7 @@ const kickassScraper = require('../scrapers/kickass/kickass_scraper');
|
||||
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
|
||||
const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper');
|
||||
const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper');
|
||||
const erairawsScraper = require('../scrapers/erairaws/erairaws_scraper');
|
||||
const torrentGalaxyScraper = require('../scrapers/torrentgalaxy/torrentgalaxy_scraper');
|
||||
const rutorScraper = require('../scrapers/rutor/rutor_scraper');
|
||||
const Comando = require('../scrapers/comando/comando_scraper')
|
||||
@@ -34,6 +35,7 @@ module.exports = [
|
||||
{ scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */24 ? * *' },
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
// { scraper: require('../scrapers/1337x/1337x_search_scraper') }
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
|
||||
94
scraper/scrapers/erairaws/erairaws_api.js
Normal file
94
scraper/scrapers/erairaws/erairaws_api.js
Normal file
@@ -0,0 +1,94 @@
|
||||
const needle = require("needle");
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const baseUrl = 'https://www.erai-raws.info';
|
||||
|
||||
const Categories = {
|
||||
ANIMES: 'anime',
|
||||
EPISODES: 'episodes'
|
||||
};
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return singleRequest(`${baseUrl}/${category}/page/${page}/`, config)
|
||||
.then((body) => parseTableBody(body)
|
||||
.then(animes => Promises.sequence(animes.map(anime => () => singleRequest(anime.animeLink))))
|
||||
.then(animeBodies => Promise.all(animeBodies.map(animeBody => parseTorrentPage(animeBody))))
|
||||
.then(animeInfos => animeInfos.reduce((a, b) => a.concat(b), [])))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2, };
|
||||
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
if (!body || (Buffer.isBuffer(body) && !body.size)) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (
|
||||
body.includes("502: Bad gateway") ||
|
||||
body.includes("403 Forbidden")
|
||||
) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
|
||||
const links = $('[itemprop=\'headline\'] a, .content-area a.aa_ss_ops_new')
|
||||
.map((i, element) => ({
|
||||
name: $(element).text(),
|
||||
animeLink: $(element).attr("href"),
|
||||
})).get();
|
||||
resolve(links);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
const entries = $('.tab-content table, .content-area table')
|
||||
.map((i, entry) => {
|
||||
const languages = $(entry).find('.tooltip3').map((_, l) => $(l).attr('data-title')).get().join('/');
|
||||
const magnets = $(entry).find('a[href^="magnet"]').map((_, m) => $(m).attr('href')).get();
|
||||
return { languages, magnets }
|
||||
}).get();
|
||||
const torrents = entries
|
||||
.map(entry => entry.magnets
|
||||
.map(magnet => decode(magnet))
|
||||
.map(decodedMagnet => ({
|
||||
title: decodedMagnet.name,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
trackers: decodedMagnet.tr,
|
||||
languages: entry.languages
|
||||
})))
|
||||
.reduce((a, b) => a.concat(b), []);
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { browse, Categories };
|
||||
47
scraper/scrapers/erairaws/erairaws_scraper.js
Normal file
47
scraper/scrapers/erairaws/erairaws_scraper.js
Normal file
@@ -0,0 +1,47 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const erairaws = require('./erairaws_api');
|
||||
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'EraiRaws';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return scrapeLatestTorrentsForCategory(erairaws.Categories.EPISODES)
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return erairaws.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processRecord(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve([]));
|
||||
}
|
||||
|
||||
async function processRecord(foundTorrent) {
|
||||
return checkAndUpdateTorrent({ provider: NAME, ...foundTorrent }).then(() => foundTorrent);
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (category === erairaws.Categories.ANIMES) {
|
||||
return 45;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
Reference in New Issue
Block a user