add erairaws language scraper
This commit is contained in:
@@ -7,6 +7,7 @@ const kickassScraper = require('../scrapers/kickass/kickass_scraper');
|
||||
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
|
||||
const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper');
|
||||
const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper');
|
||||
const erairawsScraper = require('../scrapers/erairaws/erairaws_scraper');
|
||||
const torrentGalaxyScraper = require('../scrapers/torrentgalaxy/torrentgalaxy_scraper');
|
||||
const rutorScraper = require('../scrapers/rutor/rutor_scraper');
|
||||
const Comando = require('../scrapers/comando/comando_scraper')
|
||||
@@ -34,6 +35,7 @@ module.exports = [
|
||||
{ scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */24 ? * *' },
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
// { scraper: require('../scrapers/1337x/1337x_search_scraper') }
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
|
||||
94
scraper/scrapers/erairaws/erairaws_api.js
Normal file
94
scraper/scrapers/erairaws/erairaws_api.js
Normal file
@@ -0,0 +1,94 @@
|
||||
const needle = require("needle");
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const baseUrl = 'https://www.erai-raws.info';
|
||||
|
||||
const Categories = {
|
||||
ANIMES: 'anime',
|
||||
EPISODES: 'episodes'
|
||||
};
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return singleRequest(`${baseUrl}/${category}/page/${page}/`, config)
|
||||
.then((body) => parseTableBody(body)
|
||||
.then(animes => Promises.sequence(animes.map(anime => () => singleRequest(anime.animeLink))))
|
||||
.then(animeBodies => Promise.all(animeBodies.map(animeBody => parseTorrentPage(animeBody))))
|
||||
.then(animeInfos => animeInfos.reduce((a, b) => a.concat(b), [])))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2, };
|
||||
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
if (!body || (Buffer.isBuffer(body) && !body.size)) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (
|
||||
body.includes("502: Bad gateway") ||
|
||||
body.includes("403 Forbidden")
|
||||
) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
|
||||
const links = $('[itemprop=\'headline\'] a, .content-area a.aa_ss_ops_new')
|
||||
.map((i, element) => ({
|
||||
name: $(element).text(),
|
||||
animeLink: $(element).attr("href"),
|
||||
})).get();
|
||||
resolve(links);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
const entries = $('.tab-content table, .content-area table')
|
||||
.map((i, entry) => {
|
||||
const languages = $(entry).find('.tooltip3').map((_, l) => $(l).attr('data-title')).get().join('/');
|
||||
const magnets = $(entry).find('a[href^="magnet"]').map((_, m) => $(m).attr('href')).get();
|
||||
return { languages, magnets }
|
||||
}).get();
|
||||
const torrents = entries
|
||||
.map(entry => entry.magnets
|
||||
.map(magnet => decode(magnet))
|
||||
.map(decodedMagnet => ({
|
||||
title: decodedMagnet.name,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
trackers: decodedMagnet.tr,
|
||||
languages: entry.languages
|
||||
})))
|
||||
.reduce((a, b) => a.concat(b), []);
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { browse, Categories };
|
||||
47
scraper/scrapers/erairaws/erairaws_scraper.js
Normal file
47
scraper/scrapers/erairaws/erairaws_scraper.js
Normal file
@@ -0,0 +1,47 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const erairaws = require('./erairaws_api');
|
||||
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'EraiRaws';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return scrapeLatestTorrentsForCategory(erairaws.Categories.EPISODES)
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return erairaws.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processRecord(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve([]));
|
||||
}
|
||||
|
||||
async function processRecord(foundTorrent) {
|
||||
return checkAndUpdateTorrent({ provider: NAME, ...foundTorrent }).then(() => foundTorrent);
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (category === erairaws.Categories.ANIMES) {
|
||||
return 45;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
Reference in New Issue
Block a user