remove scraper module
This commit is contained in:
@@ -1,93 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const baseUrl = 'https://www.erai-raws.info';
|
||||
|
||||
const Categories = {
|
||||
ANIMES: 'anime',
|
||||
EPISODES: 'episodes'
|
||||
};
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return singleRequest(`${baseUrl}/${category}/page/${page}/`, config)
|
||||
.then((body) => parseTableBody(body)
|
||||
.then(animes => Promises.sequence(animes.map(anime => () => singleRequest(anime.animeLink))))
|
||||
.then(animeBodies => Promise.all(animeBodies.map(animeBody => parseTorrentPage(animeBody))))
|
||||
.then(animeInfos => animeInfos.reduce((a, b) => a.concat(b), [])))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout, };
|
||||
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
if (!body || (Buffer.isBuffer(body) && !body.size)) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (
|
||||
body.includes("502: Bad gateway") ||
|
||||
body.includes("403 Forbidden")
|
||||
) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
|
||||
const links = $('[itemprop=\'headline\'] a, .content-area a.aa_ss_ops_new')
|
||||
.map((i, element) => ({
|
||||
name: $(element).text(),
|
||||
animeLink: $(element).attr("href"),
|
||||
})).get();
|
||||
resolve(links);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
const entries = $('.tab-content table, .content-area table')
|
||||
.map((i, entry) => {
|
||||
const languages = $(entry).find('.tooltip3').map((_, l) => $(l).attr('data-title')).get().join('/');
|
||||
const magnets = $(entry).find('a[href^="magnet"]').map((_, m) => $(m).attr('href')).get();
|
||||
return { languages, magnets }
|
||||
}).get();
|
||||
const torrents = entries
|
||||
.map(entry => entry.magnets
|
||||
.map(magnet => decode(magnet))
|
||||
.map(decodedMagnet => ({
|
||||
title: decodedMagnet.name,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
trackers: decodedMagnet.tr,
|
||||
languages: entry.languages
|
||||
})))
|
||||
.reduce((a, b) => a.concat(b), []);
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { browse, Categories };
|
||||
@@ -1,50 +0,0 @@
|
||||
const Parser = require('rss-parser');
|
||||
const decode = require("magnet-uri");
|
||||
|
||||
const parser = new Parser({
|
||||
customFields: {
|
||||
item: [['erai:subtitles', 'subtitles']]
|
||||
}
|
||||
});
|
||||
const baseUrl = 'https://www.erai-raws.info';
|
||||
const rssKey = process.env.ERAI_RSS_KEY;
|
||||
|
||||
const Categories = {
|
||||
ANIMES: 'anime',
|
||||
EPISODES: 'episodes'
|
||||
};
|
||||
|
||||
function browse() {
|
||||
return parser.parseURL(`${baseUrl}/feed/?type=magnet&${rssKey}`)
|
||||
.then(result => result.items
|
||||
.map(item => {
|
||||
const decodedMagnet = decode(item.link);
|
||||
const languages = parseLanguages(item.subtitles);
|
||||
return {
|
||||
title: decodedMagnet.name,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
trackers: decodedMagnet.tr,
|
||||
languages: languages
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
const languageMapping = {
|
||||
'us': 'English',
|
||||
'br': 'Portuguese(Brazil)',
|
||||
'mx': 'Spanish(Latin_America)',
|
||||
'es': 'Spanish',
|
||||
'sa': 'Arabic',
|
||||
'fr': 'French',
|
||||
'de': 'German',
|
||||
'it': 'Italian',
|
||||
'ru': 'Russian'
|
||||
}
|
||||
function parseLanguages(languages) {
|
||||
return languages.split('][')
|
||||
.map(lang => lang.replace(/[\[\]]/g, ''))
|
||||
.map(lang => languageMapping[lang] || lang)
|
||||
.join('/');
|
||||
}
|
||||
|
||||
module.exports = { browse, Categories };
|
||||
@@ -1,47 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const erairaws = require('./erairaws_rss_api');
|
||||
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'EraiRaws';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return scrapeLatestTorrentsForCategory(erairaws.Categories.EPISODES)
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return erairaws.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processRecord(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve([]));
|
||||
}
|
||||
|
||||
async function processRecord(foundTorrent) {
|
||||
return checkAndUpdateTorrent({ provider: NAME, ...foundTorrent }).then(() => foundTorrent);
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (category === erairaws.Categories.ANIMES) {
|
||||
return 45;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
Reference in New Issue
Block a user