small adjustments to the scrapers

This commit is contained in:
TheBeastLT
2021-11-11 21:12:14 +01:00
parent 7835ade720
commit 91eac9f8c6
8 changed files with 25 additions and 25 deletions

View File

@@ -141,8 +141,10 @@ function parseOriginalName(originalNameElem) {
if (!originalNameElem[0]) {
return '';
}
const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue;
return originalName.replace(/: ?/, '');
const originalName = originalNameElem.next().text()
|| originalNameElem[0].nextSibling.nodeValue
|| originalNameElem.text();
return originalName.replace(/[^:]*: ?/, '').trim();
}
function parseCategory(categorys) {

View File

@@ -102,9 +102,6 @@ function typeMapping() {
}
function untilPage(category) {
if (lapumia.Categories.TV === category) {
return 5;
}
if (lapumia.Categories.ANIME === category) {
return 2;
}

View File

@@ -107,12 +107,6 @@ function typeMapping() {
}
function untilPage(category) {
if (ondebaixa.Categories.DESENHOS === category) {
return 5;
}
if (ondebaixa.Categories.TV === category) {
return 5;
}
return UNTIL_PAGE;
}

View File

@@ -6,6 +6,7 @@ const { getRandomUserAgent } = require("../../lib/requestHelper");
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
const appId = 'torrentio-addon';
const defaultTimeout = 30000;
const retryDelay = 3000;
let token;
@@ -109,7 +110,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) {
}
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
// too many requests
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1));
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
}
if (response.status !== 200 || (response.data && response.data.error)) {
// something went wrong
@@ -120,7 +121,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) {
})
.catch(error => {
if (error.response && [429].includes(error.response.status) && retries > 0) {
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1));
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
}
return Promise.reject(error.message || error);
});

View File

@@ -12,14 +12,10 @@ const allowedCategories = [
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X265_4K,
rarbg.Options.category.MOVIES_X265_4K_HDR,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_X264_3D,
rarbg.Options.category.MOVIES_X264_4K,
rarbg.Options.category.MOVIES_BD_REMUX,
rarbg.Options.category.MOVIES_HIGH_RES,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
@@ -32,8 +28,8 @@ async function scrape() {
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
return Promise.all(
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
}

View File

@@ -8,6 +8,7 @@ const baseUrl = 'http://www.rutor.info';
const defaultTimeout = 10000;
const Categories = {
ALL: '0',
FOREIGN_FILMS: '1',
RUSSIAN_FILMS: '5',
SCIENCE_FILMS: '12',

View File

@@ -7,7 +7,6 @@ const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
const UNTIL_PAGE = 5;
const limiter = new Bottleneck({ maxConcurrent: 10 });
@@ -54,7 +53,7 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) {
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
@@ -84,4 +83,16 @@ async function processTorrentRecord(record) {
return createTorrentEntry(torrent);
}
function getUntilPage(category) {
switch (category) {
case thepiratebay.Categories.VIDEO.MOVIES_3D:
return 1;
case thepiratebay.Categories.VIDEO.TV_SHOWS:
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
return 10;
default:
return 5;
}
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -72,14 +72,12 @@ function parseTableBody(body) {
$('tr').each((i, element) => {
const row = $(element);
const titleElement = row.find('td a');
try {
if (titleElement.length) {
torrents.push({
title: titleElement.attr('title').trim(),
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
seeders: parseInt(row.find('span.seed_ok').first().text()),
});
} catch (e) {
console.error('Failed parsing TorrentGalaxy row: ', e);
}
});