small adjustments to the scrapers

This commit is contained in:
TheBeastLT
2021-11-11 21:12:14 +01:00
parent 7835ade720
commit 91eac9f8c6
8 changed files with 25 additions and 25 deletions

View File

@@ -141,8 +141,10 @@ function parseOriginalName(originalNameElem) {
if (!originalNameElem[0]) { if (!originalNameElem[0]) {
return ''; return '';
} }
const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue; const originalName = originalNameElem.next().text()
return originalName.replace(/: ?/, ''); || originalNameElem[0].nextSibling.nodeValue
|| originalNameElem.text();
return originalName.replace(/[^:]*: ?/, '').trim();
} }
function parseCategory(categorys) { function parseCategory(categorys) {

View File

@@ -102,9 +102,6 @@ function typeMapping() {
} }
function untilPage(category) { function untilPage(category) {
if (lapumia.Categories.TV === category) {
return 5;
}
if (lapumia.Categories.ANIME === category) { if (lapumia.Categories.ANIME === category) {
return 2; return 2;
} }

View File

@@ -107,12 +107,6 @@ function typeMapping() {
} }
function untilPage(category) { function untilPage(category) {
if (ondebaixa.Categories.DESENHOS === category) {
return 5;
}
if (ondebaixa.Categories.TV === category) {
return 5;
}
return UNTIL_PAGE; return UNTIL_PAGE;
} }

View File

@@ -6,6 +6,7 @@ const { getRandomUserAgent } = require("../../lib/requestHelper");
const baseUrl = 'https://torrentapi.org/pubapi_v2.php'; const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
const appId = 'torrentio-addon'; const appId = 'torrentio-addon';
const defaultTimeout = 30000; const defaultTimeout = 30000;
const retryDelay = 3000;
let token; let token;
@@ -109,7 +110,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) {
} }
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) { if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
// too many requests // too many requests
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1)); return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
} }
if (response.status !== 200 || (response.data && response.data.error)) { if (response.status !== 200 || (response.data && response.data.error)) {
// something went wrong // something went wrong
@@ -120,7 +121,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) {
}) })
.catch(error => { .catch(error => {
if (error.response && [429].includes(error.response.status) && retries > 0) { if (error.response && [429].includes(error.response.status) && retries > 0) {
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1)); return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
} }
return Promise.reject(error.message || error); return Promise.reject(error.message || error);
}); });

View File

@@ -12,14 +12,10 @@ const allowedCategories = [
rarbg.Options.category.MOVIES_XVID, rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P, rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P, rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X265_4K,
rarbg.Options.category.MOVIES_X265_4K_HDR,
rarbg.Options.category.MOVIES_X264, rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P, rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P, rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_X264_3D, rarbg.Options.category.MOVIES_HIGH_RES,
rarbg.Options.category.MOVIES_X264_4K,
rarbg.Options.category.MOVIES_BD_REMUX,
rarbg.Options.category.TV_EPISODES, rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES, rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES rarbg.Options.category.TV_HD_EPISODES
@@ -32,8 +28,8 @@ async function scrape() {
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds); //const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
return Promise.all( return Promise.all(
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId)) seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))))) .then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`)); .then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
} }

View File

@@ -8,6 +8,7 @@ const baseUrl = 'http://www.rutor.info';
const defaultTimeout = 10000; const defaultTimeout = 10000;
const Categories = { const Categories = {
ALL: '0',
FOREIGN_FILMS: '1', FOREIGN_FILMS: '1',
RUSSIAN_FILMS: '5', RUSSIAN_FILMS: '5',
SCIENCE_FILMS: '12', SCIENCE_FILMS: '12',

View File

@@ -7,7 +7,6 @@ const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay'; const NAME = 'ThePirateBay';
const UNTIL_PAGE = 5;
const limiter = new Bottleneck({ maxConcurrent: 10 }); const limiter = new Bottleneck({ maxConcurrent: 10 });
@@ -54,7 +53,7 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) {
return Promise.resolve([]); return Promise.resolve([]);
}) })
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))) .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE .then(resolved => resolved.length > 0 && page < getUntilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1) ? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve()); : Promise.resolve());
} }
@@ -84,4 +83,16 @@ async function processTorrentRecord(record) {
return createTorrentEntry(torrent); return createTorrentEntry(torrent);
} }
function getUntilPage(category) {
switch (category) {
case thepiratebay.Categories.VIDEO.MOVIES_3D:
return 1;
case thepiratebay.Categories.VIDEO.TV_SHOWS:
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
return 10;
default:
return 5;
}
}
module.exports = { scrape, updateSeeders, NAME }; module.exports = { scrape, updateSeeders, NAME };

View File

@@ -72,14 +72,12 @@ function parseTableBody(body) {
$('tr').each((i, element) => { $('tr').each((i, element) => {
const row = $(element); const row = $(element);
const titleElement = row.find('td a'); const titleElement = row.find('td a');
try { if (titleElement.length) {
torrents.push({ torrents.push({
title: titleElement.attr('title').trim(), title: titleElement.attr('title').trim(),
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1], torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
seeders: parseInt(row.find('span.seed_ok').first().text()), seeders: parseInt(row.find('span.seed_ok').first().text()),
}); });
} catch (e) {
console.error('Failed parsing TorrentGalaxy row: ', e);
} }
}); });