mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
small adjustments to the scrapers
This commit is contained in:
@@ -141,8 +141,10 @@ function parseOriginalName(originalNameElem) {
|
||||
if (!originalNameElem[0]) {
|
||||
return '';
|
||||
}
|
||||
const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue;
|
||||
return originalName.replace(/: ?/, '');
|
||||
const originalName = originalNameElem.next().text()
|
||||
|| originalNameElem[0].nextSibling.nodeValue
|
||||
|| originalNameElem.text();
|
||||
return originalName.replace(/[^:]*: ?/, '').trim();
|
||||
}
|
||||
|
||||
function parseCategory(categorys) {
|
||||
|
||||
@@ -102,9 +102,6 @@ function typeMapping() {
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (lapumia.Categories.TV === category) {
|
||||
return 5;
|
||||
}
|
||||
if (lapumia.Categories.ANIME === category) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
@@ -107,12 +107,6 @@ function typeMapping() {
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (ondebaixa.Categories.DESENHOS === category) {
|
||||
return 5;
|
||||
}
|
||||
if (ondebaixa.Categories.TV === category) {
|
||||
return 5;
|
||||
}
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
|
||||
const appId = 'torrentio-addon';
|
||||
const defaultTimeout = 30000;
|
||||
const retryDelay = 3000;
|
||||
|
||||
let token;
|
||||
|
||||
@@ -109,7 +110,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) {
|
||||
}
|
||||
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
|
||||
// too many requests
|
||||
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1));
|
||||
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
|
||||
}
|
||||
if (response.status !== 200 || (response.data && response.data.error)) {
|
||||
// something went wrong
|
||||
@@ -120,7 +121,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) {
|
||||
})
|
||||
.catch(error => {
|
||||
if (error.response && [429].includes(error.response.status) && retries > 0) {
|
||||
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1));
|
||||
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
|
||||
}
|
||||
return Promise.reject(error.message || error);
|
||||
});
|
||||
|
||||
@@ -12,14 +12,10 @@ const allowedCategories = [
|
||||
rarbg.Options.category.MOVIES_XVID,
|
||||
rarbg.Options.category.MOVIES_XVID_720P,
|
||||
rarbg.Options.category.MOVIES_X265_1080P,
|
||||
rarbg.Options.category.MOVIES_X265_4K,
|
||||
rarbg.Options.category.MOVIES_X265_4K_HDR,
|
||||
rarbg.Options.category.MOVIES_X264,
|
||||
rarbg.Options.category.MOVIES_X264_720P,
|
||||
rarbg.Options.category.MOVIES_X264_1080P,
|
||||
rarbg.Options.category.MOVIES_X264_3D,
|
||||
rarbg.Options.category.MOVIES_X264_4K,
|
||||
rarbg.Options.category.MOVIES_BD_REMUX,
|
||||
rarbg.Options.category.MOVIES_HIGH_RES,
|
||||
rarbg.Options.category.TV_EPISODES,
|
||||
rarbg.Options.category.TV_UHD_EPISODES,
|
||||
rarbg.Options.category.TV_HD_EPISODES
|
||||
@@ -32,8 +28,8 @@ async function scrape() {
|
||||
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
||||
|
||||
return Promise.all(
|
||||
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ const baseUrl = 'http://www.rutor.info';
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const Categories = {
|
||||
ALL: '0',
|
||||
FOREIGN_FILMS: '1',
|
||||
RUSSIAN_FILMS: '5',
|
||||
SCIENCE_FILMS: '12',
|
||||
|
||||
@@ -7,7 +7,6 @@ const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'ThePirateBay';
|
||||
const UNTIL_PAGE = 5;
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
@@ -54,7 +53,7 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
@@ -84,4 +83,16 @@ async function processTorrentRecord(record) {
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function getUntilPage(category) {
|
||||
switch (category) {
|
||||
case thepiratebay.Categories.VIDEO.MOVIES_3D:
|
||||
return 1;
|
||||
case thepiratebay.Categories.VIDEO.TV_SHOWS:
|
||||
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
|
||||
return 10;
|
||||
default:
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -72,14 +72,12 @@ function parseTableBody(body) {
|
||||
$('tr').each((i, element) => {
|
||||
const row = $(element);
|
||||
const titleElement = row.find('td a');
|
||||
try {
|
||||
if (titleElement.length) {
|
||||
torrents.push({
|
||||
title: titleElement.attr('title').trim(),
|
||||
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
|
||||
seeders: parseInt(row.find('span.seed_ok').first().text()),
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('Failed parsing TorrentGalaxy row: ', e);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user