mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
small adjustments to the scrapers
This commit is contained in:
@@ -141,8 +141,10 @@ function parseOriginalName(originalNameElem) {
|
|||||||
if (!originalNameElem[0]) {
|
if (!originalNameElem[0]) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue;
|
const originalName = originalNameElem.next().text()
|
||||||
return originalName.replace(/: ?/, '');
|
|| originalNameElem[0].nextSibling.nodeValue
|
||||||
|
|| originalNameElem.text();
|
||||||
|
return originalName.replace(/[^:]*: ?/, '').trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseCategory(categorys) {
|
function parseCategory(categorys) {
|
||||||
|
|||||||
@@ -102,9 +102,6 @@ function typeMapping() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function untilPage(category) {
|
function untilPage(category) {
|
||||||
if (lapumia.Categories.TV === category) {
|
|
||||||
return 5;
|
|
||||||
}
|
|
||||||
if (lapumia.Categories.ANIME === category) {
|
if (lapumia.Categories.ANIME === category) {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -107,12 +107,6 @@ function typeMapping() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function untilPage(category) {
|
function untilPage(category) {
|
||||||
if (ondebaixa.Categories.DESENHOS === category) {
|
|
||||||
return 5;
|
|
||||||
}
|
|
||||||
if (ondebaixa.Categories.TV === category) {
|
|
||||||
return 5;
|
|
||||||
}
|
|
||||||
return UNTIL_PAGE;
|
return UNTIL_PAGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ const { getRandomUserAgent } = require("../../lib/requestHelper");
|
|||||||
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
|
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
|
||||||
const appId = 'torrentio-addon';
|
const appId = 'torrentio-addon';
|
||||||
const defaultTimeout = 30000;
|
const defaultTimeout = 30000;
|
||||||
|
const retryDelay = 3000;
|
||||||
|
|
||||||
let token;
|
let token;
|
||||||
|
|
||||||
@@ -109,7 +110,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) {
|
|||||||
}
|
}
|
||||||
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
|
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
|
||||||
// too many requests
|
// too many requests
|
||||||
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1));
|
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
|
||||||
}
|
}
|
||||||
if (response.status !== 200 || (response.data && response.data.error)) {
|
if (response.status !== 200 || (response.data && response.data.error)) {
|
||||||
// something went wrong
|
// something went wrong
|
||||||
@@ -120,7 +121,7 @@ async function singleRequest(params = {}, config = {}, retries = 15) {
|
|||||||
})
|
})
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
if (error.response && [429].includes(error.response.status) && retries > 0) {
|
if (error.response && [429].includes(error.response.status) && retries > 0) {
|
||||||
return Promises.delay(3000).then(() => singleRequest(params, config, retries - 1));
|
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
|
||||||
}
|
}
|
||||||
return Promise.reject(error.message || error);
|
return Promise.reject(error.message || error);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -12,14 +12,10 @@ const allowedCategories = [
|
|||||||
rarbg.Options.category.MOVIES_XVID,
|
rarbg.Options.category.MOVIES_XVID,
|
||||||
rarbg.Options.category.MOVIES_XVID_720P,
|
rarbg.Options.category.MOVIES_XVID_720P,
|
||||||
rarbg.Options.category.MOVIES_X265_1080P,
|
rarbg.Options.category.MOVIES_X265_1080P,
|
||||||
rarbg.Options.category.MOVIES_X265_4K,
|
|
||||||
rarbg.Options.category.MOVIES_X265_4K_HDR,
|
|
||||||
rarbg.Options.category.MOVIES_X264,
|
rarbg.Options.category.MOVIES_X264,
|
||||||
rarbg.Options.category.MOVIES_X264_720P,
|
rarbg.Options.category.MOVIES_X264_720P,
|
||||||
rarbg.Options.category.MOVIES_X264_1080P,
|
rarbg.Options.category.MOVIES_X264_1080P,
|
||||||
rarbg.Options.category.MOVIES_X264_3D,
|
rarbg.Options.category.MOVIES_HIGH_RES,
|
||||||
rarbg.Options.category.MOVIES_X264_4K,
|
|
||||||
rarbg.Options.category.MOVIES_BD_REMUX,
|
|
||||||
rarbg.Options.category.TV_EPISODES,
|
rarbg.Options.category.TV_EPISODES,
|
||||||
rarbg.Options.category.TV_UHD_EPISODES,
|
rarbg.Options.category.TV_UHD_EPISODES,
|
||||||
rarbg.Options.category.TV_HD_EPISODES
|
rarbg.Options.category.TV_HD_EPISODES
|
||||||
@@ -32,8 +28,8 @@ async function scrape() {
|
|||||||
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
||||||
|
|
||||||
return Promise.all(
|
return Promise.all(
|
||||||
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||||
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ const baseUrl = 'http://www.rutor.info';
|
|||||||
const defaultTimeout = 10000;
|
const defaultTimeout = 10000;
|
||||||
|
|
||||||
const Categories = {
|
const Categories = {
|
||||||
|
ALL: '0',
|
||||||
FOREIGN_FILMS: '1',
|
FOREIGN_FILMS: '1',
|
||||||
RUSSIAN_FILMS: '5',
|
RUSSIAN_FILMS: '5',
|
||||||
SCIENCE_FILMS: '12',
|
SCIENCE_FILMS: '12',
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ const Promises = require('../../lib/promises');
|
|||||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||||
|
|
||||||
const NAME = 'ThePirateBay';
|
const NAME = 'ThePirateBay';
|
||||||
const UNTIL_PAGE = 5;
|
|
||||||
|
|
||||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||||
|
|
||||||
@@ -54,7 +53,7 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
|||||||
return Promise.resolve([]);
|
return Promise.resolve([]);
|
||||||
})
|
})
|
||||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
|
||||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||||
: Promise.resolve());
|
: Promise.resolve());
|
||||||
}
|
}
|
||||||
@@ -84,4 +83,16 @@ async function processTorrentRecord(record) {
|
|||||||
return createTorrentEntry(torrent);
|
return createTorrentEntry(torrent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getUntilPage(category) {
|
||||||
|
switch (category) {
|
||||||
|
case thepiratebay.Categories.VIDEO.MOVIES_3D:
|
||||||
|
return 1;
|
||||||
|
case thepiratebay.Categories.VIDEO.TV_SHOWS:
|
||||||
|
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
|
||||||
|
return 10;
|
||||||
|
default:
|
||||||
|
return 5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = { scrape, updateSeeders, NAME };
|
module.exports = { scrape, updateSeeders, NAME };
|
||||||
@@ -72,14 +72,12 @@ function parseTableBody(body) {
|
|||||||
$('tr').each((i, element) => {
|
$('tr').each((i, element) => {
|
||||||
const row = $(element);
|
const row = $(element);
|
||||||
const titleElement = row.find('td a');
|
const titleElement = row.find('td a');
|
||||||
try {
|
if (titleElement.length) {
|
||||||
torrents.push({
|
torrents.push({
|
||||||
title: titleElement.attr('title').trim(),
|
title: titleElement.attr('title').trim(),
|
||||||
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
|
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
|
||||||
seeders: parseInt(row.find('span.seed_ok').first().text()),
|
seeders: parseInt(row.find('span.seed_ok').first().text()),
|
||||||
});
|
});
|
||||||
} catch (e) {
|
|
||||||
console.error('Failed parsing TorrentGalaxy row: ', e);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user