mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
[scraper] clean up torrent galaxy provider
This commit is contained in:
@@ -4,7 +4,7 @@ const Sugar = require('sugar-date');
|
|||||||
const decode = require('magnet-uri');
|
const decode = require('magnet-uri');
|
||||||
const Promises = require('../../lib/promises');
|
const Promises = require('../../lib/promises');
|
||||||
const { escapeHTML } = require('../../lib/metadata');
|
const { escapeHTML } = require('../../lib/metadata');
|
||||||
const { getRandomUserAgent, defaultOptionsWithProxy } = require('../../lib/requestHelper');
|
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||||
|
|
||||||
const defaultProxies = [
|
const defaultProxies = [
|
||||||
'https://1337x.to'
|
'https://1337x.to'
|
||||||
|
|||||||
@@ -3,11 +3,12 @@ const needle = require('needle');
|
|||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
const decode = require('magnet-uri');
|
const decode = require('magnet-uri');
|
||||||
const Promises = require('../../lib/promises');
|
const Promises = require('../../lib/promises');
|
||||||
|
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||||
|
|
||||||
const defaultProxies = [
|
const defaultProxies = [
|
||||||
'https://torrentgalaxy.to',
|
// 'https://torrentgalaxy.to',
|
||||||
// 'https://torrentgalaxy.mx',
|
// 'https://torrentgalaxy.mx',
|
||||||
// 'https://torrentgalaxy.su'
|
'https://torrentgalaxy.su'
|
||||||
];
|
];
|
||||||
const defaultTimeout = 10000;
|
const defaultTimeout = 10000;
|
||||||
|
|
||||||
@@ -53,9 +54,9 @@ function search(keyword, config = {}, retries = 2) {
|
|||||||
.catch(() => search(keyword, config, retries - 1));
|
.catch(() => search(keyword, config, retries - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
function browse(config = {}, retries = 2) {
|
function browse(config = {}, retries = 2, error = null) {
|
||||||
if (retries === 0) {
|
if (retries === 0) {
|
||||||
return Promise.reject(new Error(`Failed browse request`));
|
return Promise.reject(error || new Error(`Failed browse request`));
|
||||||
}
|
}
|
||||||
const proxyList = config.proxyList || defaultProxies;
|
const proxyList = config.proxyList || defaultProxies;
|
||||||
const page = config.page || 1;
|
const page = config.page || 1;
|
||||||
@@ -64,17 +65,17 @@ function browse(config = {}, retries = 2) {
|
|||||||
return Promises.first(proxyList
|
return Promises.first(proxyList
|
||||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
|
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
|
||||||
.then((body) => parseTableBody(body))
|
.then((body) => parseTableBody(body))
|
||||||
.catch(() => browse(config, retries - 1));
|
.catch((err) => browse(config, retries - 1, err));
|
||||||
}
|
}
|
||||||
|
|
||||||
function singleRequest(requestUrl) {
|
function singleRequest(requestUrl) {
|
||||||
const options = { open_timeout: defaultTimeout, follow: 2 };
|
const options = { userAgent: getRandomUserAgent(), open_timeout: defaultTimeout, follow: 2 };
|
||||||
|
|
||||||
return needle('get', requestUrl, options)
|
return needle('get', requestUrl, options)
|
||||||
.then((response) => {
|
.then((response) => {
|
||||||
const body = response.body;
|
const body = response.body;
|
||||||
if (!body) {
|
if (!body) {
|
||||||
throw new Error(`No body: ${requestUrl}`);
|
throw new Error(`No body: ${requestUrl} with status ${response.statusCode}`);
|
||||||
} else if (body.includes('Access Denied')) {
|
} else if (body.includes('Access Denied')) {
|
||||||
console.log(`Access Denied: ${requestUrl}`);
|
console.log(`Access Denied: ${requestUrl}`);
|
||||||
throw new Error(`Access Denied: ${requestUrl}`);
|
throw new Error(`Access Denied: ${requestUrl}`);
|
||||||
@@ -102,21 +103,25 @@ function parseTableBody(body) {
|
|||||||
const row = $(element);
|
const row = $(element);
|
||||||
const magnetLink = row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(2)').attr('href');
|
const magnetLink = row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(2)').attr('href');
|
||||||
const imdbIdMatch = row.html().match(/search=(tt\d+)/i);
|
const imdbIdMatch = row.html().match(/search=(tt\d+)/i);
|
||||||
torrents.push({
|
try {
|
||||||
name: row.find('.tgxtablecell div a[title]').first().text(),
|
torrents.push({
|
||||||
infoHash: decode(magnetLink).infoHash,
|
name: row.find('.tgxtablecell div a[title]').first().text(),
|
||||||
magnetLink: magnetLink,
|
infoHash: decode(magnetLink).infoHash,
|
||||||
torrentLink: row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(1)').first().attr('href'),
|
magnetLink: magnetLink,
|
||||||
torrentId: row.find('.tgxtablecell div a[title]').first().attr('href').match(/torrent\/(\d+)/)[1],
|
torrentLink: row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(1)').first().attr('href'),
|
||||||
verified: !!row.find('i.fa-check').length,
|
torrentId: row.find('.tgxtablecell div a[title]').first().attr('href').match(/torrent\/(\d+)/)[1],
|
||||||
category: row.find('div:nth-of-type(n+2) .shrink a').first().attr('href').match(/cat=(\d+)$/)[1],
|
verified: !!row.find('i.fa-check').length,
|
||||||
seeders: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'green\'] b').first().text()),
|
category: row.find('div:nth-of-type(n+2) .shrink a').first().attr('href').match(/cat=(\d+)$/)[1],
|
||||||
leechers: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'#ff0000\'] b').first().text()),
|
seeders: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'green\'] b').first().text()),
|
||||||
languages: row.find('.tgxtablecell img[title]').first().attr('title'),
|
leechers: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'#ff0000\'] b').first().text()),
|
||||||
size: parseSize(row.find('.collapsehide span.badge-secondary').first().text()),
|
languages: row.find('.tgxtablecell img[title]').first().attr('title'),
|
||||||
uploadDate: parseDate(row.find('div.collapsehide:nth-of-type(12)').first().text()),
|
size: parseSize(row.find('.collapsehide span.badge-secondary').first().text()),
|
||||||
imdbId: imdbIdMatch && imdbIdMatch[1],
|
uploadDate: parseDate(row.find('div.collapsehide:nth-of-type(12)').first().text()),
|
||||||
});
|
imdbId: imdbIdMatch && imdbIdMatch[1],
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed parsing TorrentGalaxy row: ', e);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
resolve(torrents);
|
resolve(torrents);
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrent
|
|||||||
const NAME = 'TorrentGalaxy';
|
const NAME = 'TorrentGalaxy';
|
||||||
const TYPE_MAPPING = typeMapping();
|
const TYPE_MAPPING = typeMapping();
|
||||||
|
|
||||||
|
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
|
||||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||||
const allowedCategories = [
|
const allowedCategories = [
|
||||||
torrentGalaxy.Categories.ANIME,
|
torrentGalaxy.Categories.ANIME,
|
||||||
@@ -21,7 +22,6 @@ const allowedCategories = [
|
|||||||
torrentGalaxy.Categories.TV_SD,
|
torrentGalaxy.Categories.TV_SD,
|
||||||
torrentGalaxy.Categories.TV_HD,
|
torrentGalaxy.Categories.TV_HD,
|
||||||
torrentGalaxy.Categories.TV_PACKS,
|
torrentGalaxy.Categories.TV_PACKS,
|
||||||
torrentGalaxy.Categories.TV_SPORT,
|
|
||||||
torrentGalaxy.Categories.DOCUMENTARIES,
|
torrentGalaxy.Categories.DOCUMENTARIES,
|
||||||
];
|
];
|
||||||
const packCategories = [
|
const packCategories = [
|
||||||
@@ -57,7 +57,7 @@ async function scrapeLatestTorrents() {
|
|||||||
|
|
||||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||||
return torrentGalaxy.browse(({ category, page }))
|
return api_limiter.schedule(() => torrentGalaxy.browse({ category, page }))
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||||
return Promise.resolve([]);
|
return Promise.resolve([]);
|
||||||
@@ -73,10 +73,6 @@ async function processTorrentRecord(record) {
|
|||||||
return Promise.resolve('Invalid torrent record');
|
return Promise.resolve('Invalid torrent record');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (await checkAndUpdateTorrent(record)) {
|
|
||||||
return record;
|
|
||||||
}
|
|
||||||
|
|
||||||
const torrent = {
|
const torrent = {
|
||||||
provider: NAME,
|
provider: NAME,
|
||||||
infoHash: record.infoHash,
|
infoHash: record.infoHash,
|
||||||
@@ -92,6 +88,15 @@ async function processTorrentRecord(record) {
|
|||||||
languages: !(record.languages || '').includes('Other') ? record.languages : undefined
|
languages: !(record.languages || '').includes('Other') ? record.languages : undefined
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (await checkAndUpdateTorrent(torrent)) {
|
||||||
|
return torrent;
|
||||||
|
}
|
||||||
|
const isOld = moment(torrent.uploadDate).isBefore(moment().subtract(18, 'month'));
|
||||||
|
if (torrent.seeders === 0 && isOld && !torrent.pack) {
|
||||||
|
console.log(`Skipping old unseeded torrent [${torrent.infoHash}] ${torrent.title}`)
|
||||||
|
return torrent;
|
||||||
|
}
|
||||||
|
|
||||||
return createTorrentEntry(torrent).then(() => torrent);
|
return createTorrentEntry(torrent).then(() => torrent);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,7 +121,6 @@ function getMaxPage(category) {
|
|||||||
switch (category) {
|
switch (category) {
|
||||||
case torrentGalaxy.Categories.TV_SD:
|
case torrentGalaxy.Categories.TV_SD:
|
||||||
case torrentGalaxy.Categories.TV_HD:
|
case torrentGalaxy.Categories.TV_HD:
|
||||||
return 10;
|
|
||||||
case torrentGalaxy.Categories.MOVIE_SD:
|
case torrentGalaxy.Categories.MOVIE_SD:
|
||||||
case torrentGalaxy.Categories.MOVIE_HD:
|
case torrentGalaxy.Categories.MOVIE_HD:
|
||||||
return 5;
|
return 5;
|
||||||
|
|||||||
Reference in New Issue
Block a user