[scraper] clean up torrent galaxy provider

This commit is contained in:
TheBeastLT
2021-02-01 19:46:46 +01:00
parent 473747bd33
commit 31b0c85fcf
3 changed files with 39 additions and 30 deletions

View File

@@ -4,7 +4,7 @@ const Sugar = require('sugar-date');
const decode = require('magnet-uri'); const decode = require('magnet-uri');
const Promises = require('../../lib/promises'); const Promises = require('../../lib/promises');
const { escapeHTML } = require('../../lib/metadata'); const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent, defaultOptionsWithProxy } = require('../../lib/requestHelper'); const { getRandomUserAgent } = require('../../lib/requestHelper');
const defaultProxies = [ const defaultProxies = [
'https://1337x.to' 'https://1337x.to'

View File

@@ -3,11 +3,12 @@ const needle = require('needle');
const moment = require('moment'); const moment = require('moment');
const decode = require('magnet-uri'); const decode = require('magnet-uri');
const Promises = require('../../lib/promises'); const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const defaultProxies = [ const defaultProxies = [
'https://torrentgalaxy.to', // 'https://torrentgalaxy.to',
// 'https://torrentgalaxy.mx', // 'https://torrentgalaxy.mx',
// 'https://torrentgalaxy.su' 'https://torrentgalaxy.su'
]; ];
const defaultTimeout = 10000; const defaultTimeout = 10000;
@@ -53,9 +54,9 @@ function search(keyword, config = {}, retries = 2) {
.catch(() => search(keyword, config, retries - 1)); .catch(() => search(keyword, config, retries - 1));
} }
function browse(config = {}, retries = 2) { function browse(config = {}, retries = 2, error = null) {
if (retries === 0) { if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`)); return Promise.reject(error || new Error(`Failed browse request`));
} }
const proxyList = config.proxyList || defaultProxies; const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1; const page = config.page || 1;
@@ -64,17 +65,17 @@ function browse(config = {}, retries = 2) {
return Promises.first(proxyList return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`))) .map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
.then((body) => parseTableBody(body)) .then((body) => parseTableBody(body))
.catch(() => browse(config, retries - 1)); .catch((err) => browse(config, retries - 1, err));
} }
function singleRequest(requestUrl) { function singleRequest(requestUrl) {
const options = { open_timeout: defaultTimeout, follow: 2 }; const options = { userAgent: getRandomUserAgent(), open_timeout: defaultTimeout, follow: 2 };
return needle('get', requestUrl, options) return needle('get', requestUrl, options)
.then((response) => { .then((response) => {
const body = response.body; const body = response.body;
if (!body) { if (!body) {
throw new Error(`No body: ${requestUrl}`); throw new Error(`No body: ${requestUrl} with status ${response.statusCode}`);
} else if (body.includes('Access Denied')) { } else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`); console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`); throw new Error(`Access Denied: ${requestUrl}`);
@@ -102,21 +103,25 @@ function parseTableBody(body) {
const row = $(element); const row = $(element);
const magnetLink = row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(2)').attr('href'); const magnetLink = row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(2)').attr('href');
const imdbIdMatch = row.html().match(/search=(tt\d+)/i); const imdbIdMatch = row.html().match(/search=(tt\d+)/i);
torrents.push({ try {
name: row.find('.tgxtablecell div a[title]').first().text(), torrents.push({
infoHash: decode(magnetLink).infoHash, name: row.find('.tgxtablecell div a[title]').first().text(),
magnetLink: magnetLink, infoHash: decode(magnetLink).infoHash,
torrentLink: row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(1)').first().attr('href'), magnetLink: magnetLink,
torrentId: row.find('.tgxtablecell div a[title]').first().attr('href').match(/torrent\/(\d+)/)[1], torrentLink: row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(1)').first().attr('href'),
verified: !!row.find('i.fa-check').length, torrentId: row.find('.tgxtablecell div a[title]').first().attr('href').match(/torrent\/(\d+)/)[1],
category: row.find('div:nth-of-type(n+2) .shrink a').first().attr('href').match(/cat=(\d+)$/)[1], verified: !!row.find('i.fa-check').length,
seeders: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'green\'] b').first().text()), category: row.find('div:nth-of-type(n+2) .shrink a').first().attr('href').match(/cat=(\d+)$/)[1],
leechers: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'#ff0000\'] b').first().text()), seeders: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'green\'] b').first().text()),
languages: row.find('.tgxtablecell img[title]').first().attr('title'), leechers: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'#ff0000\'] b').first().text()),
size: parseSize(row.find('.collapsehide span.badge-secondary').first().text()), languages: row.find('.tgxtablecell img[title]').first().attr('title'),
uploadDate: parseDate(row.find('div.collapsehide:nth-of-type(12)').first().text()), size: parseSize(row.find('.collapsehide span.badge-secondary').first().text()),
imdbId: imdbIdMatch && imdbIdMatch[1], uploadDate: parseDate(row.find('div.collapsehide:nth-of-type(12)').first().text()),
}); imdbId: imdbIdMatch && imdbIdMatch[1],
});
} catch (e) {
console.error('Failed parsing TorrentGalaxy row: ', e);
}
}); });
resolve(torrents); resolve(torrents);

View File

@@ -9,6 +9,7 @@ const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrent
const NAME = 'TorrentGalaxy'; const NAME = 'TorrentGalaxy';
const TYPE_MAPPING = typeMapping(); const TYPE_MAPPING = typeMapping();
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
const limiter = new Bottleneck({ maxConcurrent: 10 }); const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [ const allowedCategories = [
torrentGalaxy.Categories.ANIME, torrentGalaxy.Categories.ANIME,
@@ -21,7 +22,6 @@ const allowedCategories = [
torrentGalaxy.Categories.TV_SD, torrentGalaxy.Categories.TV_SD,
torrentGalaxy.Categories.TV_HD, torrentGalaxy.Categories.TV_HD,
torrentGalaxy.Categories.TV_PACKS, torrentGalaxy.Categories.TV_PACKS,
torrentGalaxy.Categories.TV_SPORT,
torrentGalaxy.Categories.DOCUMENTARIES, torrentGalaxy.Categories.DOCUMENTARIES,
]; ];
const packCategories = [ const packCategories = [
@@ -57,7 +57,7 @@ async function scrapeLatestTorrents() {
async function scrapeLatestTorrentsForCategory(category, page = 1) { async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`); console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return torrentGalaxy.browse(({ category, page })) return api_limiter.schedule(() => torrentGalaxy.browse({ category, page }))
.catch(error => { .catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error); console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]); return Promise.resolve([]);
@@ -73,10 +73,6 @@ async function processTorrentRecord(record) {
return Promise.resolve('Invalid torrent record'); return Promise.resolve('Invalid torrent record');
} }
if (await checkAndUpdateTorrent(record)) {
return record;
}
const torrent = { const torrent = {
provider: NAME, provider: NAME,
infoHash: record.infoHash, infoHash: record.infoHash,
@@ -92,6 +88,15 @@ async function processTorrentRecord(record) {
languages: !(record.languages || '').includes('Other') ? record.languages : undefined languages: !(record.languages || '').includes('Other') ? record.languages : undefined
}; };
if (await checkAndUpdateTorrent(torrent)) {
return torrent;
}
const isOld = moment(torrent.uploadDate).isBefore(moment().subtract(18, 'month'));
if (torrent.seeders === 0 && isOld && !torrent.pack) {
console.log(`Skipping old unseeded torrent [${torrent.infoHash}] ${torrent.title}`)
return torrent;
}
return createTorrentEntry(torrent).then(() => torrent); return createTorrentEntry(torrent).then(() => torrent);
} }
@@ -116,7 +121,6 @@ function getMaxPage(category) {
switch (category) { switch (category) {
case torrentGalaxy.Categories.TV_SD: case torrentGalaxy.Categories.TV_SD:
case torrentGalaxy.Categories.TV_HD: case torrentGalaxy.Categories.TV_HD:
return 10;
case torrentGalaxy.Categories.MOVIE_SD: case torrentGalaxy.Categories.MOVIE_SD:
case torrentGalaxy.Categories.MOVIE_HD: case torrentGalaxy.Categories.MOVIE_HD:
return 5; return 5;