fix comando api scrapping with different formatting

This commit is contained in:
TheBeastLT
2021-09-15 10:01:50 +02:00
committed by TheBeastLT
parent 0bda72e19c
commit 211003a1dc
3 changed files with 10 additions and 9 deletions

View File

@@ -5,7 +5,7 @@ const decode = require('magnet-uri');
const Promises = require('../../lib/promises'); const Promises = require('../../lib/promises');
const { escapeHTML } = require('../../lib/metadata'); const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper'); const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper') const { isPtDubbed, sanitizePtName, sanitizePtLanguages, sanitizePtOriginalName } = require('../scraperHelper')
const defaultTimeout = 30000; const defaultTimeout = 30000;
const maxSearchPage = 50 const maxSearchPage = 50
@@ -111,22 +111,24 @@ function parseTorrentPage(body) {
.map((i, elem) => $(elem).nextUntil('h2, hr')) .map((i, elem) => $(elem).nextUntil('h2, hr'))
.map((i, elem) => $(elem).find('a[href^="magnet"]')) .map((i, elem) => $(elem).find('a[href^="magnet"]'))
.map((i, section) => $(section).attr("href")).get(); .map((i, section) => $(section).attr("href")).get();
const details = $('b:contains(\'Original\'), strong:contains(\'Original\')').parent() const details = $('b:contains(\'Servidor\'), b:contains(\'Original\')').parent()
const imdbIdMatch = details.find('a[href*="imdb.com"]').attr('href') const imdbIdMatch = details.find('a[href*="imdb.com"]').attr('href')
const torrents = magnets.map(magnetLink => { const torrents = magnets.map(magnetLink => {
const originalName = details.find('strong:contains(\'Original\')').next().text().trim() || const originalNameElem = details.find('strong, b')
details.find('b:contains(\'Original\'), strong:contains(\'Original\')')[0].nextSibling.nodeValue; .filter((i, elem) => $(elem).text().match(/Baixar|Orig(?:\.|inal)/));
const languagesElem = details.find('strong, b')
.filter((i, elem) => $(elem).text().match(/^\s*(Idioma|[AÁ]udio)/));
const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue;
return { return {
title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))), title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))),
originalName: originalName.replace(/: ?/, '').trim(), originalName: sanitizePtOriginalName(originalName.replace(/: ?/, '')),
year: details.find('a[href*="comando.to/category/"]').text(), year: details.find('a[href*="comando.to/category/"]').text(),
infoHash: decode(magnetLink).infoHash, infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink, magnetLink: magnetLink,
category: parseCategory($('div.entry-categories').html()), category: parseCategory($('div.entry-categories').html()),
uploadDate: new Date(moment($('a.updated').text(), 'LL', 'pt-br').format()), uploadDate: new Date(moment($('a.updated').text(), 'LL', 'pt-br').format()),
imdbId: imdbIdMatch ? imdbIdMatch.split('/')[4] : null, imdbId: imdbIdMatch ? imdbIdMatch.split('/')[4] : null,
languages: sanitizePtLanguages(details.find( languages: sanitizePtLanguages(languagesElem[0].nextSibling.nodeValue)
'b:contains(\'Idioma\'), b:contains(\'Audio\'), b:contains(\'Áudio\')')[0].nextSibling.nodeValue)
} }
}); });
resolve(torrents.filter((x) => x)); resolve(torrents.filter((x) => x));

View File

@@ -1,7 +1,6 @@
const needle = require("needle") const needle = require("needle")
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const decode = require('magnet-uri'); const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { escapeHTML } = require('../../lib/metadata'); const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper'); const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper') const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')

View File

@@ -11,7 +11,7 @@ function sanitizePtName(name) {
} }
function sanitizePtOriginalName(name) { function sanitizePtOriginalName(name) {
return name.trim().replace(/S\d+$/, ''); return name.trim().replace(/S\d+$|\d.\s?[Tt]emporada/, '');
} }
function sanitizePtLanguages(languages) { function sanitizePtLanguages(languages) {