fix comando api scrapping with different formatting
This commit is contained in:
@@ -5,7 +5,7 @@ const decode = require('magnet-uri');
|
|||||||
const Promises = require('../../lib/promises');
|
const Promises = require('../../lib/promises');
|
||||||
const { escapeHTML } = require('../../lib/metadata');
|
const { escapeHTML } = require('../../lib/metadata');
|
||||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||||
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')
|
const { isPtDubbed, sanitizePtName, sanitizePtLanguages, sanitizePtOriginalName } = require('../scraperHelper')
|
||||||
|
|
||||||
const defaultTimeout = 30000;
|
const defaultTimeout = 30000;
|
||||||
const maxSearchPage = 50
|
const maxSearchPage = 50
|
||||||
@@ -111,22 +111,24 @@ function parseTorrentPage(body) {
|
|||||||
.map((i, elem) => $(elem).nextUntil('h2, hr'))
|
.map((i, elem) => $(elem).nextUntil('h2, hr'))
|
||||||
.map((i, elem) => $(elem).find('a[href^="magnet"]'))
|
.map((i, elem) => $(elem).find('a[href^="magnet"]'))
|
||||||
.map((i, section) => $(section).attr("href")).get();
|
.map((i, section) => $(section).attr("href")).get();
|
||||||
const details = $('b:contains(\'Original\'), strong:contains(\'Original\')').parent()
|
const details = $('b:contains(\'Servidor\'), b:contains(\'Original\')').parent()
|
||||||
const imdbIdMatch = details.find('a[href*="imdb.com"]').attr('href')
|
const imdbIdMatch = details.find('a[href*="imdb.com"]').attr('href')
|
||||||
const torrents = magnets.map(magnetLink => {
|
const torrents = magnets.map(magnetLink => {
|
||||||
const originalName = details.find('strong:contains(\'Original\')').next().text().trim() ||
|
const originalNameElem = details.find('strong, b')
|
||||||
details.find('b:contains(\'Original\'), strong:contains(\'Original\')')[0].nextSibling.nodeValue;
|
.filter((i, elem) => $(elem).text().match(/Baixar|Orig(?:\.|inal)/));
|
||||||
|
const languagesElem = details.find('strong, b')
|
||||||
|
.filter((i, elem) => $(elem).text().match(/^\s*(Idioma|[AÁ]udio)/));
|
||||||
|
const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue;
|
||||||
return {
|
return {
|
||||||
title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))),
|
title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))),
|
||||||
originalName: originalName.replace(/: ?/, '').trim(),
|
originalName: sanitizePtOriginalName(originalName.replace(/: ?/, '')),
|
||||||
year: details.find('a[href*="comando.to/category/"]').text(),
|
year: details.find('a[href*="comando.to/category/"]').text(),
|
||||||
infoHash: decode(magnetLink).infoHash,
|
infoHash: decode(magnetLink).infoHash,
|
||||||
magnetLink: magnetLink,
|
magnetLink: magnetLink,
|
||||||
category: parseCategory($('div.entry-categories').html()),
|
category: parseCategory($('div.entry-categories').html()),
|
||||||
uploadDate: new Date(moment($('a.updated').text(), 'LL', 'pt-br').format()),
|
uploadDate: new Date(moment($('a.updated').text(), 'LL', 'pt-br').format()),
|
||||||
imdbId: imdbIdMatch ? imdbIdMatch.split('/')[4] : null,
|
imdbId: imdbIdMatch ? imdbIdMatch.split('/')[4] : null,
|
||||||
languages: sanitizePtLanguages(details.find(
|
languages: sanitizePtLanguages(languagesElem[0].nextSibling.nodeValue)
|
||||||
'b:contains(\'Idioma\'), b:contains(\'Audio\'), b:contains(\'Áudio\')')[0].nextSibling.nodeValue)
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
resolve(torrents.filter((x) => x));
|
resolve(torrents.filter((x) => x));
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
const needle = require("needle")
|
const needle = require("needle")
|
||||||
const cheerio = require("cheerio");
|
const cheerio = require("cheerio");
|
||||||
const decode = require('magnet-uri');
|
const decode = require('magnet-uri');
|
||||||
const Promises = require('../../lib/promises');
|
|
||||||
const { escapeHTML } = require('../../lib/metadata');
|
const { escapeHTML } = require('../../lib/metadata');
|
||||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||||
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')
|
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ function sanitizePtName(name) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function sanitizePtOriginalName(name) {
|
function sanitizePtOriginalName(name) {
|
||||||
return name.trim().replace(/S\d+$/, '');
|
return name.trim().replace(/S\d+$|\d.\s?[Tt]emporada/, '');
|
||||||
}
|
}
|
||||||
|
|
||||||
function sanitizePtLanguages(languages) {
|
function sanitizePtLanguages(languages) {
|
||||||
|
|||||||
Reference in New Issue
Block a user