diff --git a/scraper/scrapers/comando/comando_api.js b/scraper/scrapers/comando/comando_api.js index 2256e63..ca05705 100644 --- a/scraper/scrapers/comando/comando_api.js +++ b/scraper/scrapers/comando/comando_api.js @@ -29,7 +29,7 @@ function torrent(torrentId, config = {}, retries = 2) { .then((torrent) => torrent.map(el => ({ torrentId: slug, ...el }))) .catch((err) => { console.warn(`Failed Comando ${slug} request: `, err); - return torrent(slug, config, retries - 1) + return torrent(torrentId, config, retries - 1) }); } @@ -116,14 +116,14 @@ function parseTorrentPage(body) { const torrents = magnets.map(magnetLink => { const decodedMagnet = decode(magnetLink); const originalNameElem = details.find('strong, b') - .filter((i, elem) => $(elem).text().match(/Baixar|Orig(?:\.|inal)/)); + .filter((i, elem) => $(elem).text().match(/Baixar|Orig(?:\.|inal)|^Título:/)); const languagesElem = details.find('strong, b') .filter((i, elem) => $(elem).text().match(/^\s*([IÍ]dioma|[AÁ]udio)/)); - const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue; + const originalName = parseOriginalName(originalNameElem); const title = decodedMagnet.name && escapeHTML(decodedMagnet.name.replace(/\+/g, ' ')); return { - title: title ? sanitizePtName(title) : originalName.replace(/: ?/, ''), - originalName: sanitizePtOriginalName(originalName.replace(/: ?/, '')), + title: title ? sanitizePtName(title) : originalName, + originalName: sanitizePtOriginalName(originalName), year: details.find('a[href*="comando.to/category/"]').text(), infoHash: decodedMagnet.infoHash, magnetLink: magnetLink, @@ -137,6 +137,14 @@ function parseTorrentPage(body) { }); } +function parseOriginalName(originalNameElem) { + if (!originalNameElem[0]) { + return ''; + } + const originalName = originalNameElem.next().text().trim() || originalNameElem[0].nextSibling.nodeValue; + return originalName.replace(/: ?/, ''); +} + function parseCategory(categorys) { const $ = cheerio.load(categorys) if ($('a:contains(\'animes\')').text()) { diff --git a/scraper/scrapers/comando/comando_scraper.js b/scraper/scrapers/comando/comando_scraper.js index 377c7ce..6062871 100644 --- a/scraper/scrapers/comando/comando_scraper.js +++ b/scraper/scrapers/comando/comando_scraper.js @@ -72,7 +72,7 @@ async function processTorrentRecord(foundTorrent) { if (!Number.isInteger(foundTorrent.seeders)) { await updateCurrentSeeders(foundTorrent); } - if (!foundTorrent.imdbId) { + if (!foundTorrent.imdbId && foundTorrent.originalName) { const info = { title: foundTorrent.originalName, year: foundTorrent.year }; foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined); } diff --git a/scraper/scrapers/comoeubaixo/comoeubaixo_api.js b/scraper/scrapers/comoeubaixo/comoeubaixo_api.js index 9a315a8..14cbe6e 100644 --- a/scraper/scrapers/comoeubaixo/comoeubaixo_api.js +++ b/scraper/scrapers/comoeubaixo/comoeubaixo_api.js @@ -21,13 +21,13 @@ function torrent(torrentId, config = {}, retries = 2) { if (!torrentId || retries === 0) { return Promise.reject(new Error(`Failed ${torrentId} query`)); } - const slug = torrentId.split("/")[3]; + const slug = encodeURIComponent(torrentId.split("/")[3]); return singleRequest(`${baseUrl}/${slug}/`, config) .then((body) => parseTorrentPage(body)) .then((torrent) => torrent.map(el => ({ torrentId: slug, ...el }))) .catch((err) => { - console.warn(`Failed ComoEuBaixo ${slug} request: `, err); - return torrent(slug, config, retries - 1) + console.warn(`Failed ComoEuBaixo ${torrentId} request: `, err); + return torrent(torrentId, config, retries - 1) }); } @@ -67,7 +67,7 @@ function singleRequest(requestUrl, config = {}) { return needle('get', requestUrl, options) .then((response) => { const body = response.body; - if (!body) { + if (!body || (Buffer.isBuffer(body) && !body.size)) { throw new Error(`No body: ${requestUrl}`); } else if (body.includes('502: Bad gateway') || body.includes('403 Forbidden')) { @@ -118,7 +118,7 @@ function parseTorrentPage(body) { const year = details.find('strong:contains(\'Data de Lançamento: \')').next().text().trim(); const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${sanitizedTitle.trim()}`; return { - title: sanitizedTitle.length > 4 ? sanitizedTitle : fallBackTitle, + title: sanitizedTitle.length > 5 ? sanitizedTitle : fallBackTitle, infoHash: decode(magnetLink).infoHash, magnetLink: magnetLink, category: category, diff --git a/scraper/scrapers/lapumia/lapumia_api.js b/scraper/scrapers/lapumia/lapumia_api.js index 4b4c238..66a16de 100644 --- a/scraper/scrapers/lapumia/lapumia_api.js +++ b/scraper/scrapers/lapumia/lapumia_api.js @@ -27,7 +27,7 @@ function torrent(torrentId, config = {}, retries = 2) { .then((torrent) => torrent.map(el => ({ torrentId: slug, ...el }))) .catch((err) => { console.warn(`Failed Lapumia ${slug} request: `, err); - return torrent(slug, config, retries - 1) + return torrent(torrentId, config, retries - 1) }); } diff --git a/scraper/scrapers/ondebaixa/ondebaixa_api.js b/scraper/scrapers/ondebaixa/ondebaixa_api.js index 09ae911..d7268b1 100644 --- a/scraper/scrapers/ondebaixa/ondebaixa_api.js +++ b/scraper/scrapers/ondebaixa/ondebaixa_api.js @@ -27,7 +27,7 @@ function torrent(torrentId, config = {}, retries = 2) { .then((torrent) => torrent.map(el => ({ torrentId: slug, ...el }))) .catch((err) => { console.warn(`Failed OndeBaixo ${slug} request: `, err); - return torrent(slug, config, retries - 1) + return torrent(torrentId, config, retries - 1) }); } diff --git a/scraper/scrapers/scraperHelper.js b/scraper/scrapers/scraperHelper.js index 46d997d..2df9f78 100644 --- a/scraper/scrapers/scraperHelper.js +++ b/scraper/scrapers/scraperHelper.js @@ -5,8 +5,9 @@ function isPtDubbed(name) { function sanitizePtName(name) { return name .replace(/(.*)\b(\d{3,4}P)\b(?!.*\d{3,4}[Pp])(.*)/, '$1$3 $2') // add resolution to the end if missing - .replace(/^[[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|ORG|TO|TV)\b\s?[-\]}]+[\s.]*/i, '') // replace watermarks + .replace(/^[[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks .replace(/^(\d*(?:\.\d{1,2})?(?:[4A-Z-]{3,}|P)[-.]+)+/, '') // replace metadata prefixes + .replace(/^(COM|NET|ORG|TO|TV|ME)\b\s*-+[\s.]*/, '') // replace dangling site endings .trim(); }