From 10615e4aed4dc2946cd165504b895e9f98f2a1ad Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Sat, 25 Apr 2020 10:29:10 +0200 Subject: [PATCH] [scraper] updates 1337x search api to extend results --- scraper/scrapers/1337x/1337x_api.js | 14 ++++++++++++-- scraper/scrapers/1337x/1337x_scraper.js | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/scraper/scrapers/1337x/1337x_api.js b/scraper/scrapers/1337x/1337x_api.js index a1d36ba..7e5fa45 100644 --- a/scraper/scrapers/1337x/1337x_api.js +++ b/scraper/scrapers/1337x/1337x_api.js @@ -9,6 +9,7 @@ const defaultProxies = [ 'https://1337x.to' ]; const defaultTimeout = 10000; +const maxSearchPage = 50; const Categories = { MOVIE: 'Movies', @@ -42,10 +43,19 @@ function search(keyword, config = {}, retries = 2) { } const proxyList = config.proxyList || defaultProxies; const page = config.page || 1; + const category = config.category; + const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1)) + const requestUrl = proxyUrl => category + ? `${proxyUrl}/category-search/${keyword}/${category}/${page}/` + : `${proxyUrl}/search/${keyword}/${page}/`; return Promises.first(proxyList - .map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/`, config))) - .then((body) => parseTableBody(body)) + .map(proxyUrl => singleRequest(requestUrl(proxyUrl), config))) + .then(body => parseTableBody(body)) + .then(torrents => torrents.length === 40 && page < extendToPage + ? search(keyword, { ...config, page: page + 1 }).catch(() => []) + .then(nextTorrents => torrents.concat(nextTorrents)) + : torrents) .catch((err) => search(keyword, config, retries - 1)); } diff --git a/scraper/scrapers/1337x/1337x_scraper.js b/scraper/scrapers/1337x/1337x_scraper.js index ab9f81d..f7cd29a 100644 --- a/scraper/scrapers/1337x/1337x_scraper.js +++ b/scraper/scrapers/1337x/1337x_scraper.js @@ -73,7 +73,7 @@ async function processTorrentRecord(record) { infoHash: torrentFound.infoHash, provider: NAME, torrentId: torrentFound.torrentId, - title: torrentFound.name.replace(/\t|\s+/g, ' '), + title: torrentFound.name.replace(/\t|\s+/g, ' ').trim(), type: TYPE_MAPPING[torrentFound.category], size: torrentFound.size, seeders: torrentFound.seeders,