diff --git a/scraper/scrapers/darkmahou/darkmahou_api.js b/scraper/scrapers/darkmahou/darkmahou_api.js index e45c54e..dc96576 100644 --- a/scraper/scrapers/darkmahou/darkmahou_api.js +++ b/scraper/scrapers/darkmahou/darkmahou_api.js @@ -8,13 +8,11 @@ const { getRandomUserAgent } = require("../../lib/requestHelper"); const defaultTimeout = 10000; const maxSearchPage = 50; -const limiter = new Bottleneck({ maxConcurrent: 10 }); - -const defaultProxies = ["https://darkmahou.com"]; +const defaultProxies = ['https://darkmahou.com']; const Categories = { - MOVIE: "movie", - ANIME: "tv", + MOVIE: 'movie', + ANIME: 'tv', OVA: 'ova' }; @@ -25,11 +23,11 @@ function torrent(torrentId, config = {}, retries = 2) { const proxyList = config.proxyList || defaultProxies; const slug = torrentId.split("/")[3]; return Promises.first( - proxyList.map((proxyUrl) => singleRequest(`${proxyUrl}/${slug}`, config)) - ) - .then((body) => parseTorrentPage(body)) - .then((torrent) => torrent.map((el) => ({ torrentId: slug, ...el }))) - .catch((err) => torrent(slug, config, retries - 1)); + proxyList.map((proxyUrl) => singleRequest(`${proxyUrl}/${slug}`, config)) + ) + .then((body) => parseTorrentPage(body)) + .then((torrent) => torrent.map((el) => ({ torrentId: slug, ...el }))) + .catch((err) => torrent(slug, config, retries - 1)); } function search(keyword, config = {}, retries = 2) { @@ -42,17 +40,17 @@ function search(keyword, config = {}, retries = 2) { const requestUrl = (proxyUrl) => `${proxyUrl}/page/${page}/?s=${keyword}`; return Promises.first( - proxyList.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)) - ) - .then((body) => parseTableBody(body)) - .then((torrents) => - torrents.length === 40 && page < extendToPage - ? search(keyword, { ...config, page: page + 1 }) - .catch(() => []) - .then((nextTorrents) => torrents.concat(nextTorrents)) - : torrents - ) - .catch((err) => search(keyword, config, retries - 1)); + proxyList.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)) + ) + .then((body) => parseTableBody(body)) + .then((torrents) => + torrents.length === 40 && page < extendToPage + ? search(keyword, { ...config, page: page + 1 }) + .catch(() => []) + .then((nextTorrents) => torrents.concat(nextTorrents)) + : torrents + ) + .catch((err) => search(keyword, config, retries - 1)); } function browse(config = {}, retries = 2) { @@ -63,15 +61,15 @@ function browse(config = {}, retries = 2) { const page = config.page || 1; const category = config.category; const requestUrl = (proxyUrl) => - category - ? `${proxyUrl}/category/${category}/page/${page}/` - : `${proxyUrl}/page/${page}/`; + category + ? `${proxyUrl}/category/${category}/page/${page}/` + : `${proxyUrl}/page/${page}/`; return Promises.first( - proxyList.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)) - ) - .then((body) => parseTableBody(body)) - .catch((err) => browse(config, retries - 1)); + proxyList.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)) + ) + .then((body) => parseTableBody(body)) + .catch((err) => browse(config, retries - 1)); } function singleRequest(requestUrl, config = {}) { @@ -87,8 +85,8 @@ function singleRequest(requestUrl, config = {}) { if (!body) { throw new Error(`No body: ${requestUrl}`); } else if ( - body.includes("502: Bad gateway") || - body.includes("403 Forbidden") + body.includes("502: Bad gateway") || + body.includes("403 Forbidden") ) { throw new Error(`Invalid body contents: ${requestUrl}`); } @@ -118,7 +116,7 @@ function parseTableBody(body) { } function parseTorrentPage(body) { - return new Promise(async(resolve, reject) => { + return new Promise(async (resolve, reject) => { const $ = cheerio.load(body); if (!$) { @@ -133,8 +131,8 @@ function parseTorrentPage(body) { const torrent = magnets.map((magnetLink) => { return { title: decode(magnetLink).name, - original_name: details.find('h1.entry-title').text(), - year: details.find('b:contains(\'Lançado:\')')[0].nextSibling.nodeValue || '', + originalName: details.find('h1.entry-title').text(), + year: details.find('b:contains(\'Lançado:\')')[0].nextSibling.nodeValue || '', infoHash: decode(magnetLink).infoHash, magnetLink: magnetLink, category: details.find('b:contains(\'Tipo:\')').next().attr('href').split('/')[4], diff --git a/scraper/scrapers/darkmahou/darkmahou_scraper.js b/scraper/scrapers/darkmahou/darkmahou_scraper.js index a6f40c4..2168b0e 100644 --- a/scraper/scrapers/darkmahou/darkmahou_scraper.js +++ b/scraper/scrapers/darkmahou/darkmahou_scraper.js @@ -1,16 +1,15 @@ const moment = require("moment"); const Bottleneck = require("bottleneck"); -const leetx = require("./darkmahou_api"); +const darkmahou = require("./darkmahou_api"); const { Type } = require("../../lib/types"); const repository = require("../../lib/repository"); const Promises = require("../../lib/promises"); const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries"); const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent"); -const { getImdbId } = require("../../lib/metadata"); +const { getKitsuId } = require("../../lib/metadata"); const NAME = "DarkMahou"; const UNTIL_PAGE = 5; -const TYPE_MAPPING = typeMapping(); const limiter = new Bottleneck({ maxConcurrent: 5 }); @@ -20,109 +19,87 @@ async function scrape() { console.log(`[${scrapeStart}] starting ${NAME} scrape...`); return scrapeLatestTorrents() - .then(() => { - lastScrape.lastScraped = scrapeStart; - return lastScrape.save(); - }) - .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); + .then(() => { + lastScrape.lastScraped = scrapeStart; + return lastScrape.save(); + }) + .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); } async function updateSeeders(torrent) { - return limiter.schedule(() => leetx.torrent(torrent.torrentId)); + return limiter.schedule(() => darkmahou.torrent(torrent.torrentId)); } async function scrapeLatestTorrents() { const allowedCategories = [ - leetx.Categories.MOVIE, - leetx.Categories.ANIME, - leetx.Categories.OVA + darkmahou.Categories.MOVIE, + darkmahou.Categories.ANIME, + darkmahou.Categories.OVA ]; - return Promises.sequence( - allowedCategories.map( - (category) => () => scrapeLatestTorrentsForCategory(category) - ) - ).then((entries) => entries.reduce((a, b) => a.concat(b), [])); + return Promises.sequence(allowedCategories + .map((category) => () => scrapeLatestTorrentsForCategory(category))) + .then((entries) => entries.reduce((a, b) => a.concat(b), [])); } async function scrapeLatestTorrentsForCategory(category, page = 1) { - console.log({Scraper: `Scrapping ${NAME} ${category} category page ${page}`}); - return leetx - .browse({ category, page }) - .catch((error) => { - console.warn( - `Failed ${NAME} scrapping for [${page}] ${category} due: `, - error - ); - return Promise.resolve([]); - }) - .then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processTorrentRecord(torrent))))) - .then((resolved) => resolved.length > 0 && page < untilPage(category) ? scrapeLatestTorrentsForCategory(category, page + 1) : Promise.resolve()); + console.log(`Scrapping ${NAME} ${category} category page ${page}`); + return darkmahou + .browse({ category, page }) + .catch((error) => { + console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error); + return Promise.resolve([]); + }) + .then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent))))) + .then((resolved) => resolved.length > 0 && page < untilPage(category) + ? scrapeLatestTorrentsForCategory(category, page + 1) + : Promise.resolve()); } -async function processTorrentRecord(record) { - if (await checkAndUpdateTorrent({ provider: NAME, ...record })) { - return record; - } - const torrentEntrys = await leetx - .torrent(record.torrentId) - .catch(() => undefined); - if (torrentEntrys === undefined) { - return Promise.resolve([]) - } - return Promise.allSettled( - torrentEntrys.map(async (torrentFound) => { - if (!torrentFound || !TYPE_MAPPING[torrentFound.category]) { - return Promise.resolve("Invalid torrent record"); - } - if (isNaN(torrentFound.uploadDate)) { - console.warn( - `Incorrect upload date for [${torrentFound.infoHash}] ${torrentFound.name}` - ); - return; - } - if (await checkAndUpdateTorrent(torrentFound)) { - return torrentFound; - } - if (!torrentFound.size) { - await updateTorrentSize(torrentFound) - .catch((err) => Promise.resolve(err)) - } - if (!torrentFound.seeders) { - await updateCurrentSeeders(torrentFound) - .then(response => response.seeders === 0 ? delete response.seeders : response) - } - if (!torrentFound.imdbId) { - torrentFound.imdbId = await getImdbId(torrentFound.original_name, torrentFound.year, TYPE_MAPPING[torrentFound.category]) - } - const torrent = { - infoHash: torrentFound.infoHash, - provider: NAME, - torrentId: torrentFound.torrentId, - title: torrentFound.title.replace(/\t|\s+/g, " ").trim(), - type: Type.ANIME, - imdbId: torrentFound.imdbId, - uploadDate: torrentFound.uploadDate, - seeders: torrentFound.seeders, - }; - return createTorrentEntry(torrent); - }) - ); +async function processEntry(entry) { + return darkmahou.torrent(entry.torrentId) + .then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record)))) + .catch(() => undefined); } -function typeMapping() { - const mapping = {}; - mapping[leetx.Categories.MOVIE] = Type.MOVIE; - mapping[leetx.Categories.ANIME] = Type.SERIES; - mapping[leetx.Categories.OVA] = Type.ANIME - return mapping; +async function processTorrentRecord(foundTorrent) { + if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) { + return foundTorrent; + } + + if (!foundTorrent.size) { + await updateTorrentSize(foundTorrent); + } + if (!Number.isInteger(foundTorrent.seeders)) { + await updateCurrentSeeders(foundTorrent); + } + if (!foundTorrent.imdbId && !foundTorrent.kitsuId) { + const info = { title: foundTorrent.originalName, year: foundTorrent.year }; + foundTorrent.kitsuId = await getKitsuId(info).catch(() => undefined); + } + + const torrent = { + infoHash: foundTorrent.infoHash, + provider: NAME, + torrentId: foundTorrent.torrentId, + title: foundTorrent.title, + type: Type.ANIME, + imdbId: foundTorrent.imdbId, + kitsuId: foundTorrent.kitsuId, + uploadDate: foundTorrent.uploadDate, + seeders: foundTorrent.seeders, + size: foundTorrent.size, + files: foundTorrent.files, + languages: foundTorrent.languages + }; + return createTorrentEntry(torrent); } function untilPage(category) { - if (leetx.Categories.ANIME === category) { + if (darkmahou.Categories.ANIME === category) { return 5; } - if (leetx.Categories.OVA === category) { + if (darkmahou.Categories.OVA === category) { return 4; } return UNTIL_PAGE;