diff --git a/addon/lib/filter.js b/addon/lib/filter.js index 47ba88b..fb03031 100644 --- a/addon/lib/filter.js +++ b/addon/lib/filter.js @@ -68,6 +68,11 @@ const Providers = { key: 'ondebaixa', label: 'OndeBaixa', foreign: true + }, + { + key: 'torrent9', + label: 'Torrent9', + foreign: true } ] }; diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index 29c6ea8..07183c5 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -1,7 +1,6 @@ const moment = require('moment'); const Promises = require('./promises') -const { Sequelize, DataTypes, fn, col, literal } = require('sequelize'); -const Op = Sequelize.Op; +const { Sequelize, Op, DataTypes, fn, col, literal } = require('sequelize'); const DATABASE_URI = process.env.DATABASE_URI; diff --git a/scraper/scheduler/scrapers.js b/scraper/scheduler/scrapers.js index 3bb6c48..860cfe4 100644 --- a/scraper/scheduler/scrapers.js +++ b/scraper/scheduler/scrapers.js @@ -3,7 +3,6 @@ const thepiratebayFakeRemoval = require('../scrapers/thepiratebay/thepiratebay_f const ytsScraper = require('../scrapers/yts/yts_scraper'); const eztvScraper = require('../scrapers/eztv/eztv_scraper'); const leetxScraper = require('../scrapers/1337x/1337x_scraper'); -const kickassScraper = require('../scrapers/kickass/kickass_scraper'); const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper'); const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper'); const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper'); @@ -16,6 +15,7 @@ const Lapumia = require('../scrapers/lapumia/lapumia_scraper') const OndeBaixa = require('../scrapers/ondebaixa/ondebaixa_scraper'); const AnimesTorrent = require('../scrapers/animestorrent/animestorrent_scraper') const DarkMahou = require('../scrapers/darkmahou/darkmahou_scraper') +const torrent9Scraper = require('../scrapers/torrent9/torrent9_scraper'); module.exports = [ { scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' }, @@ -28,7 +28,7 @@ module.exports = [ { scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' }, { scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' }, - // { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: torrent9Scraper, name: torrent9Scraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: Comando, name: Comando.NAME, cron: '0 0 */4 ? * *' }, { scraper: ComoEuBaixo, name: ComoEuBaixo.NAME, cron: '0 0 */4 ? * *' }, { scraper: Lapumia, name: Lapumia.NAME, cron: '0 0 */4 ? * *' }, diff --git a/scraper/scrapers/1337x/1337x_api.js b/scraper/scrapers/1337x/1337x_api.js index f1c379a..fa9dad6 100644 --- a/scraper/scrapers/1337x/1337x_api.js +++ b/scraper/scrapers/1337x/1337x_api.js @@ -5,6 +5,7 @@ const decode = require('magnet-uri'); const Promises = require('../../lib/promises'); const { escapeHTML } = require('../../lib/metadata'); const { getRandomUserAgent } = require('../../lib/requestHelper'); +const { parseSize } = require("../scraperHelper"); const defaultProxies = [ 'https://1337x.to' @@ -164,19 +165,4 @@ function parseDate(dateString) { return Sugar.Date.create(dateString); } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/,/g, '')) * scale); -} - module.exports = { torrent, search, browse, Categories }; diff --git a/scraper/scrapers/eztv/eztv_api.js b/scraper/scrapers/eztv/eztv_api.js index 42ddff7..0e8ec2c 100644 --- a/scraper/scrapers/eztv/eztv_api.js +++ b/scraper/scrapers/eztv/eztv_api.js @@ -1,8 +1,10 @@ const axios = require('axios'); const cheerio = require('cheerio'); const moment = require('moment'); +const { decode } = require("magnet-uri"); const Promises = require('../../lib/promises'); const { getRandomUserAgent } = require('./../../lib/requestHelper'); +const { parseSize } = require("../scraperHelper"); const defaultProxies = [ 'https://eztv.re' @@ -93,10 +95,11 @@ function parseTorrentPage(body) { reject(new Error('Failed loading body')); } const content = $('table[class="forum_header_border_normal"]'); + const magnetLink = content.find('a[title="Magnet Link"]').attr('href'); const torrent = { name: content.find('h1 > span').text().replace(/EZTV$/, ''), - infoHash: content.find('b:contains(\'Torrent Hash:\')')[0].nextSibling.data.trim().toLowerCase(), - magnetLink: content.find('a[title="Magnet Link"]').attr('href'), + infoHash: decode(magnetLink).infoHash, + magnetLink: magnetLink, torrentLink: content.find('a[title="Download Torrent"]').attr('href'), seeders: parseInt(content.find('span[class="stat_red"]').first().text(), 10) || 0, size: parseSize(content.find('b:contains(\'Filesize:\')')[0].nextSibling.data), @@ -107,21 +110,6 @@ function parseTorrentPage(body) { }); } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KB') || sizeText.includes('kB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); -} - function jitter() { return Promises.delay(minDelay + Math.round(Math.random() * jitterDelay)) } diff --git a/scraper/scrapers/kickass/kickass_api.js b/scraper/scrapers/kickass/kickass_api.js index 462c555..43fb061 100644 --- a/scraper/scrapers/kickass/kickass_api.js +++ b/scraper/scrapers/kickass/kickass_api.js @@ -3,6 +3,7 @@ const cheerio = require('cheerio'); const moment = require('moment'); const decode = require('magnet-uri'); const Promises = require('../../lib/promises'); +const { parseSize } = require("../scraperHelper"); const defaultProxies = [ 'https://katcr.co' @@ -157,19 +158,4 @@ function parseTorrentPage(body) { }); } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KB') || sizeText.includes('kB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); -} - module.exports = { torrent, search, browse, Categories }; diff --git a/scraper/scrapers/nyaasi/nyaa_si_api.js b/scraper/scrapers/nyaasi/nyaa_si_api.js index 36e11f1..6fa53fe 100644 --- a/scraper/scrapers/nyaasi/nyaa_si_api.js +++ b/scraper/scrapers/nyaasi/nyaa_si_api.js @@ -1,4 +1,5 @@ const { si } = require('nyaapi') +const { parseSize } = require("../scraperHelper"); const Categories = { ANIME: { @@ -62,19 +63,4 @@ function parseTorrent(torrent) { } } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GiB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MiB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KiB') || sizeText.includes('kB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); -} - module.exports = { torrent, search, browse, Categories }; diff --git a/scraper/scrapers/scraperHelper.js b/scraper/scrapers/scraperHelper.js index 3598a7c..44aa0d0 100644 --- a/scraper/scrapers/scraperHelper.js +++ b/scraper/scrapers/scraperHelper.js @@ -23,4 +23,19 @@ function sanitizePtLanguages(languages) { .trim(); } -module.exports = { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } \ No newline at end of file +function parseSize(sizeText) { + if (!sizeText) { + return undefined; + } + let scale = 1; + if (/Gi?B|Go/.test(sizeText)) { + scale = 1024 * 1024 * 1024 + } else if (/Mi?B|Mo/.test(sizeText)) { + scale = 1024 * 1024; + } else if (/[Kk]i?B|Ko/.test(sizeText)) { + scale = 1024; + } + return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); +} + +module.exports = { parseSize, isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } \ No newline at end of file diff --git a/scraper/scrapers/torrent9/torrent9_api.js b/scraper/scrapers/torrent9/torrent9_api.js new file mode 100644 index 0000000..89d9078 --- /dev/null +++ b/scraper/scrapers/torrent9/torrent9_api.js @@ -0,0 +1,119 @@ +const axios = require('axios'); +const cheerio = require('cheerio'); +const moment = require('moment'); +const decode = require('magnet-uri'); +const { parse } = require('parse-torrent-title'); +const { getRandomUserAgent } = require('../../lib/requestHelper'); +const { parseSize } = require("../scraperHelper"); + +const baseUrl = 'https://www.torrent9.pw' +const defaultTimeout = 10000; +const pageSize = 50; + +const Categories = { + MOVIE: 'films', + TV: 'series', +}; + +function torrent(torrentId, config = {}, retries = 2) { + if (!torrentId || retries === 0) { + return Promise.reject(new Error(`Failed ${torrentId} search`)); + } + + return singleRequest(`${baseUrl}/torrent/${torrentId}`) + .then((body) => parseTorrentPage(body)) + .then((torrent) => ({ torrentId, ...torrent })) + .catch((err) => { + console.warn(`Failed Torrent9 ${torrentId} request: `, err); + return torrent(torrentId, config, retries - 1) + }); +} + +function browse(config = {}, retries = 2, error = null) { + if (retries === 0) { + return Promise.reject(error || new Error(`Failed browse request`)); + } + const page = config.page || 1; + const category = config.category; + const offset = (page - 1) * pageSize + 1; + + return singleRequest(`${baseUrl}/torrents/${category}/${offset}`) + .then((body) => parseTableBody(body)) + .catch((err) => browse(config, retries - 1, err)); +} + +function singleRequest(requestUrl) { + const headers = { + 'user-agent': getRandomUserAgent(), + 'accept-encoding': 'gzip, deflate', + 'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4' + }; + const options = { headers, timeout: defaultTimeout }; + + return axios.get(requestUrl, options) + .then(response => { + const body = response.data; + if (!body || !body.length) { + throw new Error(`No body: ${requestUrl} with status ${response.status}`); + } + return body; + }) + .catch(error => Promise.reject(error.message || error)); +} + +function parseTableBody(body) { + return new Promise((resolve, reject) => { + const $ = cheerio.load(body); + + if (!$) { + reject(new Error('Failed loading body')); + } + + const torrents = []; + + $('tbody tr').each((i, element) => { + const row = $(element); + const titleElement = row.find('td a'); + try { + torrents.push({ + name: titleElement.text().trim(), + torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1], + seeders: parseInt(row.find('span.seed_ok').first().text()), + }); + } catch (e) { + console.error('Failed parsing TorrentGalaxy row: ', e); + } + }); + + resolve(torrents); + }); +} + +function parseTorrentPage(body) { + return new Promise((resolve, reject) => { + const $ = cheerio.load(body); + + if (!$) { + reject(new Error('Failed loading body')); + } + const details = $('.movie-detail'); + const magnetLink = details.find('a[href^="magnet"]').first().attr('href'); + const torrentLink = details.find('div.download-btn:nth-of-type(1) a').first().attr('href'); + const name = details.find('p strong').contents().filter((_, e) => e.type === 'text').text() || $('h5, h1').text(); + const languages = parse(name).languages; + const torrent = { + title: name.trim(), + infoHash: decode(magnetLink).infoHash, + magnetLink: magnetLink, + torrentLink: torrentLink ? `${baseUrl}${torrentLink}` : undefined, + seeders: parseInt(details.find('.movie-information ul:nth-of-type(1) li:nth-of-type(3)').text(), 10), + category: details.find('ul:nth-of-type(4) a').attr('href').match(/\/(\w+)$/)[1], + size: parseSize(details.find('ul:nth-of-type(2) li:nth-of-type(3)').text()), + uploadDate: moment(details.find('ul:nth-of-type(3) li:nth-of-type(3)').text(), 'DD/MM/YYYY').toDate(), + languages: languages && languages.includes('french') ? undefined : 'french', + }; + resolve(torrent); + }); +} + +module.exports = { torrent, browse, Categories }; diff --git a/scraper/scrapers/torrent9/torrent9_scraper.js b/scraper/scrapers/torrent9/torrent9_scraper.js new file mode 100644 index 0000000..7eaa514 --- /dev/null +++ b/scraper/scrapers/torrent9/torrent9_scraper.js @@ -0,0 +1,108 @@ +const moment = require('moment'); +const Bottleneck = require('bottleneck'); +const torrent9 = require('./torrent9_api'); +const torrent9v2 = require('./torrent9v2_api'); +const { Type } = require('../../lib/types'); +const repository = require('../../lib/repository'); +const Promises = require('../../lib/promises'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); +const { Op } = require("sequelize"); + +const NAME = 'Torrent9'; +const TYPE_MAPPING = typeMapping(); + +const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 }); +const limiter = new Bottleneck({ maxConcurrent: 10 }); +const allowedCategories = [ + torrent9.Categories.MOVIE, + torrent9.Categories.TV, +]; +const clients = [ + torrent9, + torrent9v2 +]; + +async function scrape() { + const scrapeStart = moment(); + const lastScrape = await repository.getProvider({ name: NAME }); + console.log(`[${scrapeStart}] starting ${NAME} scrape...`); + + return scrapeLatestTorrents() + .then(() => { + lastScrape.lastScraped = scrapeStart; + return lastScrape.save(); + }) + .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); +} + +async function scrapeLatestTorrents() { + const scrapeFunctions = allowedCategories + .map(category => clients.map(client => () => scrapeLatestTorrentsForCategory(client, category))) + .reduce((a, b) => a.concat(b), []); + return Promises.sequence(scrapeFunctions) + .then(entries => entries.reduce((a, b) => a.concat(b), [])); +} + +async function scrapeLatestTorrentsForCategory(client, category, page = 1) { + console.log(`Scrapping ${NAME} ${category} category page ${page}`); + return api_limiter.schedule(() => client.browse({ category, page })) + .catch(error => { + console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error); + return Promise.resolve([]); + }) + .then(results => Promise.all(results.map(r => limiter.schedule(() => processTorrentRecord(client, r))))) + .then(resolved => resolved.length > 0 && page < getUntilPage(category) + ? scrapeLatestTorrentsForCategory(client, category, page + 1) + : Promise.resolve([])); +} + +async function processTorrentRecord(client, record) { + if (await checkAndUpdateTorrent( + { provider: NAME, torrentId: { [Op.endsWith]: record.torrentId.replace(/^\d+/, '') } })) { + return record; + } + + const foundTorrent = await api_limiter.schedule(() => client.torrent(record.torrentId)).catch(() => undefined); + if (!foundTorrent) { + console.warn(`Failed retrieving torrent ${record.torrentId}`); + return record; + } + + const torrent = { + provider: NAME, + infoHash: foundTorrent.infoHash, + magnetLink: foundTorrent.magnetLink, + torrentLink: foundTorrent.torrentLink, + torrentId: foundTorrent.torrentId, + title: foundTorrent.title, + type: TYPE_MAPPING[foundTorrent.category], + size: foundTorrent.size, + seeders: foundTorrent.seeders, + uploadDate: foundTorrent.uploadDate, + imdbId: foundTorrent.imdbId, + languages: foundTorrent.languages + }; + + if (await checkAndUpdateTorrent(torrent)) { + console.info(`Skipping torrent ${torrent.torrentId} - [${torrent.infoHash}] ${torrent.title}`); + return torrent; + } + + return createTorrentEntry(torrent).then(() => torrent); +} + +function typeMapping() { + const mapping = {}; + mapping[torrent9.Categories.MOVIE] = Type.MOVIE; + mapping[torrent9.Categories.TV] = Type.SERIES; + return mapping; +} + +function getUntilPage(category) { + if (category === torrent9.Categories.TV) { + return 2; + } + return 1; +} + +module.exports = { scrape, NAME }; \ No newline at end of file diff --git a/scraper/scrapers/torrent9/torrent9v2_api.js b/scraper/scrapers/torrent9/torrent9v2_api.js new file mode 100644 index 0000000..ceb7050 --- /dev/null +++ b/scraper/scrapers/torrent9/torrent9v2_api.js @@ -0,0 +1,125 @@ +const axios = require('axios'); +const cheerio = require('cheerio'); +const moment = require('moment'); +const decode = require('magnet-uri'); +const { parse } = require('parse-torrent-title'); +const { getRandomUserAgent } = require('../../lib/requestHelper'); +const { parseSize } = require("../scraperHelper"); + +const baseUrl = 'https://www.torrent9.gg' +const defaultTimeout = 10000; + +const Categories = { + MOVIE: 'films', + TV: 'series', +}; + +function torrent(torrentId, config = {}, retries = 2) { + if (!torrentId || retries === 0) { + return Promise.reject(new Error(`Failed ${torrentId} search`)); + } + + return singleRequest(`${baseUrl}/torrent/${torrentId}`) + .then((body) => parseTorrentPage(body)) + .then((torrent) => ({ torrentId, ...torrent })) + .catch((err) => { + console.warn(`Failed Torrent9 ${torrentId} request: `, err); + return torrent(torrentId, config, retries - 1) + }); +} + +function browse(config = {}, retries = 2, error = null) { + if (retries === 0) { + return Promise.reject(error || new Error(`Failed browse request`)); + } + const page = config.page || 1; + const category = config.category; + + return singleRequest(`${baseUrl}/torrents_${category}.html,page-${page}`) + .then((body) => parseTableBody(body)) + .catch((err) => browse(config, retries - 1, err)); +} + +function singleRequest(requestUrl) { + const headers = { + 'user-agent': getRandomUserAgent(), + 'accept-encoding': 'gzip, deflate', + 'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4' + }; + const options = { headers, timeout: defaultTimeout }; + + return axios.get(requestUrl, options) + .then(response => { + const body = response.data; + if (!body || !body.length) { + throw new Error(`No body: ${requestUrl} with status ${response.status}`); + } + return body; + }) + .catch(error => Promise.reject(error.message || error)); +} + +function parseTableBody(body) { + return new Promise((resolve, reject) => { + const $ = cheerio.load(body); + + if (!$) { + reject(new Error('Failed loading body')); + } + + const torrents = []; + + $('tr').each((i, element) => { + const row = $(element); + const titleElement = row.find('td a'); + try { + torrents.push({ + title: titleElement.attr('title').trim(), + torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1], + seeders: parseInt(row.find('span.seed_ok').first().text()), + }); + } catch (e) { + console.error('Failed parsing TorrentGalaxy row: ', e); + } + }); + + resolve(torrents); + }); +} + +function parseTorrentPage(body) { + return new Promise((resolve, reject) => { + const $ = cheerio.load(body); + + if (!$) { + reject(new Error('Failed loading body')); + } + const details = $('.movie-detail'); + const magnetLink = details.find('a[href^="magnet"]').first().attr('href'); + const name = getName(details) || $('h1').text(); + const languages = parse(name).languages; + const torrent = { + title: name.trim(), + infoHash: decode(magnetLink).infoHash, + magnetLink: magnetLink, + seeders: parseInt(details.find('.movie-information ul:nth-of-type(1) li:nth-of-type(3)').text(), 10), + category: details.find('ul:nth-of-type(4) a').attr('href').match(/_(\w+)\.html$/)[1], + size: parseSize(details.find('ul:nth-of-type(2) li:nth-of-type(3)').text()), + uploadDate: moment(details.find('ul:nth-of-type(3) li:nth-of-type(3)').text(), 'DD/MM/YYYY').toDate(), + languages: languages && languages.includes('french') ? undefined : 'french', + }; + resolve(torrent); + }); +} + +function getName(details) { + const nameElement = details.find('p strong'); + if (nameElement.length === 1) { + return nameElement.contents().filter((_, elem) => elem.type === 'text').text() + } + const description = nameElement.parent().text(); + const nameMatch = description.match(/(?:[A-Z]+|[0-9]+)[^A-Z ]*\.([\w-]+\.){3,}\w+(?:-\w+)?(?=[A-Z])/); + return nameMatch && nameMatch[0]; +} + +module.exports = { torrent, browse, Categories }; diff --git a/scraper/scrapers/torrentgalaxy/torrentgalaxy_api.js b/scraper/scrapers/torrentgalaxy/torrentgalaxy_api.js index ea14866..7cbf610 100644 --- a/scraper/scrapers/torrentgalaxy/torrentgalaxy_api.js +++ b/scraper/scrapers/torrentgalaxy/torrentgalaxy_api.js @@ -4,6 +4,7 @@ const moment = require('moment'); const decode = require('magnet-uri'); const Promises = require('../../lib/promises'); const { getRandomUserAgent } = require('../../lib/requestHelper'); +const { parseSize } = require("../scraperHelper"); const defaultProxies = [ // 'https://torrentgalaxy.to', @@ -157,21 +158,6 @@ function parseTorrentPage(body) { }); } -function parseSize(sizeText) { - if (!sizeText) { - return undefined; - } - let scale = 1; - if (sizeText.includes('GB')) { - scale = 1024 * 1024 * 1024 - } else if (sizeText.includes('MB')) { - scale = 1024 * 1024; - } else if (sizeText.includes('KB') || sizeText.includes('kB')) { - scale = 1024; - } - return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); -} - function parseDate(dateString) { if (dateString.includes('ago')) { const amount = parseInt(dateString, 10);