From 30421815d7c4c228f30cfe210c961db661e01f64 Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Mon, 17 Feb 2020 14:26:27 +0100 Subject: [PATCH] updates horriblesubs api to filter entries based on title --- lib/repository.js | 53 ++++++++++++------- scrapers/horriblesubs/horriblesubs_api.js | 21 ++++---- scrapers/horriblesubs/horriblesubs_scraper.js | 7 ++- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/lib/repository.js b/lib/repository.js index 2280a1e..53ace44 100644 --- a/lib/repository.js +++ b/lib/repository.js @@ -1,4 +1,4 @@ -const { Sequelize }= require('sequelize'); +const { Sequelize } = require('sequelize'); const Op = Sequelize.Op; const POSTGRES_URI = process.env.POSTGRES_URI || 'postgres://torrentio:postgres@localhost:5432/torrentio'; @@ -6,7 +6,7 @@ const POSTGRES_URI = process.env.POSTGRES_URI || 'postgres://torrentio:postgres@ const database = new Sequelize(POSTGRES_URI, { logging: false }); const Provider = database.define('provider', { - name: { type: Sequelize.STRING(32), primaryKey: true}, + name: { type: Sequelize.STRING(32), primaryKey: true }, lastScraped: { type: Sequelize.DATE } }); @@ -17,13 +17,19 @@ const Torrent = database.define('torrent', { size: { type: Sequelize.BIGINT }, type: { type: Sequelize.STRING(16), allowNull: false }, uploadDate: { type: Sequelize.DATE, allowNull: false }, - seeders: { type: Sequelize.SMALLINT } + seeders: { type: Sequelize.SMALLINT }, + trackers: { type: Sequelize.STRING(4096) } }); const File = database.define('file', { id: { type: Sequelize.BIGINT, autoIncrement: true, primaryKey: true }, - infoHash: { type: Sequelize.STRING(64), allowNull: false, references: { model: Torrent, key: 'infoHash' }, onDelete: 'CASCADE' }, + infoHash: { + type: Sequelize.STRING(64), + allowNull: false, + references: { model: Torrent, key: 'infoHash' }, + onDelete: 'CASCADE' + }, fileIndex: { type: Sequelize.INTEGER }, title: { type: Sequelize.STRING(256), allowNull: false }, size: { type: Sequelize.BIGINT }, @@ -34,21 +40,21 @@ const File = database.define('file', kitsuEpisode: { type: Sequelize.INTEGER } }, { - indexes:[ - { unique: true, fields:['infoHash'], where: { fileIndex: { [Op.eq]: null } } }, - { unique: true, fields:['infoHash', 'fileIndex', 'imdbEpisode'] }, - { unique: false, fields:['imdbId', 'imdbSeason', 'imdbEpisode'] }, - { unique: false, fields:['kitsuId', 'kitsuEpisode'] } - ] - } + indexes: [ + { unique: true, fields: ['infoHash'], where: { fileIndex: { [Op.eq]: null } } }, + { unique: true, fields: ['infoHash', 'fileIndex', 'imdbEpisode'] }, + { unique: false, fields: ['imdbId', 'imdbSeason', 'imdbEpisode'] }, + { unique: false, fields: ['kitsuId', 'kitsuEpisode'] } + ] + } ); const SkipTorrent = database.define('skip_torrent', { - infoHash: {type: Sequelize.STRING(64), primaryKey: true}, + infoHash: { type: Sequelize.STRING(64), primaryKey: true }, }); const FailedImdbTorrent = database.define('failed_imdb_torrent', { - infoHash: {type: Sequelize.STRING(64), primaryKey: true}, + infoHash: { type: Sequelize.STRING(64), primaryKey: true }, title: { type: Sequelize.STRING(256), allowNull: false } }); @@ -57,7 +63,7 @@ function connect() { } function getProvider(provider) { - return Provider.findOrCreate({ where: { name: { [Op.eq]: provider.name }}, defaults: provider }); + return Provider.findOrCreate({ where: { name: { [Op.eq]: provider.name } }, defaults: provider }); } function updateProvider(provider) { @@ -66,7 +72,7 @@ function updateProvider(provider) { function getTorrent(torrent) { return Torrent.findByPk(torrent.infoHash) - .then((result) =>{ + .then((result) => { if (!result) { throw new Error(`torrent not found: ${torrent.infoHash}`); } @@ -88,7 +94,7 @@ function getFiles(torrent) { function getSkipTorrent(torrent) { return SkipTorrent.findByPk(torrent.infoHash) - .then((result) =>{ + .then((result) => { if (!result) { return getFailedImdbTorrent(torrent); } @@ -102,7 +108,7 @@ function createSkipTorrent(torrent) { function getFailedImdbTorrent(torrent) { return FailedImdbTorrent.findByPk(torrent.infoHash) - .then((result) =>{ + .then((result) => { if (!result) { throw new Error(`torrent not found: ${torrent.infoHash}`); } @@ -114,4 +120,15 @@ function createFailedImdbTorrent(torrent) { return FailedImdbTorrent.upsert(torrent); } -module.exports = { connect, getProvider, updateProvider, getTorrent, createTorrent, createFile, getFiles, getSkipTorrent, createSkipTorrent, createFailedImdbTorrent }; \ No newline at end of file +module.exports = { + connect, + getProvider, + updateProvider, + getTorrent, + createTorrent, + createFile, + getFiles, + getSkipTorrent, + createSkipTorrent, + createFailedImdbTorrent +}; \ No newline at end of file diff --git a/scrapers/horriblesubs/horriblesubs_api.js b/scrapers/horriblesubs/horriblesubs_api.js index 0ba0202..485e51d 100644 --- a/scrapers/horriblesubs/horriblesubs_api.js +++ b/scrapers/horriblesubs/horriblesubs_api.js @@ -1,7 +1,6 @@ const cheerio = require('cheerio'); const needle = require('needle'); const moment = require('moment'); -const decode = require('magnet-uri'); const defaultUrl = 'https://horriblesubs.info'; const defaultTimeout = 5000; @@ -18,10 +17,10 @@ function allShows(config = {}) { async function showData(showInfo, config = {}) { const showEndpoint = (showInfo.url || showInfo).match(/\/show.+/)[0]; + const title = showInfo.title; const showId = await _getShowId(showEndpoint); - const packEntries = await _getAllEntries(showId, 'batch', config); - const singleEntries = await _getAllEntries(showId, 'show', config); - const title = showInfo.title || singleEntries[0] && singleEntries[0].title; + const packEntries = await _getShowEntries(showId, title, 'batch', config); + const singleEntries = await _getShowEntries(showId, title, 'show', config); return { title: title, @@ -55,12 +54,17 @@ function _getShowId(showEndpoint) { .then($ => $('div.entry-content').find('script').html().match(/var hs_showid = (\d+)/)[1]); } +function _getShowEntries(animeId, animeTitle, type, config) { + return _getAllEntries(animeId, type, config) + .then((entries) => entries.filter((entry) => entry.title === animeTitle)); +} + function _getAllEntries(animeId, type, config, page = 0, autoExtend = true) { const entriesEndpoint = `/api.php?method=getshows&type=${type}&showid=${animeId}&nextid=${page}`; return _getEntries(entriesEndpoint, config) - .then((entries) => !autoExtend || entries.length < 12 ? entries : + .then((entries) => !autoExtend || !entries.length ? entries : _getAllEntries(animeId, type, config, page + 1) - .then((nextEntries) => entries.concat(nextEntries))) + .then((nextEntries) => entries.concat(nextEntries))); } function _getEntries(endpoint, config) { @@ -74,11 +78,10 @@ function _getEntries(endpoint, config) { mirrors: $(element).find('div[class="rls-links-container"]').children() .map((indexLink, elementLink) => ({ resolution: $(elementLink).attr('id').match(/\d+p$/)[0], - infoHash: decode($(elementLink).find('a[title="Magnet Link"]').attr('href')).infoHash, magnetLink: $(elementLink).find('a[title="Magnet Link"]').attr('href'), torrentLink: $(elementLink).find('a[title="Torrent Link"]').attr('href') })).get() - })).get()) + })).get()); } function _getAllLatestEntries(config, page = 0) { @@ -93,7 +96,7 @@ function _getAllLatestEntries(config, page = 0) { .then((entries) => entries.length < 12 ? entries : _getAllLatestEntries(config, page + 1) - .then((nextEntries) => entries.concat(nextEntries))) + .then((nextEntries) => entries.concat(nextEntries))); } async function _findLatestEntry(entry, config) { diff --git a/scrapers/horriblesubs/horriblesubs_scraper.js b/scrapers/horriblesubs/horriblesubs_scraper.js index 879c77e..b2566ea 100644 --- a/scrapers/horriblesubs/horriblesubs_scraper.js +++ b/scrapers/horriblesubs/horriblesubs_scraper.js @@ -1,6 +1,7 @@ const fs = require('fs'); const Bottleneck = require('bottleneck'); const { parse } = require('parse-torrent-title'); +const decode = require('magnet-uri'); const horriblesubs = require('./horriblesubs_api.js'); const repository = require('../../lib/repository'); const { Type } = require('../../lib/types'); @@ -12,7 +13,7 @@ const showMappings = require('./horriblesubs_mapping.json'); const NAME = 'HorribleSubs'; const limiter = new Bottleneck({ maxConcurrent: 5 }); -const entryLimiter = new Bottleneck({ maxConcurrent: 20 }); +const entryLimiter = new Bottleneck({ maxConcurrent: 10 }); async function scrape() { const lastScraped = await repository.getProvider({ name: NAME }); @@ -28,7 +29,7 @@ async function _scrapeAllShows() { const shows = await horriblesubs.allShows(); return Promise.all(shows - .slice(0, 6) + .slice(0, 5) .map((show) => limiter.schedule(() => horriblesubs.showData(show) .then((showData) => _parseShowData(showData)) .catch((err) => console.log(err))))); @@ -86,6 +87,8 @@ async function _parseShowData(showData) { .map((mirror) => ({ provider: NAME, ...mirror, + infoHash: decode(mirror.magnetLink).infoHash, + trackers: decode(mirror.magnetLink).tr.join(','), title: `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`, size: 300000000, type: Type.ANIME,