diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index e557b3d..6e34462 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -1,4 +1,4 @@ -const { Sequelize, fn, col } = require('sequelize'); +const { Sequelize, fn, col, literal } = require('sequelize'); const Op = Sequelize.Op; const DATABASE_URI = process.env.DATABASE_URI; @@ -115,8 +115,15 @@ function getTorrentsBasedOnTitle(titleQuery, type) { return Torrent.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` }, type: type } }); } -function getTorrentsWithoutId(provider) { - return Torrent.findAll({ where: { provider: provider, torrentId: { [Op.is]: null } }, limit: 100 }); +function getTorrentsWithoutSize() { + return Torrent.findAll({ + where: literal( + 'exists (select 1 from files where files."infoHash" = torrent."infoHash" and files.size = 300000000)'), + order: [ + ['seeders', 'DESC'] + ], + limit: 1000 + }); } function getTorrentsUpdatedBetween(provider, startDate, endDate) { @@ -185,6 +192,6 @@ module.exports = { getSkipTorrent, createSkipTorrent, createFailedImdbTorrent, - getTorrentsWithoutId, + getTorrentsWithoutSize, getTorrentsUpdatedBetween }; \ No newline at end of file diff --git a/scraper/lib/torrent.js b/scraper/lib/torrent.js index c20bbf9..d9e9f98 100644 --- a/scraper/lib/torrent.js +++ b/scraper/lib/torrent.js @@ -34,6 +34,54 @@ module.exports.updateCurrentSeeders = function (torrent) { }).then((seeders) => ({ ...torrent, seeders: torrent.seeders || seeders })); }; +module.exports.updateTorrentSize = function (torrent) { + if (!torrent.infoHash && !torrent.magnetLink) { + return Promise.reject(new Error("no infoHash or magnetLink")); + } + const magnetLink = torrent.magnetLink || decode.encode({ infoHash: torrent.infoHash }); + return new Promise((resolve, rejected) => { + const engine = new torrentStream(magnetLink, { connections: MAX_PEER_CONNECTIONS }); + + engine.ready(() => { + const size = engine.torrent.length; + engine.destroy(); + resolve({ size }); + }); + setTimeout(() => { + engine.destroy(); + rejected(new Error('No available connections for torrent!')); + }, SEEDS_CHECK_TIMEOUT); + }).then((size) => ({ ...torrent, size })); +}; + +module.exports.sizeAndFiles = function (torrent) { + if (!torrent.infoHash && !torrent.magnetLink) { + return Promise.reject(new Error("no infoHash or magnetLink")); + } + // const magnet = decode.encode({ infoHash: torrent.infoHash, announce: torrent.trackers }); + return new Promise((resolve, rejected) => { + const engine = new torrentStream(torrent.infoHash, { connections: MAX_PEER_CONNECTIONS }); + + engine.ready(() => { + const files = engine.files + .map((file, fileId) => ({ + fileIndex: fileId, + name: file.name, + path: file.path.replace(/^[^\/]+\//, ''), + size: file.length + })); + const size = engine.torrent.length; + + engine.destroy(); + resolve({ files, size }); + }); + setTimeout(() => { + engine.destroy(); + rejected(new Error('No available connections for torrent!')); + }, 20000); + }); +}; + module.exports.torrentFiles = function (torrent) { return getFilesFromObject(torrent) .catch(() => filesFromTorrentFile(torrent)) diff --git a/scraper/lib/torrentEntries.js b/scraper/lib/torrentEntries.js index 2e690af..393575d 100644 --- a/scraper/lib/torrentEntries.js +++ b/scraper/lib/torrentEntries.js @@ -62,8 +62,9 @@ async function updateTorrentSeeders(torrent) { return repository.getTorrent(torrent) .catch(() => undefined) .then(stored => { - if (stored && stored.seeders !== torrent.seeders) { + if (stored) { stored.seeders = torrent.seeders; + stored.changed('updatedAt', true); return stored.save() } }) diff --git a/scraper/scrapers/horriblesubs/horriblesubs_scraper.js b/scraper/scrapers/horriblesubs/horriblesubs_scraper.js index db24a4a..4383d7f 100644 --- a/scraper/scrapers/horriblesubs/horriblesubs_scraper.js +++ b/scraper/scrapers/horriblesubs/horriblesubs_scraper.js @@ -5,7 +5,7 @@ const decode = require('magnet-uri'); const horriblesubs = require('./horriblesubs_api.js'); const repository = require('../../lib/repository'); const { Type } = require('../../lib/types'); -const { updateCurrentSeeders } = require('../../lib/torrent'); +const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent'); const { parseTorrentFiles } = require('../../lib/torrentFiles'); const { getMetadata, getKitsuId } = require('../../lib/metadata'); const showMappings = require('./horriblesubs_mapping.json'); @@ -153,7 +153,6 @@ async function _parseShowData(showData) { infoHash: decode(mirror.magnetLink).infoHash, trackers: decode(mirror.magnetLink).tr.join(','), title: formatTitle(episodeInfo, mirror), - size: 300000000, type: Type.ANIME, kitsuId: getKitsuId(episodeInfo.episode), uploadDate: episodeInfo.uploadDate, @@ -161,12 +160,14 @@ async function _parseShowData(showData) { .reduce((a, b) => a.concat(b), []) .filter((incompleteTorrent) => incompleteTorrent.kitsuId) .map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent) + .then((torrent) => torrent && updateTorrentSize(torrent)) .then((torrent) => torrent && updateCurrentSeeders(torrent)) .then((torrent) => torrent && parseTorrentFiles(torrent) .then((files) => verifyFiles(torrent, files)) .then((files) => repository.createTorrent(torrent) .then(() => files.forEach(file => repository.createFile(file))) - .then(() => console.log(`Created entry for ${torrent.title}`))))))) + .then(() => console.log(`Created entry for ${torrent.title}`)))) + .catch(error => console.warn(`Failed creating entry for ${incompleteTorrent.title}:`, error))))) .then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`)); } diff --git a/scraper/scrapers/thepiratebay/thepiratebay_api.js b/scraper/scrapers/thepiratebay/thepiratebay_api.js index ec98eaf..9580a74 100644 --- a/scraper/scrapers/thepiratebay/thepiratebay_api.js +++ b/scraper/scrapers/thepiratebay/thepiratebay_api.js @@ -6,14 +6,12 @@ const Promises = require('../../lib/promises'); const defaultProxies = [ // 'https://thepiratebay.org', - // 'https://proxybay.pro', - // 'https://ukpiratebayproxy.com', - // 'https://thepiratebayproxy.info', - // 'https://mypiratebay.co', - 'https://thepiratebay.asia', - 'https://thepiratebay10.org', - 'https://thepiratebay0.org', - 'https://proxtpb.art', + 'https://proxybay.pro', + 'https://ukpiratebayproxy.com', + 'https://thepiratebayproxy.info', + 'https://mypiratebay.co', + // 'https://thepiratebay10.org', + // 'https://thepiratebay0.org', ]; const dumpUrl = '/static/dump/csv/'; const defaultTimeout = 10000;