diff --git a/package-lock.json b/package-lock.json index 3a0f231..873b257 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1870,8 +1870,8 @@ } }, "parse-torrent-title": { - "version": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244", - "from": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244", + "version": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6", + "from": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6", "requires": { "moment": "^2.24.0" } diff --git a/package.json b/package.json index 0757f91..5214bbc 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,7 @@ "nodejs-bing": "^0.1.0", "nyaapi": "^2.3.3", "parse-torrent": "^6.1.2", - "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244", + "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6", "pg": "^7.8.2", "pg-hstore": "^2.3.2", "real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d", diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index 77ab7c3..e9bde11 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -183,14 +183,14 @@ function getTorrentsWithoutSize() { }); } -function getUpdateSeedersTorrents() { +function getUpdateSeedersTorrents(limit = 100) { const until = moment().subtract(7, 'days').format('YYYY-MM-DD'); return Torrent.findAll({ where: literal(`torrent."updatedAt" < \'${until}\'`), - limit: 100, + limit: limit, order: [ ['seeders', 'DESC'], - ['uploadDate', 'DESC'] + ['updatedAt', 'ASC'] ] }); } diff --git a/scraper/lib/torrent.js b/scraper/lib/torrent.js index 2246ec5..423e1f9 100644 --- a/scraper/lib/torrent.js +++ b/scraper/lib/torrent.js @@ -8,9 +8,9 @@ const { Type } = require('./types'); const { isVideo, isSubtitle } = require('./extension'); const { cacheTrackers } = require('./cache'); -const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_best.txt'; +const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_all.txt'; const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20; -const SEEDS_CHECK_TIMEOUT = process.env.SEEDS_CHECK_TIMEOUT || 10 * 1000; // 10 secs +const SEEDS_CHECK_TIMEOUT = 30 * 1000; // 30 secs const ANIME_TRACKERS = [ "http://nyaa.tracker.wf:7777/announce", "http://anidex.moe:6969/announce", @@ -18,34 +18,41 @@ const ANIME_TRACKERS = [ "udp://tracker.uw0.xyz:6969/announce" ]; -async function updateCurrentSeeders(torrent) { +async function updateCurrentSeeders(torrentsInput) { return new Promise(async (resolve) => { - if (!torrent.magnetLink && !torrent.infoHash) { - return resolve(0); - } - - const seeders = {}; - const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr; - const torrentTrackers = torrent.trackers && torrent.trackers.split(','); - const trackers = magnetTrackers || torrentTrackers || await getDefaultTrackers(torrent); - const callback = () => resolve(seeders); + const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput]; + const perTorrentResults = Object.fromEntries(new Map(torrents.map(torrent => [torrent.infoHash, {}]))); + const perTrackerInfoHashes = await Promise.all(torrents.map(torrent => getTorrentTrackers(torrent) + .then(torrentTrackers => ({ infoHash: torrent.infoHash, trackers: torrentTrackers })))) + .then(allTorrentTrackers => allTorrentTrackers + .reduce((allTrackersMap, torrentTrackers) => { + torrentTrackers.trackers.forEach(tracker => + allTrackersMap[tracker] = (allTrackersMap[tracker] || []).concat(torrentTrackers.infoHash)); + return allTrackersMap; + }, {})); + const callback = () => resolve(perTorrentResults); setTimeout(callback, SEEDS_CHECK_TIMEOUT); - async.each(trackers, function (tracker, ready) { - BTClient.scrape({ infoHash: torrent.infoHash, announce: tracker }, (_, results) => { + async.each(Object.keys(perTrackerInfoHashes), function (tracker, ready) { + BTClient.scrape({ infoHash: perTrackerInfoHashes[tracker], announce: tracker }, (_, results) => { if (results) { - seeders[tracker] = [results.complete, results.incomplete]; + Object.entries(results) + .filter(([infoHash]) => perTorrentResults[infoHash]) + .forEach(([infoHash, seeders]) => + perTorrentResults[infoHash][tracker] = [seeders.complete, seeders.incomplete]) } ready(); }) }, callback); - }).then(seeders => { - if (!Object.values(seeders).length) { - console.log(`Retrying seeders update for [${torrent.infoHash}] ${torrent.title || torrent.name}`) - return updateCurrentSeeders(torrent); - } - torrent.seeders = Math.max(...Object.values(seeders).map(values => values[0]).concat(0)); - return torrent; + }).then(perTorrentResults => { + const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput]; + torrents.forEach(torrent => { + const results = perTorrentResults[torrent.infoHash]; + const newSeeders = Math.max(...Object.values(results).map(values => values[0]).concat(0)); + console.log(`Updating seeders for [${torrent.infoHash}] ${torrent.title} - ${torrent.seeders} -> ${newSeeders}`) + torrent.seeders = newSeeders; + }) + return torrentsInput; }); } @@ -158,11 +165,17 @@ function filterSubtitles(files) { return files.filter(file => isSubtitle(file.path)); } +async function getTorrentTrackers(torrent) { + const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr; + const torrentTrackers = torrent.trackers && torrent.trackers.split(','); + return magnetTrackers || torrentTrackers || getDefaultTrackers(torrent); +} + async function getDefaultTrackers(torrent) { return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT }) .then(response => response.body && response.body.trim()) .then(body => body && body.split('\n\n') || [])) - .then(trackers => torrent.type === Type.ANIME ? trackers.concat(ANIME_TRACKERS) : trackers); + .then(trackers => torrent.type === Type.ANIME ? Array.from(new Set(trackers.concat(ANIME_TRACKERS))) : trackers); } module.exports = { updateCurrentSeeders, updateTorrentSize, sizeAndFiles, torrentFiles } diff --git a/scraper/scheduler/seeders.js b/scraper/scheduler/seeders.js index 6beaa63..9a94686 100644 --- a/scraper/scheduler/seeders.js +++ b/scraper/scheduler/seeders.js @@ -1,22 +1,19 @@ const Bottleneck = require('bottleneck'); -const scrapers = require('./scrapers'); const repository = require('../lib/repository') -const { delay, timeout } = require('../lib/promises') +const { delay } = require('../lib/promises') const { updateCurrentSeeders } = require('../lib/torrent') const { updateTorrentSeeders } = require('../lib/torrentEntries') const DELAY_MS = 15 * 1000; // 15 seconds -const TIMEOUT_MS = 30 * 1000 // 30 seconds -const FALLBACK_SCRAPER = { updateSeeders: () => [] }; -const limiter = new Bottleneck({ maxConcurrent: 20, minTime: 250 }); const updateLimiter = new Bottleneck({ maxConcurrent: 5 }); -const forceSeedersLimiter = new Bottleneck({ maxConcurrent: 5 }); const statistics = {}; function scheduleUpdateSeeders() { console.log('Starting seeders update...') - return repository.getUpdateSeedersTorrents() - .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => _updateSeeders(torrent))))) + return repository.getUpdateSeedersTorrents(50) + .then(torrents => updateCurrentSeeders(torrents)) + .then(updatedTorrents => Promise.all( + updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated))))) .then(torrents => updateStatistics(torrents)) .then(() => console.log('Finished seeders update:', statistics)) .catch(error => console.warn('Failed seeders update:', error)) @@ -24,31 +21,6 @@ function scheduleUpdateSeeders() { .then(() => scheduleUpdateSeeders()); } -async function _updateSeeders(torrent) { - const provider = await scrapers.find(provider => provider.name === torrent.provider); - const scraper = provider ? provider.scraper : FALLBACK_SCRAPER; - - const updatedTorrents = await timeout(TIMEOUT_MS, scraper.updateSeeders(torrent, getImdbIdsMethod(torrent))) - .then(updated => Array.isArray(updated) ? updated : [updated]) - .catch(error => { - console.warn(`Failed seeders update ${torrent.provider} [${torrent.infoHash}]: `, error) - return [] - }); - - if (!updatedTorrents.find(updated => updated.infoHash === torrent.infoHash)) { - await forceSeedersLimiter.schedule(() => updateCurrentSeeders(torrent)) - .then(updated => updatedTorrents.push(updated)); - } - - return Promise.all(updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))); -} - -function getImdbIdsMethod(torrent) { - return () => repository.getFiles(torrent) - .then(files => files.map(file => file.imdbId).filter(id => id)) - .then(ids => Array.from(new Set(ids))); -} - function updateStatistics(updatedTorrents) { const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0); const date = new Date().toISOString().replace(/T.*/, '');