[scraper] update seeders from trackers directly

This commit is contained in:
TheBeastLT
2021-02-07 18:42:21 +01:00
parent 63ffa653a5
commit fd2f3cc5b7
5 changed files with 47 additions and 62 deletions

4
package-lock.json generated
View File

@@ -1870,8 +1870,8 @@
} }
}, },
"parse-torrent-title": { "parse-torrent-title": {
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244", "version": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244", "from": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6",
"requires": { "requires": {
"moment": "^2.24.0" "moment": "^2.24.0"
} }

View File

@@ -32,7 +32,7 @@
"nodejs-bing": "^0.1.0", "nodejs-bing": "^0.1.0",
"nyaapi": "^2.3.3", "nyaapi": "^2.3.3",
"parse-torrent": "^6.1.2", "parse-torrent": "^6.1.2",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244", "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6",
"pg": "^7.8.2", "pg": "^7.8.2",
"pg-hstore": "^2.3.2", "pg-hstore": "^2.3.2",
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d", "real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d",

View File

@@ -183,14 +183,14 @@ function getTorrentsWithoutSize() {
}); });
} }
function getUpdateSeedersTorrents() { function getUpdateSeedersTorrents(limit = 100) {
const until = moment().subtract(7, 'days').format('YYYY-MM-DD'); const until = moment().subtract(7, 'days').format('YYYY-MM-DD');
return Torrent.findAll({ return Torrent.findAll({
where: literal(`torrent."updatedAt" < \'${until}\'`), where: literal(`torrent."updatedAt" < \'${until}\'`),
limit: 100, limit: limit,
order: [ order: [
['seeders', 'DESC'], ['seeders', 'DESC'],
['uploadDate', 'DESC'] ['updatedAt', 'ASC']
] ]
}); });
} }

View File

@@ -8,9 +8,9 @@ const { Type } = require('./types');
const { isVideo, isSubtitle } = require('./extension'); const { isVideo, isSubtitle } = require('./extension');
const { cacheTrackers } = require('./cache'); const { cacheTrackers } = require('./cache');
const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_best.txt'; const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_all.txt';
const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20; const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
const SEEDS_CHECK_TIMEOUT = process.env.SEEDS_CHECK_TIMEOUT || 10 * 1000; // 10 secs const SEEDS_CHECK_TIMEOUT = 30 * 1000; // 30 secs
const ANIME_TRACKERS = [ const ANIME_TRACKERS = [
"http://nyaa.tracker.wf:7777/announce", "http://nyaa.tracker.wf:7777/announce",
"http://anidex.moe:6969/announce", "http://anidex.moe:6969/announce",
@@ -18,34 +18,41 @@ const ANIME_TRACKERS = [
"udp://tracker.uw0.xyz:6969/announce" "udp://tracker.uw0.xyz:6969/announce"
]; ];
async function updateCurrentSeeders(torrent) { async function updateCurrentSeeders(torrentsInput) {
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
if (!torrent.magnetLink && !torrent.infoHash) { const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
return resolve(0); const perTorrentResults = Object.fromEntries(new Map(torrents.map(torrent => [torrent.infoHash, {}])));
} const perTrackerInfoHashes = await Promise.all(torrents.map(torrent => getTorrentTrackers(torrent)
.then(torrentTrackers => ({ infoHash: torrent.infoHash, trackers: torrentTrackers }))))
const seeders = {}; .then(allTorrentTrackers => allTorrentTrackers
const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr; .reduce((allTrackersMap, torrentTrackers) => {
const torrentTrackers = torrent.trackers && torrent.trackers.split(','); torrentTrackers.trackers.forEach(tracker =>
const trackers = magnetTrackers || torrentTrackers || await getDefaultTrackers(torrent); allTrackersMap[tracker] = (allTrackersMap[tracker] || []).concat(torrentTrackers.infoHash));
const callback = () => resolve(seeders); return allTrackersMap;
}, {}));
const callback = () => resolve(perTorrentResults);
setTimeout(callback, SEEDS_CHECK_TIMEOUT); setTimeout(callback, SEEDS_CHECK_TIMEOUT);
async.each(trackers, function (tracker, ready) { async.each(Object.keys(perTrackerInfoHashes), function (tracker, ready) {
BTClient.scrape({ infoHash: torrent.infoHash, announce: tracker }, (_, results) => { BTClient.scrape({ infoHash: perTrackerInfoHashes[tracker], announce: tracker }, (_, results) => {
if (results) { if (results) {
seeders[tracker] = [results.complete, results.incomplete]; Object.entries(results)
.filter(([infoHash]) => perTorrentResults[infoHash])
.forEach(([infoHash, seeders]) =>
perTorrentResults[infoHash][tracker] = [seeders.complete, seeders.incomplete])
} }
ready(); ready();
}) })
}, callback); }, callback);
}).then(seeders => { }).then(perTorrentResults => {
if (!Object.values(seeders).length) { const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
console.log(`Retrying seeders update for [${torrent.infoHash}] ${torrent.title || torrent.name}`) torrents.forEach(torrent => {
return updateCurrentSeeders(torrent); const results = perTorrentResults[torrent.infoHash];
} const newSeeders = Math.max(...Object.values(results).map(values => values[0]).concat(0));
torrent.seeders = Math.max(...Object.values(seeders).map(values => values[0]).concat(0)); console.log(`Updating seeders for [${torrent.infoHash}] ${torrent.title} - ${torrent.seeders} -> ${newSeeders}`)
return torrent; torrent.seeders = newSeeders;
})
return torrentsInput;
}); });
} }
@@ -158,11 +165,17 @@ function filterSubtitles(files) {
return files.filter(file => isSubtitle(file.path)); return files.filter(file => isSubtitle(file.path));
} }
async function getTorrentTrackers(torrent) {
const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr;
const torrentTrackers = torrent.trackers && torrent.trackers.split(',');
return magnetTrackers || torrentTrackers || getDefaultTrackers(torrent);
}
async function getDefaultTrackers(torrent) { async function getDefaultTrackers(torrent) {
return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT }) return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT })
.then(response => response.body && response.body.trim()) .then(response => response.body && response.body.trim())
.then(body => body && body.split('\n\n') || [])) .then(body => body && body.split('\n\n') || []))
.then(trackers => torrent.type === Type.ANIME ? trackers.concat(ANIME_TRACKERS) : trackers); .then(trackers => torrent.type === Type.ANIME ? Array.from(new Set(trackers.concat(ANIME_TRACKERS))) : trackers);
} }
module.exports = { updateCurrentSeeders, updateTorrentSize, sizeAndFiles, torrentFiles } module.exports = { updateCurrentSeeders, updateTorrentSize, sizeAndFiles, torrentFiles }

View File

@@ -1,22 +1,19 @@
const Bottleneck = require('bottleneck'); const Bottleneck = require('bottleneck');
const scrapers = require('./scrapers');
const repository = require('../lib/repository') const repository = require('../lib/repository')
const { delay, timeout } = require('../lib/promises') const { delay } = require('../lib/promises')
const { updateCurrentSeeders } = require('../lib/torrent') const { updateCurrentSeeders } = require('../lib/torrent')
const { updateTorrentSeeders } = require('../lib/torrentEntries') const { updateTorrentSeeders } = require('../lib/torrentEntries')
const DELAY_MS = 15 * 1000; // 15 seconds const DELAY_MS = 15 * 1000; // 15 seconds
const TIMEOUT_MS = 30 * 1000 // 30 seconds
const FALLBACK_SCRAPER = { updateSeeders: () => [] };
const limiter = new Bottleneck({ maxConcurrent: 20, minTime: 250 });
const updateLimiter = new Bottleneck({ maxConcurrent: 5 }); const updateLimiter = new Bottleneck({ maxConcurrent: 5 });
const forceSeedersLimiter = new Bottleneck({ maxConcurrent: 5 });
const statistics = {}; const statistics = {};
function scheduleUpdateSeeders() { function scheduleUpdateSeeders() {
console.log('Starting seeders update...') console.log('Starting seeders update...')
return repository.getUpdateSeedersTorrents() return repository.getUpdateSeedersTorrents(50)
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => _updateSeeders(torrent))))) .then(torrents => updateCurrentSeeders(torrents))
.then(updatedTorrents => Promise.all(
updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))))
.then(torrents => updateStatistics(torrents)) .then(torrents => updateStatistics(torrents))
.then(() => console.log('Finished seeders update:', statistics)) .then(() => console.log('Finished seeders update:', statistics))
.catch(error => console.warn('Failed seeders update:', error)) .catch(error => console.warn('Failed seeders update:', error))
@@ -24,31 +21,6 @@ function scheduleUpdateSeeders() {
.then(() => scheduleUpdateSeeders()); .then(() => scheduleUpdateSeeders());
} }
async function _updateSeeders(torrent) {
const provider = await scrapers.find(provider => provider.name === torrent.provider);
const scraper = provider ? provider.scraper : FALLBACK_SCRAPER;
const updatedTorrents = await timeout(TIMEOUT_MS, scraper.updateSeeders(torrent, getImdbIdsMethod(torrent)))
.then(updated => Array.isArray(updated) ? updated : [updated])
.catch(error => {
console.warn(`Failed seeders update ${torrent.provider} [${torrent.infoHash}]: `, error)
return []
});
if (!updatedTorrents.find(updated => updated.infoHash === torrent.infoHash)) {
await forceSeedersLimiter.schedule(() => updateCurrentSeeders(torrent))
.then(updated => updatedTorrents.push(updated));
}
return Promise.all(updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated))));
}
function getImdbIdsMethod(torrent) {
return () => repository.getFiles(torrent)
.then(files => files.map(file => file.imdbId).filter(id => id))
.then(ids => Array.from(new Set(ids)));
}
function updateStatistics(updatedTorrents) { function updateStatistics(updatedTorrents) {
const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0); const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0);
const date = new Date().toISOString().replace(/T.*/, ''); const date = new Date().toISOString().replace(/T.*/, '');