[scraper] update seeders from trackers directly

This commit is contained in:
TheBeastLT
2021-02-07 18:42:21 +01:00
parent 63ffa653a5
commit fd2f3cc5b7
5 changed files with 47 additions and 62 deletions

4
package-lock.json generated
View File

@@ -1870,8 +1870,8 @@
}
},
"parse-torrent-title": {
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244",
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6",
"requires": {
"moment": "^2.24.0"
}

View File

@@ -32,7 +32,7 @@
"nodejs-bing": "^0.1.0",
"nyaapi": "^2.3.3",
"parse-torrent": "^6.1.2",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#ea1c878bbae48e47e97eee1ac4870431bf424244",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#50f366c14a4aaffbef7a3ad1b31830cf6d1bbac6",
"pg": "^7.8.2",
"pg-hstore": "^2.3.2",
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#935a5c23ae809edbcd2a111526a7f74d6767c50d",

View File

@@ -183,14 +183,14 @@ function getTorrentsWithoutSize() {
});
}
function getUpdateSeedersTorrents() {
function getUpdateSeedersTorrents(limit = 100) {
const until = moment().subtract(7, 'days').format('YYYY-MM-DD');
return Torrent.findAll({
where: literal(`torrent."updatedAt" < \'${until}\'`),
limit: 100,
limit: limit,
order: [
['seeders', 'DESC'],
['uploadDate', 'DESC']
['updatedAt', 'ASC']
]
});
}

View File

@@ -8,9 +8,9 @@ const { Type } = require('./types');
const { isVideo, isSubtitle } = require('./extension');
const { cacheTrackers } = require('./cache');
const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_best.txt';
const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_all.txt';
const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
const SEEDS_CHECK_TIMEOUT = process.env.SEEDS_CHECK_TIMEOUT || 10 * 1000; // 10 secs
const SEEDS_CHECK_TIMEOUT = 30 * 1000; // 30 secs
const ANIME_TRACKERS = [
"http://nyaa.tracker.wf:7777/announce",
"http://anidex.moe:6969/announce",
@@ -18,34 +18,41 @@ const ANIME_TRACKERS = [
"udp://tracker.uw0.xyz:6969/announce"
];
async function updateCurrentSeeders(torrent) {
async function updateCurrentSeeders(torrentsInput) {
return new Promise(async (resolve) => {
if (!torrent.magnetLink && !torrent.infoHash) {
return resolve(0);
}
const seeders = {};
const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr;
const torrentTrackers = torrent.trackers && torrent.trackers.split(',');
const trackers = magnetTrackers || torrentTrackers || await getDefaultTrackers(torrent);
const callback = () => resolve(seeders);
const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
const perTorrentResults = Object.fromEntries(new Map(torrents.map(torrent => [torrent.infoHash, {}])));
const perTrackerInfoHashes = await Promise.all(torrents.map(torrent => getTorrentTrackers(torrent)
.then(torrentTrackers => ({ infoHash: torrent.infoHash, trackers: torrentTrackers }))))
.then(allTorrentTrackers => allTorrentTrackers
.reduce((allTrackersMap, torrentTrackers) => {
torrentTrackers.trackers.forEach(tracker =>
allTrackersMap[tracker] = (allTrackersMap[tracker] || []).concat(torrentTrackers.infoHash));
return allTrackersMap;
}, {}));
const callback = () => resolve(perTorrentResults);
setTimeout(callback, SEEDS_CHECK_TIMEOUT);
async.each(trackers, function (tracker, ready) {
BTClient.scrape({ infoHash: torrent.infoHash, announce: tracker }, (_, results) => {
async.each(Object.keys(perTrackerInfoHashes), function (tracker, ready) {
BTClient.scrape({ infoHash: perTrackerInfoHashes[tracker], announce: tracker }, (_, results) => {
if (results) {
seeders[tracker] = [results.complete, results.incomplete];
Object.entries(results)
.filter(([infoHash]) => perTorrentResults[infoHash])
.forEach(([infoHash, seeders]) =>
perTorrentResults[infoHash][tracker] = [seeders.complete, seeders.incomplete])
}
ready();
})
}, callback);
}).then(seeders => {
if (!Object.values(seeders).length) {
console.log(`Retrying seeders update for [${torrent.infoHash}] ${torrent.title || torrent.name}`)
return updateCurrentSeeders(torrent);
}
torrent.seeders = Math.max(...Object.values(seeders).map(values => values[0]).concat(0));
return torrent;
}).then(perTorrentResults => {
const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
torrents.forEach(torrent => {
const results = perTorrentResults[torrent.infoHash];
const newSeeders = Math.max(...Object.values(results).map(values => values[0]).concat(0));
console.log(`Updating seeders for [${torrent.infoHash}] ${torrent.title} - ${torrent.seeders} -> ${newSeeders}`)
torrent.seeders = newSeeders;
})
return torrentsInput;
});
}
@@ -158,11 +165,17 @@ function filterSubtitles(files) {
return files.filter(file => isSubtitle(file.path));
}
async function getTorrentTrackers(torrent) {
const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr;
const torrentTrackers = torrent.trackers && torrent.trackers.split(',');
return magnetTrackers || torrentTrackers || getDefaultTrackers(torrent);
}
async function getDefaultTrackers(torrent) {
return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT })
.then(response => response.body && response.body.trim())
.then(body => body && body.split('\n\n') || []))
.then(trackers => torrent.type === Type.ANIME ? trackers.concat(ANIME_TRACKERS) : trackers);
.then(trackers => torrent.type === Type.ANIME ? Array.from(new Set(trackers.concat(ANIME_TRACKERS))) : trackers);
}
module.exports = { updateCurrentSeeders, updateTorrentSize, sizeAndFiles, torrentFiles }

View File

@@ -1,22 +1,19 @@
const Bottleneck = require('bottleneck');
const scrapers = require('./scrapers');
const repository = require('../lib/repository')
const { delay, timeout } = require('../lib/promises')
const { delay } = require('../lib/promises')
const { updateCurrentSeeders } = require('../lib/torrent')
const { updateTorrentSeeders } = require('../lib/torrentEntries')
const DELAY_MS = 15 * 1000; // 15 seconds
const TIMEOUT_MS = 30 * 1000 // 30 seconds
const FALLBACK_SCRAPER = { updateSeeders: () => [] };
const limiter = new Bottleneck({ maxConcurrent: 20, minTime: 250 });
const updateLimiter = new Bottleneck({ maxConcurrent: 5 });
const forceSeedersLimiter = new Bottleneck({ maxConcurrent: 5 });
const statistics = {};
function scheduleUpdateSeeders() {
console.log('Starting seeders update...')
return repository.getUpdateSeedersTorrents()
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => _updateSeeders(torrent)))))
return repository.getUpdateSeedersTorrents(50)
.then(torrents => updateCurrentSeeders(torrents))
.then(updatedTorrents => Promise.all(
updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))))
.then(torrents => updateStatistics(torrents))
.then(() => console.log('Finished seeders update:', statistics))
.catch(error => console.warn('Failed seeders update:', error))
@@ -24,31 +21,6 @@ function scheduleUpdateSeeders() {
.then(() => scheduleUpdateSeeders());
}
async function _updateSeeders(torrent) {
const provider = await scrapers.find(provider => provider.name === torrent.provider);
const scraper = provider ? provider.scraper : FALLBACK_SCRAPER;
const updatedTorrents = await timeout(TIMEOUT_MS, scraper.updateSeeders(torrent, getImdbIdsMethod(torrent)))
.then(updated => Array.isArray(updated) ? updated : [updated])
.catch(error => {
console.warn(`Failed seeders update ${torrent.provider} [${torrent.infoHash}]: `, error)
return []
});
if (!updatedTorrents.find(updated => updated.infoHash === torrent.infoHash)) {
await forceSeedersLimiter.schedule(() => updateCurrentSeeders(torrent))
.then(updated => updatedTorrents.push(updated));
}
return Promise.all(updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated))));
}
function getImdbIdsMethod(torrent) {
return () => repository.getFiles(torrent)
.then(files => files.map(file => file.imdbId).filter(id => id))
.then(ids => Array.from(new Set(ids)));
}
function updateStatistics(updatedTorrents) {
const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0);
const date = new Date().toISOString().replace(/T.*/, '');