diff --git a/scraper/lib/cache.js b/scraper/lib/cache.js index d1870ce..4ea7a14 100644 --- a/scraper/lib/cache.js +++ b/scraper/lib/cache.js @@ -24,6 +24,7 @@ function initiateRemoteCache() { options: { collection: 'torrentio_scraper_collection', useUnifiedTopology: true, + poolSize: 1 }, ttl: GLOBAL_TTL, ignoreCacheErrors: true diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index 1dc431b..29c6ea8 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -187,7 +187,7 @@ function getTorrentsWithoutSize() { }); } -function getUpdateSeedersTorrents(limit = 100) { +function getUpdateSeedersTorrents(limit = 50) { const until = moment().subtract(7, 'days').format('YYYY-MM-DD'); return Torrent.findAll({ where: literal(`torrent."updatedAt" < \'${until}\'`), @@ -199,6 +199,19 @@ function getUpdateSeedersTorrents(limit = 100) { }); } +function getUpdateSeedersNewTorrents(limit = 50) { + const lastUpdate = moment().subtract(12, 'hours').format('YYYY-MM-DD'); + const createdAfter = moment().subtract(4, 'days').format('YYYY-MM-DD'); + return Torrent.findAll({ + where: literal(`torrent."updatedAt" < \'${lastUpdate}\' AND torrent."createdAt" > \'${createdAfter}\'`), + limit: limit, + order: [ + ['seeders', 'ASC'], + ['updatedAt', 'ASC'] + ] + }); +} + function getNoContentsTorrents() { return Torrent.findAll({ where: { opened: false, seeders: { [Op.gte]: 1 } }, @@ -315,6 +328,7 @@ module.exports = { getTorrentsBasedOnQuery, deleteTorrent, getUpdateSeedersTorrents, + getUpdateSeedersNewTorrents, getNoContentsTorrents, createFile, getFiles, diff --git a/scraper/scheduler/scheduler.js b/scraper/scheduler/scheduler.js index fd4563d..b9816a5 100644 --- a/scraper/scheduler/scheduler.js +++ b/scraper/scheduler/scheduler.js @@ -1,10 +1,11 @@ const { scheduleScraping, scrapeAll } = require('./scraper') -const { scheduleUpdateSeeders } = require('./seeders') +const { scheduleUpdateSeeders, scheduleUpdateSeedersForNewTorrents } = require('./seeders') function startScraper() { if (process.env.ENABLE_SCHEDULING) { scheduleScraping(); scheduleUpdateSeeders(); + scheduleUpdateSeedersForNewTorrents(); } else { scrapeAll() } diff --git a/scraper/scheduler/seeders.js b/scraper/scheduler/seeders.js index 3b51dfb..9fae79a 100644 --- a/scraper/scheduler/seeders.js +++ b/scraper/scheduler/seeders.js @@ -7,29 +7,48 @@ const { updateTorrentSeeders } = require('../lib/torrentEntries') const DELAY_MS = 0; // 0 seconds const updateLimiter = new Bottleneck({ maxConcurrent: 5 }); const statistics = {}; +const statisticsNew = {}; function scheduleUpdateSeeders() { console.log('Starting seeders update...') - return getTorrents() + getTorrents() .then(torrents => updateCurrentSeeders(torrents)) .then(updatedTorrents => Promise.all( updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated))))) - .then(torrents => updateStatistics(torrents)) + .then(torrents => updateStatistics(torrents, statistics)) .then(() => console.log('Finished seeders update:', statistics)) .catch(error => console.warn('Failed seeders update:', error)) .then(() => delay(DELAY_MS)) .then(() => scheduleUpdateSeeders()); } +function scheduleUpdateSeedersForNewTorrents() { + console.log('Starting seeders update for new torrents...') + getNewTorrents() + .then(torrents => updateCurrentSeeders(torrents)) + .then(updatedTorrents => Promise.all( + updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated))))) + .then(torrents => updateStatistics(torrents, statisticsNew)) + .then(() => console.log('Finished seeders update for new torrents:', statisticsNew)) + .catch(error => console.warn('Failed seeders update for new torrents:', error)) + .then(() => delay(30_000)) + .then(() => scheduleUpdateSeedersForNewTorrents()); +} + async function getTorrents() { - return repository.getUpdateSeedersTorrents(50) + return repository.getUpdateSeedersTorrents() .catch(() => delay(5000).then(() => getTorrents())) } -function updateStatistics(updatedTorrents) { - const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0); - const date = new Date().toISOString().replace(/T.*/, ''); - statistics[date] = (statistics[date] || 0) + totalTorrents; +async function getNewTorrents() { + return repository.getUpdateSeedersNewTorrents() + .catch(() => delay(5000).then(() => getNewTorrents())) } -module.exports = { scheduleUpdateSeeders } \ No newline at end of file +function updateStatistics(updatedTorrents, statisticsObject) { + const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0); + const date = new Date().toISOString().replace(/T.*/, ''); + statisticsObject[date] = (statisticsObject[date] || 0) + totalTorrents; +} + +module.exports = { scheduleUpdateSeeders, scheduleUpdateSeedersForNewTorrents } \ No newline at end of file