[scraper] adds update seeders cron

This commit is contained in:
TheBeastLT
2020-04-08 15:53:02 +02:00
parent d3cc8c6b57
commit ce0e330172
10 changed files with 118 additions and 52 deletions

View File

@@ -4,6 +4,7 @@ const leetx = require('./1337x_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { updateCurrentSeeders } = require('../../lib/torrent');
const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries');
const NAME = '1337x';
@@ -25,6 +26,13 @@ async function scrape() {
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => leetx.torrent(torrent.torrentId)
.then(record => (torrent.seeders = record.seeders, torrent))
.catch(() => updateCurrentSeeders(torrent))
.then(updated => updateTorrentSeeders(updated)));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
leetx.Categories.MOVIE,
@@ -80,15 +88,6 @@ async function processTorrentRecord(record) {
return createTorrentEntry(torrent);
}
async function updateSeeders() {
const startDate = moment().subtract(7, 'day').toDate();
const endDate = moment().subtract(1, 'day').toDate();
return repository.getTorrentsUpdatedBetween(NAME, startDate, endDate)
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => leetx.torrent(torrent.torrentId)
.then(foundTorrent => updateTorrentSeeders(foundTorrent))
.catch(error => console.warn(error))))))
}
function typeMapping() {
const mapping = {};
mapping[leetx.Categories.MOVIE] = Type.MOVIE;
@@ -98,4 +97,4 @@ function typeMapping() {
return mapping;
}
module.exports = { scrape, NAME };
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -7,6 +7,7 @@ const repository = require('../../lib/repository');
const { Type } = require('../../lib/types');
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
const { parseTorrentFiles } = require('../../lib/torrentFiles');
const { updateTorrentSeeders } = require('../../lib/torrentEntries');
const { getMetadata, getKitsuId } = require('../../lib/metadata');
const showMappings = require('./horriblesubs_mapping.json');
@@ -36,6 +37,11 @@ async function scrape() {
}
}
async function updateSeeders(torrent) {
return entryLimiter.schedule(() => updateCurrentSeeders(torrent)
.then(updated => updateTorrentSeeders(updated)));
}
async function _scrapeLatestEntries() {
const latestEntries = await horriblesubs.getLatestEntries();
@@ -209,4 +215,4 @@ async function checkIfExists(torrent) {
return { ...torrent, size: existingTorrent.size, seeders: existingTorrent.seeders };
}
module.exports = { scrape, NAME };
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -4,6 +4,7 @@ const kickass = require('./kickass_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { updateCurrentSeeders } = require('../../lib/torrent');
const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries');
const NAME = 'KickassTorrents';
@@ -25,6 +26,13 @@ async function scrape() {
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => kickass.torrent(torrent.torrentId)
.then(record => (torrent.seeders = record.seeders, torrent))
.catch(() => updateCurrentSeeders(torrent))
.then(updated => updateTorrentSeeders(updated)));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
kickass.Categories.MOVIE,
@@ -83,4 +91,4 @@ function typeMapping() {
return mapping;
}
module.exports = { scrape, NAME };
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -8,6 +8,7 @@ const Promises = require('../../lib/promises');
const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries');
const NAME = 'RARBG';
const SEARCH_OPTIONS = { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 };
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
@@ -25,6 +26,17 @@ async function scrape() {
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
const imdbIds = await repository.getFiles(torrent)
.then(files => files.map(file => file.imdbId))
.then(ids => Array.from(new Set(ids)));
return Promise.all(imdbIds.map(imdbId => limiter.schedule(() => rarbg.search(imdbId, SEARCH_OPTIONS, 'imdb'))))
.then(results => results.reduce((a, b) => a.concat(b), []))
.then(results => results.map(result => toTorrent(result)))
.then(torrents => Promise.all(torrents.map(updated => updateTorrentSeeders(updated))));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
rarbg.CATEGORY['4K_MOVIES_X264_4k'],
@@ -51,17 +63,7 @@ async function scrapeLatestTorrents() {
async function scrapeLatestTorrentsForCategory(category) {
console.log(`Scrapping ${NAME} ${category} category`);
return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended', ranked: 0 })
.then(torrents => torrents.map(torrent => ({
name: torrent.title,
infoHash: decode(torrent.download).infoHash,
magnetLink: torrent.download,
seeders: torrent.seeders,
leechers: torrent.leechers,
category: torrent.category,
size: torrent.size,
uploadDate: new Date(torrent.pubdate),
imdbId: torrent.episode_info && torrent.episode_info.imdb
})))
.then(results => results.map(result => toTorrent(result)))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
@@ -77,7 +79,7 @@ async function processTorrentRecord(record) {
const torrent = {
provider: NAME,
infoHash: record.infoHash,
title: record.name,
title: record.title,
type: getType(record.category),
seeders: record.seeders,
size: record.size,
@@ -88,6 +90,21 @@ async function processTorrentRecord(record) {
return createTorrentEntry(torrent);
}
function toTorrent(result) {
return {
title: result.title,
provider: NAME,
infoHash: decode(result.download).infoHash,
magnetLink: result.download,
seeders: result.seeders,
leechers: result.leechers,
category: result.category,
size: result.size,
uploadDate: new Date(result.pubdate),
imdbId: result.episode_info && result.episode_info.imdb
};
}
const seriesCategories = [
'TV Episodes',
'Movies/TV-UHD-episodes',
@@ -101,4 +118,4 @@ function getType(category) {
return Type.MOVIE;
}
module.exports = { scrape, NAME };
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -4,6 +4,7 @@ const thepiratebay = require('./thepiratebay_api.js');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { updateCurrentSeeders } = require('../../lib/torrent');
const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
@@ -37,6 +38,13 @@ async function scrape() {
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => thepiratebay.torrent(torrent.torrentId)
.then(record => (torrent.seeders = record.seeders, torrent))
.catch(() => updateCurrentSeeders(torrent))
.then(updated => updateTorrentSeeders(updated)));
}
async function scrapeLatestTorrents() {
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
@@ -81,4 +89,4 @@ async function processTorrentRecord(record) {
return createTorrentEntry(torrent);
}
module.exports = { scrape, NAME };
module.exports = { scrape, updateSeeders, NAME };