From 97cbe5d6c11dcb65d6e5e63bf2d991a8ebdfb890 Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Mon, 30 Mar 2020 14:42:00 +0200 Subject: [PATCH] [scraper] fixes update seeders method --- scraper/lib/metadata.js | 2 +- scraper/lib/repository.js | 14 ++++++++++++-- scraper/lib/torrentEntries.js | 17 ++++++++++++++--- scraper/manual/manual.js | 4 ++-- scraper/scrapers/1337x/1337x_api.js | 9 ++++++++- scraper/scrapers/1337x/1337x_scraper.js | 9 +++++++++ .../horriblesubs/horriblesubs_scraper.js | 2 +- .../scrapers/thepiratebay/thepiratebay_api.js | 9 ++++++--- 8 files changed, 53 insertions(+), 13 deletions(-) diff --git a/scraper/lib/metadata.js b/scraper/lib/metadata.js index fcb6397..5cf5330 100644 --- a/scraper/lib/metadata.js +++ b/scraper/lib/metadata.js @@ -83,7 +83,7 @@ function escapeTitle(title) { .normalize('NFKD') // normalize non-ASCII characters .replace(/[\u0300-\u036F]/g, '') .replace(/&/g, 'and') - .replace(/[;, ~]+/g, ' ') // replace dots, commas or underscores with spaces + .replace(/[;, ~.]+/g, ' ') // replace dots, commas or underscores with spaces .replace(/[^\w \-()+#@!']+/g, '') // remove all non-alphanumeric chars .trim(); } diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index 785e7c5..e557b3d 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -107,7 +107,7 @@ function getTorrent(torrent) { if (!result) { throw new Error(`torrent not found: ${torrent.infoHash}`); } - return result.dataValues; + return result; }) } @@ -115,6 +115,14 @@ function getTorrentsBasedOnTitle(titleQuery, type) { return Torrent.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` }, type: type } }); } +function getTorrentsWithoutId(provider) { + return Torrent.findAll({ where: { provider: provider, torrentId: { [Op.is]: null } }, limit: 100 }); +} + +function getTorrentsUpdatedBetween(provider, startDate, endDate) { + return Torrent.findAll({ where: { provider: provider, updatedAt: { [Op.gte]: startDate, [Op.lte]: endDate } } }); +} + function createTorrent(torrent) { return Torrent.upsert(torrent); } @@ -176,5 +184,7 @@ module.exports = { deleteFile, getSkipTorrent, createSkipTorrent, - createFailedImdbTorrent + createFailedImdbTorrent, + getTorrentsWithoutId, + getTorrentsUpdatedBetween }; \ No newline at end of file diff --git a/scraper/lib/torrentEntries.js b/scraper/lib/torrentEntries.js index 5e3079b..9e0a53a 100644 --- a/scraper/lib/torrentEntries.js +++ b/scraper/lib/torrentEntries.js @@ -60,10 +60,21 @@ async function updateTorrentSeeders(torrent) { } return repository.getTorrent(torrent) + .catch(() => undefined) .then(stored => { - stored.seeders = torrent.seeders; - return stored.save(); - }).catch(() => undefined); + if (stored && stored.seeders !== torrent.seeders) { + stored.seeders = torrent.seeders; + return stored.save() + } + }) + .then(updated => { + console.log(`Updated [${torrent.infoHash}] ${torrent.name || torrent.title} to ${torrent.seeders} seeders`); + return updated; + }) + .catch(error => { + console.warn('Failed updating seeders:', error); + return undefined; + }); } module.exports = { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders }; diff --git a/scraper/manual/manual.js b/scraper/manual/manual.js index aa8866f..5dc8716 100644 --- a/scraper/manual/manual.js +++ b/scraper/manual/manual.js @@ -37,7 +37,7 @@ async function updateMovieCollections() { .then(files => files.filter(file => parse(file.title).complete)); collectionFiles.map(original => repository.getTorrent({ infoHash: original.infoHash }) - .then(torrent => parseTorrentFiles({ ...torrent, imdbId: original.imdbId })) + .then(torrent => parseTorrentFiles({ ...torrent.get(), imdbId: original.imdbId })) .then(files => Promise.all(files.map(file => { console.log(file); return repository.createFile(file) @@ -90,7 +90,7 @@ async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) { })); const imdbId = storedFiles.length && storedFiles[0].imdbId || await getImdbId(parse(torrent.title)); - return parseTorrentFiles({ ...torrent, imdbId, files }) + return parseTorrentFiles({ ...torrent.get(), imdbId, files }) .then(newFiles => newFiles.map(file => { const fileIndex = file.fileIndex !== undefined ? file.fileIndex : null; const mapping = fileIndexMap[fileIndex]; diff --git a/scraper/scrapers/1337x/1337x_api.js b/scraper/scrapers/1337x/1337x_api.js index 28e8626..295bec5 100644 --- a/scraper/scrapers/1337x/1337x_api.js +++ b/scraper/scrapers/1337x/1337x_api.js @@ -130,7 +130,7 @@ function parseTorrentPage(body) { category: details.find('strong:contains(\'Category\')').next().text(), language: details.find('strong:contains(\'Language\')').next().text(), size: parseSize(details.find('strong:contains(\'Total size\')').next().text()), - uploadDate: Sugar.Date.create(details.find('strong:contains(\'Date uploaded\')').next().text()), + uploadDate: parseDate(details.find('strong:contains(\'Date uploaded\')').next().text()), imdbId: imdbIdMatch && imdbIdMatch[1], files: details.find('div[id=\'files\']').first().find('li') .map((i, elem) => $(elem).text()) @@ -145,6 +145,13 @@ function parseTorrentPage(body) { }); } +function parseDate(dateString) { + if (/decade.*ago/i.test(dateString)) { + return Sugar.Date.create('10 years ago'); + } + return Sugar.Date.create(dateString); +} + function parseSize(sizeText) { if (!sizeText) { return undefined; diff --git a/scraper/scrapers/1337x/1337x_scraper.js b/scraper/scrapers/1337x/1337x_scraper.js index 839a05e..81a95c0 100644 --- a/scraper/scrapers/1337x/1337x_scraper.js +++ b/scraper/scrapers/1337x/1337x_scraper.js @@ -80,6 +80,15 @@ async function processTorrentRecord(record) { return createTorrentEntry(torrent); } +async function updateSeeders() { + const startDate = moment().subtract(7, 'day').toDate(); + const endDate = moment().subtract(1, 'day').toDate(); + return repository.getTorrentsUpdatedBetween(NAME, startDate, endDate) + .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => leetx.torrent(torrent.torrentId) + .then(foundTorrent => updateTorrentSeeders(foundTorrent)) + .catch(error => console.warn(error)))))) +} + function typeMapping() { const mapping = {}; mapping[leetx.Categories.MOVIE] = Type.MOVIE; diff --git a/scraper/scrapers/horriblesubs/horriblesubs_scraper.js b/scraper/scrapers/horriblesubs/horriblesubs_scraper.js index 6fce3b0..db24a4a 100644 --- a/scraper/scrapers/horriblesubs/horriblesubs_scraper.js +++ b/scraper/scrapers/horriblesubs/horriblesubs_scraper.js @@ -203,7 +203,7 @@ async function checkIfExists(torrent) { } else if (existingTorrent.provider === NAME) { return undefined; // torrent by this provider already exists } - return { ...torrent, size: existingTorrent.size, seeders: existingTorrent.seeders }; + return { ...torrent.get(), size: existingTorrent.size, seeders: existingTorrent.seeders }; } module.exports = { scrape, NAME }; \ No newline at end of file diff --git a/scraper/scrapers/thepiratebay/thepiratebay_api.js b/scraper/scrapers/thepiratebay/thepiratebay_api.js index 5b38406..ec98eaf 100644 --- a/scraper/scrapers/thepiratebay/thepiratebay_api.js +++ b/scraper/scrapers/thepiratebay/thepiratebay_api.js @@ -7,10 +7,13 @@ const Promises = require('../../lib/promises'); const defaultProxies = [ // 'https://thepiratebay.org', // 'https://proxybay.pro', - 'https://ukpiratebayproxy.com', - 'https://thepiratebayproxy.info', - 'https://mypiratebay.co', + // 'https://ukpiratebayproxy.com', + // 'https://thepiratebayproxy.info', + // 'https://mypiratebay.co', 'https://thepiratebay.asia', + 'https://thepiratebay10.org', + 'https://thepiratebay0.org', + 'https://proxtpb.art', ]; const dumpUrl = '/static/dump/csv/'; const defaultTimeout = 10000;