diff --git a/scraper/lib/promises.js b/scraper/lib/promises.js index 04deb6e..620aea9 100644 --- a/scraper/lib/promises.js +++ b/scraper/lib/promises.js @@ -33,4 +33,11 @@ async function delay(duration) { return new Promise((resolve) => setTimeout(resolve, duration)); } -module.exports = { sequence, first, delay }; \ No newline at end of file +/** + * Return most common value from given array. + */ +function mostCommonValue(array) { + return array.sort((a, b) => array.filter(v => v === a).length - array.filter(v => v === b).length).pop(); +} + +module.exports = { sequence, first, delay, mostCommonValue }; \ No newline at end of file diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index 5aab304..b176f00 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -198,6 +198,17 @@ function getUpdateSeedersTorrents() { }); } +function getNoContentsTorrents() { + return Torrent.findAll({ + where: { opened: false }, + limit: 500, + order: [ + ['seeders', 'DESC'], + ['uploadDate', 'DESC'] + ] + }); +} + function createTorrent(torrent) { return Torrent.upsert(torrent) .then(() => createContents(torrent.infoHash, torrent.contents)) @@ -213,7 +224,8 @@ function setTorrentSeeders(infoHash, seeders) { function createFile(file) { if (file.id) { - return File.upsert(file).then(() => upsertSubtitles(file.id, file.subtitles)); + return (file.dataValues ? file.save() : File.upsert(file)) + .then(() => upsertSubtitles(file, file.subtitles)); } if (file.subtitles && file.subtitles.length) { file.subtitles = file.subtitles.map(subtitle => ({ infoHash: file.infoHash, title: subtitle.path, ...subtitle })); @@ -249,7 +261,7 @@ function upsertSubtitles(file, subtitles) { subtitle.title = subtitle.title || subtitle.path; return subtitle; }) - .map(subtitle => () => subtitle.dataValues ? subtitle.save() : Subtitle.upsert(subtitle))); + .map(subtitle => () => subtitle.dataValues ? subtitle.save() : Subtitle.create(subtitle))); } return Promise.resolve(); } @@ -296,6 +308,7 @@ module.exports = { getTorrent, getTorrentsBasedOnTitle, getUpdateSeedersTorrents, + getNoContentsTorrents, createFile, getFiles, getFilesBasedOnTitle, diff --git a/scraper/lib/torrentEntries.js b/scraper/lib/torrentEntries.js index fcbdcd8..075e5eb 100644 --- a/scraper/lib/torrentEntries.js +++ b/scraper/lib/torrentEntries.js @@ -92,6 +92,63 @@ async function getStoredTorrentEntry(torrent) { .catch(() => undefined); } +async function checkAndUpdateTorrent(torrent) { + const storedTorrent = torrent.dataValues + ? torrent + : await repository.getTorrent(torrent).catch(() => undefined); + if (!storedTorrent) { + return false; + } + return createTorrentContents(storedTorrent) + .then(() => updateTorrentSeeders(torrent)); +} + +async function createTorrentContents(torrent) { + if (torrent.opened) { + return; + } + const storedVideos = await repository.getFiles(torrent).catch(() => []); + if (!storedVideos || !storedVideos.length) { + return; + } + const notOpenedVideo = storedVideos.length === 1 && !Number.isInteger(storedVideos[0].fileIndex); + const imdbId = Promises.mostCommonValue(storedVideos.map(stored => stored.imdbId)); + + const { contents, videos, subtitles } = await parseTorrentFiles({ ...torrent.get(), imdbId }) + .then(torrentContents => notOpenedVideo ? torrentContents : { ...torrentContents, videos: storedVideos }) + .then(torrentContents => assignSubtitles(torrentContents)) + .catch(error => { + console.log(`Failed getting contents for [${torrent.infoHash}] ${torrent.title}`, error.message); + return {}; + }); + + if (!contents || !contents.length) { + return; + } + if (notOpenedVideo && videos.length === 1) { + // if both have a single video and stored one was not opened, update stored one to true metadata and use that + storedVideos[0].fileIndex = videos[0].fileIndex; + storedVideos[0].title = videos[0].title; + storedVideos[0].size = videos[0].size; + storedVideos[0].subtitles = videos[0].subtitles; + videos[0] = storedVideos[0]; + } + // no videos available or more than one new videos were in the torrent + const shouldDeleteOld = notOpenedVideo && videos.every(video => !video.id); + + return repository.createTorrent({ ...torrent.get(), contents, subtitles }) + .then(() => { + if (shouldDeleteOld) { + console.error(`Deleting old video for [${torrent.infoHash}] ${torrent.title}`) + return storedVideos[0].destroy(); + } + return Promise.resolve(); + }) + .then(() => Promises.sequence(videos.map(video => () => repository.createFile(video)))) + .then(() => console.log(`Created contents for ${torrent.provider} [${torrent.infoHash}] ${torrent.title}`)) + .catch(error => console.error(`Failed saving contents for [${torrent.infoHash}] ${torrent.title}`, error)); +} + async function updateTorrentSeeders(torrent) { if (!torrent.infoHash || !Number.isInteger(torrent.seeders)) { return; @@ -104,4 +161,11 @@ async function updateTorrentSeeders(torrent) { }); } -module.exports = { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders }; +module.exports = { + createTorrentEntry, + createTorrentContents, + createSkipTorrentEntry, + getStoredTorrentEntry, + updateTorrentSeeders, + checkAndUpdateTorrent +}; diff --git a/scraper/manual/manual.js b/scraper/manual/manual.js index 906c719..354c4d1 100644 --- a/scraper/manual/manual.js +++ b/scraper/manual/manual.js @@ -1,8 +1,10 @@ const Bottleneck = require('bottleneck'); const { parse } = require('parse-torrent-title'); +const Promises = require('../lib/promises'); const repository = require('../lib/repository'); const { getImdbId } = require('../lib/metadata'); const { parseTorrentFiles } = require('../lib/torrentFiles'); +const { createTorrentContents } = require('../lib/torrentEntries'); const { assignSubtitles } = require('../lib/torrentSubtitles'); const { Type } = require('../lib/types'); @@ -110,6 +112,14 @@ async function assignSubs() { })); } +async function openTorrentContents() { + const limiter = new Bottleneck({ maxConcurrent: 5 }); + const unopenedTorrents = await repository.getNoContentsTorrents(); + + return Promise.all(unopenedTorrents.map(torrent => limiter.schedule(() => createTorrentContents(torrent)))) + .then(() => unopenedTorrents.length === 500 ? openTorrentContents() : Promise.resolve) +} + async function findAllFiles() { /* Test cases */ /* Anime Season and absolute episodes */ @@ -185,8 +195,9 @@ async function findAllFiles() { //findAllFiles().then(() => console.log('Finished')); //updateMovieCollections().then(() => console.log('Finished')); -reapplyEpisodeDecomposing('0b6c0f0692bdb151efb87e3de90e46e3b177444e', false).then(() => console.log('Finished')); +// reapplyEpisodeDecomposing('0b6c0f0692bdb151efb87e3de90e46e3b177444e', false).then(() => console.log('Finished')); //reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished')); //reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished')); //reapplyManualHashes().then(() => console.log('Finished')); -// assignSubs().then(() => console.log('Finished')); \ No newline at end of file +// assignSubs().then(() => console.log('Finished')); +openTorrentContents().then(() => console.log('Finished')); \ No newline at end of file diff --git a/scraper/scrapers/1337x/1337x_scraper.js b/scraper/scrapers/1337x/1337x_scraper.js index b9daff6..e84638d 100644 --- a/scraper/scrapers/1337x/1337x_scraper.js +++ b/scraper/scrapers/1337x/1337x_scraper.js @@ -4,7 +4,7 @@ const leetx = require('./1337x_api'); const { Type } = require('../../lib/types'); const repository = require('../../lib/repository'); const Promises = require('../../lib/promises'); -const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = '1337x'; const UNTIL_PAGE = 10; @@ -65,8 +65,8 @@ async function processTorrentRecord(record) { console.warn(`Incorrect upload date for [${torrentFound.infoHash}] ${torrentFound.name}`); return; } - if (await getStoredTorrentEntry(torrentFound)) { - return updateTorrentSeeders(torrentFound); + if (await checkAndUpdateTorrent(torrentFound)) { + return torrentFound; } const torrent = { diff --git a/scraper/scrapers/eztv/eztv_scraper.js b/scraper/scrapers/eztv/eztv_scraper.js index 527bfdf..bb19264 100644 --- a/scraper/scrapers/eztv/eztv_scraper.js +++ b/scraper/scrapers/eztv/eztv_scraper.js @@ -3,7 +3,7 @@ const Bottleneck = require('bottleneck'); const eztv = require('./eztv_api'); const { Type } = require('../../lib/types'); const repository = require('../../lib/repository'); -const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'EZTV'; const UNTIL_PAGE = 10; @@ -50,8 +50,8 @@ async function scrapeLatestTorrentsForCategory(page = 1) { } async function processTorrentRecord(record) { - if (await getStoredTorrentEntry(record)) { - return updateTorrentSeeders(record); + if (await checkAndUpdateTorrent(record)) { + return record; } if (!record || !record.size) { diff --git a/scraper/scrapers/horriblesubs/horriblesubs_scraper.js b/scraper/scrapers/horriblesubs/horriblesubs_scraper.js index ee05485..91d9a81 100644 --- a/scraper/scrapers/horriblesubs/horriblesubs_scraper.js +++ b/scraper/scrapers/horriblesubs/horriblesubs_scraper.js @@ -6,7 +6,7 @@ const horriblesubs = require('./horriblesubs_api.js'); const repository = require('../../lib/repository'); const { Type } = require('../../lib/types'); const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent'); -const { createTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const { getMetadata, getKitsuId } = require('../../lib/metadata'); const showMappings = require('./horriblesubs_mapping.json'); @@ -172,7 +172,7 @@ async function processTorrentRecord(torrent, updateSeeders = true) { if (existingTorrent && existingTorrent.provider === NAME) { if (updateSeeders) { - return updateCurrentSeeders(torrent).then(updatedSeeders => updateTorrentSeeders(updatedSeeders)) + return updateCurrentSeeders(torrent).then(updatedSeeders => checkAndUpdateTorrent(updatedSeeders)) } return Promise.resolve(torrent) } diff --git a/scraper/scrapers/kickass/kickass_scraper.js b/scraper/scrapers/kickass/kickass_scraper.js index 2df1308..f23e423 100644 --- a/scraper/scrapers/kickass/kickass_scraper.js +++ b/scraper/scrapers/kickass/kickass_scraper.js @@ -4,7 +4,7 @@ const kickass = require('./kickass_api'); const { Type } = require('../../lib/types'); const repository = require('../../lib/repository'); const Promises = require('../../lib/promises'); -const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'KickassTorrents'; const UNTIL_PAGE = 10; @@ -54,8 +54,8 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) { } async function processTorrentRecord(record) { - if (await getStoredTorrentEntry(record)) { - return updateTorrentSeeders(record); + if (await checkAndUpdateTorrent(record)) { + return record; } const torrentFound = await kickass.torrent(record.torrentId).catch(() => undefined); diff --git a/scraper/scrapers/rarbg/rarbg_dump_scraper.js b/scraper/scrapers/rarbg/rarbg_dump_scraper.js index d4d4494..f3e1b7e 100644 --- a/scraper/scrapers/rarbg/rarbg_dump_scraper.js +++ b/scraper/scrapers/rarbg/rarbg_dump_scraper.js @@ -3,12 +3,7 @@ const Bottleneck = require('bottleneck'); const rarbg = require('rarbg-api'); const decode = require('magnet-uri'); const { Type } = require('../../lib/types'); -const { - createTorrentEntry, - getStoredTorrentEntry, - updateTorrentSeeders -} = require('../../lib/torrentEntries'); - +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'RARBG'; const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 }); @@ -54,8 +49,8 @@ async function getTorrentsForImdbId(imdbId, retries = 5) { } async function processTorrentRecord(record) { - if (await getStoredTorrentEntry(record)) { - return updateTorrentSeeders(record); + if (await checkAndUpdateTorrent(record)) { + return record; } const torrent = { diff --git a/scraper/scrapers/rarbg/rarbg_scraper.js b/scraper/scrapers/rarbg/rarbg_scraper.js index e5346a8..0108682 100644 --- a/scraper/scrapers/rarbg/rarbg_scraper.js +++ b/scraper/scrapers/rarbg/rarbg_scraper.js @@ -5,7 +5,7 @@ const decode = require('magnet-uri'); const { Type } = require('../../lib/types'); const repository = require('../../lib/repository'); const Promises = require('../../lib/promises'); -const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'RARBG'; const SEARCH_OPTIONS = { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 }; @@ -70,8 +70,8 @@ async function scrapeLatestTorrentsForCategory(category, retries = 5) { } async function processTorrentRecord(record) { - if (await getStoredTorrentEntry(record)) { - return updateTorrentSeeders(record); + if (await checkAndUpdateTorrent(record)) { + return record; } const torrent = { diff --git a/scraper/scrapers/thepiratebay/thepiratebay_scraper.js b/scraper/scrapers/thepiratebay/thepiratebay_scraper.js index 6a3dc9c..7a10c8d 100644 --- a/scraper/scrapers/thepiratebay/thepiratebay_scraper.js +++ b/scraper/scrapers/thepiratebay/thepiratebay_scraper.js @@ -4,7 +4,7 @@ const thepiratebay = require('./thepiratebay_api.js'); const { Type } = require('../../lib/types'); const repository = require('../../lib/repository'); const Promises = require('../../lib/promises'); -const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'ThePirateBay'; const UNTIL_PAGE = 5; @@ -60,8 +60,8 @@ async function scrapeLatestTorrentsForCategory(category, page = 1) { } async function processTorrentRecord(record) { - if (await getStoredTorrentEntry(record)) { - return updateTorrentSeeders(record); + if (await checkAndUpdateTorrent(record)) { + return record; } const torrentFound = await thepiratebay.torrent(record.torrentId).catch(() => undefined); diff --git a/scraper/scrapers/yts/yts_scraper.js b/scraper/scrapers/yts/yts_scraper.js index a169ae3..d9df186 100644 --- a/scraper/scrapers/yts/yts_scraper.js +++ b/scraper/scrapers/yts/yts_scraper.js @@ -3,12 +3,12 @@ const Bottleneck = require('bottleneck'); const yts = require('./yts_api'); const { Type } = require('../../lib/types'); const repository = require('../../lib/repository'); -const { createTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders } = require('../../lib/torrentEntries'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); const NAME = 'YTS'; const UNTIL_PAGE = 2; -const limiter = new Bottleneck({ maxConcurrent: 20 }); +const limiter = new Bottleneck({ maxConcurrent: 10 }); async function scrape() { const scrapeStart = moment(); @@ -45,8 +45,8 @@ async function scrapeLatestTorrentsForCategory(page = 1) { } async function processTorrentRecord(record) { - if (await getStoredTorrentEntry(record)) { - return updateTorrentSeeders(record); + if (await checkAndUpdateTorrent(record)) { + return record; } if (!record || !record.size) {