From 4507b1bd1d7d7ad183f00500816434720035aca3 Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Sat, 2 May 2020 19:48:02 +0200 Subject: [PATCH] [scraper] assign subtitles to videos --- package-lock.json | 4 +-- package.json | 2 +- scraper/lib/repository.js | 18 ++++++++-- scraper/lib/torrentEntries.js | 4 ++- scraper/lib/torrentSubtitles.js | 63 +++++++++++++++++++++++++++++++++ scraper/manual/manual.js | 49 ++++++++++++------------- 6 files changed, 108 insertions(+), 32 deletions(-) create mode 100644 scraper/lib/torrentSubtitles.js diff --git a/package-lock.json b/package-lock.json index bf6f8cc..d9c27f0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1687,8 +1687,8 @@ } }, "parse-torrent-title": { - "version": "git://github.com/TheBeastLT/parse-torrent-title.git#213d188496d5645bcbfff5c3b5b3839df486260d", - "from": "git://github.com/TheBeastLT/parse-torrent-title.git#213d188496d5645bcbfff5c3b5b3839df486260d", + "version": "git://github.com/TheBeastLT/parse-torrent-title.git#49be4a2b4ab14e26fca4e52de82f6ad08948fdc7", + "from": "git://github.com/TheBeastLT/parse-torrent-title.git#49be4a2b4ab14e26fca4e52de82f6ad08948fdc7", "requires": { "moment": "^2.24.0" } diff --git a/package.json b/package.json index 933e348..3f3196d 100644 --- a/package.json +++ b/package.json @@ -31,7 +31,7 @@ "node-schedule": "^1.3.2", "nodejs-bing": "^0.1.0", "parse-torrent": "^6.1.2", - "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#213d188496d5645bcbfff5c3b5b3839df486260d", + "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#49be4a2b4ab14e26fca4e52de82f6ad08948fdc7", "pg": "^7.8.2", "pg-hstore": "^2.3.2", "rarbg-api": "^1.1.3", diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index 1108b3e..c1f503f 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -204,6 +204,9 @@ function createFile(file) { if (file.id) { return File.upsert(file).then(() => upsertSubtitles(file.id, file.subtitles)); } + if (file.subtitles && file.subtitles.length) { + file.subtitles = file.subtitles.map(subtitle => ({ infoHash: file.infoHash, title: subtitle.path, ...subtitle })); + } return File.create(file, { include: [Subtitle] }); } @@ -229,8 +232,13 @@ function createSubtitles(infoHash, subtitles) { function upsertSubtitles(file, subtitles) { if (file.id && subtitles && subtitles.length) { return Promises.sequence(subtitles - .map(subtitle => ({ fileId: file.id, infoHash: file.infoHash, title: subtitle.path, ...subtitle })) - .map(subtitle => () => Subtitle.upsert(subtitle))); + .map(subtitle => { + subtitle.fileId = file.id; + subtitle.infoHash = subtitle.infoHash || file.infoHash; + subtitle.title = subtitle.title || subtitle.path; + return subtitle; + }) + .map(subtitle => () => subtitle.dataValues ? subtitle.save() : Subtitle.upsert(subtitle))); } return Promise.resolve(); } @@ -239,6 +247,10 @@ function getSubtitles(torrent) { return Subtitle.findAll({ where: { infoHash: torrent.infoHash } }); } +function getUnassignedSubtitles() { + return Subtitle.findAll({ where: { fileId: null } }); +} + function createContents(infoHash, contents) { if (contents && contents.length) { return Content.bulkCreate(contents.map(content => ({ infoHash, ...content }))) @@ -278,7 +290,9 @@ module.exports = { getFilesBasedOnTitle, deleteFile, createSubtitles, + upsertSubtitles, getSubtitles, + getUnassignedSubtitles, createContents, getContents, getSkipTorrent, diff --git a/scraper/lib/torrentEntries.js b/scraper/lib/torrentEntries.js index 1d88b95..47c5f85 100644 --- a/scraper/lib/torrentEntries.js +++ b/scraper/lib/torrentEntries.js @@ -3,6 +3,7 @@ const { Type } = require('./types'); const repository = require('./repository'); const { getImdbId, getKitsuId } = require('./metadata'); const { parseTorrentFiles } = require('./torrentFiles'); +const { assignSubtitles } = require('./torrentSubtitles'); async function createTorrentEntry(torrent, overwrite = false) { const titleInfo = parse(torrent.title); @@ -35,6 +36,7 @@ async function createTorrentEntry(torrent, overwrite = false) { const { contents, videos, subtitles } = await parseTorrentFiles(torrent) .then(torrentContents => overwrite ? overwriteExistingFiles(torrent, torrentContents) : torrentContents) + .then(torrentContents => assignSubtitles(torrentContents)) .catch(error => { console.log(`Failed getting files for ${torrent.title}`, error.message); return {}; @@ -68,7 +70,7 @@ async function overwriteExistingFiles(torrent, torrentContents) { : existingFiles[file.fileIndex !== undefined ? file.fileIndex : null]; if (mapping) { const originalFile = mapping.shift(); - return { ...file, id: originalFile.id, size: originalFile.size || file.size }; + return { id: originalFile.id, ...file }; } return file; }); diff --git a/scraper/lib/torrentSubtitles.js b/scraper/lib/torrentSubtitles.js new file mode 100644 index 0000000..2a0125c --- /dev/null +++ b/scraper/lib/torrentSubtitles.js @@ -0,0 +1,63 @@ +const { parse } = require('parse-torrent-title'); + +function assignSubtitles({ contents, videos, subtitles }) { + if (videos && videos.length && subtitles && subtitles.length) { + if (videos.length === 1) { + videos[0].subtitles = subtitles; + return { contents, videos, subtitles: [] }; + } + + const parsedVideos = videos + .map(video => _parseVideo(video)); + const assignedSubs = subtitles + .map(subtitle => ({ subtitle, video: _mostProbableSubtitleVideo(subtitle, parsedVideos) })); + const unassignedSubs = assignedSubs.filter(assignedSub => !assignedSub.video); + + assignedSubs + .filter(assignedSub => assignedSub.video) + .forEach(assignedSub => + assignedSub.video.subtitles = (assignedSub.video.subtitles || []).concat(assignedSub.subtitle)) + return { contents, videos, subtitles: unassignedSubs }; + } + return { contents, videos, subtitles }; +} + +function _parseVideo(video) { + return { + videoFile: video, + fileName: video.title.replace(/\.(\w{2,4})$/, ''), + folderName: video.title.replace(/\/?[^/]+$/, ''), + ...parse(video.title) + }; +} + +function _mostProbableSubtitleVideo(subtitle, parsedVideos) { + const subTitle = subtitle.title || subtitle.path; + const parsedSub = parse(subTitle); + const byFileName = parsedVideos.filter(video => subTitle.includes(video.fileName)); + if (byFileName.length === 1) { + return byFileName[0].videoFile; + } + const byTitleSeasonEpisode = parsedVideos.filter(video => parsedSub.title.includes(video.title) + && video.seasons === parsedSub.seasons + && JSON.stringify(video.episodes) === JSON.stringify(parsedSub.episodes)); + if (byTitleSeasonEpisode.length === 1) { + return byTitleSeasonEpisode[0].videoFile; + } + const bySeasonEpisode = parsedVideos.filter(video => video.seasons === parsedSub.seasons + && video.episodes === parsedSub.episodes); + if (bySeasonEpisode.length === 1) { + return bySeasonEpisode[0].videoFile; + } + const byTitle = parsedVideos.filter(video => parsedSub.title.includes(video.title)); + if (byTitle.length === 1) { + return byTitle[0].videoFile; + } + const byEpisode = parsedVideos.filter(video => JSON.stringify(video.episodes) === JSON.stringify(parsedSub.episodes)); + if (byEpisode.length === 1) { + return byEpisode[0].videoFile; + } + return undefined; +} + +module.exports = { assignSubtitles } \ No newline at end of file diff --git a/scraper/manual/manual.js b/scraper/manual/manual.js index 18e0936..906c719 100644 --- a/scraper/manual/manual.js +++ b/scraper/manual/manual.js @@ -3,33 +3,11 @@ const { parse } = require('parse-torrent-title'); const repository = require('../lib/repository'); const { getImdbId } = require('../lib/metadata'); const { parseTorrentFiles } = require('../lib/torrentFiles'); +const { assignSubtitles } = require('../lib/torrentSubtitles'); const { Type } = require('../lib/types'); const limiter = new Bottleneck({ maxConcurrent: 40 }); -async function addMissingEpisodes() { - const torrent = { infoHash: '0ec780c2c7f8d5b38e61827f0b53c77c3d22f955' }; - const torrentFiles = await require('../lib/torrent').torrentFiles(torrent); - const storedFiles = await repository.getFiles(torrent) - .then((files) => files.reduce((map, next) => (map[next.fileIndex] = next, map), {})); - const imdbId = Object.values(storedFiles)[0].imdbId; - - torrentFiles - .filter((file) => !storedFiles[file.fileIndex !== undefined ? file.fileIndex : null]) - .map((file) => ({ - infoHash: torrent.infoHash, - fileIndex: file.fileIndex, - title: file.name, - size: file.size, - imdbId: imdbId, - imdbSeason: parse(file.name).season, - imdbEpisode: parse(file.name).episode, - // imdbSeason: parseInt(file.name.match(/(\d+)[ .]?-[ .]?\d+/)[1], 10), - // imdbEpisode: parseInt(file.name.match(/\d+[ .]?-[ .]?(\d+)/)[1], 10), - })) - .forEach((file) => repository.createFile(file)); -} - async function updateMovieCollections() { const collectionFiles = await repository.getFilesBasedOnTitle('logy') .then(files => files.filter(file => file.fileIndex === null)) @@ -90,6 +68,7 @@ async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) { const imdbId = storedFiles.length && storedFiles[0].imdbId || await getImdbId(parse(torrent.title)); return parseTorrentFiles({ ...torrent.get(), imdbId, files }) + .then(torrentContents => torrentContents.videos) .then(newFiles => newFiles.map(file => { const fileIndex = file.fileIndex !== undefined ? file.fileIndex : null; const mapping = fileIndexMap[fileIndex]; @@ -113,6 +92,24 @@ async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) { .then(() => console.log(`Updated files for ${torrent.title}`)); } +async function assignSubs() { + const unassignedSubs = await repository.getUnassignedSubtitles() + .then(subs => subs.reduce((map, sub) => { + map[sub.infoHash] = (map[sub.infoHash] || []).concat(sub); + return map; + }, {})); + const infoHashes = Object.keys(unassignedSubs); + + return Promise.all(infoHashes.map(async infoHash => { + const videos = await repository.getFiles({ infoHash }); + const subtitles = unassignedSubs[infoHash]; + const assignedContents = assignSubtitles({ videos, subtitles }); + return Promise.all(assignedContents.videos + .filter(video => video.subtitles) + .map(video => repository.upsertSubtitles(video, video.subtitles))); + })); +} + async function findAllFiles() { /* Test cases */ /* Anime Season and absolute episodes */ @@ -183,13 +180,13 @@ async function findAllFiles() { // }; return parseTorrentFiles(torrent) - .then((files) => console.log(files)); + .then((files) => console.log(files.videos)); } -//addMissingEpisodes().then(() => console.log('Finished')); //findAllFiles().then(() => console.log('Finished')); //updateMovieCollections().then(() => console.log('Finished')); reapplyEpisodeDecomposing('0b6c0f0692bdb151efb87e3de90e46e3b177444e', false).then(() => console.log('Finished')); //reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished')); //reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished')); -//reapplyManualHashes().then(() => console.log('Finished')); \ No newline at end of file +//reapplyManualHashes().then(() => console.log('Finished')); +// assignSubs().then(() => console.log('Finished')); \ No newline at end of file