updates episode decompose

This commit is contained in:
TheBeastLT
2020-02-02 21:16:30 +01:00
parent 051c50de3f
commit 30419f3c64
6 changed files with 249 additions and 97 deletions

View File

@@ -7,59 +7,63 @@ const { Type } = require('./types');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io'; const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.now.sh'; const KITSU_URL = 'https://anime-kitsu.now.sh';
function getMetadata(id, type = Type.SERIES ) { function getMetadata(id, type = Type.SERIES) {
const key = id.match(/^\d+$/) ? `kitsu:${id}` : id; const key = id.match(/^\d+$/) ? `kitsu:${id}` : id;
const metaType = type === Type.MOVIE ? Type.MOVIE : Type.SERIES;
return cacheWrapMetadata(key, return cacheWrapMetadata(key,
() => needle('get', `${KITSU_URL}/meta/series/${key}.json`, { open_timeout: 60000 }) () => needle('get', `${KITSU_URL}/meta/${metaType}/${key}.json`, { open_timeout: 60000 })
.then((response) => { .then((response) => {
const body = response.body; const body = response.body;
if (body && body.meta && body.meta.id) { if (body && body.meta && body.meta.id) {
return { return {
kitsuId: body.kitsu_id, kitsuId: body.meta.kitsu_id,
imdbId: body.imdb_id, imdbId: body.meta.imdb_id,
title: body.meta.name, title: body.meta.name,
year: body.meta.year, year: body.meta.year,
genres: body.meta.genres, genres: body.meta.genres,
videos: body.meta.videos && body.meta.videos videos: (body.meta.videos || [])
.map((video) => video.imdbSeason .map((video) => video.imdbSeason
? { ? {
episode: video.episode, season: video.season,
imdbSeason: video.imdbSeason, episode: video.episode,
imdbEpisode: video.imdbEpisode imdbSeason: video.imdbSeason,
} imdbEpisode: video.imdbEpisode
: { }
season: video.season, : {
episode: video.episode, season: video.season,
kitsuId: video.kitsu_id, episode: video.episode,
kitsuEpisode: video.kitsuEpisode, kitsuId: video.kitsu_id,
released: video.released kitsuEpisode: video.kitsuEpisode,
} released: video.released
), }
episodeCount: body.meta.videos && Object.values(body.meta.videos ),
.filter((entry) => entry.season !== 0) episodeCount: Object.values((body.meta.videos || [])
.sort((a, b) => a.season - b.season) .filter((entry) => entry.season !== 0)
.reduce((map, next) => { .sort((a, b) => a.season - b.season)
map[next.season] = map[next.season] + 1 || 1; .reduce((map, next) => {
return map; map[next.season] = map[next.season] + 1 || 1;
}, {})) return map;
}; }, {})),
} else { totalCount: body.meta.videos && body.meta.videos
throw new Error('No search results'); .filter((entry) => entry.season !== 0).length
} };
}) } else {
.catch((error) => { throw new Error('No search results');
throw new Error(`failed kitsu query ${kitsuId} due: ${error.message}`); }
})); })
.catch((error) => {
throw new Error(`failed kitsu query ${kitsuId} due: ${error.message}`);
}));
} }
function escapeTitle(title, hyphenEscape = true) { function escapeTitle(title, hyphenEscape = true) {
return title.toLowerCase() return title.toLowerCase()
.normalize('NFKD') // normalize non-ASCII characters .normalize('NFKD') // normalize non-ASCII characters
.replace(/[\u0300-\u036F]/g, '') .replace(/[\u0300-\u036F]/g, '')
.replace(/&/g, 'and') .replace(/&/g, 'and')
.replace(hyphenEscape ? /[.,_+ -]+/g : /[.,_+ ]+/g, ' ') // replace dots, commas or underscores with spaces .replace(hyphenEscape ? /[.,_+ -]+/g : /[.,_+ ]+/g, ' ') // replace dots, commas or underscores with spaces
.replace(/[^\w- ()]/gi, '') // remove all non-alphanumeric chars .replace(/[^\w- ()]/gi, '') // remove all non-alphanumeric chars
.trim(); .trim();
} }
async function getImdbId(info) { async function getImdbId(info) {
@@ -67,7 +71,7 @@ async function getImdbId(info) {
return cacheWrapImdbId(key, return cacheWrapImdbId(key,
() => new Promise((resolve, reject) => { () => new Promise((resolve, reject) => {
nameToImdb(info, function(err, res) { nameToImdb(info, function (err, res) {
if (res) { if (res) {
resolve(res); resolve(res);
} else { } else {
@@ -85,14 +89,14 @@ async function getKitsuId(title) {
const query = title.replace(/[;]+/g, ' ').replace(/[,%']+/g, ''); const query = title.replace(/[;]+/g, ' ').replace(/[,%']+/g, '');
return cacheWrapImdbId(query, return cacheWrapImdbId(query,
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 }) () => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
.then((response) => { .then((response) => {
const body = response.body; const body = response.body;
if (body && body.metas && body.metas.length) { if (body && body.metas && body.metas.length) {
return body.metas[0].id.replace('kitsu:', ''); return body.metas[0].id.replace('kitsu:', '');
} else { } else {
throw new Error('No search results'); throw new Error('No search results');
} }
})); }));
} }
module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuId }; module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuId };

View File

@@ -6,11 +6,11 @@ const { retrieveTorrentFiles } = require('./cache');
const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20; const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
const EXTENSIONS = ["3g2", "3gp", "avi", "flv", "mkv", "mov", "mp2", "mp4", "mpe", "mpeg", "mpg", "mpv", "webm", "wmv"]; const EXTENSIONS = ["3g2", "3gp", "avi", "flv", "mkv", "mov", "mp2", "mp4", "mpe", "mpeg", "mpg", "mpv", "webm", "wmv"];
module.exports.updateCurrentSeeders = function(torrent) { module.exports.updateCurrentSeeders = function (torrent) {
return Promise.resolve({ ...torrent, seeders: 0}); return Promise.resolve({ ...torrent, seeders: torrent.seeders || 0 });
}; };
module.exports.torrentFiles = function(torrent) { module.exports.torrentFiles = function (torrent) {
return filesFromTorrentFile(torrent) return filesFromTorrentFile(torrent)
.catch(() => filesFromTorrentStream(torrent)) .catch(() => filesFromTorrentStream(torrent))
.catch(() => filesFromCache(torrent.infoHash)) .catch(() => filesFromCache(torrent.infoHash))

View File

@@ -10,9 +10,9 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) {
parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/); parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/);
const metadata = await getMetadata(kitsuId || imdbId, torrent.type || Type.MOVIE).catch(() => undefined); const metadata = await getMetadata(kitsuId || imdbId, torrent.type || Type.MOVIE).catch(() => undefined);
if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) { // if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) {
throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`); // throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`);
} // }
if (torrent.type === Type.MOVIE) { if (torrent.type === Type.MOVIE) {
if (parsedTorrentName.complete) { if (parsedTorrentName.complete) {
@@ -33,23 +33,24 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) {
}); });
} }
return [ { return [{
infoHash: torrent.infoHash, infoHash: torrent.infoHash,
title: torrent.title, title: torrent.title,
size: torrent.size, size: torrent.size,
imdbId: imdbId || metadata && metadata.imdb_id, imdbId: imdbId || metadata && metadata.imdb_id,
kitsuId: kitsuId || metadata && metadata.kitsu_id kitsuId: kitsuId || metadata && metadata.kitsu_id
} ]; }];
} }
return getSeriesFiles(torrent, parsedTorrentName) return getSeriesFiles(torrent, parsedTorrentName)
.then((files) => files .then((files) => files
.filter((file) => file.size > MIN_SIZE) .filter((file) => file.size > MIN_SIZE)
.map((file) => parseSeriesFile(file, parsedTorrentName))) .map((file) => parseSeriesFile(file, parsedTorrentName)))
.then((files) => decomposeAbsoluteEpisodes(files, metadata)) .then((files) => decomposeEpisodes(torrent, files, metadata))
.then((files) => assignKitsuOrImdbEpisodes(files, metadata))
.then((files) => Promise.all(files.map(file => file.isMovie .then((files) => Promise.all(files.map(file => file.isMovie
? mapSeriesMovie(file, torrent.infoHash) ? mapSeriesMovie(file, torrent.infoHash)
: mapSeriesEpisode(file, torrent.infoHash, imdbId)))) : mapSeriesEpisode(file, torrent.infoHash, imdbId, kitsuId))))
.then((files) => files.reduce((a, b) => a.concat(b), [])) .then((files) => files.reduce((a, b) => a.concat(b), []))
.catch((error) => { .catch((error) => {
console.log(`Failed getting files for ${torrent.title}`, error.message); console.log(`Failed getting files for ${torrent.title}`, error.message);
@@ -58,40 +59,43 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) {
} }
async function getSeriesFiles(torrent, parsedTorrentName) { async function getSeriesFiles(torrent, parsedTorrentName) {
if (parsedTorrentName.episode || parsedTorrentName.date) { if (parsedTorrentName.episode || (!parsedTorrentName.episodes && parsedTorrentName.date)) {
return [ { return [{
name: torrent.title, name: torrent.title,
path: torrent.title, path: torrent.title,
size: torrent.size size: torrent.size
} ]; }];
} }
return torrentFiles(torrent); return torrentFiles(torrent);
} }
async function mapSeriesEpisode(file, infoHash, imdbId) { async function mapSeriesEpisode(file, infoHash, imdbId, kitsuId) {
if (!file.episodes) { if (!file.episodes && !file.kitsuEpisodes) {
return Promise.resolve([]); return Promise.resolve([]);
} }
return Promise.resolve(file.episodes.map(episode => ({ const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()];
return Promise.resolve(episodeIndexes.map((index) => ({
infoHash: infoHash, infoHash: infoHash,
fileIndex: file.fileIndex, fileIndex: file.fileIndex,
title: file.path || file.name, title: file.path || file.name,
size: file.size, size: file.size,
imdbId: imdbId, imdbId: imdbId || file.imdbId,
imdbSeason: file.season, imdbSeason: file.season,
imdbEpisode: episode imdbEpisode: file.episodes && file.episodes[index],
kitsuId: kitsuId || file.kitsuId,
kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index]
}))) })))
} }
async function mapSeriesMovie(file, infoHash) { async function mapSeriesMovie(file, infoHash) {
return findMovieImdbId(file).then((imdbId) => [ { return findMovieImdbId(file).then((imdbId) => [{
infoHash: infoHash, infoHash: infoHash,
fileIndex: file.fileIndex, fileIndex: file.fileIndex,
title: file.name, title: file.name,
size: file.size, size: file.size,
imdbId: imdbId imdbId: imdbId
} ]) }])
} }
function parseSeriesFile(file, parsedTorrentName) { function parseSeriesFile(file, parsedTorrentName) {
@@ -104,12 +108,121 @@ function parseSeriesFile(file, parsedTorrentName) {
const pathInfo = parse(folders[folders.length - 2]); const pathInfo = parse(folders[folders.length - 2]);
fileInfo.season = pathInfo.season; fileInfo.season = pathInfo.season;
} }
fileInfo.isMovie = parsedTorrentName.hasMovies && !fileInfo.season && fileInfo.isMovie = (parsedTorrentName.hasMovies && !fileInfo.season && (!fileInfo.episodes || !!fileInfo.year))
(!fileInfo.episodes || !!fileInfo.year || !!file.name.match(/\b(?:\d+[ .]movie|movie[ .]\d+)\b/i)); || (!fileInfo.season && !!file.name.match(/\b(?:\d+[ .]movie|movie[ .]\d+)\b/i));
return { ...file, ...fileInfo }; return { ...file, ...fileInfo };
} }
async function decomposeEpisodes(torrent, files, metadata = { episodeCount: {} }) {
if (files.every(file => !file.episodes)) {
return files;
}
// for anime type episodes are always absolute and for a single season
if (torrent.type === Type.ANIME) {
files
.filter(file => file.episodes)
.forEach(file => file.season = 1);
return files;
}
const sortedEpisodes = files
.map(file => !file.isMovie && file.episodes || [])
.reduce((a, b) => a.concat(b), [])
.sort((a, b) => a - b);
if (sortedEpisodes.every(ep => ep > 100)
&& sortedEpisodes.slice(1).some((ep, index) => ep - sortedEpisodes[index] > 10)
&& sortedEpisodes.every(ep => metadata.episodeCount[div100(ep) - 1] >= mod100(ep))
&& files.every(file => !file.season || file.episodes.every(ep => div100(ep) === file.season))) {
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
}
if ((files.every(file => !file.season) || files.some(file => file.season && file.episodes
&& file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep)))
&& (sortedEpisodes.length <= 1 || sortedEpisodes.slice(1).every((ep, i) => ep - sortedEpisodes[i] <= 2))) {
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
}
return files;
}
function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
// decompose concat season and episode files (ex. 101=S01E01) in case:
// 1. file has a season, but individual files are concatenated with that season (ex. path Season 5/511 - Prize
// Fighters.avi)
// 2. file does not have a season and the episode does not go out of range for the concat season
// episode count
files
.filter(file => file.episodes && file.episodes.every(ep => ep > 100))
.filter(file => metadata.episodeCount[(file.season || div100(file.episodes[0])) - 1] < 100)
.filter(file => file.season && file.episodes.every(ep => div100(ep) === file.season) || !file.season)
.forEach(file => {
file.season = div100(file.episodes[0]);
file.episodes = file.episodes.map(ep => mod100(ep))
});
}
function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
files
.filter(file => file.episodes && !file.isMovie)
.forEach(file => {
const seasonIdx = ([...metadata.episodeCount.keys()]
.find((i) => metadata.episodeCount.slice(0, i + 1).reduce((a, b) => a + b) >= file.episodes[0])
+ 1 || metadata.episodeCount.length) - 1;
file.season = seasonIdx + 1;
file.episodes = file.episodes
.map(ep => ep - metadata.episodeCount.slice(0, seasonIdx).reduce((a, b) => a + b, 0))
});
}
function assignKitsuOrImdbEpisodes(files, metadata) {
if (!metadata || !metadata.videos || !metadata.videos.length) {
return files;
}
const seriesMapping = metadata.videos
.reduce((map, video) => {
const episodeMap = map[video.season] || {};
episodeMap[video.episode] = video;
map[video.season] = episodeMap;
return map;
}, {});
if (metadata.videos.some(video => video.imdbSeason) || !metadata.imdbId) {
// kitsu episode info is the base
files
.filter(file => file.season && file.episodes)
.map(file => {
const seasonMapping = seriesMapping[file.season];
file.kitsuEpisodes = file.episodes;
if (seasonMapping && seasonMapping[file.episodes[0]] && seasonMapping[file.episodes[0]].imdbSeason) {
file.imdbId = metadata.imdbId;
file.season = seasonMapping[file.episodes[0]].imdbSeason;
file.episodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].imdbEpisode);
} else {
// no imdb mapping available for episode
file.season = undefined;
file.episodes = undefined;
}
})
} else if (metadata.videos.some(video => video.kitsuEpisode)) {
// imdb episode info is base
files
.filter(file => file.season && file.episodes)
.forEach(file => {
const seasonMapping = seriesMapping[file.season];
if (seasonMapping && seasonMapping[file.episodes[0]] && seasonMapping[file.episodes[0]].kitsuId) {
file.kitsuId = seasonMapping[file.episodes[0]].kitsuId;
file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode);
}
})
}
return files;
}
function findMovieImdbId(title) { function findMovieImdbId(title) {
const parsedTitle = typeof title === 'string' ? parse(title) : title; const parsedTitle = typeof title === 'string' ? parse(title) : title;
const searchQuery = { const searchQuery = {
@@ -120,20 +233,12 @@ function findMovieImdbId(title) {
return getImdbId(searchQuery).catch((error) => undefined); return getImdbId(searchQuery).catch((error) => undefined);
} }
async function decomposeAbsoluteEpisodes(files, metadata) { function div100(episode) {
if (files.every((file) => !file.episodes || file.episodes.every((ep) => ep < 100))) { return (episode / 100 >> 0); // floor to nearest int
return files; // nothing to decompose }
}
// decompose if season is inside path, but individual files are concatenated ex. 101 (S01E01) function mod100(episode) {
files return episode % 100;
.filter(file => file.season && metadata.episodeCount[file.season] < 100)
.filter(file => file.episodes && file.episodes.every(ep => ep / 100 === file.season))
.forEach(file => file.episodes = file.episodes.map(ep => ep % 100));
// decompose if no season info is available, but individual files are concatenated ex. 101 (S01E01)
// based on total episodes count per season
return files;
} }
module.exports = { parseTorrentFiles }; module.exports = { parseTorrentFiles };

View File

@@ -25,14 +25,46 @@ async function addMissingEpisodes() {
} }
async function findAllFiles() { async function findAllFiles() {
/* Test cases */
/* Anime Season and absolute episodes */
const torrent = { const torrent = {
infoHash: '6b95e5cfde9aaa71970a14f6bb6b9de19e2cbfa1', infoHash: '6b95e5cfde9aaa71970a14f6bb6b9de19e2cbfa1',
title: '[OMDA] Bleach + Filmes + Ovas (480p-720p x264 AAC-MP3) [rich_jc]', title: '[OMDA] Bleach + Filmes + Ovas (480p-720p x264 AAC-MP3) [rich_jc]',
type: Type.SERIES type: Type.SERIES
}; };
const imdbId = 'tt0434665'; const imdbId = 'tt0434665';
/* Season and concat episodes */
// const torrent = {
// infoHash: '235e8ed73b6cc9679b0842c39e17223c47b51f68',
// title: 'Daria - The Complete Animated Series [2010] DVDRip',
// type: Type.SERIES
// };
// const imdbId = 'tt0118298';
/* Series Season and absolute episodes */
// const torrent = {
// infoHash: '16b4560beb05397c0eeb35487a997caf789243ea',
// title: 'Seinfeld - Complete Collection',
// type: Type.SERIES
// };
// const imdbId = 'tt0098904';
/* Series Season and episodes */
// const torrent = {
// infoHash: 'd0f120c1bbfb988eb35b648e1c78ca3e5d45ef39',
// title: 'Seinfeld Complete Series-720p WEBrip EN-SUB x264-[MULVAcoded]',
// type: Type.SERIES
// };
// const imdbId = 'tt0098904';
/* Anime single absolute episode */
// const torrent = {
// infoHash: 'e81e12880980086c476aa8bfdd22bed9d41b1dfe',
// title: '[Vision] Naruto Shippuuden - 451 (1080p x264 AAC) [rich_jc].mp4',
// size: 467361138,
// type: Type.SERIES
// };
// const imdbId = 'tt0988824';
return parseTorrentFiles(torrent, imdbId).then((files) => console.log(files)); return parseTorrentFiles(torrent, imdbId)
.then((files) => console.log(files));
} }
//addMissingEpisodes().then(() => console.log('Finished')); //addMissingEpisodes().then(() => console.log('Finished'));

2
package-lock.json generated
View File

@@ -1714,7 +1714,7 @@
} }
}, },
"parse-torrent-title": { "parse-torrent-title": {
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#9f6abe9c606cc48d78962fe9496f2c34f1f7ea5c", "version": "git://github.com/TheBeastLT/parse-torrent-title.git#e05b0e0121a944c1ab399e767640a0c8f9300e8e",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#master" "from": "git://github.com/TheBeastLT/parse-torrent-title.git#master"
}, },
"parseurl": { "parseurl": {

View File

@@ -28,7 +28,7 @@ async function _scrapeAllShows() {
const shows = await horriblesubs.allShows(); const shows = await horriblesubs.allShows();
return Promise.all(shows return Promise.all(shows
.slice(0, 20) .slice(0, 6)
.map((show) => limiter.schedule(() => horriblesubs.showData(show) .map((show) => limiter.schedule(() => horriblesubs.showData(show)
.then((showData) => _parseShowData(showData)) .then((showData) => _parseShowData(showData))
.catch((err) => console.log(err))))); .catch((err) => console.log(err)))));
@@ -86,7 +86,7 @@ async function _parseShowData(showData) {
.map((mirror) => ({ .map((mirror) => ({
provider: NAME, provider: NAME,
...mirror, ...mirror,
title: `${episodeInfo.title} ${episodeInfo.episode} [${mirror.resolution}]`, title: `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`,
size: 300000000, size: 300000000,
type: Type.ANIME, type: Type.ANIME,
uploadDate: episodeInfo.uploadDate, uploadDate: episodeInfo.uploadDate,
@@ -95,15 +95,26 @@ async function _parseShowData(showData) {
.map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent) .map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent)
.then((torrent) => torrent && updateCurrentSeeders(torrent)) .then((torrent) => torrent && updateCurrentSeeders(torrent))
.then((torrent) => torrent && parseTorrentFiles(torrent, undefined, kitsuId) .then((torrent) => torrent && parseTorrentFiles(torrent, undefined, kitsuId)
.then((files) => verifyFiles(files)) .then((files) => verifyFiles(torrent, files))
.then((files) => repository.createTorrent(torrent) .then((files) => repository.createTorrent(torrent)
.then(() => files.forEach(file => repository.createFile(file))) .then(() => files.forEach(file => repository.createFile(file)))
.then(() => console.log(`Created entry for ${torrent.title}`))))))) .then(() => console.log(`Created entry for ${torrent.title}`)))))))
.then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`)); .then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`));
} }
function verifyFiles(files) { async function verifyFiles(torrent, files) {
if (files && files.length) { if (files && files.length) {
const existingFiles = await repository.getFiles({ infoHash: files[0].infoHash })
.then((existing) => existing.reduce((map, file) => (map[file.fileIndex] = file, map), {}))
.catch(() => undefined);
if (existingFiles && Object.keys(existingFiles).length) {
return files
.map(file => ({
...file,
id: existingFiles[file.fileIndex] && existingFiles[file.fileIndex].id,
size: existingFiles[file.fileIndex] && existingFiles[file.fileIndex].size || file.size
}))
}
return files; return files;
} }
throw new Error(`No video files found for: ${torrent.title}`); throw new Error(`No video files found for: ${torrent.title}`);