diff --git a/lib/metadata.js b/lib/metadata.js index 655712b..5a1f479 100644 --- a/lib/metadata.js +++ b/lib/metadata.js @@ -7,59 +7,63 @@ const { Type } = require('./types'); const CINEMETA_URL = 'https://v3-cinemeta.strem.io'; const KITSU_URL = 'https://anime-kitsu.now.sh'; -function getMetadata(id, type = Type.SERIES ) { +function getMetadata(id, type = Type.SERIES) { const key = id.match(/^\d+$/) ? `kitsu:${id}` : id; + const metaType = type === Type.MOVIE ? Type.MOVIE : Type.SERIES; return cacheWrapMetadata(key, - () => needle('get', `${KITSU_URL}/meta/series/${key}.json`, { open_timeout: 60000 }) - .then((response) => { - const body = response.body; - if (body && body.meta && body.meta.id) { - return { - kitsuId: body.kitsu_id, - imdbId: body.imdb_id, - title: body.meta.name, - year: body.meta.year, - genres: body.meta.genres, - videos: body.meta.videos && body.meta.videos - .map((video) => video.imdbSeason - ? { - episode: video.episode, - imdbSeason: video.imdbSeason, - imdbEpisode: video.imdbEpisode - } - : { - season: video.season, - episode: video.episode, - kitsuId: video.kitsu_id, - kitsuEpisode: video.kitsuEpisode, - released: video.released - } - ), - episodeCount: body.meta.videos && Object.values(body.meta.videos - .filter((entry) => entry.season !== 0) - .sort((a, b) => a.season - b.season) - .reduce((map, next) => { - map[next.season] = map[next.season] + 1 || 1; - return map; - }, {})) - }; - } else { - throw new Error('No search results'); - } - }) - .catch((error) => { - throw new Error(`failed kitsu query ${kitsuId} due: ${error.message}`); - })); + () => needle('get', `${KITSU_URL}/meta/${metaType}/${key}.json`, { open_timeout: 60000 }) + .then((response) => { + const body = response.body; + if (body && body.meta && body.meta.id) { + return { + kitsuId: body.meta.kitsu_id, + imdbId: body.meta.imdb_id, + title: body.meta.name, + year: body.meta.year, + genres: body.meta.genres, + videos: (body.meta.videos || []) + .map((video) => video.imdbSeason + ? { + season: video.season, + episode: video.episode, + imdbSeason: video.imdbSeason, + imdbEpisode: video.imdbEpisode + } + : { + season: video.season, + episode: video.episode, + kitsuId: video.kitsu_id, + kitsuEpisode: video.kitsuEpisode, + released: video.released + } + ), + episodeCount: Object.values((body.meta.videos || []) + .filter((entry) => entry.season !== 0) + .sort((a, b) => a.season - b.season) + .reduce((map, next) => { + map[next.season] = map[next.season] + 1 || 1; + return map; + }, {})), + totalCount: body.meta.videos && body.meta.videos + .filter((entry) => entry.season !== 0).length + }; + } else { + throw new Error('No search results'); + } + }) + .catch((error) => { + throw new Error(`failed kitsu query ${kitsuId} due: ${error.message}`); + })); } function escapeTitle(title, hyphenEscape = true) { return title.toLowerCase() - .normalize('NFKD') // normalize non-ASCII characters - .replace(/[\u0300-\u036F]/g, '') - .replace(/&/g, 'and') - .replace(hyphenEscape ? /[.,_+ -]+/g : /[.,_+ ]+/g, ' ') // replace dots, commas or underscores with spaces - .replace(/[^\w- ()]/gi, '') // remove all non-alphanumeric chars - .trim(); + .normalize('NFKD') // normalize non-ASCII characters + .replace(/[\u0300-\u036F]/g, '') + .replace(/&/g, 'and') + .replace(hyphenEscape ? /[.,_+ -]+/g : /[.,_+ ]+/g, ' ') // replace dots, commas or underscores with spaces + .replace(/[^\w- ()]/gi, '') // remove all non-alphanumeric chars + .trim(); } async function getImdbId(info) { @@ -67,7 +71,7 @@ async function getImdbId(info) { return cacheWrapImdbId(key, () => new Promise((resolve, reject) => { - nameToImdb(info, function(err, res) { + nameToImdb(info, function (err, res) { if (res) { resolve(res); } else { @@ -85,14 +89,14 @@ async function getKitsuId(title) { const query = title.replace(/[;]+/g, ' ').replace(/[,%']+/g, ''); return cacheWrapImdbId(query, () => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 }) - .then((response) => { - const body = response.body; - if (body && body.metas && body.metas.length) { - return body.metas[0].id.replace('kitsu:', ''); - } else { - throw new Error('No search results'); - } - })); + .then((response) => { + const body = response.body; + if (body && body.metas && body.metas.length) { + return body.metas[0].id.replace('kitsu:', ''); + } else { + throw new Error('No search results'); + } + })); } module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuId }; diff --git a/lib/torrent.js b/lib/torrent.js index 49865f2..f94fc47 100644 --- a/lib/torrent.js +++ b/lib/torrent.js @@ -6,11 +6,11 @@ const { retrieveTorrentFiles } = require('./cache'); const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20; const EXTENSIONS = ["3g2", "3gp", "avi", "flv", "mkv", "mov", "mp2", "mp4", "mpe", "mpeg", "mpg", "mpv", "webm", "wmv"]; -module.exports.updateCurrentSeeders = function(torrent) { - return Promise.resolve({ ...torrent, seeders: 0}); +module.exports.updateCurrentSeeders = function (torrent) { + return Promise.resolve({ ...torrent, seeders: torrent.seeders || 0 }); }; -module.exports.torrentFiles = function(torrent) { +module.exports.torrentFiles = function (torrent) { return filesFromTorrentFile(torrent) .catch(() => filesFromTorrentStream(torrent)) .catch(() => filesFromCache(torrent.infoHash)) diff --git a/lib/torrentFiles.js b/lib/torrentFiles.js index 4ceaa34..3d28497 100644 --- a/lib/torrentFiles.js +++ b/lib/torrentFiles.js @@ -10,9 +10,9 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) { parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/); const metadata = await getMetadata(kitsuId || imdbId, torrent.type || Type.MOVIE).catch(() => undefined); - if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) { - throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`); - } + // if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) { + // throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`); + // } if (torrent.type === Type.MOVIE) { if (parsedTorrentName.complete) { @@ -33,23 +33,24 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) { }); } - return [ { + return [{ infoHash: torrent.infoHash, title: torrent.title, size: torrent.size, imdbId: imdbId || metadata && metadata.imdb_id, kitsuId: kitsuId || metadata && metadata.kitsu_id - } ]; + }]; } return getSeriesFiles(torrent, parsedTorrentName) .then((files) => files .filter((file) => file.size > MIN_SIZE) .map((file) => parseSeriesFile(file, parsedTorrentName))) - .then((files) => decomposeAbsoluteEpisodes(files, metadata)) + .then((files) => decomposeEpisodes(torrent, files, metadata)) + .then((files) => assignKitsuOrImdbEpisodes(files, metadata)) .then((files) => Promise.all(files.map(file => file.isMovie ? mapSeriesMovie(file, torrent.infoHash) - : mapSeriesEpisode(file, torrent.infoHash, imdbId)))) + : mapSeriesEpisode(file, torrent.infoHash, imdbId, kitsuId)))) .then((files) => files.reduce((a, b) => a.concat(b), [])) .catch((error) => { console.log(`Failed getting files for ${torrent.title}`, error.message); @@ -58,40 +59,43 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) { } async function getSeriesFiles(torrent, parsedTorrentName) { - if (parsedTorrentName.episode || parsedTorrentName.date) { - return [ { + if (parsedTorrentName.episode || (!parsedTorrentName.episodes && parsedTorrentName.date)) { + return [{ name: torrent.title, path: torrent.title, size: torrent.size - } ]; + }]; } return torrentFiles(torrent); } -async function mapSeriesEpisode(file, infoHash, imdbId) { - if (!file.episodes) { +async function mapSeriesEpisode(file, infoHash, imdbId, kitsuId) { + if (!file.episodes && !file.kitsuEpisodes) { return Promise.resolve([]); } - return Promise.resolve(file.episodes.map(episode => ({ + const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()]; + return Promise.resolve(episodeIndexes.map((index) => ({ infoHash: infoHash, fileIndex: file.fileIndex, title: file.path || file.name, size: file.size, - imdbId: imdbId, + imdbId: imdbId || file.imdbId, imdbSeason: file.season, - imdbEpisode: episode + imdbEpisode: file.episodes && file.episodes[index], + kitsuId: kitsuId || file.kitsuId, + kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index] }))) } async function mapSeriesMovie(file, infoHash) { - return findMovieImdbId(file).then((imdbId) => [ { + return findMovieImdbId(file).then((imdbId) => [{ infoHash: infoHash, fileIndex: file.fileIndex, title: file.name, size: file.size, imdbId: imdbId - } ]) + }]) } function parseSeriesFile(file, parsedTorrentName) { @@ -104,12 +108,121 @@ function parseSeriesFile(file, parsedTorrentName) { const pathInfo = parse(folders[folders.length - 2]); fileInfo.season = pathInfo.season; } - fileInfo.isMovie = parsedTorrentName.hasMovies && !fileInfo.season && - (!fileInfo.episodes || !!fileInfo.year || !!file.name.match(/\b(?:\d+[ .]movie|movie[ .]\d+)\b/i)); + fileInfo.isMovie = (parsedTorrentName.hasMovies && !fileInfo.season && (!fileInfo.episodes || !!fileInfo.year)) + || (!fileInfo.season && !!file.name.match(/\b(?:\d+[ .]movie|movie[ .]\d+)\b/i)); return { ...file, ...fileInfo }; } +async function decomposeEpisodes(torrent, files, metadata = { episodeCount: {} }) { + if (files.every(file => !file.episodes)) { + return files; + } + // for anime type episodes are always absolute and for a single season + if (torrent.type === Type.ANIME) { + files + .filter(file => file.episodes) + .forEach(file => file.season = 1); + return files; + } + + const sortedEpisodes = files + .map(file => !file.isMovie && file.episodes || []) + .reduce((a, b) => a.concat(b), []) + .sort((a, b) => a - b); + + if (sortedEpisodes.every(ep => ep > 100) + && sortedEpisodes.slice(1).some((ep, index) => ep - sortedEpisodes[index] > 10) + && sortedEpisodes.every(ep => metadata.episodeCount[div100(ep) - 1] >= mod100(ep)) + && files.every(file => !file.season || file.episodes.every(ep => div100(ep) === file.season))) { + decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata); + } + + if ((files.every(file => !file.season) || files.some(file => file.season && file.episodes + && file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep))) + && (sortedEpisodes.length <= 1 || sortedEpisodes.slice(1).every((ep, i) => ep - sortedEpisodes[i] <= 2))) { + decomposeAbsoluteEpisodeFiles(torrent, files, metadata); + } + + return files; +} + +function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) { + // decompose concat season and episode files (ex. 101=S01E01) in case: + // 1. file has a season, but individual files are concatenated with that season (ex. path Season 5/511 - Prize + // Fighters.avi) + // 2. file does not have a season and the episode does not go out of range for the concat season + // episode count + files + .filter(file => file.episodes && file.episodes.every(ep => ep > 100)) + .filter(file => metadata.episodeCount[(file.season || div100(file.episodes[0])) - 1] < 100) + .filter(file => file.season && file.episodes.every(ep => div100(ep) === file.season) || !file.season) + .forEach(file => { + file.season = div100(file.episodes[0]); + file.episodes = file.episodes.map(ep => mod100(ep)) + }); + +} + +function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) { + files + .filter(file => file.episodes && !file.isMovie) + .forEach(file => { + const seasonIdx = ([...metadata.episodeCount.keys()] + .find((i) => metadata.episodeCount.slice(0, i + 1).reduce((a, b) => a + b) >= file.episodes[0]) + + 1 || metadata.episodeCount.length) - 1; + + file.season = seasonIdx + 1; + file.episodes = file.episodes + .map(ep => ep - metadata.episodeCount.slice(0, seasonIdx).reduce((a, b) => a + b, 0)) + }); +} + +function assignKitsuOrImdbEpisodes(files, metadata) { + if (!metadata || !metadata.videos || !metadata.videos.length) { + return files; + } + + const seriesMapping = metadata.videos + .reduce((map, video) => { + const episodeMap = map[video.season] || {}; + episodeMap[video.episode] = video; + map[video.season] = episodeMap; + return map; + }, {}); + + if (metadata.videos.some(video => video.imdbSeason) || !metadata.imdbId) { + // kitsu episode info is the base + files + .filter(file => file.season && file.episodes) + .map(file => { + const seasonMapping = seriesMapping[file.season]; + file.kitsuEpisodes = file.episodes; + if (seasonMapping && seasonMapping[file.episodes[0]] && seasonMapping[file.episodes[0]].imdbSeason) { + file.imdbId = metadata.imdbId; + file.season = seasonMapping[file.episodes[0]].imdbSeason; + file.episodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].imdbEpisode); + } else { + // no imdb mapping available for episode + file.season = undefined; + file.episodes = undefined; + } + }) + } else if (metadata.videos.some(video => video.kitsuEpisode)) { + // imdb episode info is base + files + .filter(file => file.season && file.episodes) + .forEach(file => { + const seasonMapping = seriesMapping[file.season]; + if (seasonMapping && seasonMapping[file.episodes[0]] && seasonMapping[file.episodes[0]].kitsuId) { + file.kitsuId = seasonMapping[file.episodes[0]].kitsuId; + file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode); + } + }) + } + return files; +} + function findMovieImdbId(title) { const parsedTitle = typeof title === 'string' ? parse(title) : title; const searchQuery = { @@ -120,20 +233,12 @@ function findMovieImdbId(title) { return getImdbId(searchQuery).catch((error) => undefined); } -async function decomposeAbsoluteEpisodes(files, metadata) { - if (files.every((file) => !file.episodes || file.episodes.every((ep) => ep < 100))) { - return files; // nothing to decompose - } +function div100(episode) { + return (episode / 100 >> 0); // floor to nearest int +} - // decompose if season is inside path, but individual files are concatenated ex. 101 (S01E01) - files - .filter(file => file.season && metadata.episodeCount[file.season] < 100) - .filter(file => file.episodes && file.episodes.every(ep => ep / 100 === file.season)) - .forEach(file => file.episodes = file.episodes.map(ep => ep % 100)); - // decompose if no season info is available, but individual files are concatenated ex. 101 (S01E01) - // based on total episodes count per season - - return files; +function mod100(episode) { + return episode % 100; } module.exports = { parseTorrentFiles }; \ No newline at end of file diff --git a/manual/manual.js b/manual/manual.js index 71f3ecb..9084cd3 100644 --- a/manual/manual.js +++ b/manual/manual.js @@ -25,14 +25,46 @@ async function addMissingEpisodes() { } async function findAllFiles() { + /* Test cases */ + /* Anime Season and absolute episodes */ const torrent = { infoHash: '6b95e5cfde9aaa71970a14f6bb6b9de19e2cbfa1', title: '[OMDA] Bleach + Filmes + Ovas (480p-720p x264 AAC-MP3) [rich_jc]', type: Type.SERIES }; const imdbId = 'tt0434665'; + /* Season and concat episodes */ + // const torrent = { + // infoHash: '235e8ed73b6cc9679b0842c39e17223c47b51f68', + // title: 'Daria - The Complete Animated Series [2010] DVDRip', + // type: Type.SERIES + // }; + // const imdbId = 'tt0118298'; + /* Series Season and absolute episodes */ + // const torrent = { + // infoHash: '16b4560beb05397c0eeb35487a997caf789243ea', + // title: 'Seinfeld - Complete Collection', + // type: Type.SERIES + // }; + // const imdbId = 'tt0098904'; + /* Series Season and episodes */ + // const torrent = { + // infoHash: 'd0f120c1bbfb988eb35b648e1c78ca3e5d45ef39', + // title: 'Seinfeld Complete Series-720p WEBrip EN-SUB x264-[MULVAcoded]', + // type: Type.SERIES + // }; + // const imdbId = 'tt0098904'; + /* Anime single absolute episode */ + // const torrent = { + // infoHash: 'e81e12880980086c476aa8bfdd22bed9d41b1dfe', + // title: '[Vision] Naruto Shippuuden - 451 (1080p x264 AAC) [rich_jc].mp4', + // size: 467361138, + // type: Type.SERIES + // }; + // const imdbId = 'tt0988824'; - return parseTorrentFiles(torrent, imdbId).then((files) => console.log(files)); + return parseTorrentFiles(torrent, imdbId) + .then((files) => console.log(files)); } //addMissingEpisodes().then(() => console.log('Finished')); diff --git a/package-lock.json b/package-lock.json index f41df8b..deb4906 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1714,7 +1714,7 @@ } }, "parse-torrent-title": { - "version": "git://github.com/TheBeastLT/parse-torrent-title.git#9f6abe9c606cc48d78962fe9496f2c34f1f7ea5c", + "version": "git://github.com/TheBeastLT/parse-torrent-title.git#e05b0e0121a944c1ab399e767640a0c8f9300e8e", "from": "git://github.com/TheBeastLT/parse-torrent-title.git#master" }, "parseurl": { diff --git a/scrapers/horriblesubs/horriblesubs_scraper.js b/scrapers/horriblesubs/horriblesubs_scraper.js index a809427..17506ee 100644 --- a/scrapers/horriblesubs/horriblesubs_scraper.js +++ b/scrapers/horriblesubs/horriblesubs_scraper.js @@ -28,7 +28,7 @@ async function _scrapeAllShows() { const shows = await horriblesubs.allShows(); return Promise.all(shows - .slice(0, 20) + .slice(0, 6) .map((show) => limiter.schedule(() => horriblesubs.showData(show) .then((showData) => _parseShowData(showData)) .catch((err) => console.log(err))))); @@ -86,7 +86,7 @@ async function _parseShowData(showData) { .map((mirror) => ({ provider: NAME, ...mirror, - title: `${episodeInfo.title} ${episodeInfo.episode} [${mirror.resolution}]`, + title: `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`, size: 300000000, type: Type.ANIME, uploadDate: episodeInfo.uploadDate, @@ -95,15 +95,26 @@ async function _parseShowData(showData) { .map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent) .then((torrent) => torrent && updateCurrentSeeders(torrent)) .then((torrent) => torrent && parseTorrentFiles(torrent, undefined, kitsuId) - .then((files) => verifyFiles(files)) + .then((files) => verifyFiles(torrent, files)) .then((files) => repository.createTorrent(torrent) .then(() => files.forEach(file => repository.createFile(file))) .then(() => console.log(`Created entry for ${torrent.title}`))))))) .then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`)); } -function verifyFiles(files) { +async function verifyFiles(torrent, files) { if (files && files.length) { + const existingFiles = await repository.getFiles({ infoHash: files[0].infoHash }) + .then((existing) => existing.reduce((map, file) => (map[file.fileIndex] = file, map), {})) + .catch(() => undefined); + if (existingFiles && Object.keys(existingFiles).length) { + return files + .map(file => ({ + ...file, + id: existingFiles[file.fileIndex] && existingFiles[file.fileIndex].id, + size: existingFiles[file.fileIndex] && existingFiles[file.fileIndex].size || file.size + })) + } return files; } throw new Error(`No video files found for: ${torrent.title}`);