updates the package structure and updates initial horriblesubs scraper WIP

This commit is contained in:
TheBeastLT
2020-01-07 13:33:46 +01:00
parent 643917939b
commit 051c50de3f
12 changed files with 264 additions and 280 deletions

View File

@@ -11,6 +11,21 @@ const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
const MONGO_URI = process.env.MONGODB_URI;
const cache = initiateCache();
const torrentFilesCache = initiateTorrentFilesCache();
function initiateTorrentFilesCache() {
if (MONGO_URI) {
return cacheManager.caching({
store: mangodbStore,
uri: MONGO_URI,
options: {
collection: 'cacheManager',
},
ttl: GLOBAL_TTL,
ignoreCacheErrors: true
});
}
}
function initiateCache() {
if (MONGO_URI) {
@@ -18,7 +33,7 @@ function initiateCache() {
store: mangodbStore,
uri: MONGO_URI,
options: {
collection: 'cacheManager',
collection: 'torrentio_scraper_collection',
},
ttl: GLOBAL_TTL,
ignoreCacheErrors: true
@@ -32,7 +47,7 @@ function initiateCache() {
}
function retrieveTorrentFiles(infoHash) {
return cache.get(`${TORRENT_FILES_KEY_PREFIX}:${infoHash}`)
return torrentFilesCache.get(`${TORRENT_FILES_KEY_PREFIX}:${infoHash}`)
.then((results) => {
if (!results) {
throw new Error('No cached files found');

View File

@@ -2,52 +2,46 @@ const needle = require('needle');
const nameToImdb = require('name-to-imdb');
const bing = require('nodejs-bing');
const { cacheWrapImdbId, cacheWrapMetadata } = require('./cache');
const { Type } = require('./types');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.now.sh';
function getMetadata(imdbId, type) {
return cacheWrapMetadata(imdbId,
() => needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 60000 })
.then((response) => {
const body = response.body;
if (body && body.meta && body.meta.name) {
return {
imdbId: imdbId,
title: body.meta.name,
year: body.meta.year,
genres: body.meta.genres,
totalEpisodes: body.meta.videos && body.meta.videos
.filter(video => video.season > 0).length,
episodeCount: body.meta.videos && Object.values(body.meta.videos
.filter((entry) => entry.season !== 0)
.sort((a, b) => a.season - b.season)
.reduce((map, next) => {
map[next.season] = map[next.season] + 1 || 1;
return map;
}, {}))
};
} else {
throw new Error('No search results');
}
})
.catch((error) => {
throw new Error(`failed cinemeta query ${imdbId} due: ${error.message}`);
}));
}
function getKitsuMetadata(kitsuId) {
const key = kitsuId.startsWith('kitsu:') ? kitsuId : `kitsu:${kitsuId}`;
function getMetadata(id, type = Type.SERIES ) {
const key = id.match(/^\d+$/) ? `kitsu:${id}` : id;
return cacheWrapMetadata(key,
() => needle('get', `${KITSU_URL}/meta/series/${key}.json`, { open_timeout: 60000 })
.then((response) => {
const body = response.body;
if (body && body.meta && body.meta.id) {
return {
...body.meta,
videos: undefined,
totalEpisodes: body.meta.videos && body.meta.videos
.filter(video => video.season > 0).length
kitsuId: body.kitsu_id,
imdbId: body.imdb_id,
title: body.meta.name,
year: body.meta.year,
genres: body.meta.genres,
videos: body.meta.videos && body.meta.videos
.map((video) => video.imdbSeason
? {
episode: video.episode,
imdbSeason: video.imdbSeason,
imdbEpisode: video.imdbEpisode
}
: {
season: video.season,
episode: video.episode,
kitsuId: video.kitsu_id,
kitsuEpisode: video.kitsuEpisode,
released: video.released
}
),
episodeCount: body.meta.videos && Object.values(body.meta.videos
.filter((entry) => entry.season !== 0)
.sort((a, b) => a.season - b.season)
.reduce((map, next) => {
map[next.season] = map[next.season] + 1 || 1;
return map;
}, {}))
};
} else {
throw new Error('No search results');
@@ -101,4 +95,4 @@ async function getKitsuId(title) {
}));
}
module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuMetadata, getKitsuId };
module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuId };

View File

@@ -6,6 +6,10 @@ const { retrieveTorrentFiles } = require('./cache');
const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
const EXTENSIONS = ["3g2", "3gp", "avi", "flv", "mkv", "mov", "mp2", "mp4", "mpe", "mpeg", "mpg", "mpv", "webm", "wmv"];
module.exports.updateCurrentSeeders = function(torrent) {
return Promise.resolve({ ...torrent, seeders: 0});
};
module.exports.torrentFiles = function(torrent) {
return filesFromTorrentFile(torrent)
.catch(() => filesFromTorrentStream(torrent))
@@ -28,7 +32,7 @@ async function filesFromTorrentFile(torrent) {
return Promise.reject(new Error("no torrentLink"));
}
needle('get', torrent.torrentLink, { open_timeout: 2000 })
return needle('get', torrent.torrentLink, { open_timeout: 10000 })
.then((response) => {
if (!response.body || response.statusCode !== 200) {
throw new Error('torrent not found')

View File

@@ -5,8 +5,14 @@ const { Type } = require('./types');
const MIN_SIZE = 20 * 1024 * 1024; // 20 MB
async function parseTorrentFiles(torrent, imdbId) {
async function parseTorrentFiles(torrent, imdbId, kitsuId) {
const parsedTorrentName = parse(torrent.title);
parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/);
const metadata = await getMetadata(kitsuId || imdbId, torrent.type || Type.MOVIE).catch(() => undefined);
if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) {
throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`);
}
if (torrent.type === Type.MOVIE) {
if (parsedTorrentName.complete) {
@@ -27,31 +33,20 @@ async function parseTorrentFiles(torrent, imdbId) {
});
}
return [{
return [ {
infoHash: torrent.infoHash,
title: torrent.title,
size: torrent.size,
imdbId: imdbId,
}];
imdbId: imdbId || metadata && metadata.imdb_id,
kitsuId: kitsuId || metadata && metadata.kitsu_id
} ];
}
if (parsedTorrentName.season && parsedTorrentName.episode) {
return [{
infoHash: torrent.infoHash,
title: torrent.title,
size: torrent.size,
imdbId: imdbId,
imdbSeason: parsedTorrentName.season,
imdbEpisode: parsedTorrentName.episode
}];
}
parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/);
return torrentFiles(torrent)
return getSeriesFiles(torrent, parsedTorrentName)
.then((files) => files
.filter((file) => file.size > MIN_SIZE)
.map((file) => parseSeriesFile(file, parsedTorrentName)))
.then((files) => decomposeAbsoluteEpisodes(files, torrent, imdbId))
.then((files) => decomposeAbsoluteEpisodes(files, metadata))
.then((files) => Promise.all(files.map(file => file.isMovie
? mapSeriesMovie(file, torrent.infoHash)
: mapSeriesEpisode(file, torrent.infoHash, imdbId))))
@@ -62,6 +57,18 @@ async function parseTorrentFiles(torrent, imdbId) {
});
}
async function getSeriesFiles(torrent, parsedTorrentName) {
if (parsedTorrentName.episode || parsedTorrentName.date) {
return [ {
name: torrent.title,
path: torrent.title,
size: torrent.size
} ];
}
return torrentFiles(torrent);
}
async function mapSeriesEpisode(file, infoHash, imdbId) {
if (!file.episodes) {
return Promise.resolve([]);
@@ -69,21 +76,22 @@ async function mapSeriesEpisode(file, infoHash, imdbId) {
return Promise.resolve(file.episodes.map(episode => ({
infoHash: infoHash,
fileIndex: file.fileIndex,
title: file.name,
title: file.path || file.name,
size: file.size,
imdbId: imdbId,
imdbSeason: file.season,
imdbEpisode: episode})))
imdbEpisode: episode
})))
}
async function mapSeriesMovie(file, infoHash) {
return findMovieImdbId(file).then((imdbId) => [{
return findMovieImdbId(file).then((imdbId) => [ {
infoHash: infoHash,
fileIndex: file.fileIndex,
title: file.name,
size: file.size,
imdbId: imdbId
}])
} ])
}
function parseSeriesFile(file, parsedTorrentName) {
@@ -96,7 +104,8 @@ function parseSeriesFile(file, parsedTorrentName) {
const pathInfo = parse(folders[folders.length - 2]);
fileInfo.season = pathInfo.season;
}
fileInfo.isMovie = parsedTorrentName.hasMovies && !fileInfo.season && !fileInfo.episodes || !!fileInfo.year;
fileInfo.isMovie = parsedTorrentName.hasMovies && !fileInfo.season &&
(!fileInfo.episodes || !!fileInfo.year || !!file.name.match(/\b(?:\d+[ .]movie|movie[ .]\d+)\b/i));
return { ...file, ...fileInfo };
}
@@ -111,12 +120,11 @@ function findMovieImdbId(title) {
return getImdbId(searchQuery).catch((error) => undefined);
}
async function decomposeAbsoluteEpisodes(files, torrent, imdbId) {
async function decomposeAbsoluteEpisodes(files, metadata) {
if (files.every((file) => !file.episodes || file.episodes.every((ep) => ep < 100))) {
return files; // nothing to decompose
}
const metadata = await getMetadata(imdbId, torrent.type || Type.MOVIE);
// decompose if season is inside path, but individual files are concatenated ex. 101 (S01E01)
files
.filter(file => file.season && metadata.episodeCount[file.season] < 100)