updates tpb dump scrapper

This commit is contained in:
TheBeastLT
2019-12-29 20:07:15 +01:00
parent 7925f18064
commit e7f46d2adc
12 changed files with 689 additions and 165 deletions

View File

@@ -1,32 +1,38 @@
const _ = require('lodash');
const needle = require('needle');
const nameToImdb = require('name-to-imdb');
const bing = require('nodejs-bing');
const { cacheWrapImdbId, cacheWrapMetadata } = require('./cache');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
function getMetadata(imdbId, type) {
return needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 1000 })
.then((response) => response.body)
.then((body) => {
if (body && body.meta && body.meta.name) {
return {
imdbId: imdbId,
title: body.meta.name,
year: body.meta.year,
genres: body.meta.genres,
episodeCount: body.meta.videos && _.chain(body.meta.videos)
.countBy('season')
.toPairs()
.filter((pair) => pair[0] !== '0')
.sortBy((pair) => parseInt(pair[0], 10))
.map((pair) => pair[1])
.value()
};
} else {
console.log(`failed cinemeta query: Empty Body`);
throw new Error('failed cinemeta query');
}
});
return cacheWrapMetadata(imdbId,
() => needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 60000 })
.then((response) => {
const body = response.body;
if (body && body.meta && body.meta.name) {
return {
imdbId: imdbId,
title: body.meta.name,
year: body.meta.year,
genres: body.meta.genres,
totalEpisodes: body.meta.videos && body.meta.videos
.filter(video => video.season > 0).length,
episodeCount: body.meta.videos && Object.values(body.meta.videos
.filter((entry) => entry.season !== 0)
.sort((a, b) => a.season - b.season)
.reduce((map, next) => {
map[next.season] = map[next.season] + 1 || 1;
return map;
}, {}))
};
} else {
throw new Error('No search results');
}
})
.catch((error) => {
throw new Error(`failed cinemeta query ${imdbId} due: ${error.message}`);
}));
}
function escapeTitle(title, hyphenEscape = true) {
@@ -39,48 +45,23 @@ function escapeTitle(title, hyphenEscape = true) {
.trim();
}
async function seriesMetadata(id) {
const idInfo = id.split(':');
const imdbId = idInfo[0];
const season = parseInt(idInfo[1], 10);
const episode = parseInt(idInfo[2], 10);
const metadata = await getMetadata(imdbId, 'series');
const title = escapeTitle(metadata.title);
return {
imdb: imdbId,
title: hardcodedTitles[imdbId] || title,
season: season,
episode: episode,
absoluteEpisode: hasEpisodeCount && metadata.episodeCount.slice(0, season - 1).reduce((a, b) => a + b, episode),
genres: metadata.genres,
isAnime: !metadata.genres.length || metadata.genres.includes('Animation')
};
}
async function movieMetadata(id) {
const metadata = await getMetadata(id, 'movie');
return {
imdb: id,
title: escapeTitle(metadata.title),
year: metadata.year,
genres: metadata.genres,
isAnime: !metadata.genres.length || metadata.genres.includes('Animation')
};
}
async function getImdbId(info) {
return new Promise((resolve, reject) => {
nameToImdb(info, function(err, res) {
if (res) {
resolve(res);
} else {
reject(err || new Error('failed imdbId search'));
}
});
});
const key = `${info.name}_${info.year}_${info.type}`;
return cacheWrapImdbId(key,
() => new Promise((resolve, reject) => {
nameToImdb(info, function(err, res) {
if (res) {
resolve(res);
} else {
reject(err || new Error('failed imdbId search'));
}
});
}).catch(() => bing.web(`${info.name} ${info.year || ''} ${info.type} imdb`)
.then((results) => results
.map((result) => result.link)
.find(result => result.includes('imdb.com/title/'))
.match(/imdb\.com\/title\/(tt\d+)/)[1])));
}
module.exports = { escapeTitle, getMetadata, movieMetadata, seriesMetadata, getImdbId };
module.exports = { escapeTitle, getMetadata, getImdbId };