adds kitsu mapping for horrible subs WIP

This commit is contained in:
TheBeastLT
2019-12-30 18:35:02 +01:00
parent e7f46d2adc
commit a73cdb6b54
5 changed files with 10525 additions and 14 deletions

10432
horrible_subs_mapping.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,7 @@ const { connect } = require('./lib/repository');
const tpbDump = require('./scrapers/piratebay_dump');
const horribleSubsScraper = require('./scrapers/horiblesubs_scraper');
const providers = [tpbDump];
const providers = [horribleSubsScraper];
async function scrape() {
providers.forEach((provider) => provider.scrape());

View File

@@ -4,6 +4,7 @@ const bing = require('nodejs-bing');
const { cacheWrapImdbId, cacheWrapMetadata } = require('./cache');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.now.sh';
function getMetadata(imdbId, type) {
return cacheWrapMetadata(imdbId,
@@ -35,6 +36,28 @@ function getMetadata(imdbId, type) {
}));
}
function getKitsuMetadata(kitsuId) {
const key = kitsuId.startsWith('kitsu:') ? kitsuId : `kitsu:${kitsuId}`;
return cacheWrapMetadata(key,
() => needle('get', `${KITSU_URL}/meta/series/${key}.json`, { open_timeout: 60000 })
.then((response) => {
const body = response.body;
if (body && body.meta && body.meta.id) {
return {
...body.meta,
videos: undefined,
totalEpisodes: body.meta.videos && body.meta.videos
.filter(video => video.season > 0).length
};
} else {
throw new Error('No search results');
}
})
.catch((error) => {
throw new Error(`failed kitsu query ${kitsuId} due: ${error.message}`);
}));
}
function escapeTitle(title, hyphenEscape = true) {
return title.toLowerCase()
.normalize('NFKD') // normalize non-ASCII characters
@@ -64,4 +87,17 @@ async function getImdbId(info) {
.match(/imdb\.com\/title\/(tt\d+)/)[1])));
}
module.exports = { escapeTitle, getMetadata, getImdbId };
async function getKitsuId(title) {
return cacheWrapImdbId(title,
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${title}.json`, { open_timeout: 60000 })
.then((response) => {
const body = response.body;
if (body && body.metas && body.metas.length) {
return body.metas[0].id.replace('kitsu:', '');
} else {
throw new Error('No search results');
}
}));
}
module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuMetadata, getKitsuId };

View File

@@ -40,8 +40,11 @@ async function getLatestEntries(config = {}) {
function _getContent(endpoint, config = {},) {
const baseUrl = config.proxyUrl || defaultUrl;
const timeout = config.timeout || defaultTimeout;
const url = endpoint.startsWith('http')
? endpoint.replace(/https?:\/\/[^/]+/, baseUrl)
: `${baseUrl}${endpoint}`;
return needle('get', `${baseUrl}${endpoint}`, { open_timeout: timeout, follow: 2 })
return needle('get', url, { open_timeout: timeout, follow: 2 })
.then((response) => response.body)
.then((body) => cheerio.load(body));
}
@@ -115,5 +118,5 @@ function _parseDate(date) {
return moment(date, 'MM/DD/YYYY').toDate();
}
module.exports = { allShows, showData, getLatestEntries };
module.exports = { allShows, showData, getLatestEntries, _getShowId };

View File

@@ -1,4 +1,5 @@
const moment = require('moment');
const fs = require('fs');
const needle = require('needle');
const Bottleneck = require('bottleneck');
const { parse } = require('parse-torrent-title');
@@ -7,11 +8,11 @@ const horriblesubs = require('./api/horriblesubs');
const { Type } = require('../lib/types');
const { torrentFiles, currentSeeders } = require('../lib/torrent');
const repository = require('../lib/repository');
const { getImdbId, getMetadata } = require('../lib/metadata');
const { getImdbId, getMetadata, getKitsuId, getKitsuMetadata } = require('../lib/metadata');
const NAME = 'HorribleSubs';
const limiter = new Bottleneck({maxConcurrent: 1});
const limiter = new Bottleneck({maxConcurrent: 5});
const entryLimiter = new Bottleneck({maxConcurrent: 20});
async function scrape() {
@@ -24,15 +25,54 @@ async function scrape() {
}
async function _scrapeAllShows() {
console.log(`${NAME}: getting all shows...`);
const shows = await horriblesubs.allShows();
initMapping();
// console.log(`${NAME}: getting all shows...`);
// const shows = await horriblesubs.allShows();
Promise.all(shows
.slice(0, 20)
//.filter(show => show.url.includes('piece'))
.map((show) => limiter.schedule(() => horriblesubs.showData(show)
.then((showData) => _parseShowData(showData))
.catch((err) => console.log(err)))));
// Promise.all(shows
// .slice(0, 20)
// //.filter(show => show.url.includes('piece'))
// .map((show) => limiter.schedule(() => horriblesubs.showData(show)
// .then((showData) => _parseShowData(showData))
// .catch((err) => console.log(err)))));
}
async function initMapping() {
console.log(`${NAME}: initiating kitsu mapping...`);
const shows = await horriblesubs.allShows()
.then((shows) => Promise.all(shows.map((show) => limiter.schedule(() => enrichShow(show)))))
.then((shows) => shows.reduce((map, show) => (map[show.showId] = show, map), {}));
const kitsuIds = Object.values(shows).map((show) => show.kitsu_id);
console.log(JSON.stringify(kitsuIds));
fs.writeFile("./horrible_subs_mapping.json", JSON.stringify(shows), 'utf8', function (err) {
if (err) {
console.log("An error occurred while writing JSON Object to File.");
}
});
console.log(`${NAME}: finished kitsu mapping`);
}
async function enrichShow(show) {
console.log(`${NAME}: getting show info for ${show.title}...`);
const showId = await horriblesubs._getShowId(show.url)
.catch((error) => show.title);
const slug = show.url.replace(/^.*\//, '');
const metadata = await getKitsuId(slug)
.then((kitsuId) => getKitsuMetadata(kitsuId))
.catch((error) => {
console.log(`Failed getting kitsu meta: ${error.message}`);
return {};
});
return {
showId: showId,
...show,
kitsu_id: metadata.kitsu_id,
kitsuTitle: metadata.name,
kitsuSlug: metadata.slug,
imdb_id: metadata.imdb_id
}
}
const hardcodedShows = {