adds kitsu mapping for horrible subs WIP
This commit is contained in:
10432
horrible_subs_mapping.json
Normal file
10432
horrible_subs_mapping.json
Normal file
File diff suppressed because it is too large
Load Diff
2
index.js
2
index.js
@@ -5,7 +5,7 @@ const { connect } = require('./lib/repository');
|
||||
const tpbDump = require('./scrapers/piratebay_dump');
|
||||
const horribleSubsScraper = require('./scrapers/horiblesubs_scraper');
|
||||
|
||||
const providers = [tpbDump];
|
||||
const providers = [horribleSubsScraper];
|
||||
|
||||
async function scrape() {
|
||||
providers.forEach((provider) => provider.scrape());
|
||||
|
||||
@@ -4,6 +4,7 @@ const bing = require('nodejs-bing');
|
||||
const { cacheWrapImdbId, cacheWrapMetadata } = require('./cache');
|
||||
|
||||
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
|
||||
const KITSU_URL = 'https://anime-kitsu.now.sh';
|
||||
|
||||
function getMetadata(imdbId, type) {
|
||||
return cacheWrapMetadata(imdbId,
|
||||
@@ -35,6 +36,28 @@ function getMetadata(imdbId, type) {
|
||||
}));
|
||||
}
|
||||
|
||||
function getKitsuMetadata(kitsuId) {
|
||||
const key = kitsuId.startsWith('kitsu:') ? kitsuId : `kitsu:${kitsuId}`;
|
||||
return cacheWrapMetadata(key,
|
||||
() => needle('get', `${KITSU_URL}/meta/series/${key}.json`, { open_timeout: 60000 })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
if (body && body.meta && body.meta.id) {
|
||||
return {
|
||||
...body.meta,
|
||||
videos: undefined,
|
||||
totalEpisodes: body.meta.videos && body.meta.videos
|
||||
.filter(video => video.season > 0).length
|
||||
};
|
||||
} else {
|
||||
throw new Error('No search results');
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
throw new Error(`failed kitsu query ${kitsuId} due: ${error.message}`);
|
||||
}));
|
||||
}
|
||||
|
||||
function escapeTitle(title, hyphenEscape = true) {
|
||||
return title.toLowerCase()
|
||||
.normalize('NFKD') // normalize non-ASCII characters
|
||||
@@ -64,4 +87,17 @@ async function getImdbId(info) {
|
||||
.match(/imdb\.com\/title\/(tt\d+)/)[1])));
|
||||
}
|
||||
|
||||
module.exports = { escapeTitle, getMetadata, getImdbId };
|
||||
async function getKitsuId(title) {
|
||||
return cacheWrapImdbId(title,
|
||||
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${title}.json`, { open_timeout: 60000 })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
if (body && body.metas && body.metas.length) {
|
||||
return body.metas[0].id.replace('kitsu:', '');
|
||||
} else {
|
||||
throw new Error('No search results');
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuMetadata, getKitsuId };
|
||||
|
||||
@@ -40,8 +40,11 @@ async function getLatestEntries(config = {}) {
|
||||
function _getContent(endpoint, config = {},) {
|
||||
const baseUrl = config.proxyUrl || defaultUrl;
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const url = endpoint.startsWith('http')
|
||||
? endpoint.replace(/https?:\/\/[^/]+/, baseUrl)
|
||||
: `${baseUrl}${endpoint}`;
|
||||
|
||||
return needle('get', `${baseUrl}${endpoint}`, { open_timeout: timeout, follow: 2 })
|
||||
return needle('get', url, { open_timeout: timeout, follow: 2 })
|
||||
.then((response) => response.body)
|
||||
.then((body) => cheerio.load(body));
|
||||
}
|
||||
@@ -115,5 +118,5 @@ function _parseDate(date) {
|
||||
return moment(date, 'MM/DD/YYYY').toDate();
|
||||
}
|
||||
|
||||
module.exports = { allShows, showData, getLatestEntries };
|
||||
module.exports = { allShows, showData, getLatestEntries, _getShowId };
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
const moment = require('moment');
|
||||
const fs = require('fs');
|
||||
const needle = require('needle');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
@@ -7,11 +8,11 @@ const horriblesubs = require('./api/horriblesubs');
|
||||
const { Type } = require('../lib/types');
|
||||
const { torrentFiles, currentSeeders } = require('../lib/torrent');
|
||||
const repository = require('../lib/repository');
|
||||
const { getImdbId, getMetadata } = require('../lib/metadata');
|
||||
const { getImdbId, getMetadata, getKitsuId, getKitsuMetadata } = require('../lib/metadata');
|
||||
|
||||
const NAME = 'HorribleSubs';
|
||||
|
||||
const limiter = new Bottleneck({maxConcurrent: 1});
|
||||
const limiter = new Bottleneck({maxConcurrent: 5});
|
||||
const entryLimiter = new Bottleneck({maxConcurrent: 20});
|
||||
|
||||
async function scrape() {
|
||||
@@ -24,15 +25,54 @@ async function scrape() {
|
||||
}
|
||||
|
||||
async function _scrapeAllShows() {
|
||||
console.log(`${NAME}: getting all shows...`);
|
||||
const shows = await horriblesubs.allShows();
|
||||
initMapping();
|
||||
// console.log(`${NAME}: getting all shows...`);
|
||||
// const shows = await horriblesubs.allShows();
|
||||
|
||||
Promise.all(shows
|
||||
.slice(0, 20)
|
||||
//.filter(show => show.url.includes('piece'))
|
||||
.map((show) => limiter.schedule(() => horriblesubs.showData(show)
|
||||
.then((showData) => _parseShowData(showData))
|
||||
.catch((err) => console.log(err)))));
|
||||
// Promise.all(shows
|
||||
// .slice(0, 20)
|
||||
// //.filter(show => show.url.includes('piece'))
|
||||
// .map((show) => limiter.schedule(() => horriblesubs.showData(show)
|
||||
// .then((showData) => _parseShowData(showData))
|
||||
// .catch((err) => console.log(err)))));
|
||||
}
|
||||
|
||||
async function initMapping() {
|
||||
console.log(`${NAME}: initiating kitsu mapping...`);
|
||||
const shows = await horriblesubs.allShows()
|
||||
.then((shows) => Promise.all(shows.map((show) => limiter.schedule(() => enrichShow(show)))))
|
||||
.then((shows) => shows.reduce((map, show) => (map[show.showId] = show, map), {}));
|
||||
const kitsuIds = Object.values(shows).map((show) => show.kitsu_id);
|
||||
console.log(JSON.stringify(kitsuIds));
|
||||
|
||||
fs.writeFile("./horrible_subs_mapping.json", JSON.stringify(shows), 'utf8', function (err) {
|
||||
if (err) {
|
||||
console.log("An error occurred while writing JSON Object to File.");
|
||||
}
|
||||
});
|
||||
console.log(`${NAME}: finished kitsu mapping`);
|
||||
}
|
||||
|
||||
async function enrichShow(show) {
|
||||
console.log(`${NAME}: getting show info for ${show.title}...`);
|
||||
const showId = await horriblesubs._getShowId(show.url)
|
||||
.catch((error) => show.title);
|
||||
const slug = show.url.replace(/^.*\//, '');
|
||||
const metadata = await getKitsuId(slug)
|
||||
.then((kitsuId) => getKitsuMetadata(kitsuId))
|
||||
.catch((error) => {
|
||||
console.log(`Failed getting kitsu meta: ${error.message}`);
|
||||
return {};
|
||||
});
|
||||
|
||||
return {
|
||||
showId: showId,
|
||||
...show,
|
||||
kitsu_id: metadata.kitsu_id,
|
||||
kitsuTitle: metadata.name,
|
||||
kitsuSlug: metadata.slug,
|
||||
imdb_id: metadata.imdb_id
|
||||
}
|
||||
}
|
||||
|
||||
const hardcodedShows = {
|
||||
|
||||
Reference in New Issue
Block a user