mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
172 lines
6.3 KiB
JavaScript
172 lines
6.3 KiB
JavaScript
const moment = require('moment');
|
|
const fs = require('fs');
|
|
const needle = require('needle');
|
|
const Bottleneck = require('bottleneck');
|
|
const { parse } = require('parse-torrent-title');
|
|
const decode = require('magnet-uri');
|
|
const horriblesubs = require('./api/horriblesubs');
|
|
const { Type } = require('../lib/types');
|
|
const { torrentFiles, currentSeeders } = require('../lib/torrent');
|
|
const repository = require('../lib/repository');
|
|
const { getImdbId, getMetadata, getKitsuId, getKitsuMetadata } = require('../lib/metadata');
|
|
|
|
const NAME = 'HorribleSubs';
|
|
|
|
const limiter = new Bottleneck({maxConcurrent: 5});
|
|
const entryLimiter = new Bottleneck({maxConcurrent: 20});
|
|
|
|
async function scrape() {
|
|
const lastScraped = await repository.getProvider({ name: NAME });
|
|
|
|
if (!lastScraped.lastScraped) {
|
|
console.log(`${NAME}: no previous scrapping exist`);
|
|
await _scrapeAllShows()
|
|
}
|
|
}
|
|
|
|
async function _scrapeAllShows() {
|
|
initMapping();
|
|
// console.log(`${NAME}: getting all shows...`);
|
|
// const shows = await horriblesubs.allShows();
|
|
|
|
// Promise.all(shows
|
|
// .slice(0, 20)
|
|
// //.filter(show => show.url.includes('piece'))
|
|
// .map((show) => limiter.schedule(() => horriblesubs.showData(show)
|
|
// .then((showData) => _parseShowData(showData))
|
|
// .catch((err) => console.log(err)))));
|
|
}
|
|
|
|
async function initMapping() {
|
|
console.log(`${NAME}: initiating kitsu mapping...`);
|
|
const currentMapping = require('../horrible_subs_mapping');
|
|
const mappings = Object.values(currentMapping);
|
|
const shows = await horriblesubs.allShows()
|
|
.then((shows) => shows.filter((show) => !mappings.find((mapping) => mapping.title === show.title)))
|
|
.then((shows) => Promise.all(shows.map((show) => limiter.schedule(() => enrichShow(show)))))
|
|
.then((shows) => shows.reduce((map, show) => (map[show.title] = show, map), currentMapping));
|
|
|
|
fs.writeFile("./horrible_subs_mapping.json", JSON.stringify(shows), 'utf8', function (err) {
|
|
if (err) {
|
|
console.log("An error occurred while writing JSON Object to File.");
|
|
}
|
|
});
|
|
console.log(`${NAME}: finished kitsu mapping`);
|
|
}
|
|
|
|
async function enrichShow(show) {
|
|
console.log(`${NAME}: getting show info for ${show.title}...`);
|
|
const showId = await horriblesubs._getShowId(show.url)
|
|
.catch((error) => show.title);
|
|
const metadata = await getKitsuId(show.title)
|
|
.then((kitsuId) => getKitsuMetadata(kitsuId))
|
|
.catch((error) => {
|
|
console.log(`Failed getting kitsu meta: ${error.message}`);
|
|
return {};
|
|
});
|
|
|
|
return {
|
|
showId: showId,
|
|
...show,
|
|
kitsu_id: metadata.kitsu_id,
|
|
kitsuTitle: metadata.name,
|
|
kitsuSlug: metadata.slug,
|
|
imdb_id: metadata.imdb_id
|
|
}
|
|
}
|
|
|
|
async function _parseShowData(showData) {
|
|
console.log(`${NAME}: scrapping ${showData.title} data...`);
|
|
const imdbId = hardcodedShows[showData.showId] || await getImdbId({
|
|
name: showData.title.replace(/\W+/g, ' ').toLowerCase(),
|
|
type: 'series'
|
|
}).catch(() => undefined);
|
|
const metadata = imdbId && await getMetadata(imdbId, 'series') || {};
|
|
|
|
return Promise.all([
|
|
showData.singleEpisodes
|
|
.map((episode) => episode.mirrors.map((mirror) => entryLimiter.schedule(() => _constructSingleEntry(metadata, episode, mirror))))
|
|
.reduce((a, b) => a.concat(b), []),
|
|
showData.packEpisodes
|
|
.map((pack) => pack.mirrors.map((mirror) => entryLimiter.schedule(() =>_constructPackEntry(metadata, pack, mirror))))
|
|
.reduce((a, b) => a.concat(b), [])
|
|
].reduce((a, b) => a.concat(b), []))
|
|
.then((torrentEntries) => torrentEntries.forEach((torrent) => repository.updateTorrent(torrent)));
|
|
}
|
|
|
|
async function _constructSingleEntry(metadata, single, mirror) {
|
|
mirror.infoHash = decode(mirror.magnetLink).infoHash;
|
|
const seeders = await currentSeeders(mirror);
|
|
const seasonMatch = single.title.match(/[Ss]?(\d{1,2})\W*$/);
|
|
const xSeason = seasonMatch && parseInt(seasonMatch[1]); // could have a season
|
|
const xEpisode = parseInt(single.episode); // could be a seasonal or absolute episode
|
|
const { season, episode, absoluteEpisode } = actualSeasonEpisode(metadata, xSeason, xEpisode);
|
|
const title = `${single.title} ${single.episode} [${mirror.resolution}]`;
|
|
const file = { title: title, season: season, episode: episode, absoluteEpisode: absoluteEpisode};
|
|
|
|
return {
|
|
infoHash: mirror.infoHash,
|
|
provider: NAME,
|
|
title: title,
|
|
type: Type.ANIME,
|
|
imdbId: metadata.imdbId,
|
|
uploadDate: single.uploadDate,
|
|
seeders: seeders,
|
|
files: [file]
|
|
}
|
|
}
|
|
|
|
async function _constructPackEntry(metadata, pack, mirror) {
|
|
mirror.infoHash = decode(mirror.magnetLink).infoHash;
|
|
const seeders = await currentSeeders(mirror);
|
|
const seasonMatch = pack.title.match(/[Ss]?(\d{1,2})\W*$/);
|
|
const xSeason = seasonMatch && parseInt(seasonMatch[1]);
|
|
|
|
const files = await torrentFiles(mirror)
|
|
.then((files) => files.map((file) => {
|
|
const title = file.path.match(/[^\/]+$/)[0];
|
|
const titleInfo = parse(title.replace(pack.title, ''));
|
|
return titleInfo.episodes
|
|
.map((xEpisode) => actualSeasonEpisode(metadata, xSeason, xEpisode))
|
|
.map((actual) => ({
|
|
title: title, season: actual.season, episode: actual.episode, absoluteEpisode: actual.absoluteEpisode
|
|
}));
|
|
}))
|
|
.then((files) => files.reduce((a, b) => a.concat(b), []))
|
|
.catch(() => []);
|
|
|
|
return {
|
|
infoHash: mirror.infoHash,
|
|
provider: NAME,
|
|
title: `${pack.title} ${pack.episode} [${mirror.resolution}]`,
|
|
type: 'anime',
|
|
imdbId: metadata.imdbId,
|
|
uploadDate: pack.uploadDate,
|
|
seeders: seeders,
|
|
files: files
|
|
}
|
|
}
|
|
|
|
function actualSeasonEpisode(metadata, xSeason, xEpisode) {
|
|
if (xSeason) {
|
|
return {
|
|
season: xSeason,
|
|
episode: xEpisode,
|
|
absoluteEpisode: metadata.episodeCount && metadata.episodeCount
|
|
.slice(0, xSeason - 1)
|
|
.reduce((a, b) => a + b, xEpisode),
|
|
}
|
|
} else if (metadata.episodeCount) {
|
|
return metadata.episodeCount
|
|
.reduce((epInfo, epCount) => {
|
|
if (epInfo.episode > epCount) {
|
|
epInfo.season = epInfo.season + 1;
|
|
epInfo.episode = epInfo.episode - epCount;
|
|
}
|
|
return epInfo;
|
|
}, { season: 1, episode: xEpisode, absoluteEpisode: xEpisode })
|
|
}
|
|
return { season: xSeason || 1, episode: xEpisode, absoluteEpisode: xEpisode }
|
|
}
|
|
|
|
module.exports = { scrape }; |