mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
189 lines
7.7 KiB
JavaScript
189 lines
7.7 KiB
JavaScript
const fs = require('fs');
|
|
const moment = require('moment');
|
|
const Bottleneck = require('bottleneck');
|
|
const decode = require('magnet-uri');
|
|
const horriblesubs = require('./horriblesubs_api.js');
|
|
const repository = require('../../lib/repository');
|
|
const { Type } = require('../../lib/types');
|
|
const { updateCurrentSeeders } = require('../../lib/torrent');
|
|
const { parseTorrentFiles } = require('../../lib/torrentFiles');
|
|
const { getMetadata, getKitsuId } = require('../../lib/metadata');
|
|
const showMappings = require('./horriblesubs_mapping.json');
|
|
|
|
const NAME = 'HorribleSubs';
|
|
const NEXT_FULL_SCRAPE_OFFSET = 3 * 24 * 60 * 60; // 3 days;
|
|
|
|
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
|
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
|
|
|
|
async function scrape() {
|
|
const scrapeStart = moment();
|
|
const lastScrape = await repository.getProvider({ name: NAME });
|
|
const lastScraped = lastScrape.lastScraped && moment.unix(lastScrape.lastScraped);
|
|
|
|
if (!lastScraped || lastScraped.add(NEXT_FULL_SCRAPE_OFFSET, 'seconds') < scrapeStart) {
|
|
console.log(`[${scrapeStart}] scrapping all ${NAME} shows...`);
|
|
return _scrapeAllShows()
|
|
.then(() => {
|
|
lastScrape.lastScraped = scrapeStart;
|
|
return repository.updateProvider(lastScrape);
|
|
})
|
|
.then(() => console.log(`[${moment()}] finished scrapping all ${NAME} shows`));
|
|
} else {
|
|
console.log(`[${scrapeStart}] scrapping latest ${NAME} entries...`);
|
|
return _scrapeLatestEntries()
|
|
.then(() => console.log(`[${moment()}] finished scrapping latest ${NAME} entries`));
|
|
}
|
|
}
|
|
|
|
async function _scrapeLatestEntries() {
|
|
const latestEntries = await horriblesubs.getLatestEntries();
|
|
|
|
return Promise.all(latestEntries
|
|
.map((entryData) => limiter.schedule(() => _parseShowData(entryData)
|
|
.catch((err) => console.log(err)))));
|
|
}
|
|
|
|
async function _scrapeAllShows() {
|
|
const shows = await horriblesubs.allShows();
|
|
|
|
return Promise.all(shows
|
|
.map((show) => limiter.schedule(() => horriblesubs.showData(show)
|
|
.then((showData) => _parseShowData(showData))
|
|
.catch((err) => console.log(err)))));
|
|
}
|
|
|
|
async function compareSearchKitsuIds() {
|
|
console.log(`${NAME}: initiating kitsu compare...`);
|
|
const shows = await horriblesubs.allShows()
|
|
.then((shows) => Promise.all(shows.slice(0, 1).map((show) => limiter.schedule(() => enrichShow(show)))));
|
|
|
|
const incorrect = shows.filter(
|
|
(show) => showMappings[show.title] && showMappings[show.title].kitsu_id !== show.kitsu_id);
|
|
const incorrectRatio = incorrect.length / shows.length;
|
|
console.log(incorrect);
|
|
console.log(`Ratio: ${incorrectRatio}`);
|
|
}
|
|
|
|
async function initMapping() {
|
|
console.log(`${NAME}: initiating kitsu mapping...`);
|
|
const shows = await horriblesubs.allShows()
|
|
.then((shows) => shows.filter((show) => !showMappings[show.title]))
|
|
.then((shows) => Promise.all(shows.map((show) => limiter.schedule(() => enrichShow(show)))))
|
|
.then((shows) => shows.reduce((map, show) => (map[show.title] = show, map), showMappings));
|
|
|
|
fs.writeFile("./scrapers/horriblesubs/horriblesubs_mapping.json", JSON.stringify(shows), 'utf8', function (err) {
|
|
if (err) {
|
|
console.log("An error occurred while writing JSON Object to File.");
|
|
} else {
|
|
console.log(`${NAME}: finished kitsu mapping`);
|
|
}
|
|
});
|
|
}
|
|
|
|
async function enrichShow(show) {
|
|
console.log(`${NAME}: getting show info for ${show.title}...`);
|
|
const showId = await horriblesubs._getShowId(show.url)
|
|
.catch(() => show.title);
|
|
const metadata = await getKitsuId({ title: show.title })
|
|
.then((kitsuId) => getMetadata(kitsuId))
|
|
.catch((error) => {
|
|
console.log(`Failed getting kitsu meta: ${error.message}`);
|
|
return {};
|
|
});
|
|
|
|
return {
|
|
showId: showId,
|
|
kitsu_id: metadata.kitsuId,
|
|
...show,
|
|
kitsuTitle: metadata.title,
|
|
imdb_id: metadata.imdbId
|
|
}
|
|
}
|
|
|
|
async function _parseShowData(showData) {
|
|
console.log(`${NAME}: scrapping ${showData.title} data...`);
|
|
const showMapping = showMappings[showData.title];
|
|
const kitsuId = showMapping && showMapping.kitsu_id;
|
|
if (!showMapping) {
|
|
throw new Error(`No kitsu mapping found for ${showData.title}`);
|
|
}
|
|
if (!kitsuId) {
|
|
throw new Error(`No kitsuId found for ${showData.title}`);
|
|
}
|
|
|
|
// sometimes horriblesubs entry contains multiple season in it, so need to split it per kitsu season entry
|
|
const kitsuIdsMapping = Array.isArray(kitsuId) && await Promise.all(kitsuId.map(kitsuId => getMetadata(kitsuId)))
|
|
.then((metas) => metas.reduce((map, meta) => {
|
|
const epOffset = Object.keys(map).length;
|
|
[...Array(meta.totalCount).keys()]
|
|
.map(ep => ep + 1)
|
|
.forEach(ep => map[ep + epOffset] = { kitsuId: meta.kitsuId, episode: ep, title: meta.title });
|
|
return map;
|
|
}, {})) || {};
|
|
const formatTitle = (episodeInfo, mirror) => {
|
|
const mapping = kitsuIdsMapping[episodeInfo.episode.replace(/^0+/, '')];
|
|
if (mapping) {
|
|
return `${mapping.title} - ${mapping.episode} [${mirror.resolution}]`;
|
|
}
|
|
return `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`;
|
|
};
|
|
const getKitsuId = inputEpisode => {
|
|
const episodeString = inputEpisode.includes('-') && inputEpisode.split('-')[0] || inputEpisode;
|
|
const episode = parseInt(episodeString, 10);
|
|
return kitsuIdsMapping[episode] && kitsuIdsMapping[episode].kitsuId || kitsuId;
|
|
};
|
|
|
|
return Promise.all([].concat(showData.singleEpisodes).concat(showData.packEpisodes)
|
|
.map((episodeInfo) => episodeInfo.mirrors
|
|
.map((mirror) => ({
|
|
provider: NAME,
|
|
...mirror,
|
|
infoHash: decode(mirror.magnetLink).infoHash,
|
|
trackers: decode(mirror.magnetLink).tr.join(','),
|
|
title: formatTitle(episodeInfo, mirror),
|
|
size: 300000000,
|
|
type: Type.ANIME,
|
|
kitsuId: getKitsuId(episodeInfo.episode),
|
|
uploadDate: episodeInfo.uploadDate,
|
|
})))
|
|
.reduce((a, b) => a.concat(b), [])
|
|
.map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent)
|
|
.then((torrent) => torrent && updateCurrentSeeders(torrent))
|
|
.then((torrent) => torrent && parseTorrentFiles(torrent)
|
|
.then((files) => verifyFiles(torrent, files))
|
|
.then((files) => repository.createTorrent(torrent)
|
|
.then(() => files.forEach(file => repository.createFile(file)))
|
|
.then(() => console.log(`Created entry for ${torrent.title}`)))))))
|
|
.then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`));
|
|
}
|
|
|
|
async function verifyFiles(torrent, files) {
|
|
if (files && files.length) {
|
|
const existingFiles = await repository.getFiles({ infoHash: files[0].infoHash })
|
|
.then((existing) => existing.reduce((map, file) => (map[file.fileIndex] = file, map), {}))
|
|
.catch(() => undefined);
|
|
if (existingFiles && Object.keys(existingFiles).length) {
|
|
return files
|
|
.map(file => ({
|
|
...file,
|
|
id: existingFiles[file.fileIndex] && existingFiles[file.fileIndex].id,
|
|
size: existingFiles[file.fileIndex] && existingFiles[file.fileIndex].size || file.size
|
|
}))
|
|
}
|
|
return files;
|
|
}
|
|
return Promise.reject(`No video files found for: ${torrent.title}`);
|
|
}
|
|
|
|
async function checkIfExists(torrent) {
|
|
const existingTorrent = await repository.getTorrent(torrent).catch(() => undefined);
|
|
if (!existingTorrent) {
|
|
return torrent; // no torrent exists yet
|
|
} else if (existingTorrent.provider === NAME) {
|
|
return undefined; // torrent by this provider already exists
|
|
}
|
|
return { ...torrent, size: existingTorrent.size, seeders: existingTorrent.seeders };
|
|
}
|
|
|
|
module.exports = { scrape }; |