mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
updates horriblesubs scraper to split multiple season entries
This commit is contained in:
2
index.js
2
index.js
@@ -6,7 +6,7 @@ const thepiratebayScraper = require('./scrapers/thepiratebay/thepiratebay_dump_s
|
|||||||
const horribleSubsScraper = require('./scrapers/horriblesubs/horriblesubs_scraper');
|
const horribleSubsScraper = require('./scrapers/horriblesubs/horriblesubs_scraper');
|
||||||
const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper');
|
const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper');
|
||||||
|
|
||||||
const providers = [thepiratebayScraper];
|
const providers = [horribleSubsScraper];
|
||||||
|
|
||||||
async function scrape() {
|
async function scrape() {
|
||||||
providers.forEach((provider) => provider.scrape());
|
providers.forEach((provider) => provider.scrape());
|
||||||
|
|||||||
@@ -67,18 +67,18 @@ function _requestMetadata(url) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function escapeTitle(title, hyphenEscape = true) {
|
function escapeTitle(title) {
|
||||||
return title.toLowerCase()
|
return title.toLowerCase()
|
||||||
.normalize('NFKD') // normalize non-ASCII characters
|
.normalize('NFKD') // normalize non-ASCII characters
|
||||||
.replace(/[\u0300-\u036F]/g, '')
|
.replace(/[\u0300-\u036F]/g, '')
|
||||||
.replace(/&/g, 'and')
|
.replace(/&/g, 'and')
|
||||||
.replace(hyphenEscape ? /[.,_+ -]+/g : /[.,_+ ]+/g, ' ') // replace dots, commas or underscores with spaces
|
.replace(/[;, ~\-]+/g, ' ') // replace dots, commas or underscores with spaces
|
||||||
.replace(/[^\w- ()]/gi, '') // remove all non-alphanumeric chars
|
.replace(/[^\w ()+#@!']+/g, '') // remove all non-alphanumeric chars
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getImdbId(info, type) {
|
async function getImdbId(info, type) {
|
||||||
const name = escapeTitle(info.title).toLowerCase();
|
const name = escapeTitle(info.title);
|
||||||
const year = info.year || info.date && info.date.slice(0, 4);
|
const year = info.year || info.date && info.date.slice(0, 4);
|
||||||
const key = `${name}_${year}_${type}`;
|
const key = `${name}_${year}_${type}`;
|
||||||
|
|
||||||
@@ -100,7 +100,7 @@ async function getImdbId(info, type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getKitsuId(info) {
|
async function getKitsuId(info) {
|
||||||
const title = escapeTitle(info.title).toLowerCase().replace(/[;]+/g, ' ').replace(/[,%']+/g, '');
|
const title = escapeTitle(info.title);
|
||||||
const season = info.season > 1 ? ` S${info.season}` : '';
|
const season = info.season > 1 ? ` S${info.season}` : '';
|
||||||
const query = `${title}${season}`;
|
const query = `${title}${season}`;
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ const { torrentFiles } = require('../lib/torrent');
|
|||||||
const { escapeTitle, getMetadata, getImdbId } = require('../lib/metadata');
|
const { escapeTitle, getMetadata, getImdbId } = require('../lib/metadata');
|
||||||
const { Type } = require('./types');
|
const { Type } = require('./types');
|
||||||
|
|
||||||
const MIN_SIZE = 10 * 1024 * 1024; // 20 MB
|
const MIN_SIZE = 10 * 1024 * 1024; // 10 MB
|
||||||
|
|
||||||
async function parseTorrentFiles(torrent) {
|
async function parseTorrentFiles(torrent) {
|
||||||
const parsedTorrentName = parse(torrent.title);
|
const parsedTorrentName = parse(torrent.title);
|
||||||
|
|||||||
2
package-lock.json
generated
2
package-lock.json
generated
@@ -1714,7 +1714,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"parse-torrent-title": {
|
"parse-torrent-title": {
|
||||||
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#6767960f4c40c7c97fd2f8e191cf099ab7dcd1dd",
|
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#ddd5037820289d35e600baec9d8e730935d261af",
|
||||||
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#master"
|
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#master"
|
||||||
},
|
},
|
||||||
"parseurl": {
|
"parseurl": {
|
||||||
|
|||||||
@@ -68,8 +68,7 @@
|
|||||||
"kitsu_id": "6508"
|
"kitsu_id": "6508"
|
||||||
},
|
},
|
||||||
"Anohana (Live Action)": {
|
"Anohana (Live Action)": {
|
||||||
"showId": "18",
|
"showId": "18"
|
||||||
"kitsu_id": "5981"
|
|
||||||
},
|
},
|
||||||
"Another": {
|
"Another": {
|
||||||
"showId": "19",
|
"showId": "19",
|
||||||
@@ -153,7 +152,7 @@
|
|||||||
},
|
},
|
||||||
"Binan Koukou Chikyuu Bouei-bu Love!": {
|
"Binan Koukou Chikyuu Bouei-bu Love!": {
|
||||||
"showId": "38",
|
"showId": "38",
|
||||||
"kitsu_id": "11329"
|
"kitsu_id": "9173"
|
||||||
},
|
},
|
||||||
"Black Bullet": {
|
"Black Bullet": {
|
||||||
"showId": "39",
|
"showId": "39",
|
||||||
|
|||||||
@@ -34,6 +34,18 @@ async function _scrapeAllShows() {
|
|||||||
.catch((err) => console.log(err)))));
|
.catch((err) => console.log(err)))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function compareSearchKitsuIds() {
|
||||||
|
console.log(`${NAME}: initiating kitsu compare...`);
|
||||||
|
const shows = await horriblesubs.allShows()
|
||||||
|
.then((shows) => Promise.all(shows.slice(0, 1).map((show) => limiter.schedule(() => enrichShow(show)))));
|
||||||
|
|
||||||
|
const incorrect = shows.filter(
|
||||||
|
(show) => showMappings[show.title] && showMappings[show.title].kitsu_id !== show.kitsu_id);
|
||||||
|
const incorrectRatio = incorrect.length / shows.length;
|
||||||
|
console.log(incorrect);
|
||||||
|
console.log(`Ratio: ${incorrectRatio}`);
|
||||||
|
}
|
||||||
|
|
||||||
async function initMapping() {
|
async function initMapping() {
|
||||||
console.log(`${NAME}: initiating kitsu mapping...`);
|
console.log(`${NAME}: initiating kitsu mapping...`);
|
||||||
const shows = await horriblesubs.allShows()
|
const shows = await horriblesubs.allShows()
|
||||||
@@ -81,6 +93,28 @@ async function _parseShowData(showData) {
|
|||||||
throw new Error(`No kitsuId found for ${showData.title}`);
|
throw new Error(`No kitsuId found for ${showData.title}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sometimes horriblesubs entry contains multiple season in it, so need to split it per kitsu season entry
|
||||||
|
const kitsuIdsMapping = kitsuId.length && await Promise.all(kitsuId.map(kitsuId => getMetadata(kitsuId)))
|
||||||
|
.then((metas) => metas.reduce((map, meta) => {
|
||||||
|
const epOffset = Object.keys(map).length;
|
||||||
|
[...Array(meta.totalCount).keys()]
|
||||||
|
.map(ep => ep + 1)
|
||||||
|
.forEach(ep => map[ep + epOffset] = { kitsuId: meta.kitsuId, episode: ep, title: meta.title });
|
||||||
|
return map;
|
||||||
|
}, {})) || {};
|
||||||
|
const formatTitle = (episodeInfo, mirror) => {
|
||||||
|
const mapping = kitsuIdsMapping[episodeInfo.episode.replace(/^0+/, '')];
|
||||||
|
if (mapping) {
|
||||||
|
return `${mapping.title} - ${mapping.episode} [${mirror.resolution}]`;
|
||||||
|
}
|
||||||
|
return `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`;
|
||||||
|
};
|
||||||
|
const getKitsuId = inputEpisode => {
|
||||||
|
const episodeString = inputEpisode.includes('-') && inputEpisode.split('-')[0] || inputEpisode;
|
||||||
|
const episode = parseInt(episodeString, 10);
|
||||||
|
return kitsuIdsMapping[episode] && kitsuIdsMapping[episode].kitsuId || kitsuId;
|
||||||
|
};
|
||||||
|
|
||||||
return Promise.all([].concat(showData.singleEpisodes).concat(showData.packEpisodes)
|
return Promise.all([].concat(showData.singleEpisodes).concat(showData.packEpisodes)
|
||||||
.map((episodeInfo) => episodeInfo.mirrors
|
.map((episodeInfo) => episodeInfo.mirrors
|
||||||
.map((mirror) => ({
|
.map((mirror) => ({
|
||||||
@@ -88,10 +122,10 @@ async function _parseShowData(showData) {
|
|||||||
...mirror,
|
...mirror,
|
||||||
infoHash: decode(mirror.magnetLink).infoHash,
|
infoHash: decode(mirror.magnetLink).infoHash,
|
||||||
trackers: decode(mirror.magnetLink).tr.join(','),
|
trackers: decode(mirror.magnetLink).tr.join(','),
|
||||||
title: `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`,
|
title: formatTitle(episodeInfo, mirror),
|
||||||
size: 300000000,
|
size: 300000000,
|
||||||
type: Type.ANIME,
|
type: Type.ANIME,
|
||||||
kitsuId: kitsuId,
|
kitsuId: getKitsuId(episodeInfo.episode),
|
||||||
uploadDate: episodeInfo.uploadDate,
|
uploadDate: episodeInfo.uploadDate,
|
||||||
})))
|
})))
|
||||||
.reduce((a, b) => a.concat(b), [])
|
.reduce((a, b) => a.concat(b), [])
|
||||||
@@ -120,7 +154,7 @@ async function verifyFiles(torrent, files) {
|
|||||||
}
|
}
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
throw new Error(`No video files found for: ${torrent.title}`);
|
return Promise.reject(`No video files found for: ${torrent.title}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function checkIfExists(torrent) {
|
async function checkIfExists(torrent) {
|
||||||
|
|||||||
Reference in New Issue
Block a user