mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Updated scrapers to latest available commit
This commit is contained in:
@@ -1,16 +1,17 @@
|
||||
const needle = require('needle');
|
||||
const axios = require('axios');
|
||||
const nameToImdb = require('name-to-imdb');
|
||||
const googleIt = require('google-it');
|
||||
const googleSr = require('google-sr');
|
||||
const bing = require('nodejs-bing');
|
||||
const he = require('he');
|
||||
const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache');
|
||||
const { Type } = require('./types');
|
||||
const { getRandomUserAgent } = require('./requestHelper');
|
||||
|
||||
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
|
||||
const KITSU_URL = 'https://anime-kitsu.strem.fun';
|
||||
const TIMEOUT = 20000;
|
||||
const selectors = {
|
||||
...googleSr.defaultSelectors,
|
||||
LinkSelector: 'a:has(h3)'
|
||||
}
|
||||
|
||||
function getMetadata(id, type = Type.SERIES) {
|
||||
if (!id) {
|
||||
@@ -32,9 +33,9 @@ function getMetadata(id, type = Type.SERIES) {
|
||||
}
|
||||
|
||||
function _requestMetadata(url) {
|
||||
return needle('get', url, { open_timeout: TIMEOUT })
|
||||
return axios.get(url, { timeout: TIMEOUT })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (body && body.meta && (body.meta.imdb_id || body.meta.kitsu_id)) {
|
||||
return {
|
||||
kitsuId: body.meta.kitsu_id,
|
||||
@@ -85,7 +86,7 @@ function escapeTitle(title) {
|
||||
.replace(/[\u0300-\u036F]/g, '')
|
||||
.replace(/&/g, 'and')
|
||||
.replace(/[;, ~./]+/g, ' ') // replace dots, commas or underscores with spaces
|
||||
.replace(/[^\w \-()+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
|
||||
.replace(/[^\w \-()×+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
|
||||
.replace(/^\d{1,2}[.#\s]+(?=(?:\d+[.\s]*)?[\u0400-\u04ff])/i, '') // remove russian movie numbering
|
||||
.replace(/\s{2,}/, ' ') // replace multiple spaces
|
||||
.trim();
|
||||
@@ -111,13 +112,10 @@ async function getImdbId(info, type) {
|
||||
reject(err || new Error('failed imdbId search'));
|
||||
}
|
||||
});
|
||||
// }).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true })
|
||||
// .then(results => results.length ? results : Promise.reject('No results'))
|
||||
}).catch(() => googleSr(query)
|
||||
.then(response => response.searchResults.length ? response.searchResults : Promise.reject('No results'))
|
||||
// .catch(() => bing.web(query))
|
||||
}).catch(() => googleSr.search(query, { selectors })
|
||||
.then(response => response.length ? response : Promise.reject('No results'))
|
||||
.then(results => results
|
||||
.map(result => result.link)
|
||||
.map(result => result.Link)
|
||||
.find(result => result.includes('imdb.com/title/')))
|
||||
.then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
|
||||
.then(match => match && match[1])))
|
||||
@@ -132,9 +130,9 @@ async function getKitsuId(info) {
|
||||
const query = encodeURIComponent(key);
|
||||
|
||||
return cacheWrapKitsuId(key,
|
||||
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
|
||||
() => axios.get(`${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { timeout: 60000 })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
const body = response.data;
|
||||
if (body && body.metas && body.metas.length) {
|
||||
return body.metas[0].id.replace('kitsu:', '');
|
||||
} else {
|
||||
@@ -147,8 +145,8 @@ async function isEpisodeImdbId(imdbId) {
|
||||
if (!imdbId) {
|
||||
return false;
|
||||
}
|
||||
return needle('get', `https://www.imdb.com/title/${imdbId}/`, { open_timeout: 10000, follow: 2 })
|
||||
.then(response => !!(response.body && response.body.includes('video.episode')))
|
||||
return axios.get(`https://www.imdb.com/title/${imdbId}/`, { timeout: 10000 })
|
||||
.then(response => !!(response.data && response.data.includes('video.episode')))
|
||||
.catch((err) => false);
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ function parseSeriesVideo(video, parsedTorrentName) {
|
||||
// force episode to any found number if it was not parsed
|
||||
if (!videoInfo.episodes && !videoInfo.date) {
|
||||
const epMatcher = videoInfo.title.match(
|
||||
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W)(\d{1,4})(?:a|b|c|v\d)?(?:\W|$)(?!disk|movie|film)/i);
|
||||
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W|_)(\d{1,4})(?:a|b|c|v\d)?(?:_|\W|$)(?!disk|movie|film)/i);
|
||||
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
|
||||
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
const moment = require('moment');
|
||||
const Promises = require('./promises')
|
||||
const { Sequelize, DataTypes, fn, col, literal } = require('sequelize');
|
||||
const Op = Sequelize.Op;
|
||||
const { Sequelize, Op, DataTypes, fn, col, literal } = require('sequelize');
|
||||
|
||||
const DATABASE_URI = process.env.DATABASE_URI;
|
||||
|
||||
@@ -177,6 +176,10 @@ function getTorrentsBasedOnQuery(where) {
|
||||
return Torrent.findAll({ where: where });
|
||||
}
|
||||
|
||||
function getFilesBasedOnQuery(where) {
|
||||
return File.findAll({ where: where });
|
||||
}
|
||||
|
||||
function getTorrentsWithoutSize() {
|
||||
return Torrent.findAll({
|
||||
where: literal(
|
||||
@@ -326,6 +329,7 @@ module.exports = {
|
||||
getTorrent,
|
||||
getTorrentsBasedOnTitle,
|
||||
getTorrentsBasedOnQuery,
|
||||
getFilesBasedOnQuery,
|
||||
deleteTorrent,
|
||||
getUpdateSeedersTorrents,
|
||||
getUpdateSeedersNewTorrents,
|
||||
|
||||
@@ -8,14 +8,17 @@ function getRandomUserAgent() {
|
||||
function defaultOptionsWithProxy() {
|
||||
if (process.env.PROXY_HOST && process.env.PROXY_TYPE) {
|
||||
return {
|
||||
proxy: process.env.PROXY_HOST,
|
||||
proxy: {
|
||||
host: process.env.PROXY_HOST.match(/\/\/(.*):/)[1],
|
||||
port: process.env.PROXY_HOST.match(/:(\d+)/)[1]
|
||||
},
|
||||
headers: {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'proxy-type': process.env.PROXY_TYPE
|
||||
}
|
||||
}
|
||||
}
|
||||
return { userAgent: getRandomUserAgent() };
|
||||
return { headers: { 'user-agent': getRandomUserAgent() } };
|
||||
}
|
||||
|
||||
module.exports = { getRandomUserAgent, defaultOptionsWithProxy };
|
||||
@@ -1,5 +1,5 @@
|
||||
const torrentStream = require('torrent-stream');
|
||||
const needle = require('needle');
|
||||
const axios = require('axios');
|
||||
const parseTorrent = require('parse-torrent');
|
||||
const BTClient = require('bittorrent-tracker')
|
||||
const async = require('async');
|
||||
@@ -113,12 +113,12 @@ async function filesFromTorrentFile(torrent) {
|
||||
return Promise.reject(new Error("no torrentLink"));
|
||||
}
|
||||
|
||||
return needle('get', torrent.torrentLink, { open_timeout: 10000 })
|
||||
return axios.get(torrent.torrentLink, { timeout: 10000, responseType: 'arraybuffer' })
|
||||
.then((response) => {
|
||||
if (!response.body || response.statusCode !== 200) {
|
||||
if (!response.data || response.status !== 200) {
|
||||
throw new Error('torrent not found')
|
||||
}
|
||||
return response.body
|
||||
return response.data
|
||||
})
|
||||
.then((body) => parseTorrent(body))
|
||||
.then((info) => info.files.map((file, fileId) => ({
|
||||
@@ -197,8 +197,8 @@ async function getTorrentTrackers(torrent) {
|
||||
}
|
||||
|
||||
async function getDefaultTrackers(torrent, retry = 3) {
|
||||
return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT })
|
||||
.then(response => response.body && response.body.trim())
|
||||
return cacheTrackers(() => axios.get(TRACKERS_URL, { timeout: SEEDS_CHECK_TIMEOUT })
|
||||
.then(response => response.data && response.data.trim())
|
||||
.then(body => body && body.split('\n\n') || []))
|
||||
.catch(() => retry > 0 ? delay(5000).then(() => getDefaultTrackers(torrent, retry - 1)) : [])
|
||||
.then(trackers => trackers.concat(ADDITIONAL_TRACKERS))
|
||||
|
||||
@@ -107,7 +107,7 @@ async function checkAndUpdateTorrent(torrent) {
|
||||
if (!storedTorrent.languages && torrent.languages && storedTorrent.provider !== 'RARBG') {
|
||||
storedTorrent.languages = torrent.languages;
|
||||
storedTorrent.save();
|
||||
console.log(`Updated [${torrent.infoHash}] ${torrent.title} language to ${torrent.languages}`);
|
||||
console.log(`Updated [${storedTorrent.infoHash}] ${storedTorrent.title} language to ${torrent.languages}`);
|
||||
}
|
||||
return createTorrentContents({ ...storedTorrent.get(), torrentLink: torrent.torrentLink })
|
||||
.then(() => updateTorrentSeeders(torrent));
|
||||
|
||||
@@ -106,7 +106,7 @@ async function getSeriesTorrentContent(torrent) {
|
||||
|
||||
async function mapSeriesEpisode(file, torrent, files) {
|
||||
if (!file.episodes && !file.kitsuEpisodes) {
|
||||
if (files.some(otherFile => otherFile.episodes || otherFile.kitsuEpisodes) || parse(torrent.title).seasons) {
|
||||
if (files.length === 1 || files.some(f => f.episodes || f.kitsuEpisodes) || parse(torrent.title).seasons) {
|
||||
return Promise.resolve({
|
||||
infoHash: torrent.infoHash,
|
||||
fileIndex: file.fileIndex,
|
||||
@@ -187,7 +187,7 @@ async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }
|
||||
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
|
||||
} else if (isDateEpisodeFiles(files, metadata)) {
|
||||
decomposeDateEpisodeFiles(torrent, files, metadata);
|
||||
} else if (isAbsoluteEpisodeFiles(files, metadata)) {
|
||||
} else if (isAbsoluteEpisodeFiles(torrent, files, metadata)) {
|
||||
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
|
||||
}
|
||||
// decomposeEpisodeTitleFiles(torrent, files, metadata);
|
||||
@@ -237,26 +237,28 @@ function isDateEpisodeFiles(files, metadata) {
|
||||
return files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date);
|
||||
}
|
||||
|
||||
function isAbsoluteEpisodeFiles(files, metadata) {
|
||||
function isAbsoluteEpisodeFiles(torrent, files, metadata) {
|
||||
const threshold = Math.ceil(files.length / 5);
|
||||
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
|
||||
const nonMovieEpisodes = files
|
||||
.filter(file => !file.isMovie && file.episodes);
|
||||
const absoluteEpisodes = files
|
||||
.filter(file => file.season && file.episodes)
|
||||
.filter(file => file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep))
|
||||
return nonMovieEpisodes.every(file => !file.season || file.season > metadata.episodeCount.length)
|
||||
|| absoluteEpisodes.length >= threshold
|
||||
// && !isNewEpisodesNotInMetadata(files, metadata);
|
||||
return nonMovieEpisodes.every(file => !file.season)
|
||||
|| (isAnime && nonMovieEpisodes.every(file => file.season > metadata.episodeCount.length))
|
||||
|| absoluteEpisodes.length >= threshold;
|
||||
}
|
||||
|
||||
function isNewEpisodesNotInMetadata(files, metadata) {
|
||||
function isNewEpisodeNotInMetadata(torrent, file, metadata) {
|
||||
// new episode might not yet been indexed by cinemeta.
|
||||
// detect this if episode number is larger than the last episode or season is larger than the last one
|
||||
return files.length === 1
|
||||
// only for non anime metas
|
||||
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
|
||||
return !isAnime && !file.isMovie && file.episodes && file.season !== 1
|
||||
&& /continuing|current/i.test(metadata.status)
|
||||
&& files.filter(file => !file.isMovie && file.episodes)
|
||||
.every(file => file.season >= metadata.episodeCount.length
|
||||
&& file.episodes.every(ep => ep > metadata.episodeCount[file.season - 1]))
|
||||
&& file.season >= metadata.episodeCount.length
|
||||
&& file.episodes.every(ep => ep > (metadata.episodeCount[file.season - 1] || 0));
|
||||
}
|
||||
|
||||
function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
|
||||
@@ -282,6 +284,7 @@ function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
|
||||
}
|
||||
files
|
||||
.filter(file => file.episodes && !file.isMovie && file.season !== 0)
|
||||
.filter(file => !isNewEpisodeNotInMetadata(torrent, file, metadata))
|
||||
.filter(file => !file.season || (metadata.episodeCount[file.season - 1] || 0) < file.episodes[0])
|
||||
.forEach(file => {
|
||||
const seasonIdx = ([...metadata.episodeCount.keys()]
|
||||
|
||||
Reference in New Issue
Block a user