Updated scrapers to latest available commit

This commit is contained in:
Gabisonfire
2024-01-17 16:43:58 -05:00
parent cab7f38c66
commit 909ade0d8e
39 changed files with 311 additions and 299 deletions

View File

@@ -1,16 +1,17 @@
const needle = require('needle');
const axios = require('axios');
const nameToImdb = require('name-to-imdb');
const googleIt = require('google-it');
const googleSr = require('google-sr');
const bing = require('nodejs-bing');
const he = require('he');
const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache');
const { Type } = require('./types');
const { getRandomUserAgent } = require('./requestHelper');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.strem.fun';
const TIMEOUT = 20000;
const selectors = {
...googleSr.defaultSelectors,
LinkSelector: 'a:has(h3)'
}
function getMetadata(id, type = Type.SERIES) {
if (!id) {
@@ -32,9 +33,9 @@ function getMetadata(id, type = Type.SERIES) {
}
function _requestMetadata(url) {
return needle('get', url, { open_timeout: TIMEOUT })
return axios.get(url, { timeout: TIMEOUT })
.then((response) => {
const body = response.body;
const body = response.data;
if (body && body.meta && (body.meta.imdb_id || body.meta.kitsu_id)) {
return {
kitsuId: body.meta.kitsu_id,
@@ -85,7 +86,7 @@ function escapeTitle(title) {
.replace(/[\u0300-\u036F]/g, '')
.replace(/&/g, 'and')
.replace(/[;, ~./]+/g, ' ') // replace dots, commas or underscores with spaces
.replace(/[^\w \-()+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
.replace(/[^\w \-()×+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
.replace(/^\d{1,2}[.#\s]+(?=(?:\d+[.\s]*)?[\u0400-\u04ff])/i, '') // remove russian movie numbering
.replace(/\s{2,}/, ' ') // replace multiple spaces
.trim();
@@ -111,13 +112,10 @@ async function getImdbId(info, type) {
reject(err || new Error('failed imdbId search'));
}
});
// }).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true })
// .then(results => results.length ? results : Promise.reject('No results'))
}).catch(() => googleSr(query)
.then(response => response.searchResults.length ? response.searchResults : Promise.reject('No results'))
// .catch(() => bing.web(query))
}).catch(() => googleSr.search(query, { selectors })
.then(response => response.length ? response : Promise.reject('No results'))
.then(results => results
.map(result => result.link)
.map(result => result.Link)
.find(result => result.includes('imdb.com/title/')))
.then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
.then(match => match && match[1])))
@@ -132,9 +130,9 @@ async function getKitsuId(info) {
const query = encodeURIComponent(key);
return cacheWrapKitsuId(key,
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
() => axios.get(`${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { timeout: 60000 })
.then((response) => {
const body = response.body;
const body = response.data;
if (body && body.metas && body.metas.length) {
return body.metas[0].id.replace('kitsu:', '');
} else {
@@ -147,8 +145,8 @@ async function isEpisodeImdbId(imdbId) {
if (!imdbId) {
return false;
}
return needle('get', `https://www.imdb.com/title/${imdbId}/`, { open_timeout: 10000, follow: 2 })
.then(response => !!(response.body && response.body.includes('video.episode')))
return axios.get(`https://www.imdb.com/title/${imdbId}/`, { timeout: 10000 })
.then(response => !!(response.data && response.data.includes('video.episode')))
.catch((err) => false);
}