Updated scrapers to latest available commit

This commit is contained in:
Gabisonfire
2024-01-17 16:43:58 -05:00
parent cab7f38c66
commit 909ade0d8e
39 changed files with 311 additions and 299 deletions

View File

@@ -1,16 +1,17 @@
const needle = require('needle');
const axios = require('axios');
const nameToImdb = require('name-to-imdb');
const googleIt = require('google-it');
const googleSr = require('google-sr');
const bing = require('nodejs-bing');
const he = require('he');
const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache');
const { Type } = require('./types');
const { getRandomUserAgent } = require('./requestHelper');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.strem.fun';
const TIMEOUT = 20000;
const selectors = {
...googleSr.defaultSelectors,
LinkSelector: 'a:has(h3)'
}
function getMetadata(id, type = Type.SERIES) {
if (!id) {
@@ -32,9 +33,9 @@ function getMetadata(id, type = Type.SERIES) {
}
function _requestMetadata(url) {
return needle('get', url, { open_timeout: TIMEOUT })
return axios.get(url, { timeout: TIMEOUT })
.then((response) => {
const body = response.body;
const body = response.data;
if (body && body.meta && (body.meta.imdb_id || body.meta.kitsu_id)) {
return {
kitsuId: body.meta.kitsu_id,
@@ -85,7 +86,7 @@ function escapeTitle(title) {
.replace(/[\u0300-\u036F]/g, '')
.replace(/&/g, 'and')
.replace(/[;, ~./]+/g, ' ') // replace dots, commas or underscores with spaces
.replace(/[^\w \-()+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
.replace(/[^\w \-()×+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
.replace(/^\d{1,2}[.#\s]+(?=(?:\d+[.\s]*)?[\u0400-\u04ff])/i, '') // remove russian movie numbering
.replace(/\s{2,}/, ' ') // replace multiple spaces
.trim();
@@ -111,13 +112,10 @@ async function getImdbId(info, type) {
reject(err || new Error('failed imdbId search'));
}
});
// }).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true })
// .then(results => results.length ? results : Promise.reject('No results'))
}).catch(() => googleSr(query)
.then(response => response.searchResults.length ? response.searchResults : Promise.reject('No results'))
// .catch(() => bing.web(query))
}).catch(() => googleSr.search(query, { selectors })
.then(response => response.length ? response : Promise.reject('No results'))
.then(results => results
.map(result => result.link)
.map(result => result.Link)
.find(result => result.includes('imdb.com/title/')))
.then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
.then(match => match && match[1])))
@@ -132,9 +130,9 @@ async function getKitsuId(info) {
const query = encodeURIComponent(key);
return cacheWrapKitsuId(key,
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
() => axios.get(`${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { timeout: 60000 })
.then((response) => {
const body = response.body;
const body = response.data;
if (body && body.metas && body.metas.length) {
return body.metas[0].id.replace('kitsu:', '');
} else {
@@ -147,8 +145,8 @@ async function isEpisodeImdbId(imdbId) {
if (!imdbId) {
return false;
}
return needle('get', `https://www.imdb.com/title/${imdbId}/`, { open_timeout: 10000, follow: 2 })
.then(response => !!(response.body && response.body.includes('video.episode')))
return axios.get(`https://www.imdb.com/title/${imdbId}/`, { timeout: 10000 })
.then(response => !!(response.data && response.data.includes('video.episode')))
.catch((err) => false);
}

View File

@@ -43,7 +43,7 @@ function parseSeriesVideo(video, parsedTorrentName) {
// force episode to any found number if it was not parsed
if (!videoInfo.episodes && !videoInfo.date) {
const epMatcher = videoInfo.title.match(
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W)(\d{1,4})(?:a|b|c|v\d)?(?:\W|$)(?!disk|movie|film)/i);
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W|_)(\d{1,4})(?:a|b|c|v\d)?(?:_|\W|$)(?!disk|movie|film)/i);
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
}

View File

@@ -1,7 +1,6 @@
const moment = require('moment');
const Promises = require('./promises')
const { Sequelize, DataTypes, fn, col, literal } = require('sequelize');
const Op = Sequelize.Op;
const { Sequelize, Op, DataTypes, fn, col, literal } = require('sequelize');
const DATABASE_URI = process.env.DATABASE_URI;
@@ -177,6 +176,10 @@ function getTorrentsBasedOnQuery(where) {
return Torrent.findAll({ where: where });
}
function getFilesBasedOnQuery(where) {
return File.findAll({ where: where });
}
function getTorrentsWithoutSize() {
return Torrent.findAll({
where: literal(
@@ -326,6 +329,7 @@ module.exports = {
getTorrent,
getTorrentsBasedOnTitle,
getTorrentsBasedOnQuery,
getFilesBasedOnQuery,
deleteTorrent,
getUpdateSeedersTorrents,
getUpdateSeedersNewTorrents,

View File

@@ -8,14 +8,17 @@ function getRandomUserAgent() {
function defaultOptionsWithProxy() {
if (process.env.PROXY_HOST && process.env.PROXY_TYPE) {
return {
proxy: process.env.PROXY_HOST,
proxy: {
host: process.env.PROXY_HOST.match(/\/\/(.*):/)[1],
port: process.env.PROXY_HOST.match(/:(\d+)/)[1]
},
headers: {
'user-agent': getRandomUserAgent(),
'proxy-type': process.env.PROXY_TYPE
}
}
}
return { userAgent: getRandomUserAgent() };
return { headers: { 'user-agent': getRandomUserAgent() } };
}
module.exports = { getRandomUserAgent, defaultOptionsWithProxy };

View File

@@ -1,5 +1,5 @@
const torrentStream = require('torrent-stream');
const needle = require('needle');
const axios = require('axios');
const parseTorrent = require('parse-torrent');
const BTClient = require('bittorrent-tracker')
const async = require('async');
@@ -113,12 +113,12 @@ async function filesFromTorrentFile(torrent) {
return Promise.reject(new Error("no torrentLink"));
}
return needle('get', torrent.torrentLink, { open_timeout: 10000 })
return axios.get(torrent.torrentLink, { timeout: 10000, responseType: 'arraybuffer' })
.then((response) => {
if (!response.body || response.statusCode !== 200) {
if (!response.data || response.status !== 200) {
throw new Error('torrent not found')
}
return response.body
return response.data
})
.then((body) => parseTorrent(body))
.then((info) => info.files.map((file, fileId) => ({
@@ -197,8 +197,8 @@ async function getTorrentTrackers(torrent) {
}
async function getDefaultTrackers(torrent, retry = 3) {
return cacheTrackers(() => needle('get', TRACKERS_URL, { open_timeout: SEEDS_CHECK_TIMEOUT })
.then(response => response.body && response.body.trim())
return cacheTrackers(() => axios.get(TRACKERS_URL, { timeout: SEEDS_CHECK_TIMEOUT })
.then(response => response.data && response.data.trim())
.then(body => body && body.split('\n\n') || []))
.catch(() => retry > 0 ? delay(5000).then(() => getDefaultTrackers(torrent, retry - 1)) : [])
.then(trackers => trackers.concat(ADDITIONAL_TRACKERS))

View File

@@ -107,7 +107,7 @@ async function checkAndUpdateTorrent(torrent) {
if (!storedTorrent.languages && torrent.languages && storedTorrent.provider !== 'RARBG') {
storedTorrent.languages = torrent.languages;
storedTorrent.save();
console.log(`Updated [${torrent.infoHash}] ${torrent.title} language to ${torrent.languages}`);
console.log(`Updated [${storedTorrent.infoHash}] ${storedTorrent.title} language to ${torrent.languages}`);
}
return createTorrentContents({ ...storedTorrent.get(), torrentLink: torrent.torrentLink })
.then(() => updateTorrentSeeders(torrent));

View File

@@ -106,7 +106,7 @@ async function getSeriesTorrentContent(torrent) {
async function mapSeriesEpisode(file, torrent, files) {
if (!file.episodes && !file.kitsuEpisodes) {
if (files.some(otherFile => otherFile.episodes || otherFile.kitsuEpisodes) || parse(torrent.title).seasons) {
if (files.length === 1 || files.some(f => f.episodes || f.kitsuEpisodes) || parse(torrent.title).seasons) {
return Promise.resolve({
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
@@ -187,7 +187,7 @@ async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
} else if (isDateEpisodeFiles(files, metadata)) {
decomposeDateEpisodeFiles(torrent, files, metadata);
} else if (isAbsoluteEpisodeFiles(files, metadata)) {
} else if (isAbsoluteEpisodeFiles(torrent, files, metadata)) {
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
}
// decomposeEpisodeTitleFiles(torrent, files, metadata);
@@ -237,26 +237,28 @@ function isDateEpisodeFiles(files, metadata) {
return files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date);
}
function isAbsoluteEpisodeFiles(files, metadata) {
function isAbsoluteEpisodeFiles(torrent, files, metadata) {
const threshold = Math.ceil(files.length / 5);
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
const nonMovieEpisodes = files
.filter(file => !file.isMovie && file.episodes);
const absoluteEpisodes = files
.filter(file => file.season && file.episodes)
.filter(file => file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep))
return nonMovieEpisodes.every(file => !file.season || file.season > metadata.episodeCount.length)
|| absoluteEpisodes.length >= threshold
// && !isNewEpisodesNotInMetadata(files, metadata);
return nonMovieEpisodes.every(file => !file.season)
|| (isAnime && nonMovieEpisodes.every(file => file.season > metadata.episodeCount.length))
|| absoluteEpisodes.length >= threshold;
}
function isNewEpisodesNotInMetadata(files, metadata) {
function isNewEpisodeNotInMetadata(torrent, file, metadata) {
// new episode might not yet been indexed by cinemeta.
// detect this if episode number is larger than the last episode or season is larger than the last one
return files.length === 1
// only for non anime metas
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
return !isAnime && !file.isMovie && file.episodes && file.season !== 1
&& /continuing|current/i.test(metadata.status)
&& files.filter(file => !file.isMovie && file.episodes)
.every(file => file.season >= metadata.episodeCount.length
&& file.episodes.every(ep => ep > metadata.episodeCount[file.season - 1]))
&& file.season >= metadata.episodeCount.length
&& file.episodes.every(ep => ep > (metadata.episodeCount[file.season - 1] || 0));
}
function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
@@ -282,6 +284,7 @@ function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
}
files
.filter(file => file.episodes && !file.isMovie && file.season !== 0)
.filter(file => !isNewEpisodeNotInMetadata(torrent, file, metadata))
.filter(file => !file.season || (metadata.episodeCount[file.season - 1] || 0) < file.episodes[0])
.forEach(file => {
const seasonIdx = ([...metadata.episodeCount.keys()]