refactors scrapers and add kat and unofficial tpb dump scraper

This commit is contained in:
TheBeastLT
2020-02-23 21:10:35 +01:00
parent 30421815d7
commit 0f91c98b84
14 changed files with 403 additions and 114 deletions

View File

@@ -6,57 +6,63 @@ const { Type } = require('./types');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.now.sh';
const TIMEOUT = 20000;
function getMetadata(id, type = Type.SERIES) {
const key = id.match(/^\d+$/) ? `kitsu:${id}` : id;
const metaType = type === Type.MOVIE ? Type.MOVIE : Type.SERIES;
return cacheWrapMetadata(key,
() => needle('get', `${KITSU_URL}/meta/${metaType}/${key}.json`, { open_timeout: 60000 })
.then((response) => {
const body = response.body;
if (body && body.meta && body.meta.id) {
return {
kitsuId: body.meta.kitsu_id,
imdbId: body.meta.imdb_id,
title: body.meta.name,
year: body.meta.year,
country: body.meta.country,
genres: body.meta.genres,
videos: (body.meta.videos || [])
.map((video) => video.imdbSeason
? {
season: video.season,
episode: video.episode,
imdbSeason: video.imdbSeason,
imdbEpisode: video.imdbEpisode
}
: {
season: video.season,
episode: video.episode,
kitsuId: video.kitsu_id,
kitsuEpisode: video.kitsuEpisode,
released: video.released
}
),
episodeCount: Object.values((body.meta.videos || [])
.filter((entry) => entry.season !== 0)
.sort((a, b) => a.season - b.season)
.reduce((map, next) => {
map[next.season] = map[next.season] + 1 || 1;
return map;
}, {})),
totalCount: body.meta.videos && body.meta.videos
.filter((entry) => entry.season !== 0).length
};
} else {
throw new Error('No search results');
}
})
() => _requestMetadata(`${KITSU_URL}/meta/${metaType}/${key}.json`)
.catch(() => _requestMetadata(`${CINEMETA_URL}/meta/${metaType}/${key}.json`))
.catch((error) => {
throw new Error(`failed kitsu query ${kitsuId} due: ${error.message}`);
throw new Error(`failed metadata query ${kitsuId} due: ${error.message}`);
}));
}
function _requestMetadata(url) {
return needle('get', url, { open_timeout: TIMEOUT })
.then((response) => {
const body = response.body;
if (body && body.meta && body.meta.id) {
return {
kitsuId: body.meta.kitsu_id,
imdbId: body.meta.imdb_id,
title: body.meta.name,
year: body.meta.year,
country: body.meta.country,
genres: body.meta.genres,
videos: (body.meta.videos || [])
.map((video) => video.imdbSeason
? {
season: video.season,
episode: video.episode,
imdbSeason: video.imdbSeason,
imdbEpisode: video.imdbEpisode
}
: {
season: video.season,
episode: video.episode,
kitsuId: video.kitsu_id,
kitsuEpisode: video.kitsuEpisode,
released: video.released
}
),
episodeCount: Object.values((body.meta.videos || [])
.filter((entry) => entry.season !== 0)
.sort((a, b) => a.season - b.season)
.reduce((map, next) => {
map[next.season] = map[next.season] + 1 || 1;
return map;
}, {})),
totalCount: body.meta.videos && body.meta.videos
.filter((entry) => entry.season !== 0).length
};
} else {
throw new Error('No search results');
}
});
}
function escapeTitle(title, hyphenEscape = true) {
return title.toLowerCase()
.normalize('NFKD') // normalize non-ASCII characters
@@ -86,7 +92,8 @@ async function getImdbId(info) {
.match(/imdb\.com\/title\/(tt\d+)/)[1])));
}
async function getKitsuId(title) {
async function getKitsuId(info) {
const title = info.season > 1 ? `${info.name} S${info.season}` : info.name;
const query = title.replace(/[;]+/g, ' ').replace(/[,%']+/g, '');
return cacheWrapImdbId(query,
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })

View File

@@ -13,6 +13,7 @@ const Provider = database.define('provider', {
const Torrent = database.define('torrent', {
infoHash: { type: Sequelize.STRING(64), primaryKey: true },
provider: { type: Sequelize.STRING(32), allowNull: false },
torrentId: { type: Sequelize.STRING(128) },
title: { type: Sequelize.STRING(256), allowNull: false },
size: { type: Sequelize.BIGINT },
type: { type: Sequelize.STRING(16), allowNull: false },
@@ -42,7 +43,7 @@ const File = database.define('file',
{
indexes: [
{ unique: true, fields: ['infoHash'], where: { fileIndex: { [Op.eq]: null } } },
{ unique: true, fields: ['infoHash', 'fileIndex', 'imdbEpisode'] },
{ unique: true, fields: ['infoHash', 'fileIndex', 'imdbSeason', 'imdbEpisode'] },
{ unique: false, fields: ['imdbId', 'imdbSeason', 'imdbEpisode'] },
{ unique: false, fields: ['kitsuId', 'kitsuEpisode'] }
]

View File

@@ -76,6 +76,9 @@ async function filesFromTorrentStream(torrent) {
if (!torrent.infoHash && !torrent.magnetLink) {
return Promise.reject(new Error("no infoHash or magnetLink"));
}
if (torrent.seeders === 0) {
return Promise.reject(new Error("no seeders for the torrent"));
}
return new Promise((resolve, rejected) => {
const engine = new torrentStream(torrent.magnetLink || torrent.infoHash, { connections: MAX_PEER_CONNECTIONS });

47
lib/torrentEntries.js Normal file
View File

@@ -0,0 +1,47 @@
const { parse } = require('parse-torrent-title');
const { Type } = require('./types');
const repository = require('./repository');
const { getImdbId, getKitsuId, escapeTitle } = require('./metadata');
const { parseTorrentFiles } = require('./torrentFiles');
async function createTorrentEntry(torrent) {
const titleInfo = parse(torrent.title);
const searchTitle = escapeTitle(titleInfo.title).toLowerCase();
if (!torrent.imdbId && torrent.type !== Type.ANIME) {
torrent.imdbId = await getImdbId({ name: searchTitle, year: titleInfo.year, type: torrent.type })
.catch(() => undefined);
}
if (!torrent.kitsuId && torrent.type === Type.ANIME) {
torrent.kitsuId = await getKitsuId({ name: searchTitle, season: titleInfo.season })
.catch(() => undefined);
}
if (!torrent.imdbId && !torrent.kitsuId && !titleInfo.complete) {
console.log(`imdbId or kitsuId not found: ${torrent.title}`);
repository.createFailedImdbTorrent(torrent);
return;
}
const files = await parseTorrentFiles(torrent);
if (!files || !files.length) {
console.log(`no video files found: ${torrent.title}`);
return;
}
repository.createTorrent(torrent)
.then(() => files.forEach(file => repository.createFile(file)))
.then(() => console.log(`Created entry for ${torrent.title}`));
}
async function createSkipTorrentEntry(torrent) {
return repository.createSkipTorrent(torrent);
}
async function getStoredTorrentEntry(torrent) {
return repository.getSkipTorrent(torrent)
.catch(() => repository.getTorrent(torrent))
.catch(() => undefined);
}
module.exports = { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry };

View File

@@ -116,7 +116,7 @@ function parseSeriesFile(file, parsedTorrentName) {
return { ...file, ...fileInfo };
}
async function decomposeEpisodes(torrent, files, metadata = { episodeCount: {} }) {
async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }) {
if (files.every(file => !file.episodes && !file.date)) {
return files;
}
@@ -142,7 +142,7 @@ async function decomposeEpisodes(torrent, files, metadata = { episodeCount: {} }
&& file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep)))
&& (sortedEpisodes.length <= 1 || sortedEpisodes.slice(1).every((ep, i) => ep - sortedEpisodes[i] <= 2))) {
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
} else if (files.every(file => !file.season && file.date)) {
} else if (files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date)) {
decomposeDateEpisodeFiles(torrent, files, metadata);
}
@@ -167,6 +167,14 @@ function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
}
function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
if (metadata.episodeCount.length === 0) {
files
.filter(file => !file.season && file.episodes && !file.isMovie)
.forEach(file => {
file.season = 1;
});
return;
}
files
.filter(file => file.episodes && !file.isMovie)
.forEach(file => {