Big rewrite - distributed consumers for ingestion / scraping(scalable) - single producer written in c#.

Changed from page scraping to rss xml scraping
Includes RealDebridManager hashlist decoding (requires a github readonly PAT as requests must be authenticated) - This allows ingestion of 200k+ entries in a few hours.
Simplifies a lot of torrentio to deal with new data
This commit is contained in:
iPromKnight
2024-02-01 16:38:45 +00:00
parent 6fb4ddcf23
commit ab17ef81be
255 changed files with 18489 additions and 69074 deletions

View File

@@ -0,0 +1,2 @@
build.sh
node_modules/

View File

@@ -0,0 +1,16 @@
env:
es2021: true
node: true
extends: eslint:recommended
plugins:
- import
rules:
import/no-unresolved: 2
import/no-commonjs: 2
import/extensions:
- 2
- ignorePackages
parserOptions:
ecmaVersion: latest
sourceType: module

View File

@@ -0,0 +1,18 @@
FROM node:lts-buster-slim
# RUN apk update && apk upgrade && \
# apk add --no-cache git curl
RUN apt-get update && \
apt-get install -y curl git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/node/app
COPY package*.json ./
RUN npm ci --only-production
COPY . .
RUN chmod a+x ./check-ip.sh
CMD [ "node", "--no-warnings=ExperimentalWarning", "index.js" ]

View File

@@ -0,0 +1,5 @@
#!/bin/sh
CURRENT_IP="$(curl -s http://whatismyip.akamai.com)"
clear
echo "Current IP: $CURRENT_IP"

View File

@@ -0,0 +1,11 @@
import { getTrackers } from "./lib/trackerService.js";
import { connect } from './lib/repository.js';
import { listenToQueue } from './jobs/processTorrents.js';
import { jobConfig } from "./lib/config.js";
await getTrackers();
await connect();
if (jobConfig.JOBS_ENABLED) {
await listenToQueue();
}

View File

@@ -0,0 +1,34 @@
import { rabbitConfig, jobConfig } from '../lib/config.js'
import { processTorrentRecord } from "../lib/ingestedTorrent.js";
import amqp from 'amqplib'
import Promise from 'bluebird'
const assertQueueOptions = { durable: true }
const consumeQueueOptions = { noAck: false }
const processMessage = msg =>
Promise.resolve(getMessageAsJson(msg))
.then(torrent => processTorrentRecord(torrent))
.then(() => Promise.resolve(msg));
const getMessageAsJson = msg => {
const torrent = JSON.parse(msg.content.toString());
return Promise.resolve(torrent.message);
}
const assertAndConsumeQueue = channel => {
console.log('Worker is running! Waiting for new torrents...')
const ackMsg = msg => Promise.resolve(msg)
.then(msg => processMessage(msg))
.then(msg => channel.ack(msg))
.catch(error => console.error('Failed processing torrent', error));
return channel.assertQueue(rabbitConfig.QUEUE_NAME, assertQueueOptions)
.then(() => channel.prefetch(jobConfig.JOB_CONCURRENCY))
.then(() => channel.consume(rabbitConfig.QUEUE_NAME, ackMsg, consumeQueueOptions))
}
export const listenToQueue = () => amqp.connect(rabbitConfig.URI)
.then(connection => connection.createChannel())
.then(channel => assertAndConsumeQueue(channel))

View File

@@ -0,0 +1,72 @@
import { cacheConfig } from './config.js';
import cacheManager from 'cache-manager';
import mangodbStore from 'cache-manager-mongodb';
const GLOBAL_KEY_PREFIX = 'selfhostio-consumer';
const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
const KITSU_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|kitsu_id`;
const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
const TRACKERS_KEY_PREFIX = `${GLOBAL_KEY_PREFIX}|trackers`;
const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
const MEMORY_TTL = process.env.METADATA_TTL || 2 * 60 * 60; // 2 hours
const TRACKERS_TTL = 2 * 24 * 60 * 60; // 2 days
const memoryCache = initiateMemoryCache();
const remoteCache = initiateRemoteCache();
function initiateRemoteCache() {
if (cacheConfig.NO_CACHE) {
return null;
} else if (cacheConfig.MONGO_URI) {
return cacheManager.caching({
store: mangodbStore,
uri: cacheConfig.MONGO_URI,
options: {
collection: cacheConfig.COLLECTION_NAME,
socketTimeoutMS: 120000,
useNewUrlParser: true,
useUnifiedTopology: false,
ttl: GLOBAL_TTL
},
ttl: GLOBAL_TTL,
ignoreCacheErrors: true
});
} else {
return cacheManager.caching({
store: 'memory',
ttl: MEMORY_TTL
});
}
}
function initiateMemoryCache() {
return cacheManager.caching({
store: 'memory',
ttl: MEMORY_TTL,
max: Infinity // infinite LRU cache size
});
}
function cacheWrap(cache, key, method, options) {
if (cacheConfig.NO_CACHE || !cache) {
return method();
}
return cache.wrap(key, method, options);
}
export function cacheWrapImdbId(key, method) {
return cacheWrap(remoteCache, `${IMDB_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
}
export function cacheWrapKitsuId(key, method) {
return cacheWrap(remoteCache, `${KITSU_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
}
export function cacheWrapMetadata(id, method) {
return cacheWrap(memoryCache, `${METADATA_PREFIX}:${id}`, method, { ttl: MEMORY_TTL });
}
export function cacheTrackers(method) {
return cacheWrap(memoryCache, `${TRACKERS_KEY_PREFIX}`, method, { ttl: TRACKERS_TTL });
}

View File

@@ -0,0 +1,45 @@
export const rabbitConfig = {
URI: process.env.RABBIT_URI || 'amqp://localhost',
QUEUE_NAME: process.env.QUEUE_NAME || 'test-queue'
}
export const cacheConfig = {
MONGO_URI: process.env.MONGODB_URI || 'mongodb://mongo:mongo@localhost:27017/selfhostio?authSource=admin',
NO_CACHE: parseBool(process.env.NO_CACHE, false),
COLLECTION_NAME: process.env.MONGODB_COLLECTION || 'selfhostio_consumer_collection'
}
export const databaseConfig = {
DATABASE_URI: process.env.POSTGRES_DATABASE_URI || 'postgres://postgres:postgres@localhost:5432/selfhostio',
ENABLE_SYNC: parseBool(process.env.ENABLE_SYNC, true)
}
export const jobConfig = {
JOB_CONCURRENCY: parseInt(process.env.JOB_CONCURRENCY || 1),
JOBS_ENABLED: parseBool(process.env.JOBS_ENABLED || true)
}
export const metadataConfig = {
IMDB_CONCURRENT: parseInt(process.env.IMDB_CONCURRENT || 1),
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || 1000),
}
export const trackerConfig = {
TRACKERS_URL: process.env.TRACKERS_URL || 'https://ngosang.github.io/trackerslist/trackers_all.txt',
UDP_ENABLED: parseBool(process.env.UDP_TRACKERS_ENABLED || false),
}
export const torrentConfig = {
MAX_CONNECTIONS_PER_TORRENT: parseInt(process.env.MAX_SINGLE_TORRENT_CONNECTIONS || 20),
TIMEOUT: parseInt(process.env.TORRENT_TIMEOUT || 30000),
}
function parseBool(boolString, defaultValue) {
const isString = typeof boolString === 'string' || boolString instanceof String;
if (!isString) {
return defaultValue;
}
return boolString.toLowerCase() === 'true' ? true : defaultValue;
}

View File

@@ -0,0 +1,62 @@
const VIDEO_EXTENSIONS = [
"3g2",
"3gp",
"avi",
"flv",
"mkv",
"mk3d",
"mov",
"mp2",
"mp4",
"m4v",
"mpe",
"mpeg",
"mpg",
"mpv",
"webm",
"wmv",
"ogm",
"divx"
];
const SUBTITLE_EXTENSIONS = [
"aqt",
"gsub",
"jss",
"sub",
"ttxt",
"pjs",
"psb",
"rt",
"smi",
"slt",
"ssf",
"srt",
"ssa",
"ass",
"usf",
"idx",
"vtt"
];
const DISK_EXTENSIONS = [
"iso",
"m2ts",
"ts",
"vob"
]
export function isVideo(filename) {
return isExtension(filename, VIDEO_EXTENSIONS);
}
export function isSubtitle(filename) {
return isExtension(filename, SUBTITLE_EXTENSIONS);
}
export function isDisk(filename) {
return isExtension(filename, DISK_EXTENSIONS);
}
export function isExtension(filename, extensions) {
const extensionMatch = filename.match(/\.(\w{2,4})$/);
return extensionMatch && extensions.includes(extensionMatch[1].toLowerCase());
}

View File

@@ -0,0 +1,45 @@
import { Type } from './types.js';
import { createTorrentEntry, checkAndUpdateTorrent } from './torrentEntries.js';
import {getTrackers} from "./trackerService.js";
export async function processTorrentRecord(torrent) {
const category = torrent.category;
const type = category === 'tv' ? Type.SERIES : Type.MOVIE;
const torrentInfo = await parseTorrent(torrent, type);
console.log(`Processing torrent ${torrentInfo.title} with infoHash ${torrentInfo.infoHash}`)
if (await checkAndUpdateTorrent(torrentInfo)) {
return torrentInfo;
}
return createTorrentEntry(torrentInfo);
}
async function assignTorrentTrackers() {
const trackers = await getTrackers();
return trackers.join(',');
}
async function parseTorrent(torrent, category) {
const infoHash = torrent.infoHash?.trim().toLowerCase()
return {
title: torrent.name,
torrentId: `${torrent.name}_${infoHash}`,
infoHash: infoHash,
seeders: 100,
size: torrent.size,
uploadDate: torrent.createdAt,
imdbId: parseImdbId(torrent),
type: category,
provider: torrent.source,
trackers: await assignTorrentTrackers(),
}
}
function parseImdbId(torrent) {
if (torrent.imdb === undefined || torrent.imdb === null) {
return undefined;
}
return torrent.imdb;
}

View File

@@ -0,0 +1,165 @@
import axios from 'axios';
import nameToImdb from 'name-to-imdb';
import { search } from 'google-sr';
import { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } from './cache.js';
import { Type } from './types.js';
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.strem.fun';
const TIMEOUT = 20000;
export function getMetadata(id, type = Type.SERIES) {
if (!id) {
return Promise.reject("no valid id provided");
}
const key = Number.isInteger(id) || id.match(/^\d+$/) ? `kitsu:${id}` : id;
const metaType = type === Type.MOVIE ? Type.MOVIE : Type.SERIES;
return cacheWrapMetadata(key, () => _requestMetadata(`${KITSU_URL}/meta/${metaType}/${key}.json`)
.catch(() => _requestMetadata(`${CINEMETA_URL}/meta/${metaType}/${key}.json`))
.catch(() => {
// try different type in case there was a mismatch
const otherType = metaType === Type.MOVIE ? Type.SERIES : Type.MOVIE;
return _requestMetadata(`${CINEMETA_URL}/meta/${otherType}/${key}.json`)
})
.catch((error) => {
throw new Error(`failed metadata query ${key} due: ${error.message}`);
}));
}
function _requestMetadata(url) {
return axios.get(url, { timeout: TIMEOUT })
.then((response) => {
const body = response.data;
if (body && body.meta && (body.meta.imdb_id || body.meta.kitsu_id)) {
return {
kitsuId: body.meta.kitsu_id,
imdbId: body.meta.imdb_id,
type: body.meta.type,
title: body.meta.name,
year: body.meta.year,
country: body.meta.country,
genres: body.meta.genres,
status: body.meta.status,
videos: (body.meta.videos || [])
.map((video) => Number.isInteger(video.imdbSeason)
? {
name: video.name || video.title,
season: video.season,
episode: video.episode,
imdbSeason: video.imdbSeason,
imdbEpisode: video.imdbEpisode
}
: {
name: video.name || video.title,
season: video.season,
episode: video.episode,
kitsuId: video.kitsu_id,
kitsuEpisode: video.kitsuEpisode,
released: video.released
}
),
episodeCount: Object.values((body.meta.videos || [])
.filter((entry) => entry.season !== 0 && entry.episode !== 0)
.sort((a, b) => a.season - b.season)
.reduce((map, next) => {
map[next.season] = map[next.season] + 1 || 1;
return map;
}, {})),
totalCount: body.meta.videos && body.meta.videos
.filter((entry) => entry.season !== 0 && entry.episode !== 0).length
};
} else {
throw new Error('No search results');
}
});
}
export function escapeTitle(title) {
return title.toLowerCase()
.normalize('NFKD') // normalize non-ASCII characters
.replace(/[\u0300-\u036F]/g, '')
.replace(/&/g, 'and')
.replace(/[;, ~./]+/g, ' ') // replace dots, commas or underscores with spaces
.replace(/[^\w \-()×+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
.replace(/^\d{1,2}[.#\s]+(?=(?:\d+[.\s]*)?[\u0400-\u04ff])/i, '') // remove russian movie numbering
.replace(/\s{2,}/, ' ') // replace multiple spaces
.trim();
}
export async function getImdbId(info, type) {
const name = escapeTitle(info.title);
const year = info.year || (info.date && info.date.slice(0, 4));
const key = `${name}_${year || 'NA'}_${type}`;
const query = `${name} ${year || ''} ${type} imdb`;
const fallbackQuery = `${name} ${type} imdb`;
const googleQuery = year ? query : fallbackQuery;
try {
const imdbId = await cacheWrapImdbId(key,
() => getIMDbIdFromNameToImdb(name, info.year, type)
);
return imdbId && 'tt' + imdbId.replace(/tt0*([1-9][0-9]*)$/, '$1').padStart(7, '0');
} catch (error) {
const imdbIdFallback = await getIMDbIdFromGoogle(googleQuery);
return imdbIdFallback && 'tt' + imdbIdFallback.replace(/tt0*([1-9][0-9]*)$/, '$1').padStart(7, '0');
}
}
function getIMDbIdFromNameToImdb(name, year, type) {
return new Promise((resolve, reject) => {
nameToImdb({ name, year, type }, function(err, res) {
if (res) {
resolve(res);
} else {
reject(err || new Error('Failed IMDbId search'));
}
});
});
}
async function getIMDbIdFromGoogle(query) {
try {
const searchResults = await search({ query: query });
for (const result of searchResults) {
if (result.link.includes('imdb.com/title/')) {
const match = result.link.match(/imdb\.com\/title\/(tt\d+)/);
if (match) {
return match[1];
}
}
}
return undefined;
}
catch (error) {
throw new Error('Failed to find IMDb ID from Google search');
}
}
export async function getKitsuId(info) {
const title = escapeTitle(info.title.replace(/\s\|\s.*/, ''));
const year = info.year ? ` ${info.year}` : '';
const season = info.season > 1 ? ` S${info.season}` : '';
const key = `${title}${year}${season}`;
const query = encodeURIComponent(key);
return cacheWrapKitsuId(key,
() => axios.get(`${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { timeout: 60000 })
.then((response) => {
const body = response.data;
if (body && body.metas && body.metas.length) {
return body.metas[0].id.replace('kitsu:', '');
} else {
throw new Error('No search results');
}
}));
}
export async function isEpisodeImdbId(imdbId) {
if (!imdbId) {
return false;
}
return axios.get(`https://www.imdb.com/title/${imdbId}/`, { timeout: 10000 })
.then(response => !!(response.data && response.data.includes('video.episode')))
.catch(() => false);
}

View File

@@ -0,0 +1,98 @@
import { parse } from 'parse-torrent-title';
import { Type } from './types.js';
const MULTIPLE_FILES_SIZE = 4 * 1024 * 1024 * 1024; // 4 GB
export function parseSeriesVideos(torrent, videos) {
const parsedTorrentName = parse(torrent.title);
const hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/i);
const parsedVideos = videos.map(video => parseSeriesVideo(video, parsedTorrentName));
return parsedVideos.map(video => ({ ...video, isMovie: isMovieVideo(video, parsedVideos, torrent.type, hasMovies) }));
}
function parseSeriesVideo(video, parsedTorrentName) {
const videoInfo = parse(video.name);
// the episode may be in a folder containing season number
if (!Number.isInteger(videoInfo.season) && video.path.includes('/')) {
const folders = video.path.split('/');
const pathInfo = parse(folders[folders.length - 2]);
videoInfo.season = pathInfo.season;
}
if (!Number.isInteger(videoInfo.season) && parsedTorrentName.season) {
videoInfo.season = parsedTorrentName.season;
}
if (!Number.isInteger(videoInfo.season) && videoInfo.seasons && videoInfo.seasons.length > 1) {
// in case single file was interpreted as having multiple seasons
videoInfo.season = videoInfo.seasons[0];
}
if (!Number.isInteger(videoInfo.season) && video.path.includes('/') && parsedTorrentName.seasons
&& parsedTorrentName.seasons.length > 1) {
// russian season are usually named with 'series name-2` i.e. Улицы разбитых фонарей-6/22. Одиночный выстрел.mkv
const folderPathSeasonMatch = video.path.match(/[\u0400-\u04ff]-(\d{1,2})(?=.*\/)/);
videoInfo.season = folderPathSeasonMatch && parseInt(folderPathSeasonMatch[1], 10) || undefined;
}
// sometimes video file does not have correct date format as in torrent title
if (!videoInfo.episodes && !videoInfo.date && parsedTorrentName.date) {
videoInfo.date = parsedTorrentName.date;
}
// limit number of episodes in case of incorrect parsing
if (videoInfo.episodes && videoInfo.episodes.length > 20) {
videoInfo.episodes = [videoInfo.episodes[0]];
videoInfo.episode = videoInfo.episodes[0];
}
// force episode to any found number if it was not parsed
if (!videoInfo.episodes && !videoInfo.date) {
const epMatcher = videoInfo.title.match(
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W|_)(\d{1,4})(?:a|b|c|v\d)?(?:_|\W|$)(?!disk|movie|film)/i);
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
}
if (!videoInfo.episodes && !videoInfo.date) {
const epMatcher = video.name.match(new RegExp(`(?:\\(${videoInfo.year}\\)|part)[._ ]?(\\d{1,3})(?:\\b|_)`, "i"));
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
}
return { ...video, ...videoInfo };
}
function isMovieVideo(video, otherVideos, type, hasMovies) {
if (Number.isInteger(video.season) && Array.isArray(video.episodes)) {
// not movie if video has season
return false;
}
if (video.name.match(/\b(?:\d+[ .]movie|movie[ .]\d+)\b/i)) {
// movie if video explicitly has numbered movie keyword in the name, ie. 1 Movie or Movie 1
return true;
}
if (!hasMovies && type !== Type.ANIME) {
// not movie if torrent name does not contain movies keyword or is not a pack torrent and is not anime
return false;
}
if (!video.episodes) {
// movie if there's no episode info it could be a movie
return true;
}
// movie if contains year info and there aren't more than 3 video with same title and year
// as some series titles might contain year in it.
return !!video.year
&& otherVideos.length > 3
&& otherVideos.filter(other => other.title === video.title && other.year === video.year) < 3;
}
export function isPackTorrent(torrent) {
if (torrent.pack) {
return true;
}
const parsedInfo = parse(torrent.title);
if (torrent.type === Type.MOVIE) {
return parsedInfo.complete || typeof parsedInfo.year === 'string' || /movies/i.test(torrent.title);
}
const hasMultipleEpisodes = parsedInfo.complete ||
torrent.size > MULTIPLE_FILES_SIZE ||
(parsedInfo.seasons && parsedInfo.seasons.length > 1) ||
(parsedInfo.episodes && parsedInfo.episodes.length > 1) ||
(parsedInfo.seasons && !parsedInfo.episodes);
const hasSingleEpisode = Number.isInteger(parsedInfo.episode) || (!parsedInfo.episodes && parsedInfo.date);
return hasMultipleEpisodes && !hasSingleEpisode;
}

View File

@@ -0,0 +1,55 @@
/**
* Execute promises in sequence one after another.
*/
export async function sequence(promises) {
return promises.reduce((promise, func) =>
promise.then(result => func().then(Array.prototype.concat.bind(result))), Promise.resolve([]));
}
/**
* Return first resolved promise as the result.
*/
export async function first(promises) {
return Promise.all(promises.map((p) => {
// If a request fails, count that as a resolution so it will keep
// waiting for other possible successes. If a request succeeds,
// treat it as a rejection so Promise.all immediately bails out.
return p.then(
(val) => Promise.reject(val),
(err) => Promise.resolve(err)
);
})).then(
// If '.all' resolved, we've just got an array of errors.
(errors) => Promise.reject(errors),
// If '.all' rejected, we've got the result we wanted.
(val) => Promise.resolve(val)
);
}
/**
* Delay promise
*/
export async function delay(duration) {
return new Promise((resolve) => setTimeout(resolve, duration));
}
/**
* Timeout promise after a set time in ms
*/
export async function timeout(timeoutMs, promise, message = 'Timed out') {
return Promise.race([
promise,
new Promise(function (resolve, reject) {
setTimeout(function () {
reject(message);
}, timeoutMs);
})
]);
}
/**
* Return most common value from given array.
*/
export function mostCommonValue(array) {
return array.sort((a, b) => array.filter(v => v === a).length - array.filter(v => v === b).length).pop();
}

View File

@@ -0,0 +1,379 @@
import moment from 'moment';
import * as Promises from './promises.js';
import { Sequelize, Op, DataTypes, fn, col, literal } from 'sequelize';
import { databaseConfig } from './config.js';
const database = new Sequelize(
databaseConfig.DATABASE_URI,
{
logging: false
}
);
const Provider = database.define('provider', {
name: { type: DataTypes.STRING(32), primaryKey: true },
lastScraped: { type: DataTypes.DATE },
lastScrapedId: { type: DataTypes.STRING(128) }
});
const IngestedTorrent = database.define('ingested_torrent', {
id: { type: DataTypes.BIGINT, autoIncrement: true, primaryKey: true },
name: DataTypes.STRING,
source: DataTypes.STRING,
category: DataTypes.STRING,
info_hash: DataTypes.STRING,
size: DataTypes.STRING,
seeders: DataTypes.INTEGER,
leechers: DataTypes.INTEGER,
imdb: DataTypes.STRING,
processed: {
type: DataTypes.BOOLEAN,
defaultValue: false
}},
{
indexes: [
{
unique: true,
fields: ['source', 'info_hash']
}
]
})
/* eslint-disable no-unused-vars */
const IngestedPage = database.define('ingested_page', {
id: { type: DataTypes.BIGINT, autoIncrement: true, primaryKey: true },
url: { type: DataTypes.STRING, allowNull: false },
},
{
indexes: [
{
unique: true,
fields: ['url']
}
]
})
/* eslint-enable no-unused-vars */
const Torrent = database.define('torrent',
{
infoHash: { type: DataTypes.STRING(64), primaryKey: true },
provider: { type: DataTypes.STRING(32), allowNull: false },
torrentId: { type: DataTypes.STRING(512) },
title: { type: DataTypes.STRING(512), allowNull: false },
size: { type: DataTypes.BIGINT },
type: { type: DataTypes.STRING(16), allowNull: false },
uploadDate: { type: DataTypes.DATE, allowNull: false },
seeders: { type: DataTypes.SMALLINT },
trackers: { type: DataTypes.STRING(8000) },
languages: { type: DataTypes.STRING(4096) },
resolution: { type: DataTypes.STRING(16) },
reviewed: { type: DataTypes.BOOLEAN, allowNull: false, defaultValue: false },
opened: { type: DataTypes.BOOLEAN, allowNull: false, defaultValue: false }
}
);
const File = database.define('file',
{
id: { type: DataTypes.BIGINT, autoIncrement: true, primaryKey: true },
infoHash: {
type: DataTypes.STRING(64),
allowNull: false,
references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE'
},
fileIndex: { type: DataTypes.INTEGER },
title: { type: DataTypes.STRING(512), allowNull: false },
size: { type: DataTypes.BIGINT },
imdbId: { type: DataTypes.STRING(32) },
imdbSeason: { type: DataTypes.INTEGER },
imdbEpisode: { type: DataTypes.INTEGER },
kitsuId: { type: DataTypes.INTEGER },
kitsuEpisode: { type: DataTypes.INTEGER }
},
{
indexes: [
{
unique: true,
name: 'files_unique_file_constraint',
fields: [
col('infoHash'),
fn('COALESCE', (col('fileIndex')), -1),
fn('COALESCE', (col('imdbId')), 'null'),
fn('COALESCE', (col('imdbSeason')), -1),
fn('COALESCE', (col('imdbEpisode')), -1),
fn('COALESCE', (col('kitsuId')), -1),
fn('COALESCE', (col('kitsuEpisode')), -1)
]
},
{ unique: false, fields: ['imdbId', 'imdbSeason', 'imdbEpisode'] },
{ unique: false, fields: ['kitsuId', 'kitsuEpisode'] }
]
}
);
const Subtitle = database.define('subtitle',
{
infoHash: {
type: DataTypes.STRING(64),
allowNull: false,
references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE'
},
fileIndex: {
type: DataTypes.INTEGER,
allowNull: false
},
fileId: {
type: DataTypes.BIGINT,
allowNull: true,
references: { model: File, key: 'id' },
onDelete: 'SET NULL'
},
title: { type: DataTypes.STRING(512), allowNull: false },
},
{
timestamps: false,
indexes: [
{
unique: true,
name: 'subtitles_unique_subtitle_constraint',
fields: [
col('infoHash'),
col('fileIndex'),
fn('COALESCE', (col('fileId')), -1)
]
},
{ unique: false, fields: ['fileId'] }
]
}
);
const Content = database.define('content',
{
infoHash: {
type: DataTypes.STRING(64),
primaryKey: true,
allowNull: false,
references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE'
},
fileIndex: {
type: DataTypes.INTEGER,
primaryKey: true,
allowNull: false
},
path: { type: DataTypes.STRING(512), allowNull: false },
size: { type: DataTypes.BIGINT },
},
{
timestamps: false,
}
);
const SkipTorrent = database.define('skip_torrent', {
infoHash: { type: DataTypes.STRING(64), primaryKey: true },
});
Torrent.hasMany(File, { foreignKey: 'infoHash', constraints: false });
File.belongsTo(Torrent, { foreignKey: 'infoHash', constraints: false });
Torrent.hasMany(Content, { foreignKey: 'infoHash', constraints: false });
Content.belongsTo(Torrent, { foreignKey: 'infoHash', constraints: false });
File.hasMany(Subtitle, { foreignKey: 'fileId', constraints: false });
Subtitle.belongsTo(File, { foreignKey: 'fileId', constraints: false });
export function connect() {
if (databaseConfig.ENABLE_SYNC) {
return database.sync({ alter: true })
.catch(error => {
console.error('Failed syncing database: ', error);
throw error;
});
}
return Promise.resolve();
}
export function getProvider(provider) {
return Provider.findOrCreate({ where: { name: { [Op.eq]: provider.name } }, defaults: provider })
.then((result) => result[0])
.catch(() => provider);
}
export function getTorrent(torrent) {
const where = torrent.infoHash
? { infoHash: torrent.infoHash }
: { provider: torrent.provider, torrentId: torrent.torrentId }
return Torrent.findOne({ where: where });
}
export function getTorrentsBasedOnTitle(titleQuery, type) {
return getTorrentsBasedOnQuery({ title: { [Op.regexp]: `${titleQuery}` }, type: type });
}
export function getTorrentsBasedOnQuery(where) {
return Torrent.findAll({ where: where });
}
export function getFilesBasedOnQuery(where) {
return File.findAll({ where: where });
}
export function getUnprocessedIngestedTorrents() {
return IngestedTorrent.findAll({
where: {
processed: false,
category: {
[Op.or]: ['tv', 'movies']
}
},
});
}
export function setIngestedTorrentsProcessed(ingestedTorrents) {
return Promises.sequence(ingestedTorrents
.map(ingestedTorrent => () => {
ingestedTorrent.processed = true;
return ingestedTorrent.save();
}));
}
export function getTorrentsWithoutSize() {
return Torrent.findAll({
where: literal(
'exists (select 1 from files where files."infoHash" = torrent."infoHash" and files.size = 300000000)'),
order: [
['seeders', 'DESC']
]
});
}
export function getUpdateSeedersTorrents(limit = 50) {
const until = moment().subtract(7, 'days').format('YYYY-MM-DD');
return Torrent.findAll({
where: literal(`torrent."updatedAt" < '${until}'`),
limit: limit,
order: [
['seeders', 'DESC'],
['updatedAt', 'ASC']
]
});
}
export function getUpdateSeedersNewTorrents(limit = 50) {
const lastUpdate = moment().subtract(12, 'hours').format('YYYY-MM-DD');
const createdAfter = moment().subtract(4, 'days').format('YYYY-MM-DD');
return Torrent.findAll({
where: literal(`torrent."updatedAt" < '${lastUpdate}' AND torrent."createdAt" > '${createdAfter}'`),
limit: limit,
order: [
['seeders', 'ASC'],
['updatedAt', 'ASC']
]
});
}
export function getNoContentsTorrents() {
return Torrent.findAll({
where: { opened: false, seeders: { [Op.gte]: 1 } },
limit: 500,
order: [[fn('RANDOM')]]
});
}
export function createTorrent(torrent) {
return Torrent.upsert(torrent)
.then(() => createContents(torrent.infoHash, torrent.contents))
.then(() => createSubtitles(torrent.infoHash, torrent.subtitles));
}
export function setTorrentSeeders(torrent, seeders) {
const where = torrent.infoHash
? { infoHash: torrent.infoHash }
: { provider: torrent.provider, torrentId: torrent.torrentId }
return Torrent.update(
{ seeders: seeders },
{ where: where }
);
}
export function deleteTorrent(torrent) {
return Torrent.destroy({ where: { infoHash: torrent.infoHash } })
}
export function createFile(file) {
if (file.id) {
return (file.dataValues ? file.save() : File.upsert(file))
.then(() => upsertSubtitles(file, file.subtitles));
}
if (file.subtitles && file.subtitles.length) {
file.subtitles = file.subtitles.map(subtitle => ({ infoHash: file.infoHash, title: subtitle.path, ...subtitle }));
}
return File.create(file, { include: [Subtitle], ignoreDuplicates: true });
}
export function getFiles(torrent) {
return File.findAll({ where: { infoHash: torrent.infoHash } });
}
export function getFilesBasedOnTitle(titleQuery) {
return File.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` } } });
}
export function deleteFile(file) {
return File.destroy({ where: { id: file.id } })
}
export function createSubtitles(infoHash, subtitles) {
if (subtitles && subtitles.length) {
return Subtitle.bulkCreate(subtitles.map(subtitle => ({ infoHash, title: subtitle.path, ...subtitle })));
}
return Promise.resolve();
}
export function upsertSubtitles(file, subtitles) {
if (file.id && subtitles && subtitles.length) {
return Promises.sequence(subtitles
.map(subtitle => {
subtitle.fileId = file.id;
subtitle.infoHash = subtitle.infoHash || file.infoHash;
subtitle.title = subtitle.title || subtitle.path;
return subtitle;
})
.map(subtitle => () => subtitle.dataValues ? subtitle.save() : Subtitle.create(subtitle)));
}
return Promise.resolve();
}
export function getSubtitles(torrent) {
return Subtitle.findAll({ where: { infoHash: torrent.infoHash } });
}
export function getUnassignedSubtitles() {
return Subtitle.findAll({ where: { fileId: null } });
}
export function createContents(infoHash, contents) {
if (contents && contents.length) {
return Content.bulkCreate(contents.map(content => ({ infoHash, ...content })), { ignoreDuplicates: true })
.then(() => Torrent.update({ opened: true }, { where: { infoHash: infoHash }, silent: true }));
}
return Promise.resolve();
}
export function getContents(torrent) {
return Content.findAll({ where: { infoHash: torrent.infoHash } });
}
export function getSkipTorrent(torrent) {
return SkipTorrent.findByPk(torrent.infoHash)
.then((result) => {
if (!result) {
throw new Error(`torrent not found: ${torrent.infoHash}`);
}
return result.dataValues;
})
}
export function createSkipTorrent(torrent) {
return SkipTorrent.upsert({ infoHash: torrent.infoHash });
}

View File

@@ -0,0 +1,82 @@
import torrentStream from 'torrent-stream';
import {isSubtitle, isVideo} from './extension.js';
import { torrentConfig } from './config.js';
import { decode } from 'magnet-uri';
export async function torrentFiles(torrent, timeout) {
return filesFromTorrentStream(torrent, timeout)
.then(files => ({
contents: files,
videos: filterVideos(files),
subtitles: filterSubtitles(files)
}));
}
async function filesFromTorrentStream(torrent, timeout) {
return filesAndSizeFromTorrentStream(torrent, timeout).then(result => result.files);
}
const engineOptions = {
connections: torrentConfig.MAX_CONNECTIONS_PER_TORRENT,
uploads: 0,
verify: false,
dht: false,
tracker: true
}
function filesAndSizeFromTorrentStream(torrent, timeout = 30000) {
if (!torrent.infoHash) {
return Promise.reject(new Error("no infoHash..."));
}
const magnet = decode.encode({ infoHash: torrent.infoHash, announce: torrent.trackers });
return new Promise((resolve, rejected) => {
const timeoutId = setTimeout(() => {
engine.destroy();
rejected(new Error('No available connections for torrent!'));
}, timeout);
const engine = new torrentStream(magnet, engineOptions);
engine.ready(() => {
const files = engine.files
.map((file, fileId) => ({
fileIndex: fileId,
name: file.name,
path: file.path.replace(/^[^/]+\//, ''),
size: file.length
}));
const size = engine.torrent.length;
resolve({ files, size });
engine.destroy();
clearTimeout(timeoutId);
});
});
}
function filterVideos(files) {
if (files.length === 1 && !Number.isInteger(files[0].fileIndex)) {
return files;
}
const videos = files.filter(file => isVideo(file.path));
const maxSize = Math.max(...videos.map(video => video.size));
const minSampleRatio = videos.length <= 3 ? 3 : 10;
const minAnimeExtraRatio = 5;
const minRedundantRatio = videos.length <= 3 ? 30 : Number.MAX_VALUE;
const isSample = video => video.path.match(/sample|bonus|promo/i) && maxSize / parseInt(video.size) > minSampleRatio;
const isRedundant = video => maxSize / parseInt(video.size) > minRedundantRatio;
const isExtra = video => video.path.match(/extras?\//i);
const isAnimeExtra = video => video.path.match(/(?:\b|_)(?:NC)?(?:ED|OP|PV)(?:v?\d\d?)?(?:\b|_)/i)
&& maxSize / parseInt(video.size) > minAnimeExtraRatio;
const isWatermark = video => video.path.match(/^[A-Z-]+(?:\.[A-Z]+)?\.\w{3,4}$/)
&& maxSize / parseInt(video.size) > minAnimeExtraRatio
return videos
.filter(video => !isSample(video))
.filter(video => !isExtra(video))
.filter(video => !isAnimeExtra(video))
.filter(video => !isRedundant(video))
.filter(video => !isWatermark(video));
}
function filterSubtitles(files) {
return files.filter(file => isSubtitle(file.path));
}

View File

@@ -0,0 +1,172 @@
import { parse } from 'parse-torrent-title';
import { Type } from './types.js';
import * as Promises from './promises.js';
import * as repository from './repository.js';
import { getImdbId, getKitsuId } from './metadata.js';
import { parseTorrentFiles } from './torrentFiles.js';
import { assignSubtitles } from './torrentSubtitles.js';
import { isPackTorrent } from './parseHelper.js';
export async function createTorrentEntry(torrent, overwrite = false) {
const titleInfo = parse(torrent.title);
if (!torrent.imdbId && torrent.type !== Type.ANIME) {
torrent.imdbId = await getImdbId(titleInfo, torrent.type)
.catch(() => undefined);
}
if (torrent.imdbId && torrent.imdbId.length < 9) {
// pad zeros to imdbId if missing
torrent.imdbId = 'tt' + torrent.imdbId.replace('tt', '').padStart(7, '0');
}
if (torrent.imdbId && torrent.imdbId.length > 9 && torrent.imdbId.startsWith('tt0')) {
// sanitize imdbId from redundant zeros
torrent.imdbId = torrent.imdbId.replace(/tt0+([0-9]{7,})$/, 'tt$1');
}
if (!torrent.kitsuId && torrent.type === Type.ANIME) {
torrent.kitsuId = await getKitsuId(titleInfo)
.catch(() => undefined);
}
if (!torrent.imdbId && !torrent.kitsuId && !isPackTorrent(torrent)) {
console.log(`imdbId or kitsuId not found: ${torrent.provider} ${torrent.title}`);
return;
}
const { contents, videos, subtitles } = await parseTorrentFiles(torrent)
.then(torrentContents => overwrite ? overwriteExistingFiles(torrent, torrentContents) : torrentContents)
.then(torrentContents => assignSubtitles(torrentContents))
.catch(error => {
console.log(`Failed getting files for ${torrent.title}`, error.message);
return {};
});
if (!videos || !videos.length) {
console.log(`no video files found for ${torrent.provider} [${torrent.infoHash}] ${torrent.title}`);
return;
}
return repository.createTorrent({ ...torrent, contents, subtitles })
.then(() => Promises.sequence(videos.map(video => () => repository.createFile(video))))
.then(() => console.log(`Created ${torrent.provider} entry for [${torrent.infoHash}] ${torrent.title}`));
}
async function overwriteExistingFiles(torrent, torrentContents) {
const videos = torrentContents && torrentContents.videos;
if (videos && videos.length) {
const existingFiles = await repository.getFiles({ infoHash: videos[0].infoHash })
.then((existing) => existing
.reduce((map, next) => {
const fileIndex = next.fileIndex !== undefined ? next.fileIndex : null;
map[fileIndex] = (map[fileIndex] || []).concat(next);
return map;
}, {}))
.catch(() => undefined);
if (existingFiles && Object.keys(existingFiles).length) {
const overwrittenVideos = videos
.map(file => {
const mapping = videos.length === 1 && Object.keys(existingFiles).length === 1
? Object.values(existingFiles)[0]
: existingFiles[file.fileIndex !== undefined ? file.fileIndex : null];
if (mapping) {
const originalFile = mapping.shift();
return { id: originalFile.id, ...file };
}
return file;
});
return { ...torrentContents, videos: overwrittenVideos };
}
return torrentContents;
}
return Promise.reject(`No video files found for: ${torrent.title}`);
}
export async function createSkipTorrentEntry(torrent) {
return repository.createSkipTorrent(torrent);
}
export async function getStoredTorrentEntry(torrent) {
return repository.getSkipTorrent(torrent)
.catch(() => repository.getTorrent(torrent))
.catch(() => undefined);
}
export async function checkAndUpdateTorrent(torrent) {
const storedTorrent = torrent.dataValues
? torrent
: await repository.getTorrent(torrent).catch(() => undefined);
if (!storedTorrent) {
return false;
}
if (storedTorrent.provider === 'RARBG') {
return true;
}
if (storedTorrent.provider === 'KickassTorrents' && torrent.provider) {
storedTorrent.provider = torrent.provider;
storedTorrent.torrentId = torrent.torrentId;
}
if (!storedTorrent.languages && torrent.languages && storedTorrent.provider !== 'RARBG') {
storedTorrent.languages = torrent.languages;
await storedTorrent.save();
console.log(`Updated [${storedTorrent.infoHash}] ${storedTorrent.title} language to ${torrent.languages}`);
}
return createTorrentContents({ ...storedTorrent.get(), torrentLink: torrent.torrentLink })
.then(() => updateTorrentSeeders(torrent));
}
export async function createTorrentContents(torrent) {
if (torrent.opened) {
return;
}
const storedVideos = await repository.getFiles(torrent).catch(() => []);
if (!storedVideos || !storedVideos.length) {
return;
}
const notOpenedVideo = storedVideos.length === 1 && !Number.isInteger(storedVideos[0].fileIndex);
const imdbId = Promises.mostCommonValue(storedVideos.map(stored => stored.imdbId));
const kitsuId = Promises.mostCommonValue(storedVideos.map(stored => stored.kitsuId));
const { contents, videos, subtitles } = await parseTorrentFiles({ ...torrent, imdbId, kitsuId })
.then(torrentContents => notOpenedVideo ? torrentContents : { ...torrentContents, videos: storedVideos })
.then(torrentContents => assignSubtitles(torrentContents))
.catch(error => {
console.log(`Failed getting contents for [${torrent.infoHash}] ${torrent.title}`, error.message);
return {};
});
if (!contents || !contents.length) {
return;
}
if (notOpenedVideo && videos.length === 1) {
// if both have a single video and stored one was not opened, update stored one to true metadata and use that
storedVideos[0].fileIndex = videos[0].fileIndex;
storedVideos[0].title = videos[0].title;
storedVideos[0].size = videos[0].size;
storedVideos[0].subtitles = videos[0].subtitles;
videos[0] = storedVideos[0];
}
// no videos available or more than one new videos were in the torrent
const shouldDeleteOld = notOpenedVideo && videos.every(video => !video.id);
return repository.createTorrent({ ...torrent, contents, subtitles })
.then(() => {
if (shouldDeleteOld) {
console.error(`Deleting old video for [${torrent.infoHash}] ${torrent.title}`)
return storedVideos[0].destroy();
}
return Promise.resolve();
})
.then(() => Promises.sequence(videos.map(video => () => repository.createFile(video))))
.then(() => console.log(`Created contents for ${torrent.provider} [${torrent.infoHash}] ${torrent.title}`))
.catch(error => console.error(`Failed saving contents for [${torrent.infoHash}] ${torrent.title}`, error));
}
export async function updateTorrentSeeders(torrent) {
if (!(torrent.infoHash || (torrent.provider && torrent.torrentId)) || !Number.isInteger(torrent.seeders)) {
return torrent;
}
return repository.setTorrentSeeders(torrent, torrent.seeders)
.catch(error => {
console.warn('Failed updating seeders:', error);
return undefined;
});
}

View File

@@ -0,0 +1,512 @@
import moment from 'moment';
import Bottleneck from 'bottleneck';
import distance from 'jaro-winkler';
import { parse } from 'parse-torrent-title';
import * as Promises from './promises.js';
import { getMetadata, getImdbId, getKitsuId } from './metadata.js';
import { parseSeriesVideos, isPackTorrent } from './parseHelper.js';
import { Type } from './types.js';
import { isDisk } from './extension.js';
import {torrentFiles} from "./torrent.js";
import { metadataConfig } from './config.js';
const MIN_SIZE = 5 * 1024 * 1024; // 5 MB
const imdb_limiter = new Bottleneck({ maxConcurrent: metadataConfig.IMDB_CONCURRENT, minTime: metadataConfig.IMDB_INTERVAL_MS });
export async function parseTorrentFiles(torrent) {
const parsedTorrentName = parse(torrent.title);
const metadata = await getMetadata(torrent.kitsuId || torrent.imdbId, torrent.type || Type.MOVIE)
.then(meta => Object.assign({}, meta))
.catch(() => undefined);
// if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) {
// throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`);
// }
if (torrent.type !== Type.ANIME && metadata && metadata.type && metadata.type !== torrent.type) {
// it's actually a movie/series
torrent.type = metadata.type;
}
if (torrent.type === Type.MOVIE && (!parsedTorrentName.seasons ||
parsedTorrentName.season === 5 && [1, 5].includes(parsedTorrentName.episode))) {
return parseMovieFiles(torrent, parsedTorrentName, metadata);
}
return parseSeriesFiles(torrent, parsedTorrentName, metadata)
}
async function parseMovieFiles(torrent, parsedName, metadata) {
const { contents, videos, subtitles } = await getMoviesTorrentContent(torrent);
const filteredVideos = videos
.filter(video => video.size > MIN_SIZE)
.filter(video => !isFeaturette(video));
if (isSingleMovie(filteredVideos)) {
const parsedVideos = filteredVideos.map(video => ({
infoHash: torrent.infoHash,
fileIndex: video.fileIndex,
title: video.path || torrent.title,
size: video.size || torrent.size,
imdbId: torrent.imdbId || metadata && metadata.imdbId,
kitsuId: torrent.kitsuId || metadata && metadata.kitsuId
}));
return { contents, videos: parsedVideos, subtitles };
}
const parsedVideos = await Promises.sequence(filteredVideos.map(video => () => isFeaturette(video)
? Promise.resolve(video)
: findMovieImdbId(video.name).then(imdbId => ({ ...video, imdbId }))))
.then(videos => videos.map(video => ({
infoHash: torrent.infoHash,
fileIndex: video.fileIndex,
title: video.path || video.name,
size: video.size,
imdbId: video.imdbId,
})));
return { contents, videos: parsedVideos, subtitles };
}
async function parseSeriesFiles(torrent, parsedName, metadata) {
const { contents, videos, subtitles } = await getSeriesTorrentContent(torrent);
const parsedVideos = await Promise.resolve(videos)
.then(videos => videos.filter(video => videos.length === 1 || video.size > MIN_SIZE))
.then(videos => parseSeriesVideos(torrent, videos))
.then(videos => decomposeEpisodes(torrent, videos, metadata))
.then(videos => assignKitsuOrImdbEpisodes(torrent, videos, metadata))
.then(videos => Promise.all(videos.map(video => video.isMovie
? mapSeriesMovie(video, torrent)
: mapSeriesEpisode(video, torrent, videos))))
.then(videos => videos
.reduce((a, b) => a.concat(b), [])
.map(video => isFeaturette(video) ? clearInfoFields(video) : video))
return { contents, videos: parsedVideos, subtitles };
}
async function getMoviesTorrentContent(torrent) {
const files = await torrentFiles(torrent)
.catch(error => {
if (!isPackTorrent(torrent)) {
return { videos: [{ name: torrent.title, path: torrent.title, size: torrent.size }] }
}
return Promise.reject(error);
});
if (files.contents && files.contents.length && !files.videos.length && isDiskTorrent(files.contents)) {
files.videos = [{ name: torrent.title, path: torrent.title, size: torrent.size }];
}
return files;
}
async function getSeriesTorrentContent(torrent) {
return torrentFiles(torrent)
.catch(error => {
if (!isPackTorrent(torrent)) {
return { videos: [{ name: torrent.title, path: torrent.title, size: torrent.size }] }
}
return Promise.reject(error);
});
}
async function mapSeriesEpisode(file, torrent, files) {
if (!file.episodes && !file.kitsuEpisodes) {
if (files.length === 1 || files.some(f => f.episodes || f.kitsuEpisodes) || parse(torrent.title).seasons) {
return Promise.resolve({
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.name,
size: file.size,
imdbId: torrent.imdbId || file.imdbId,
});
}
return Promise.resolve([]);
}
const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()];
return Promise.resolve(episodeIndexes.map((index) => ({
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.name,
size: file.size,
imdbId: file.imdbId || torrent.imdbId,
imdbSeason: file.season,
imdbEpisode: file.episodes && file.episodes[index],
kitsuId: file.kitsuId || torrent.kitsuId,
kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index]
})))
}
async function mapSeriesMovie(file, torrent) {
const kitsuId = torrent.type === Type.ANIME ? await findMovieKitsuId(file) : undefined;
const imdbId = !kitsuId ? await findMovieImdbId(file) : undefined;
const metadata = await getMetadata(kitsuId || imdbId, Type.MOVIE).catch(() => ({}));
const hasEpisode = metadata.videos && metadata.videos.length && (file.episode || metadata.videos.length === 1);
const episodeVideo = hasEpisode && metadata.videos[(file.episode || 1) - 1];
return [{
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.name,
size: file.size,
imdbId: metadata.imdbId || imdbId,
kitsuId: metadata.kitsuId || kitsuId,
imdbSeason: episodeVideo && metadata.imdbId ? episodeVideo.imdbSeason : undefined,
imdbEpisode: episodeVideo && metadata.imdbId ? episodeVideo.imdbEpisode || episodeVideo.episode : undefined,
kitsuEpisode: episodeVideo && metadata.kitsuId ? episodeVideo.kitsuEpisode || episodeVideo.episode : undefined
}];
}
async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }) {
if (files.every(file => !file.episodes && !file.date)) {
return files;
}
preprocessEpisodes(files);
if (torrent.type === Type.ANIME && torrent.kitsuId) {
if (needsCinemetaMetadataForAnime(files, metadata)) {
// In some cases anime could be resolved to wrong kitsuId
// because of imdb season naming/absolute per series naming/multiple seasons
// So in these cases we need to fetch cinemeta based metadata and decompose episodes using that
await updateToCinemetaMetadata(metadata);
if (files.some(file => Number.isInteger(file.season))) {
// sometimes multi season anime torrents don't include season 1 naming
files
.filter(file => !Number.isInteger(file.season) && file.episodes)
.forEach(file => file.season = 1);
}
} else {
// otherwise for anime type episodes are always absolute and for a single season
files
.filter(file => file.episodes && file.season !== 0)
.forEach(file => file.season = 1);
return files;
}
}
const sortedEpisodes = files
.map(file => !file.isMovie && file.episodes || [])
.reduce((a, b) => a.concat(b), [])
.sort((a, b) => a - b);
if (isConcatSeasonAndEpisodeFiles(files, sortedEpisodes, metadata)) {
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
} else if (isDateEpisodeFiles(files, metadata)) {
decomposeDateEpisodeFiles(torrent, files, metadata);
} else if (isAbsoluteEpisodeFiles(torrent, files, metadata)) {
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
}
// decomposeEpisodeTitleFiles(torrent, files, metadata);
return files;
}
function preprocessEpisodes(files) {
// reverse special episode naming when they named with 0 episode, ie. S02E00
files
.filter(file => Number.isInteger(file.season) && file.episode === 0)
.forEach(file => {
file.episode = file.season
file.episodes = [file.season]
file.season = 0;
})
}
function isConcatSeasonAndEpisodeFiles(files, sortedEpisodes, metadata) {
if (metadata.kitsuId !== undefined) {
// anime does not use this naming scheme in 99% of cases;
return false;
}
// decompose concat season and episode files (ex. 101=S01E01) in case:
// 1. file has a season, but individual files are concatenated with that season (ex. path Season 5/511 - Prize
// Fighters.avi)
// 2. file does not have a season and the episode does not go out of range for the concat season
// episode count
const thresholdAbove = Math.max(Math.ceil(files.length * 0.05), 5);
const thresholdSorted = Math.max(Math.ceil(files.length * 0.8), 8);
const threshold = Math.max(Math.ceil(files.length * 0.8), 5);
const sortedConcatEpisodes = sortedEpisodes
.filter(ep => ep > 100)
.filter(ep => metadata.episodeCount[div100(ep) - 1] < ep)
.filter(ep => metadata.episodeCount[div100(ep) - 1] >= mod100(ep));
const concatFileEpisodes = files
.filter(file => !file.isMovie && file.episodes)
.filter(file => !file.season || file.episodes.every(ep => div100(ep) === file.season));
const concatAboveTotalEpisodeCount = files
.filter(file => !file.isMovie && file.episodes && file.episodes.every(ep => ep > 100))
.filter(file => file.episodes.every(ep => ep > metadata.totalCount));
return sortedConcatEpisodes.length >= thresholdSorted && concatFileEpisodes.length >= threshold
|| concatAboveTotalEpisodeCount.length >= thresholdAbove;
}
function isDateEpisodeFiles(files, metadata) {
return files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date);
}
function isAbsoluteEpisodeFiles(torrent, files, metadata) {
const threshold = Math.ceil(files.length / 5);
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
const nonMovieEpisodes = files
.filter(file => !file.isMovie && file.episodes);
const absoluteEpisodes = files
.filter(file => file.season && file.episodes)
.filter(file => file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep))
return nonMovieEpisodes.every(file => !file.season)
|| (isAnime && nonMovieEpisodes.every(file => file.season > metadata.episodeCount.length))
|| absoluteEpisodes.length >= threshold;
}
function isNewEpisodeNotInMetadata(torrent, file, metadata) {
// new episode might not yet been indexed by cinemeta.
// detect this if episode number is larger than the last episode or season is larger than the last one
// only for non anime metas
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
return !isAnime && !file.isMovie && file.episodes && file.season !== 1
&& /continuing|current/i.test(metadata.status)
&& file.season >= metadata.episodeCount.length
&& file.episodes.every(ep => ep > (metadata.episodeCount[file.season - 1] || 0));
}
function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
files
.filter(file => file.episodes && file.season !== 0 && file.episodes.every(ep => ep > 100))
.filter(file => metadata.episodeCount[(file.season || div100(file.episodes[0])) - 1] < 100)
.filter(file => file.season && file.episodes.every(ep => div100(ep) === file.season) || !file.season)
.forEach(file => {
file.season = div100(file.episodes[0]);
file.episodes = file.episodes.map(ep => mod100(ep))
});
}
function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
if (metadata.episodeCount.length === 0) {
files
.filter(file => !Number.isInteger(file.season) && file.episodes && !file.isMovie)
.forEach(file => {
file.season = 1;
});
return;
}
files
.filter(file => file.episodes && !file.isMovie && file.season !== 0)
.filter(file => !isNewEpisodeNotInMetadata(torrent, file, metadata))
.filter(file => !file.season || (metadata.episodeCount[file.season - 1] || 0) < file.episodes[0])
.forEach(file => {
const seasonIdx = ([...metadata.episodeCount.keys()]
.find((i) => metadata.episodeCount.slice(0, i + 1).reduce((a, b) => a + b) >= file.episodes[0])
+ 1 || metadata.episodeCount.length) - 1;
file.season = seasonIdx + 1;
file.episodes = file.episodes
.map(ep => ep - metadata.episodeCount.slice(0, seasonIdx).reduce((a, b) => a + b, 0))
});
}
function decomposeDateEpisodeFiles(torrent, files, metadata) {
if (!metadata || !metadata.videos || !metadata.videos.length) {
return;
}
const timeZoneOffset = getTimeZoneOffset(metadata.country);
const offsetVideos = metadata.videos
.reduce((map, video) => {
const releaseDate = moment(video.released).utcOffset(timeZoneOffset).format('YYYY-MM-DD');
map[releaseDate] = video;
return map;
}, {});
files
.filter(file => file.date)
.forEach(file => {
const video = offsetVideos[file.date];
if (video) {
file.season = video.season;
file.episodes = [video.episode];
}
});
}
/* eslint-disable no-unused-vars */
function decomposeEpisodeTitleFiles(torrent, files, metadata) {
files
// .filter(file => !file.season)
.map(file => {
const episodeTitle = file.name.replace('_', ' ')
.replace(/^.*(?:E\d+[abc]?|- )\s?(.+)\.\w{1,4}$/, '$1')
.trim();
const foundEpisode = metadata.videos
.map(video => ({ ...video, distance: distance(episodeTitle, video.name) }))
.sort((a, b) => b.distance - a.distance)[0];
if (foundEpisode) {
file.isMovie = false;
file.season = foundEpisode.season;
file.episodes = [foundEpisode.episode];
}
})
}
/* eslint-enable no-unused-vars */
function getTimeZoneOffset(country) {
switch (country) {
case 'United States':
case 'USA':
return '-08:00';
default:
return '00:00';
}
}
function assignKitsuOrImdbEpisodes(torrent, files, metadata) {
if (!metadata || !metadata.videos || !metadata.videos.length) {
if (torrent.type === Type.ANIME) {
// assign episodes as kitsu episodes for anime when no metadata available for imdb mapping
files
.filter(file => file.season && file.episodes)
.forEach(file => {
file.kitsuEpisodes = file.episodes;
file.season = undefined;
file.episodes = undefined;
})
if (metadata.type === Type.MOVIE && files.every(file => !file.imdbId)) {
// sometimes a movie has episode naming, thus not recognized as a movie and imdbId not assigned
files.forEach(file => file.imdbId = metadata.imdbId);
}
}
return files;
}
const seriesMapping = metadata.videos
.reduce((map, video) => {
const episodeMap = map[video.season] || {};
episodeMap[video.episode] = video;
map[video.season] = episodeMap;
return map;
}, {});
if (metadata.videos.some(video => Number.isInteger(video.imdbSeason)) || !metadata.imdbId) {
// kitsu episode info is the base
files
.filter(file => Number.isInteger(file.season) && file.episodes)
.map(file => {
const seasonMapping = seriesMapping[file.season];
const episodeMapping = seasonMapping && seasonMapping[file.episodes[0]];
file.kitsuEpisodes = file.episodes;
if (episodeMapping && Number.isInteger(episodeMapping.imdbSeason)) {
file.imdbId = metadata.imdbId;
file.season = episodeMapping.imdbSeason;
file.episodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].imdbEpisode);
} else {
// no imdb mapping available for episode
file.season = undefined;
file.episodes = undefined;
}
});
} else if (metadata.videos.some(video => video.kitsuEpisode)) {
// imdb episode info is base
files
.filter(file => Number.isInteger(file.season) && file.episodes)
.forEach(file => {
if (seriesMapping[file.season]) {
const seasonMapping = seriesMapping[file.season];
file.imdbId = metadata.imdbId;
file.kitsuId = seasonMapping[file.episodes[0]] && seasonMapping[file.episodes[0]].kitsuId;
file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode);
} else if (seriesMapping[file.season - 1]) {
// sometimes a second season might be a continuation of the previous season
const seasonMapping = seriesMapping[file.season - 1];
const episodes = Object.values(seasonMapping);
const firstKitsuId = episodes.length && episodes[0].kitsuId;
const differentTitlesCount = new Set(episodes.map(ep => ep.kitsuId)).size
const skippedCount = episodes.filter(ep => ep.kitsuId === firstKitsuId).length;
const seasonEpisodes = files
.filter(otherFile => otherFile.season === file.season)
.reduce((a, b) => a.concat(b.episodes), []);
const isAbsoluteOrder = seasonEpisodes.every(ep => ep > skippedCount && ep <= episodes.length)
const isNormalOrder = seasonEpisodes.every(ep => ep + skippedCount <= episodes.length)
if (differentTitlesCount >= 1 && (isAbsoluteOrder || isNormalOrder)) {
file.imdbId = metadata.imdbId;
file.season = file.season - 1;
file.episodes = file.episodes.map(ep => isAbsoluteOrder ? ep : ep + skippedCount);
file.kitsuId = seasonMapping[file.episodes[0]].kitsuId;
file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode);
}
} else if (Object.values(seriesMapping).length === 1 && seriesMapping[1]) {
// sometimes series might be named with sequel season but it's not a season on imdb and a new title
const seasonMapping = seriesMapping[1];
file.imdbId = metadata.imdbId;
file.season = 1;
file.kitsuId = seasonMapping[file.episodes[0]].kitsuId;
file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode);
}
});
}
return files;
}
function needsCinemetaMetadataForAnime(files, metadata) {
if (!metadata || !metadata.imdbId || !metadata.videos || !metadata.videos.length) {
return false;
}
const minSeason = Math.min(...metadata.videos.map(video => video.imdbSeason)) || Number.MAX_VALUE;
const maxSeason = Math.max(...metadata.videos.map(video => video.imdbSeason)) || Number.MAX_VALUE;
const differentSeasons = new Set(metadata.videos
.map(video => video.imdbSeason)
.filter(season => Number.isInteger(season))).size;
const total = metadata.totalCount || Number.MAX_VALUE;
return differentSeasons > 1 || files
.filter(file => !file.isMovie && file.episodes)
.some(file => file.season < minSeason || file.season > maxSeason || file.episodes.every(ep => ep > total));
}
async function updateToCinemetaMetadata(metadata) {
return getMetadata(metadata.imdbId, metadata.type)
.then(newMetadata => !newMetadata.videos || !newMetadata.videos.length ? metadata : newMetadata)
.then(newMetadata => {
metadata.videos = newMetadata.videos;
metadata.episodeCount = newMetadata.episodeCount;
metadata.totalCount = newMetadata.totalCount;
return metadata;
})
.catch(error => console.warn(`Failed ${metadata.imdbId} metadata cinemeta update due: ${error.message}`));
}
function findMovieImdbId(title) {
const parsedTitle = typeof title === 'string' ? parse(title) : title;
console.log(`Finding movie imdbId for ${title}`);
return imdb_limiter.schedule(() => getImdbId(parsedTitle, Type.MOVIE).catch(() => undefined));
}
function findMovieKitsuId(title) {
const parsedTitle = typeof title === 'string' ? parse(title) : title;
return getKitsuId(parsedTitle, Type.MOVIE).catch(() => undefined);
}
function isDiskTorrent(contents) {
return contents.some(content => isDisk(content.path));
}
function isSingleMovie(videos) {
return videos.length === 1 ||
(videos.length === 2 &&
videos.find(v => /\b(?:part|disc|cd)[ ._-]?0?1\b|^0?1\.\w{2,4}$/i.test(v.path)) &&
videos.find(v => /\b(?:part|disc|cd)[ ._-]?0?2\b|^0?2\.\w{2,4}$/i.test(v.path)));
}
function isFeaturette(video) {
return /featurettes?\/|extras-grym/i.test(video.path);
}
function clearInfoFields(video) {
video.imdbId = undefined;
video.imdbSeason = undefined;
video.imdbEpisode = undefined;
video.kitsuId = undefined;
video.kitsuEpisode = undefined;
return video;
}
function div100(episode) {
return (episode / 100 >> 0); // floor to nearest int
}
function mod100(episode) {
return episode % 100;
}

View File

@@ -0,0 +1,89 @@
import { parse } from 'parse-torrent-title';
export function assignSubtitles({ contents, videos, subtitles }) {
if (videos && videos.length && subtitles && subtitles.length) {
if (videos.length === 1) {
videos[0].subtitles = subtitles;
return { contents, videos, subtitles: [] };
}
const parsedVideos = videos
.map(video => _parseVideo(video));
const assignedSubs = subtitles
.map(subtitle => ({ subtitle, videos: _mostProbableSubtitleVideos(subtitle, parsedVideos) }));
const unassignedSubs = assignedSubs
.filter(assignedSub => !assignedSub.videos)
.map(assignedSub => assignedSub.subtitle);
assignedSubs
.filter(assignedSub => assignedSub.videos)
.forEach(assignedSub => assignedSub.videos
.forEach(video => video.subtitles = (video.subtitles || []).concat(assignedSub.subtitle)));
return { contents, videos, subtitles: unassignedSubs };
}
return { contents, videos, subtitles };
}
function _parseVideo(video) {
const fileName = video.title.split('/').pop().replace(/\.(\w{2,4})$/, '');
const folderName = video.title.replace(/\/?[^/]+$/, '');
return {
videoFile: video,
fileName: fileName,
folderName: folderName,
...parseFilename(video.title)
};
}
function _mostProbableSubtitleVideos(subtitle, parsedVideos) {
const subTitle = (subtitle.title || subtitle.path).split('/').pop().replace(/\.(\w{2,4})$/, '');
const parsedSub = parsePath(subtitle.title || subtitle.path);
const byFileName = parsedVideos.filter(video => subTitle.includes(video.fileName));
if (byFileName.length === 1) {
return byFileName.map(v => v.videoFile);
}
const byTitleSeasonEpisode = parsedVideos.filter(video => video.title === parsedSub.title
&& arrayEquals(video.seasons, parsedSub.seasons)
&& arrayEquals(video.episodes, parsedSub.episodes));
if (singleVideoFile(byTitleSeasonEpisode)) {
return byTitleSeasonEpisode.map(v => v.videoFile);
}
const bySeasonEpisode = parsedVideos.filter(video => arrayEquals(video.seasons, parsedSub.seasons)
&& arrayEquals(video.episodes, parsedSub.episodes));
if (singleVideoFile(bySeasonEpisode)) {
return bySeasonEpisode.map(v => v.videoFile);
}
const byTitle = parsedVideos.filter(video => video.title && video.title === parsedSub.title);
if (singleVideoFile(byTitle)) {
return byTitle.map(v => v.videoFile);
}
const byEpisode = parsedVideos.filter(video => arrayEquals(video.episodes, parsedSub.episodes));
if (singleVideoFile(byEpisode)) {
return byEpisode.map(v => v.videoFile);
}
return undefined;
}
function singleVideoFile(videos) {
return new Set(videos.map(v => v.videoFile.fileIndex)).size === 1;
}
function parsePath(path) {
const pathParts = path.split('/').map(part => parseFilename(part));
const parsedWithEpisode = pathParts.find(parsed => parsed.season && parsed.episodes);
return parsedWithEpisode || pathParts[pathParts.length - 1];
}
function parseFilename(filename) {
const parsedInfo = parse(filename)
const titleEpisode = parsedInfo.title.match(/(\d+)$/);
if (!parsedInfo.episodes && titleEpisode) {
parsedInfo.episodes = [parseInt(titleEpisode[1], 10)];
}
return parsedInfo;
}
function arrayEquals(array1, array2) {
if (!array1 || !array2) return array1 === array2;
return array1.length === array2.length && array1.every((value, index) => value === array2[index])
}

View File

@@ -0,0 +1,25 @@
import axios from 'axios';
import {cacheTrackers} from "./cache.js";
import { trackerConfig } from './config.js';
const downloadTrackers = async () => {
const response = await axios.get(trackerConfig.TRACKERS_URL);
const trackersListText = response.data;
// Trackers are separated by a newline character
let urlTrackers = trackersListText.split("\n");
// remove blank lines
urlTrackers = urlTrackers.filter(line => line.trim() !== '');
if (!trackerConfig.UDP_ENABLED) {
// remove any udp trackers
urlTrackers = urlTrackers.filter(line => !line.startsWith('udp://'));
}
console.log(`Trackers updated at ${Date.now()}: ${urlTrackers.length} trackers`);
return urlTrackers;
};
export const getTrackers = async () => {
return cacheTrackers(downloadTrackers);
};

View File

@@ -0,0 +1,6 @@
export const Type = {
MOVIE: 'movie',
SERIES: 'series',
ANIME: 'anime',
PORN: 'xxx',
};

5736
src/node/consumer/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,34 @@
{
"name": "consumer",
"version": "1.0.0",
"exports": "./index.js",
"type": "module",
"scripts": {
"start": "node index.js",
"lint": "eslint . --ext .js"
},
"author": "A Dude",
"license": "MIT",
"dependencies": {
"amqplib": "^0.10.3",
"axios": "^1.6.1",
"bluebird": "^3.7.2",
"bottleneck": "^2.19.5",
"cache-manager": "^3.4.4",
"cache-manager-mongodb": "^0.3.0",
"google-sr": "^3.2.1",
"jaro-winkler": "^0.2.8",
"magnet-uri": "^6.2.0",
"moment": "^2.30.1",
"name-to-imdb": "^3.0.4",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#022408972c2a040f846331a912a6a8487746a654",
"pg": "^8.11.3",
"sequelize": "^6.31.1",
"torrent-stream": "^1.2.1",
"user-agents": "^1.0.1444"
},
"devDependencies": {
"eslint": "^8.56.0",
"eslint-plugin-import": "^2.29.1"
}
}