mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
updates tpb dump scrapper
This commit is contained in:
32
lib/cache.js
Normal file
32
lib/cache.js
Normal file
@@ -0,0 +1,32 @@
|
||||
const cacheManager = require('cache-manager');
|
||||
|
||||
const GLOBAL_KEY_PREFIX = 'stremio-torrentio';
|
||||
const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
|
||||
const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
|
||||
|
||||
const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
|
||||
|
||||
|
||||
const cache = initiateCache();
|
||||
|
||||
function initiateCache() {
|
||||
return cacheManager.caching({
|
||||
store: 'memory',
|
||||
ttl: GLOBAL_TTL
|
||||
});
|
||||
}
|
||||
|
||||
function cacheWrap(key, method, options) {
|
||||
return cache.wrap(key, method, options);
|
||||
}
|
||||
|
||||
function cacheWrapImdbId(key, method) {
|
||||
return cacheWrap(`${IMDB_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
|
||||
}
|
||||
|
||||
function cacheWrapMetadata(id, method) {
|
||||
return cacheWrap(`${METADATA_PREFIX}:${id}`, method, { ttl: GLOBAL_TTL });
|
||||
}
|
||||
|
||||
module.exports = { cacheWrapImdbId, cacheWrapMetadata };
|
||||
|
||||
111
lib/metadata.js
111
lib/metadata.js
@@ -1,32 +1,38 @@
|
||||
const _ = require('lodash');
|
||||
const needle = require('needle');
|
||||
const nameToImdb = require('name-to-imdb');
|
||||
const bing = require('nodejs-bing');
|
||||
const { cacheWrapImdbId, cacheWrapMetadata } = require('./cache');
|
||||
|
||||
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
|
||||
|
||||
function getMetadata(imdbId, type) {
|
||||
return needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 1000 })
|
||||
.then((response) => response.body)
|
||||
.then((body) => {
|
||||
if (body && body.meta && body.meta.name) {
|
||||
return {
|
||||
imdbId: imdbId,
|
||||
title: body.meta.name,
|
||||
year: body.meta.year,
|
||||
genres: body.meta.genres,
|
||||
episodeCount: body.meta.videos && _.chain(body.meta.videos)
|
||||
.countBy('season')
|
||||
.toPairs()
|
||||
.filter((pair) => pair[0] !== '0')
|
||||
.sortBy((pair) => parseInt(pair[0], 10))
|
||||
.map((pair) => pair[1])
|
||||
.value()
|
||||
};
|
||||
} else {
|
||||
console.log(`failed cinemeta query: Empty Body`);
|
||||
throw new Error('failed cinemeta query');
|
||||
}
|
||||
});
|
||||
return cacheWrapMetadata(imdbId,
|
||||
() => needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 60000 })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
if (body && body.meta && body.meta.name) {
|
||||
return {
|
||||
imdbId: imdbId,
|
||||
title: body.meta.name,
|
||||
year: body.meta.year,
|
||||
genres: body.meta.genres,
|
||||
totalEpisodes: body.meta.videos && body.meta.videos
|
||||
.filter(video => video.season > 0).length,
|
||||
episodeCount: body.meta.videos && Object.values(body.meta.videos
|
||||
.filter((entry) => entry.season !== 0)
|
||||
.sort((a, b) => a.season - b.season)
|
||||
.reduce((map, next) => {
|
||||
map[next.season] = map[next.season] + 1 || 1;
|
||||
return map;
|
||||
}, {}))
|
||||
};
|
||||
} else {
|
||||
throw new Error('No search results');
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
throw new Error(`failed cinemeta query ${imdbId} due: ${error.message}`);
|
||||
}));
|
||||
}
|
||||
|
||||
function escapeTitle(title, hyphenEscape = true) {
|
||||
@@ -39,48 +45,23 @@ function escapeTitle(title, hyphenEscape = true) {
|
||||
.trim();
|
||||
}
|
||||
|
||||
async function seriesMetadata(id) {
|
||||
const idInfo = id.split(':');
|
||||
const imdbId = idInfo[0];
|
||||
const season = parseInt(idInfo[1], 10);
|
||||
const episode = parseInt(idInfo[2], 10);
|
||||
|
||||
const metadata = await getMetadata(imdbId, 'series');
|
||||
const title = escapeTitle(metadata.title);
|
||||
|
||||
return {
|
||||
imdb: imdbId,
|
||||
title: hardcodedTitles[imdbId] || title,
|
||||
season: season,
|
||||
episode: episode,
|
||||
absoluteEpisode: hasEpisodeCount && metadata.episodeCount.slice(0, season - 1).reduce((a, b) => a + b, episode),
|
||||
genres: metadata.genres,
|
||||
isAnime: !metadata.genres.length || metadata.genres.includes('Animation')
|
||||
};
|
||||
}
|
||||
|
||||
async function movieMetadata(id) {
|
||||
const metadata = await getMetadata(id, 'movie');
|
||||
|
||||
return {
|
||||
imdb: id,
|
||||
title: escapeTitle(metadata.title),
|
||||
year: metadata.year,
|
||||
genres: metadata.genres,
|
||||
isAnime: !metadata.genres.length || metadata.genres.includes('Animation')
|
||||
};
|
||||
}
|
||||
|
||||
async function getImdbId(info) {
|
||||
return new Promise((resolve, reject) => {
|
||||
nameToImdb(info, function(err, res) {
|
||||
if (res) {
|
||||
resolve(res);
|
||||
} else {
|
||||
reject(err || new Error('failed imdbId search'));
|
||||
}
|
||||
});
|
||||
});
|
||||
const key = `${info.name}_${info.year}_${info.type}`;
|
||||
|
||||
return cacheWrapImdbId(key,
|
||||
() => new Promise((resolve, reject) => {
|
||||
nameToImdb(info, function(err, res) {
|
||||
if (res) {
|
||||
resolve(res);
|
||||
} else {
|
||||
reject(err || new Error('failed imdbId search'));
|
||||
}
|
||||
});
|
||||
}).catch(() => bing.web(`${info.name} ${info.year || ''} ${info.type} imdb`)
|
||||
.then((results) => results
|
||||
.map((result) => result.link)
|
||||
.find(result => result.includes('imdb.com/title/'))
|
||||
.match(/imdb\.com\/title\/(tt\d+)/)[1])));
|
||||
}
|
||||
|
||||
module.exports = { escapeTitle, getMetadata, movieMetadata, seriesMetadata, getImdbId };
|
||||
module.exports = { escapeTitle, getMetadata, getImdbId };
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
const { Sequelize }= require('sequelize');
|
||||
const Op = Sequelize.Op;
|
||||
|
||||
const POSTGRES_URI = process.env.POSTGRES_URI || 'postgres://torrentio:postgres@localhost:5432/torrentio';
|
||||
|
||||
@@ -13,20 +14,42 @@ const Torrent = database.define('torrent', {
|
||||
infoHash: { type: Sequelize.STRING(64), primaryKey: true },
|
||||
provider: { type: Sequelize.STRING(32), allowNull: false },
|
||||
title: { type: Sequelize.STRING(128), allowNull: false },
|
||||
size: { type: Sequelize.BIGINT },
|
||||
type: { type: Sequelize.STRING(16), allowNull: false },
|
||||
imdbId: { type: Sequelize.STRING(12) },
|
||||
kitsuId: { type: Sequelize.INTEGER },
|
||||
uploadDate: { type: Sequelize.DATE, allowNull: false },
|
||||
seeders: { type: Sequelize.SMALLINT },
|
||||
files: { type: Sequelize.JSONB }
|
||||
seeders: { type: Sequelize.SMALLINT }
|
||||
});
|
||||
|
||||
const File = database.define('file',
|
||||
{
|
||||
id: { type: Sequelize.BIGINT, autoIncrement: true, primaryKey: true },
|
||||
infoHash: { type: Sequelize.STRING(64), allowNull: false, references: { model: Torrent, key: 'infoHash' }, onDelete: 'CASCADE' },
|
||||
fileIndex: { type: Sequelize.INTEGER },
|
||||
title: { type: Sequelize.STRING(128), allowNull: false },
|
||||
size: { type: Sequelize.BIGINT },
|
||||
imdbId: { type: Sequelize.STRING(12) },
|
||||
imdbSeason: { type: Sequelize.INTEGER },
|
||||
imdbEpisode: { type: Sequelize.INTEGER },
|
||||
kitsuId: { type: Sequelize.INTEGER },
|
||||
kitsuEpisode: { type: Sequelize.INTEGER }
|
||||
},
|
||||
{
|
||||
indexes:[
|
||||
{ unique: true, fields:['infoHash'], where: { fileIndex: { [Op.eq]: null } } },
|
||||
{ unique: true, fields:['infoHash', 'fileIndex', 'imdbEpisode'] },
|
||||
{ unique: false, fields:['imdbId', 'imdbSeason', 'imdbEpisode'] },
|
||||
{ unique: false, fields:['kitsuId', 'kitsuEpisode'] }
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
const SkipTorrent = database.define('skip_torrent', {
|
||||
infoHash: {type: Sequelize.STRING(64), primaryKey: true},
|
||||
});
|
||||
|
||||
const FailedImdbTorrent = database.define('failed_imdb_torrent', {
|
||||
infoHash: {type: Sequelize.STRING(64), primaryKey: true},
|
||||
title: { type: Sequelize.STRING(128), allowNull: false }
|
||||
});
|
||||
|
||||
function connect() {
|
||||
@@ -34,7 +57,7 @@ function connect() {
|
||||
}
|
||||
|
||||
function getProvider(provider) {
|
||||
return Provider.findOrCreate({ where: { name: provider.name }, defaults: provider });
|
||||
return Provider.findOrCreate({ where: { name: { [Op.eq]: provider.name }}, defaults: provider });
|
||||
}
|
||||
|
||||
function updateProvider(provider) {
|
||||
@@ -51,10 +74,14 @@ function getTorrent(torrent) {
|
||||
})
|
||||
}
|
||||
|
||||
function updateTorrent(torrent) {
|
||||
function createTorrent(torrent) {
|
||||
return Torrent.upsert(torrent);
|
||||
}
|
||||
|
||||
function createFile(file) {
|
||||
return File.upsert(file);
|
||||
}
|
||||
|
||||
function getSkipTorrent(torrent) {
|
||||
return SkipTorrent.findByPk(torrent.infoHash)
|
||||
.then((result) =>{
|
||||
@@ -80,7 +107,7 @@ function getFailedImdbTorrent(torrent) {
|
||||
}
|
||||
|
||||
function createFailedImdbTorrent(torrent) {
|
||||
return FailedImdbTorrent.upsert({ infoHash: torrent.infoHash });
|
||||
return FailedImdbTorrent.upsert(torrent);
|
||||
}
|
||||
|
||||
module.exports = { connect, getProvider, updateProvider, getTorrent, updateTorrent, getSkipTorrent, createSkipTorrent, createFailedImdbTorrent };
|
||||
module.exports = { connect, getProvider, updateProvider, getTorrent, createTorrent, createFile, getSkipTorrent, createSkipTorrent, createFailedImdbTorrent };
|
||||
@@ -46,7 +46,7 @@ function filesFromKat(infoHash) {
|
||||
if (!infoHash) {
|
||||
return Promise.reject(new Error("no infoHash"));
|
||||
}
|
||||
const url = `http://kat.rip/torrent/${infoHash}.html`;
|
||||
const url = `https://kat.rip/torrent/${infoHash}.html`;
|
||||
return needle('get', url, { open_timeout: 2000 })
|
||||
.then((response) => {
|
||||
if (!response.body || response.statusCode !== 200) {
|
||||
@@ -61,6 +61,7 @@ function filesFromKat(infoHash) {
|
||||
$('table[id=\'ul_top\'] tr').each((index, row) => {
|
||||
files.push({
|
||||
fileIndex: index,
|
||||
name: $(row).find('td[class=\'torFileName\']').text().replace(/.*\//, ''),
|
||||
path: $(row).find('td[class=\'torFileName\']').text(),
|
||||
size: convertToBytes($(row).find('td[class=\'torFileSize\']').text())
|
||||
});
|
||||
@@ -139,24 +140,22 @@ function convertToBytes(sizeString) {
|
||||
|
||||
function dynamicTimeout(torrent) {
|
||||
if (torrent.seeders < 5) {
|
||||
return 3000;
|
||||
} else if (torrent.seeders < 10) {
|
||||
return 4000;
|
||||
} else if (torrent.seeders < 20) {
|
||||
return 5000;
|
||||
} else if (torrent.seeders < 30) {
|
||||
} else if (torrent.seeders < 10) {
|
||||
return 7000;
|
||||
} else if (torrent.seeders < 50) {
|
||||
return 9000;
|
||||
} else if (torrent.seeders < 100) {
|
||||
return 12000;
|
||||
} else {
|
||||
} else if (torrent.seeders < 20) {
|
||||
return 10000;
|
||||
} else if (torrent.seeders < 30) {
|
||||
return 15000;
|
||||
} else if (torrent.seeders < 50) {
|
||||
return 20000;
|
||||
} else {
|
||||
return 30000;
|
||||
}
|
||||
}
|
||||
|
||||
function getTrackerList() {
|
||||
return needle('get', 'https://torrents.me/tracker-list/', { open_timeout: 2000 })
|
||||
return needle('get', 'https://torrents.me/tracker-list/', { open_timeout: 2000, follow_max: 2 })
|
||||
.then((response) => {
|
||||
if (!response.body || response.statusCode !== 200) {
|
||||
throw new Error('tracker list not found')
|
||||
|
||||
82
lib/torrentFiles.js
Normal file
82
lib/torrentFiles.js
Normal file
@@ -0,0 +1,82 @@
|
||||
const { torrentFiles } = require('../lib/torrent');
|
||||
const { getMetadata } = require('../lib/metadata');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const { Type } = require('./types');
|
||||
|
||||
const MIN_SIZE = 20 * 1024 * 1024; // 20 MB
|
||||
|
||||
async function parseTorrentFiles(torrent, imdbId) {
|
||||
if (torrent.type === Type.MOVIE) {
|
||||
return [{
|
||||
infoHash: torrent.infoHash,
|
||||
title: torrent.title,
|
||||
size: torrent.size,
|
||||
imdbId: imdbId,
|
||||
}];
|
||||
}
|
||||
const parsedTorrentName = parse(torrent.title);
|
||||
if (parsedTorrentName.season && parsedTorrentName.episode) {
|
||||
return [{
|
||||
infoHash: torrent.infoHash,
|
||||
title: torrent.title,
|
||||
size: torrent.size,
|
||||
imdbId: imdbId,
|
||||
imdbSeason: parsedTorrentName.season,
|
||||
imdbEpisode: parsedTorrentName.episode
|
||||
}];
|
||||
}
|
||||
|
||||
return torrentFiles(torrent)
|
||||
.then(files => files
|
||||
.filter(file => file.size > MIN_SIZE)
|
||||
.map(file => parseFile(file, parsedTorrentName)))
|
||||
.then(files => decomposeAbsoluteEpisodes(files, torrent, imdbId))
|
||||
.then(files => files
|
||||
.filter(file => file.season && file.episodes && file.episodes.length)
|
||||
.map(file => file.episodes.map(episode => ({
|
||||
infoHash: torrent.infoHash,
|
||||
fileIndex: file.fileIndex,
|
||||
title: file.name,
|
||||
size: file.size,
|
||||
imdbId: imdbId,
|
||||
imdbSeason: file.season,
|
||||
imdbEpisode: episode})))
|
||||
.reduce((a, b) => a.concat(b), []))
|
||||
.catch(error => {
|
||||
console.log(`Failed getting files for ${torrent.title}`, error.message);
|
||||
return [];
|
||||
});
|
||||
}
|
||||
|
||||
function parseFile(file, parsedTorrentName) {
|
||||
const fileInfo = parse(file.name);
|
||||
// the episode may be in a folder containing season number
|
||||
if (!fileInfo.season && parsedTorrentName.season) {
|
||||
fileInfo.season = parsedTorrentName.season;
|
||||
} else if (!fileInfo.season && file.path.includes('/')) {
|
||||
const folders = file.path.split('/');
|
||||
const pathInfo = parse(folders[folders.length - 2]);
|
||||
fileInfo.season = pathInfo.season;
|
||||
}
|
||||
|
||||
return { ...file, ...fileInfo };
|
||||
}
|
||||
|
||||
async function decomposeAbsoluteEpisodes(files, torrent, imdbId) {
|
||||
if (files.every((file) => file.episodes.every((ep) => ep < 100))) {
|
||||
return; // nothing to decompose
|
||||
}
|
||||
|
||||
const metadata = await getMetadata(imdbId, torrent.type || Type.MOVIE);
|
||||
// decompose if season is inside path, but individual files are concatenated ex. 101 (S01E01)
|
||||
files
|
||||
.filter(file => file.season && metadata.episodeCount[file.season] < 100)
|
||||
.filter(file => file.episodes.every(ep => ep / 100 === file.season))
|
||||
.forEach(file => file.episodes = file.episodes.map(ep => ep % 100));
|
||||
// decompose if no season info is available, but individual files are concatenated ex. 101 (S01E01)
|
||||
// based on total episodes count per season
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
module.exports = { parseTorrentFiles };
|
||||
5
lib/types.js
Normal file
5
lib/types.js
Normal file
@@ -0,0 +1,5 @@
|
||||
exports.Type = {
|
||||
MOVIE: 'movie',
|
||||
SERIES: 'series',
|
||||
ANIME: 'anime'
|
||||
};
|
||||
Reference in New Issue
Block a user