updates tpb dump scrapper

This commit is contained in:
TheBeastLT
2019-12-29 20:07:15 +01:00
parent 7925f18064
commit e7f46d2adc
12 changed files with 689 additions and 165 deletions

32
lib/cache.js Normal file
View File

@@ -0,0 +1,32 @@
const cacheManager = require('cache-manager');
const GLOBAL_KEY_PREFIX = 'stremio-torrentio';
const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
const cache = initiateCache();
function initiateCache() {
return cacheManager.caching({
store: 'memory',
ttl: GLOBAL_TTL
});
}
function cacheWrap(key, method, options) {
return cache.wrap(key, method, options);
}
function cacheWrapImdbId(key, method) {
return cacheWrap(`${IMDB_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
}
function cacheWrapMetadata(id, method) {
return cacheWrap(`${METADATA_PREFIX}:${id}`, method, { ttl: GLOBAL_TTL });
}
module.exports = { cacheWrapImdbId, cacheWrapMetadata };

View File

@@ -1,32 +1,38 @@
const _ = require('lodash');
const needle = require('needle');
const nameToImdb = require('name-to-imdb');
const bing = require('nodejs-bing');
const { cacheWrapImdbId, cacheWrapMetadata } = require('./cache');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
function getMetadata(imdbId, type) {
return needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 1000 })
.then((response) => response.body)
.then((body) => {
if (body && body.meta && body.meta.name) {
return {
imdbId: imdbId,
title: body.meta.name,
year: body.meta.year,
genres: body.meta.genres,
episodeCount: body.meta.videos && _.chain(body.meta.videos)
.countBy('season')
.toPairs()
.filter((pair) => pair[0] !== '0')
.sortBy((pair) => parseInt(pair[0], 10))
.map((pair) => pair[1])
.value()
};
} else {
console.log(`failed cinemeta query: Empty Body`);
throw new Error('failed cinemeta query');
}
});
return cacheWrapMetadata(imdbId,
() => needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 60000 })
.then((response) => {
const body = response.body;
if (body && body.meta && body.meta.name) {
return {
imdbId: imdbId,
title: body.meta.name,
year: body.meta.year,
genres: body.meta.genres,
totalEpisodes: body.meta.videos && body.meta.videos
.filter(video => video.season > 0).length,
episodeCount: body.meta.videos && Object.values(body.meta.videos
.filter((entry) => entry.season !== 0)
.sort((a, b) => a.season - b.season)
.reduce((map, next) => {
map[next.season] = map[next.season] + 1 || 1;
return map;
}, {}))
};
} else {
throw new Error('No search results');
}
})
.catch((error) => {
throw new Error(`failed cinemeta query ${imdbId} due: ${error.message}`);
}));
}
function escapeTitle(title, hyphenEscape = true) {
@@ -39,48 +45,23 @@ function escapeTitle(title, hyphenEscape = true) {
.trim();
}
async function seriesMetadata(id) {
const idInfo = id.split(':');
const imdbId = idInfo[0];
const season = parseInt(idInfo[1], 10);
const episode = parseInt(idInfo[2], 10);
const metadata = await getMetadata(imdbId, 'series');
const title = escapeTitle(metadata.title);
return {
imdb: imdbId,
title: hardcodedTitles[imdbId] || title,
season: season,
episode: episode,
absoluteEpisode: hasEpisodeCount && metadata.episodeCount.slice(0, season - 1).reduce((a, b) => a + b, episode),
genres: metadata.genres,
isAnime: !metadata.genres.length || metadata.genres.includes('Animation')
};
}
async function movieMetadata(id) {
const metadata = await getMetadata(id, 'movie');
return {
imdb: id,
title: escapeTitle(metadata.title),
year: metadata.year,
genres: metadata.genres,
isAnime: !metadata.genres.length || metadata.genres.includes('Animation')
};
}
async function getImdbId(info) {
return new Promise((resolve, reject) => {
nameToImdb(info, function(err, res) {
if (res) {
resolve(res);
} else {
reject(err || new Error('failed imdbId search'));
}
});
});
const key = `${info.name}_${info.year}_${info.type}`;
return cacheWrapImdbId(key,
() => new Promise((resolve, reject) => {
nameToImdb(info, function(err, res) {
if (res) {
resolve(res);
} else {
reject(err || new Error('failed imdbId search'));
}
});
}).catch(() => bing.web(`${info.name} ${info.year || ''} ${info.type} imdb`)
.then((results) => results
.map((result) => result.link)
.find(result => result.includes('imdb.com/title/'))
.match(/imdb\.com\/title\/(tt\d+)/)[1])));
}
module.exports = { escapeTitle, getMetadata, movieMetadata, seriesMetadata, getImdbId };
module.exports = { escapeTitle, getMetadata, getImdbId };

View File

@@ -1,4 +1,5 @@
const { Sequelize }= require('sequelize');
const Op = Sequelize.Op;
const POSTGRES_URI = process.env.POSTGRES_URI || 'postgres://torrentio:postgres@localhost:5432/torrentio';
@@ -13,20 +14,42 @@ const Torrent = database.define('torrent', {
infoHash: { type: Sequelize.STRING(64), primaryKey: true },
provider: { type: Sequelize.STRING(32), allowNull: false },
title: { type: Sequelize.STRING(128), allowNull: false },
size: { type: Sequelize.BIGINT },
type: { type: Sequelize.STRING(16), allowNull: false },
imdbId: { type: Sequelize.STRING(12) },
kitsuId: { type: Sequelize.INTEGER },
uploadDate: { type: Sequelize.DATE, allowNull: false },
seeders: { type: Sequelize.SMALLINT },
files: { type: Sequelize.JSONB }
seeders: { type: Sequelize.SMALLINT }
});
const File = database.define('file',
{
id: { type: Sequelize.BIGINT, autoIncrement: true, primaryKey: true },
infoHash: { type: Sequelize.STRING(64), allowNull: false, references: { model: Torrent, key: 'infoHash' }, onDelete: 'CASCADE' },
fileIndex: { type: Sequelize.INTEGER },
title: { type: Sequelize.STRING(128), allowNull: false },
size: { type: Sequelize.BIGINT },
imdbId: { type: Sequelize.STRING(12) },
imdbSeason: { type: Sequelize.INTEGER },
imdbEpisode: { type: Sequelize.INTEGER },
kitsuId: { type: Sequelize.INTEGER },
kitsuEpisode: { type: Sequelize.INTEGER }
},
{
indexes:[
{ unique: true, fields:['infoHash'], where: { fileIndex: { [Op.eq]: null } } },
{ unique: true, fields:['infoHash', 'fileIndex', 'imdbEpisode'] },
{ unique: false, fields:['imdbId', 'imdbSeason', 'imdbEpisode'] },
{ unique: false, fields:['kitsuId', 'kitsuEpisode'] }
]
}
);
const SkipTorrent = database.define('skip_torrent', {
infoHash: {type: Sequelize.STRING(64), primaryKey: true},
});
const FailedImdbTorrent = database.define('failed_imdb_torrent', {
infoHash: {type: Sequelize.STRING(64), primaryKey: true},
title: { type: Sequelize.STRING(128), allowNull: false }
});
function connect() {
@@ -34,7 +57,7 @@ function connect() {
}
function getProvider(provider) {
return Provider.findOrCreate({ where: { name: provider.name }, defaults: provider });
return Provider.findOrCreate({ where: { name: { [Op.eq]: provider.name }}, defaults: provider });
}
function updateProvider(provider) {
@@ -51,10 +74,14 @@ function getTorrent(torrent) {
})
}
function updateTorrent(torrent) {
function createTorrent(torrent) {
return Torrent.upsert(torrent);
}
function createFile(file) {
return File.upsert(file);
}
function getSkipTorrent(torrent) {
return SkipTorrent.findByPk(torrent.infoHash)
.then((result) =>{
@@ -80,7 +107,7 @@ function getFailedImdbTorrent(torrent) {
}
function createFailedImdbTorrent(torrent) {
return FailedImdbTorrent.upsert({ infoHash: torrent.infoHash });
return FailedImdbTorrent.upsert(torrent);
}
module.exports = { connect, getProvider, updateProvider, getTorrent, updateTorrent, getSkipTorrent, createSkipTorrent, createFailedImdbTorrent };
module.exports = { connect, getProvider, updateProvider, getTorrent, createTorrent, createFile, getSkipTorrent, createSkipTorrent, createFailedImdbTorrent };

View File

@@ -46,7 +46,7 @@ function filesFromKat(infoHash) {
if (!infoHash) {
return Promise.reject(new Error("no infoHash"));
}
const url = `http://kat.rip/torrent/${infoHash}.html`;
const url = `https://kat.rip/torrent/${infoHash}.html`;
return needle('get', url, { open_timeout: 2000 })
.then((response) => {
if (!response.body || response.statusCode !== 200) {
@@ -61,6 +61,7 @@ function filesFromKat(infoHash) {
$('table[id=\'ul_top\'] tr').each((index, row) => {
files.push({
fileIndex: index,
name: $(row).find('td[class=\'torFileName\']').text().replace(/.*\//, ''),
path: $(row).find('td[class=\'torFileName\']').text(),
size: convertToBytes($(row).find('td[class=\'torFileSize\']').text())
});
@@ -139,24 +140,22 @@ function convertToBytes(sizeString) {
function dynamicTimeout(torrent) {
if (torrent.seeders < 5) {
return 3000;
} else if (torrent.seeders < 10) {
return 4000;
} else if (torrent.seeders < 20) {
return 5000;
} else if (torrent.seeders < 30) {
} else if (torrent.seeders < 10) {
return 7000;
} else if (torrent.seeders < 50) {
return 9000;
} else if (torrent.seeders < 100) {
return 12000;
} else {
} else if (torrent.seeders < 20) {
return 10000;
} else if (torrent.seeders < 30) {
return 15000;
} else if (torrent.seeders < 50) {
return 20000;
} else {
return 30000;
}
}
function getTrackerList() {
return needle('get', 'https://torrents.me/tracker-list/', { open_timeout: 2000 })
return needle('get', 'https://torrents.me/tracker-list/', { open_timeout: 2000, follow_max: 2 })
.then((response) => {
if (!response.body || response.statusCode !== 200) {
throw new Error('tracker list not found')

82
lib/torrentFiles.js Normal file
View File

@@ -0,0 +1,82 @@
const { torrentFiles } = require('../lib/torrent');
const { getMetadata } = require('../lib/metadata');
const { parse } = require('parse-torrent-title');
const { Type } = require('./types');
const MIN_SIZE = 20 * 1024 * 1024; // 20 MB
async function parseTorrentFiles(torrent, imdbId) {
if (torrent.type === Type.MOVIE) {
return [{
infoHash: torrent.infoHash,
title: torrent.title,
size: torrent.size,
imdbId: imdbId,
}];
}
const parsedTorrentName = parse(torrent.title);
if (parsedTorrentName.season && parsedTorrentName.episode) {
return [{
infoHash: torrent.infoHash,
title: torrent.title,
size: torrent.size,
imdbId: imdbId,
imdbSeason: parsedTorrentName.season,
imdbEpisode: parsedTorrentName.episode
}];
}
return torrentFiles(torrent)
.then(files => files
.filter(file => file.size > MIN_SIZE)
.map(file => parseFile(file, parsedTorrentName)))
.then(files => decomposeAbsoluteEpisodes(files, torrent, imdbId))
.then(files => files
.filter(file => file.season && file.episodes && file.episodes.length)
.map(file => file.episodes.map(episode => ({
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.name,
size: file.size,
imdbId: imdbId,
imdbSeason: file.season,
imdbEpisode: episode})))
.reduce((a, b) => a.concat(b), []))
.catch(error => {
console.log(`Failed getting files for ${torrent.title}`, error.message);
return [];
});
}
function parseFile(file, parsedTorrentName) {
const fileInfo = parse(file.name);
// the episode may be in a folder containing season number
if (!fileInfo.season && parsedTorrentName.season) {
fileInfo.season = parsedTorrentName.season;
} else if (!fileInfo.season && file.path.includes('/')) {
const folders = file.path.split('/');
const pathInfo = parse(folders[folders.length - 2]);
fileInfo.season = pathInfo.season;
}
return { ...file, ...fileInfo };
}
async function decomposeAbsoluteEpisodes(files, torrent, imdbId) {
if (files.every((file) => file.episodes.every((ep) => ep < 100))) {
return; // nothing to decompose
}
const metadata = await getMetadata(imdbId, torrent.type || Type.MOVIE);
// decompose if season is inside path, but individual files are concatenated ex. 101 (S01E01)
files
.filter(file => file.season && metadata.episodeCount[file.season] < 100)
.filter(file => file.episodes.every(ep => ep / 100 === file.season))
.forEach(file => file.episodes = file.episodes.map(ep => ep % 100));
// decompose if no season info is available, but individual files are concatenated ex. 101 (S01E01)
// based on total episodes count per season
return files;
}
module.exports = { parseTorrentFiles };

5
lib/types.js Normal file
View File

@@ -0,0 +1,5 @@
exports.Type = {
MOVIE: 'movie',
SERIES: 'series',
ANIME: 'anime'
};