updating to use tpb cached torrent files

This commit is contained in:
TheBeastLT
2019-12-31 19:32:51 +01:00
parent 7aa0572fb8
commit 5cfc82134a
11 changed files with 147 additions and 23 deletions

3
.gitignore vendored
View File

@@ -1,2 +1,3 @@
/.idea
/node_modules
/node_modules
**.env

View File

@@ -1568,7 +1568,7 @@
},
"Recorder to Randoseru": {
"showId": "391",
"kitsu_id": "7143"
"kitsu_id": "6519"
},
"Recorder to Randoseru Mi": {
"showId": "392",

View File

@@ -1,3 +1,4 @@
require('dotenv').config();
const express = require("express");
const server = express();
const { init } = require('./lib/torrent');
@@ -5,7 +6,7 @@ const { connect } = require('./lib/repository');
const tpbDump = require('./scrapers/piratebay_dump');
const horribleSubsScraper = require('./scrapers/horiblesubs_scraper');
const providers = [horribleSubsScraper];
const providers = [tpbDump];
async function scrape() {
providers.forEach((provider) => provider.scrape());

View File

@@ -1,19 +1,45 @@
const cacheManager = require('cache-manager');
const mangodbStore = require('cache-manager-mongodb');
const GLOBAL_KEY_PREFIX = 'stremio-torrentio';
const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
const TORRENT_FILES_KEY_PREFIX = `stremio-tpb|files`;
const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
const MONGO_URI = process.env.MONGODB_URI;
const cache = initiateCache();
function initiateCache() {
return cacheManager.caching({
store: 'memory',
ttl: GLOBAL_TTL
});
if (MONGO_URI) {
return cacheManager.caching({
store: mangodbStore,
uri: MONGO_URI,
options: {
collection: 'cacheManager',
},
ttl: GLOBAL_TTL,
ignoreCacheErrors: true
});
} else {
return cacheManager.caching({
store: 'memory',
ttl: GLOBAL_TTL
});
}
}
function retrieveTorrentFiles(infoHash) {
return cache.get(`${TORRENT_FILES_KEY_PREFIX}:${infoHash}`)
.then((results) => {
if (!results) {
throw new Error('No cached files found');
}
return results;
});
}
function cacheWrap(key, method, options) {
@@ -28,5 +54,5 @@ function cacheWrapMetadata(id, method) {
return cacheWrap(`${METADATA_PREFIX}:${id}`, method, { ttl: GLOBAL_TTL });
}
module.exports = { cacheWrapImdbId, cacheWrapMetadata };
module.exports = { cacheWrapImdbId, cacheWrapMetadata, retrieveTorrentFiles };

View File

@@ -13,7 +13,7 @@ const Provider = database.define('provider', {
const Torrent = database.define('torrent', {
infoHash: { type: Sequelize.STRING(64), primaryKey: true },
provider: { type: Sequelize.STRING(32), allowNull: false },
title: { type: Sequelize.STRING(128), allowNull: false },
title: { type: Sequelize.STRING(256), allowNull: false },
size: { type: Sequelize.BIGINT },
type: { type: Sequelize.STRING(16), allowNull: false },
uploadDate: { type: Sequelize.DATE, allowNull: false },
@@ -25,9 +25,9 @@ const File = database.define('file',
id: { type: Sequelize.BIGINT, autoIncrement: true, primaryKey: true },
infoHash: { type: Sequelize.STRING(64), allowNull: false, references: { model: Torrent, key: 'infoHash' }, onDelete: 'CASCADE' },
fileIndex: { type: Sequelize.INTEGER },
title: { type: Sequelize.STRING(128), allowNull: false },
title: { type: Sequelize.STRING(256), allowNull: false },
size: { type: Sequelize.BIGINT },
imdbId: { type: Sequelize.STRING(12) },
imdbId: { type: Sequelize.STRING(32) },
imdbSeason: { type: Sequelize.INTEGER },
imdbEpisode: { type: Sequelize.INTEGER },
kitsuId: { type: Sequelize.INTEGER },
@@ -49,7 +49,7 @@ const SkipTorrent = database.define('skip_torrent', {
const FailedImdbTorrent = database.define('failed_imdb_torrent', {
infoHash: {type: Sequelize.STRING(64), primaryKey: true},
title: { type: Sequelize.STRING(128), allowNull: false }
title: { type: Sequelize.STRING(256), allowNull: false }
});
function connect() {

View File

@@ -3,6 +3,7 @@ const cheerio = require('cheerio');
const needle = require('needle');
const parseTorrent = require('parse-torrent');
const Tracker = require("peer-search/tracker");
const { retrieveTorrentFiles } = require('./cache');
const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
const EXTENSIONS = ["3g2", "3gp", "avi", "flv", "mkv", "mov", "mp2", "mp4", "mpe", "mpeg", "mpg", "mpv", "webm", "wmv"];
@@ -16,6 +17,7 @@ module.exports.torrentFiles = function(torrent) {
return filesFromTorrentFile(torrent)
.catch(() => filesFromKat(torrent.infoHash))
.catch(() => filesFromTorrentStream(torrent))
.catch(() => filesFromCache(torrent.infoHash))
.then((files) => files.filter((file) => isVideo(file)));
};
@@ -42,6 +44,16 @@ module.exports.currentSeeders = function (torrent) {
// .then((match) => JSON.parse(match).props.pageProps.result.torrent.files)
// }
function filesFromCache(infoHash) {
return retrieveTorrentFiles(infoHash)
.then((files) => files.map((file) => ({
fileIndex: parseInt(file.match(/^(\d+)@@/)[1]),
name: file.replace(/.+\/|^\d+@@/, ''),
path: file.replace(/^\d+@@/, ''),
size: 300000000
})));
}
function filesFromKat(infoHash) {
if (!infoHash) {
return Promise.reject(new Error("no infoHash"));
@@ -100,7 +112,7 @@ async function filesFromTorrentStream(torrent) {
return Promise.reject(new Error("no infoHash or magnetLink"));
}
return new Promise((resolve, rejected) => {
const engine = new torrentStream(torrent.magnetLink || torrent.infoHash, { connections: MAX_PEER_CONNECTIONS, trackers: TRACKERS });
const engine = new torrentStream(torrent.magnetLink || torrent.infoHash, { connections: MAX_PEER_CONNECTIONS });
engine.ready(() => {
const files = engine.files
@@ -117,7 +129,7 @@ async function filesFromTorrentStream(torrent) {
setTimeout(() => {
engine.destroy();
rejected(new Error('No available connections for torrent!'));
}, dynamicTimeout(torrent));
}, 30000);
});
}

View File

@@ -63,15 +63,15 @@ function parseFile(file, parsedTorrentName) {
}
async function decomposeAbsoluteEpisodes(files, torrent, imdbId) {
if (files.every((file) => file.episodes.every((ep) => ep < 100))) {
return; // nothing to decompose
if (files.every((file) => !file.episodes || file.episodes.every((ep) => ep < 100))) {
return files; // nothing to decompose
}
const metadata = await getMetadata(imdbId, torrent.type || Type.MOVIE);
// decompose if season is inside path, but individual files are concatenated ex. 101 (S01E01)
files
.filter(file => file.season && metadata.episodeCount[file.season] < 100)
.filter(file => file.episodes.every(ep => ep / 100 === file.season))
.filter(file => file.episodes && file.episodes.every(ep => ep / 100 === file.season))
.forEach(file => file.episodes = file.episodes.map(ep => ep % 100));
// decompose if no season info is available, but individual files are concatenated ex. 101 (S01E01)
// based on total episodes count per season

77
package-lock.json generated
View File

@@ -292,6 +292,11 @@
"concat-map": "0.0.1"
}
},
"bson": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/bson/-/bson-1.1.3.tgz",
"integrity": "sha512-TdiJxMVnodVS7r0BdL42y/pqC9cL2iKynVwA0Ho3qbsQYr428veL3l7BQyuqiw+Q5SqqoT0m4srSY/BlZ9AxXg=="
},
"buffer-alloc": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/buffer-alloc/-/buffer-alloc-1.2.0.tgz",
@@ -357,6 +362,24 @@
}
}
},
"cache-manager-mongodb": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/cache-manager-mongodb/-/cache-manager-mongodb-0.2.2.tgz",
"integrity": "sha512-qLplYPc6SXkYAT9t0AdFN9kc6uy7OMgc4Xb5jY9GuseywXvPnQYZMBCTCRfQIjOHHYJ59OvS5MNgE8xRBlpVyg==",
"requires": {
"bluebird": "^3.5.3",
"cache-manager": "^2.9.0",
"lodash": "^4.17.15",
"mongodb": "^3.1.13"
},
"dependencies": {
"lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
}
}
},
"caseless": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
@@ -564,6 +587,11 @@
"domelementtype": "1"
}
},
"dotenv": {
"version": "8.2.0",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-8.2.0.tgz",
"integrity": "sha512-8sJ78ElpbDJBHNeBzUbUVLsqKdccaa/BXF1uPTw3GrvQTBgrQrtObr2mUrE38vzYd8cEv+m/JBfDLioYcfXoaw=="
},
"dottie": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/dottie/-/dottie-2.0.1.tgz",
@@ -1427,6 +1455,12 @@
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
"integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g="
},
"memory-pager": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/memory-pager/-/memory-pager-1.5.0.tgz",
"integrity": "sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==",
"optional": true
},
"merge-descriptors": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
@@ -1491,6 +1525,17 @@
"moment": ">= 2.9.0"
}
},
"mongodb": {
"version": "3.4.1",
"resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.4.1.tgz",
"integrity": "sha512-juqt5/Z42J4DcE7tG7UdVaTKmUC6zinF4yioPfpeOSNBieWSK6qCY+0tfGQcHLKrauWPDdMZVROHJOa8q2pWsA==",
"requires": {
"bson": "^1.1.1",
"require_optional": "^1.0.1",
"safe-buffer": "^5.1.2",
"saslprep": "^1.0.0"
}
},
"ms": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
@@ -2042,6 +2087,20 @@
"lodash": "^4.17.11"
}
},
"require_optional": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/require_optional/-/require_optional-1.0.1.tgz",
"integrity": "sha512-qhM/y57enGWHAe3v/NcwML6a3/vfESLe/sGM2dII+gEO0BpKRUkWZow/tyloNqJyN6kXSl3RyyM8Ll5D/sJP8g==",
"requires": {
"resolve-from": "^2.0.0",
"semver": "^5.1.0"
}
},
"resolve-from": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-2.0.0.tgz",
"integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c="
},
"retry-as-promised": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/retry-as-promised/-/retry-as-promised-2.3.2.tgz",
@@ -2099,6 +2158,15 @@
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
},
"saslprep": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/saslprep/-/saslprep-1.0.3.tgz",
"integrity": "sha512-/MY/PEMbk2SuY5sScONwhUDsV2p77Znkb/q3nSVstq/yQzYJOH/Azh29p9oJLsl3LnQwSvZDKagDGBsBwSooag==",
"optional": true,
"requires": {
"sparse-bitfield": "^3.0.3"
}
},
"sax": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
@@ -2353,6 +2421,15 @@
"hoek": "0.9.x"
}
},
"sparse-bitfield": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/sparse-bitfield/-/sparse-bitfield-3.0.3.tgz",
"integrity": "sha1-/0rm5oZWBWuks+eSqzM004JzyhE=",
"optional": true,
"requires": {
"memory-pager": "^1.0.2"
}
},
"speedometer": {
"version": "0.1.4",
"resolved": "https://registry.npmjs.org/speedometer/-/speedometer-0.1.4.tgz",

View File

@@ -12,11 +12,13 @@
"author": "TheBeastLT <pauliox@beyond.lt>",
"license": "MIT",
"dependencies": {
"cache-manager": "^2.9.0",
"bottleneck": "^2.16.2",
"cache-manager": "^2.9.0",
"cache-manager-mongodb": "^0.2.1",
"cheerio": "^0.22.0",
"cloudscraper": "^3.0.0",
"ddg-scraper": "^1.0.2",
"dotenv": "^8.2.0",
"express": "^4.16.4",
"google-search-scraper": "^0.1.0",
"imdb": "^1.1.0",

View File

@@ -4,10 +4,9 @@ const moment = require('moment');
const defaultProxies = [
'https://thepiratebay.org',
'https://thepiratebay.vip',
'https://proxybay.pro',
'https://ukpiratebayproxy.com',
'https://thepiratebayproxy.info'];
'https://piratebays.life',
'https://piratebays.icu/',
'https://piratebays.cool'];
const dumpUrl = '/static/dump/csv/';
const defaultTimeout = 30000;

View File

@@ -20,6 +20,7 @@ const limiter = new Bottleneck({maxConcurrent: 40});
async function scrape() {
const lastScraped = await repository.getProvider({ name: NAME });
const lastDump = { updatedAt: 2147000000 };
const checkPoint = moment('2019-03-30 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
//const lastDump = await pirata.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
if (!lastScraped.lastScraped || lastScraped.lastScraped < lastDump.updatedAt) {
@@ -51,6 +52,11 @@ async function scrape() {
size: parseInt(row[3], 10)
};
if (torrent.uploadDate > checkPoint) {
entriesProcessed++;
return;
}
if (lastScraped.lastScraped && lastScraped.lastScraped > torrent.uploadDate) {
// torrent was already scraped previously, skipping
return;
@@ -177,7 +183,7 @@ async function findTorrentViaBing(record) {
function downloadDump(dump) {
console.log('downloading dump file...');
return needle('get', dump.url, { open_timeout: 2000, output: '/home/paulius/Downloads/tpb_dump.gz' })
return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' })
.then((response) => response.body)
.then((body) => { console.log('unzipping dump file...'); return ungzip(body); })
.then((unzipped) => { console.log('writing dump file...'); return fs.promises.writeFile(CSV_FILE_PATH, unzipped); })