From 5cfc82134a087308cca4b364f8305a36884c4477 Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Tue, 31 Dec 2019 19:32:51 +0100 Subject: [PATCH] updating to use tpb cached torrent files --- .gitignore | 3 +- horrible_subs_mapping.json | 2 +- index.js | 3 +- lib/cache.js | 36 ++++++++++++++--- lib/repository.js | 8 ++-- lib/torrent.js | 16 +++++++- lib/torrentFiles.js | 6 +-- package-lock.json | 77 ++++++++++++++++++++++++++++++++++++ package.json | 4 +- scrapers/api/thepiratebay.js | 7 ++-- scrapers/piratebay_dump.js | 8 +++- 11 files changed, 147 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index e0b850e..7a1e801 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /.idea -/node_modules \ No newline at end of file +/node_modules +**.env \ No newline at end of file diff --git a/horrible_subs_mapping.json b/horrible_subs_mapping.json index 26c73e9..a4e82e2 100644 --- a/horrible_subs_mapping.json +++ b/horrible_subs_mapping.json @@ -1568,7 +1568,7 @@ }, "Recorder to Randoseru": { "showId": "391", - "kitsu_id": "7143" + "kitsu_id": "6519" }, "Recorder to Randoseru Mi": { "showId": "392", diff --git a/index.js b/index.js index 2fe3842..01a1b39 100644 --- a/index.js +++ b/index.js @@ -1,3 +1,4 @@ +require('dotenv').config(); const express = require("express"); const server = express(); const { init } = require('./lib/torrent'); @@ -5,7 +6,7 @@ const { connect } = require('./lib/repository'); const tpbDump = require('./scrapers/piratebay_dump'); const horribleSubsScraper = require('./scrapers/horiblesubs_scraper'); -const providers = [horribleSubsScraper]; +const providers = [tpbDump]; async function scrape() { providers.forEach((provider) => provider.scrape()); diff --git a/lib/cache.js b/lib/cache.js index 23ad848..557721d 100644 --- a/lib/cache.js +++ b/lib/cache.js @@ -1,19 +1,45 @@ + const cacheManager = require('cache-manager'); +const mangodbStore = require('cache-manager-mongodb'); const GLOBAL_KEY_PREFIX = 'stremio-torrentio'; const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`; const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`; +const TORRENT_FILES_KEY_PREFIX = `stremio-tpb|files`; const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days +const MONGO_URI = process.env.MONGODB_URI; const cache = initiateCache(); function initiateCache() { - return cacheManager.caching({ - store: 'memory', - ttl: GLOBAL_TTL - }); + if (MONGO_URI) { + return cacheManager.caching({ + store: mangodbStore, + uri: MONGO_URI, + options: { + collection: 'cacheManager', + }, + ttl: GLOBAL_TTL, + ignoreCacheErrors: true + }); + } else { + return cacheManager.caching({ + store: 'memory', + ttl: GLOBAL_TTL + }); + } +} + +function retrieveTorrentFiles(infoHash) { + return cache.get(`${TORRENT_FILES_KEY_PREFIX}:${infoHash}`) + .then((results) => { + if (!results) { + throw new Error('No cached files found'); + } + return results; + }); } function cacheWrap(key, method, options) { @@ -28,5 +54,5 @@ function cacheWrapMetadata(id, method) { return cacheWrap(`${METADATA_PREFIX}:${id}`, method, { ttl: GLOBAL_TTL }); } -module.exports = { cacheWrapImdbId, cacheWrapMetadata }; +module.exports = { cacheWrapImdbId, cacheWrapMetadata, retrieveTorrentFiles }; diff --git a/lib/repository.js b/lib/repository.js index a2217d7..34cb0f3 100644 --- a/lib/repository.js +++ b/lib/repository.js @@ -13,7 +13,7 @@ const Provider = database.define('provider', { const Torrent = database.define('torrent', { infoHash: { type: Sequelize.STRING(64), primaryKey: true }, provider: { type: Sequelize.STRING(32), allowNull: false }, - title: { type: Sequelize.STRING(128), allowNull: false }, + title: { type: Sequelize.STRING(256), allowNull: false }, size: { type: Sequelize.BIGINT }, type: { type: Sequelize.STRING(16), allowNull: false }, uploadDate: { type: Sequelize.DATE, allowNull: false }, @@ -25,9 +25,9 @@ const File = database.define('file', id: { type: Sequelize.BIGINT, autoIncrement: true, primaryKey: true }, infoHash: { type: Sequelize.STRING(64), allowNull: false, references: { model: Torrent, key: 'infoHash' }, onDelete: 'CASCADE' }, fileIndex: { type: Sequelize.INTEGER }, - title: { type: Sequelize.STRING(128), allowNull: false }, + title: { type: Sequelize.STRING(256), allowNull: false }, size: { type: Sequelize.BIGINT }, - imdbId: { type: Sequelize.STRING(12) }, + imdbId: { type: Sequelize.STRING(32) }, imdbSeason: { type: Sequelize.INTEGER }, imdbEpisode: { type: Sequelize.INTEGER }, kitsuId: { type: Sequelize.INTEGER }, @@ -49,7 +49,7 @@ const SkipTorrent = database.define('skip_torrent', { const FailedImdbTorrent = database.define('failed_imdb_torrent', { infoHash: {type: Sequelize.STRING(64), primaryKey: true}, - title: { type: Sequelize.STRING(128), allowNull: false } + title: { type: Sequelize.STRING(256), allowNull: false } }); function connect() { diff --git a/lib/torrent.js b/lib/torrent.js index 03af28a..591a536 100644 --- a/lib/torrent.js +++ b/lib/torrent.js @@ -3,6 +3,7 @@ const cheerio = require('cheerio'); const needle = require('needle'); const parseTorrent = require('parse-torrent'); const Tracker = require("peer-search/tracker"); +const { retrieveTorrentFiles } = require('./cache'); const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20; const EXTENSIONS = ["3g2", "3gp", "avi", "flv", "mkv", "mov", "mp2", "mp4", "mpe", "mpeg", "mpg", "mpv", "webm", "wmv"]; @@ -16,6 +17,7 @@ module.exports.torrentFiles = function(torrent) { return filesFromTorrentFile(torrent) .catch(() => filesFromKat(torrent.infoHash)) .catch(() => filesFromTorrentStream(torrent)) + .catch(() => filesFromCache(torrent.infoHash)) .then((files) => files.filter((file) => isVideo(file))); }; @@ -42,6 +44,16 @@ module.exports.currentSeeders = function (torrent) { // .then((match) => JSON.parse(match).props.pageProps.result.torrent.files) // } +function filesFromCache(infoHash) { + return retrieveTorrentFiles(infoHash) + .then((files) => files.map((file) => ({ + fileIndex: parseInt(file.match(/^(\d+)@@/)[1]), + name: file.replace(/.+\/|^\d+@@/, ''), + path: file.replace(/^\d+@@/, ''), + size: 300000000 + }))); +} + function filesFromKat(infoHash) { if (!infoHash) { return Promise.reject(new Error("no infoHash")); @@ -100,7 +112,7 @@ async function filesFromTorrentStream(torrent) { return Promise.reject(new Error("no infoHash or magnetLink")); } return new Promise((resolve, rejected) => { - const engine = new torrentStream(torrent.magnetLink || torrent.infoHash, { connections: MAX_PEER_CONNECTIONS, trackers: TRACKERS }); + const engine = new torrentStream(torrent.magnetLink || torrent.infoHash, { connections: MAX_PEER_CONNECTIONS }); engine.ready(() => { const files = engine.files @@ -117,7 +129,7 @@ async function filesFromTorrentStream(torrent) { setTimeout(() => { engine.destroy(); rejected(new Error('No available connections for torrent!')); - }, dynamicTimeout(torrent)); + }, 30000); }); } diff --git a/lib/torrentFiles.js b/lib/torrentFiles.js index 7318664..c4aba6b 100644 --- a/lib/torrentFiles.js +++ b/lib/torrentFiles.js @@ -63,15 +63,15 @@ function parseFile(file, parsedTorrentName) { } async function decomposeAbsoluteEpisodes(files, torrent, imdbId) { - if (files.every((file) => file.episodes.every((ep) => ep < 100))) { - return; // nothing to decompose + if (files.every((file) => !file.episodes || file.episodes.every((ep) => ep < 100))) { + return files; // nothing to decompose } const metadata = await getMetadata(imdbId, torrent.type || Type.MOVIE); // decompose if season is inside path, but individual files are concatenated ex. 101 (S01E01) files .filter(file => file.season && metadata.episodeCount[file.season] < 100) - .filter(file => file.episodes.every(ep => ep / 100 === file.season)) + .filter(file => file.episodes && file.episodes.every(ep => ep / 100 === file.season)) .forEach(file => file.episodes = file.episodes.map(ep => ep % 100)); // decompose if no season info is available, but individual files are concatenated ex. 101 (S01E01) // based on total episodes count per season diff --git a/package-lock.json b/package-lock.json index f2146a3..fdd6a1b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -292,6 +292,11 @@ "concat-map": "0.0.1" } }, + "bson": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/bson/-/bson-1.1.3.tgz", + "integrity": "sha512-TdiJxMVnodVS7r0BdL42y/pqC9cL2iKynVwA0Ho3qbsQYr428veL3l7BQyuqiw+Q5SqqoT0m4srSY/BlZ9AxXg==" + }, "buffer-alloc": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/buffer-alloc/-/buffer-alloc-1.2.0.tgz", @@ -357,6 +362,24 @@ } } }, + "cache-manager-mongodb": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/cache-manager-mongodb/-/cache-manager-mongodb-0.2.2.tgz", + "integrity": "sha512-qLplYPc6SXkYAT9t0AdFN9kc6uy7OMgc4Xb5jY9GuseywXvPnQYZMBCTCRfQIjOHHYJ59OvS5MNgE8xRBlpVyg==", + "requires": { + "bluebird": "^3.5.3", + "cache-manager": "^2.9.0", + "lodash": "^4.17.15", + "mongodb": "^3.1.13" + }, + "dependencies": { + "lodash": { + "version": "4.17.15", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", + "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==" + } + } + }, "caseless": { "version": "0.12.0", "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", @@ -564,6 +587,11 @@ "domelementtype": "1" } }, + "dotenv": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-8.2.0.tgz", + "integrity": "sha512-8sJ78ElpbDJBHNeBzUbUVLsqKdccaa/BXF1uPTw3GrvQTBgrQrtObr2mUrE38vzYd8cEv+m/JBfDLioYcfXoaw==" + }, "dottie": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/dottie/-/dottie-2.0.1.tgz", @@ -1427,6 +1455,12 @@ "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=" }, + "memory-pager": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/memory-pager/-/memory-pager-1.5.0.tgz", + "integrity": "sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==", + "optional": true + }, "merge-descriptors": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", @@ -1491,6 +1525,17 @@ "moment": ">= 2.9.0" } }, + "mongodb": { + "version": "3.4.1", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.4.1.tgz", + "integrity": "sha512-juqt5/Z42J4DcE7tG7UdVaTKmUC6zinF4yioPfpeOSNBieWSK6qCY+0tfGQcHLKrauWPDdMZVROHJOa8q2pWsA==", + "requires": { + "bson": "^1.1.1", + "require_optional": "^1.0.1", + "safe-buffer": "^5.1.2", + "saslprep": "^1.0.0" + } + }, "ms": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", @@ -2042,6 +2087,20 @@ "lodash": "^4.17.11" } }, + "require_optional": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/require_optional/-/require_optional-1.0.1.tgz", + "integrity": "sha512-qhM/y57enGWHAe3v/NcwML6a3/vfESLe/sGM2dII+gEO0BpKRUkWZow/tyloNqJyN6kXSl3RyyM8Ll5D/sJP8g==", + "requires": { + "resolve-from": "^2.0.0", + "semver": "^5.1.0" + } + }, + "resolve-from": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-2.0.0.tgz", + "integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c=" + }, "retry-as-promised": { "version": "2.3.2", "resolved": "https://registry.npmjs.org/retry-as-promised/-/retry-as-promised-2.3.2.tgz", @@ -2099,6 +2158,15 @@ "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, + "saslprep": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/saslprep/-/saslprep-1.0.3.tgz", + "integrity": "sha512-/MY/PEMbk2SuY5sScONwhUDsV2p77Znkb/q3nSVstq/yQzYJOH/Azh29p9oJLsl3LnQwSvZDKagDGBsBwSooag==", + "optional": true, + "requires": { + "sparse-bitfield": "^3.0.3" + } + }, "sax": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", @@ -2353,6 +2421,15 @@ "hoek": "0.9.x" } }, + "sparse-bitfield": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/sparse-bitfield/-/sparse-bitfield-3.0.3.tgz", + "integrity": "sha1-/0rm5oZWBWuks+eSqzM004JzyhE=", + "optional": true, + "requires": { + "memory-pager": "^1.0.2" + } + }, "speedometer": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/speedometer/-/speedometer-0.1.4.tgz", diff --git a/package.json b/package.json index 09e87d9..ccfacee 100644 --- a/package.json +++ b/package.json @@ -12,11 +12,13 @@ "author": "TheBeastLT ", "license": "MIT", "dependencies": { - "cache-manager": "^2.9.0", "bottleneck": "^2.16.2", + "cache-manager": "^2.9.0", + "cache-manager-mongodb": "^0.2.1", "cheerio": "^0.22.0", "cloudscraper": "^3.0.0", "ddg-scraper": "^1.0.2", + "dotenv": "^8.2.0", "express": "^4.16.4", "google-search-scraper": "^0.1.0", "imdb": "^1.1.0", diff --git a/scrapers/api/thepiratebay.js b/scrapers/api/thepiratebay.js index 13ec64e..6e79eff 100644 --- a/scrapers/api/thepiratebay.js +++ b/scrapers/api/thepiratebay.js @@ -4,10 +4,9 @@ const moment = require('moment'); const defaultProxies = [ 'https://thepiratebay.org', - 'https://thepiratebay.vip', - 'https://proxybay.pro', - 'https://ukpiratebayproxy.com', - 'https://thepiratebayproxy.info']; + 'https://piratebays.life', + 'https://piratebays.icu/', + 'https://piratebays.cool']; const dumpUrl = '/static/dump/csv/'; const defaultTimeout = 30000; diff --git a/scrapers/piratebay_dump.js b/scrapers/piratebay_dump.js index fb6b77f..37ceeb3 100644 --- a/scrapers/piratebay_dump.js +++ b/scrapers/piratebay_dump.js @@ -20,6 +20,7 @@ const limiter = new Bottleneck({maxConcurrent: 40}); async function scrape() { const lastScraped = await repository.getProvider({ name: NAME }); const lastDump = { updatedAt: 2147000000 }; + const checkPoint = moment('2019-03-30 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate(); //const lastDump = await pirata.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]); if (!lastScraped.lastScraped || lastScraped.lastScraped < lastDump.updatedAt) { @@ -51,6 +52,11 @@ async function scrape() { size: parseInt(row[3], 10) }; + if (torrent.uploadDate > checkPoint) { + entriesProcessed++; + return; + } + if (lastScraped.lastScraped && lastScraped.lastScraped > torrent.uploadDate) { // torrent was already scraped previously, skipping return; @@ -177,7 +183,7 @@ async function findTorrentViaBing(record) { function downloadDump(dump) { console.log('downloading dump file...'); - return needle('get', dump.url, { open_timeout: 2000, output: '/home/paulius/Downloads/tpb_dump.gz' }) + return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' }) .then((response) => response.body) .then((body) => { console.log('unzipping dump file...'); return ungzip(body); }) .then((unzipped) => { console.log('writing dump file...'); return fs.promises.writeFile(CSV_FILE_PATH, unzipped); })