mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
updates tpb dump scrapper
This commit is contained in:
2
index.js
2
index.js
@@ -5,7 +5,7 @@ const { connect } = require('./lib/repository');
|
||||
const tpbDump = require('./scrapers/piratebay_dump');
|
||||
const horribleSubsScraper = require('./scrapers/horiblesubs_scraper');
|
||||
|
||||
const providers = [horribleSubsScraper];
|
||||
const providers = [tpbDump];
|
||||
|
||||
async function scrape() {
|
||||
providers.forEach((provider) => provider.scrape());
|
||||
|
||||
32
lib/cache.js
Normal file
32
lib/cache.js
Normal file
@@ -0,0 +1,32 @@
|
||||
const cacheManager = require('cache-manager');
|
||||
|
||||
const GLOBAL_KEY_PREFIX = 'stremio-torrentio';
|
||||
const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
|
||||
const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
|
||||
|
||||
const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
|
||||
|
||||
|
||||
const cache = initiateCache();
|
||||
|
||||
function initiateCache() {
|
||||
return cacheManager.caching({
|
||||
store: 'memory',
|
||||
ttl: GLOBAL_TTL
|
||||
});
|
||||
}
|
||||
|
||||
function cacheWrap(key, method, options) {
|
||||
return cache.wrap(key, method, options);
|
||||
}
|
||||
|
||||
function cacheWrapImdbId(key, method) {
|
||||
return cacheWrap(`${IMDB_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
|
||||
}
|
||||
|
||||
function cacheWrapMetadata(id, method) {
|
||||
return cacheWrap(`${METADATA_PREFIX}:${id}`, method, { ttl: GLOBAL_TTL });
|
||||
}
|
||||
|
||||
module.exports = { cacheWrapImdbId, cacheWrapMetadata };
|
||||
|
||||
111
lib/metadata.js
111
lib/metadata.js
@@ -1,32 +1,38 @@
|
||||
const _ = require('lodash');
|
||||
const needle = require('needle');
|
||||
const nameToImdb = require('name-to-imdb');
|
||||
const bing = require('nodejs-bing');
|
||||
const { cacheWrapImdbId, cacheWrapMetadata } = require('./cache');
|
||||
|
||||
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
|
||||
|
||||
function getMetadata(imdbId, type) {
|
||||
return needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 1000 })
|
||||
.then((response) => response.body)
|
||||
.then((body) => {
|
||||
if (body && body.meta && body.meta.name) {
|
||||
return {
|
||||
imdbId: imdbId,
|
||||
title: body.meta.name,
|
||||
year: body.meta.year,
|
||||
genres: body.meta.genres,
|
||||
episodeCount: body.meta.videos && _.chain(body.meta.videos)
|
||||
.countBy('season')
|
||||
.toPairs()
|
||||
.filter((pair) => pair[0] !== '0')
|
||||
.sortBy((pair) => parseInt(pair[0], 10))
|
||||
.map((pair) => pair[1])
|
||||
.value()
|
||||
};
|
||||
} else {
|
||||
console.log(`failed cinemeta query: Empty Body`);
|
||||
throw new Error('failed cinemeta query');
|
||||
}
|
||||
});
|
||||
return cacheWrapMetadata(imdbId,
|
||||
() => needle('get', `${CINEMETA_URL}/meta/${type}/${imdbId}.json`, { open_timeout: 60000 })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
if (body && body.meta && body.meta.name) {
|
||||
return {
|
||||
imdbId: imdbId,
|
||||
title: body.meta.name,
|
||||
year: body.meta.year,
|
||||
genres: body.meta.genres,
|
||||
totalEpisodes: body.meta.videos && body.meta.videos
|
||||
.filter(video => video.season > 0).length,
|
||||
episodeCount: body.meta.videos && Object.values(body.meta.videos
|
||||
.filter((entry) => entry.season !== 0)
|
||||
.sort((a, b) => a.season - b.season)
|
||||
.reduce((map, next) => {
|
||||
map[next.season] = map[next.season] + 1 || 1;
|
||||
return map;
|
||||
}, {}))
|
||||
};
|
||||
} else {
|
||||
throw new Error('No search results');
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
throw new Error(`failed cinemeta query ${imdbId} due: ${error.message}`);
|
||||
}));
|
||||
}
|
||||
|
||||
function escapeTitle(title, hyphenEscape = true) {
|
||||
@@ -39,48 +45,23 @@ function escapeTitle(title, hyphenEscape = true) {
|
||||
.trim();
|
||||
}
|
||||
|
||||
async function seriesMetadata(id) {
|
||||
const idInfo = id.split(':');
|
||||
const imdbId = idInfo[0];
|
||||
const season = parseInt(idInfo[1], 10);
|
||||
const episode = parseInt(idInfo[2], 10);
|
||||
|
||||
const metadata = await getMetadata(imdbId, 'series');
|
||||
const title = escapeTitle(metadata.title);
|
||||
|
||||
return {
|
||||
imdb: imdbId,
|
||||
title: hardcodedTitles[imdbId] || title,
|
||||
season: season,
|
||||
episode: episode,
|
||||
absoluteEpisode: hasEpisodeCount && metadata.episodeCount.slice(0, season - 1).reduce((a, b) => a + b, episode),
|
||||
genres: metadata.genres,
|
||||
isAnime: !metadata.genres.length || metadata.genres.includes('Animation')
|
||||
};
|
||||
}
|
||||
|
||||
async function movieMetadata(id) {
|
||||
const metadata = await getMetadata(id, 'movie');
|
||||
|
||||
return {
|
||||
imdb: id,
|
||||
title: escapeTitle(metadata.title),
|
||||
year: metadata.year,
|
||||
genres: metadata.genres,
|
||||
isAnime: !metadata.genres.length || metadata.genres.includes('Animation')
|
||||
};
|
||||
}
|
||||
|
||||
async function getImdbId(info) {
|
||||
return new Promise((resolve, reject) => {
|
||||
nameToImdb(info, function(err, res) {
|
||||
if (res) {
|
||||
resolve(res);
|
||||
} else {
|
||||
reject(err || new Error('failed imdbId search'));
|
||||
}
|
||||
});
|
||||
});
|
||||
const key = `${info.name}_${info.year}_${info.type}`;
|
||||
|
||||
return cacheWrapImdbId(key,
|
||||
() => new Promise((resolve, reject) => {
|
||||
nameToImdb(info, function(err, res) {
|
||||
if (res) {
|
||||
resolve(res);
|
||||
} else {
|
||||
reject(err || new Error('failed imdbId search'));
|
||||
}
|
||||
});
|
||||
}).catch(() => bing.web(`${info.name} ${info.year || ''} ${info.type} imdb`)
|
||||
.then((results) => results
|
||||
.map((result) => result.link)
|
||||
.find(result => result.includes('imdb.com/title/'))
|
||||
.match(/imdb\.com\/title\/(tt\d+)/)[1])));
|
||||
}
|
||||
|
||||
module.exports = { escapeTitle, getMetadata, movieMetadata, seriesMetadata, getImdbId };
|
||||
module.exports = { escapeTitle, getMetadata, getImdbId };
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
const { Sequelize }= require('sequelize');
|
||||
const Op = Sequelize.Op;
|
||||
|
||||
const POSTGRES_URI = process.env.POSTGRES_URI || 'postgres://torrentio:postgres@localhost:5432/torrentio';
|
||||
|
||||
@@ -13,20 +14,42 @@ const Torrent = database.define('torrent', {
|
||||
infoHash: { type: Sequelize.STRING(64), primaryKey: true },
|
||||
provider: { type: Sequelize.STRING(32), allowNull: false },
|
||||
title: { type: Sequelize.STRING(128), allowNull: false },
|
||||
size: { type: Sequelize.BIGINT },
|
||||
type: { type: Sequelize.STRING(16), allowNull: false },
|
||||
imdbId: { type: Sequelize.STRING(12) },
|
||||
kitsuId: { type: Sequelize.INTEGER },
|
||||
uploadDate: { type: Sequelize.DATE, allowNull: false },
|
||||
seeders: { type: Sequelize.SMALLINT },
|
||||
files: { type: Sequelize.JSONB }
|
||||
seeders: { type: Sequelize.SMALLINT }
|
||||
});
|
||||
|
||||
const File = database.define('file',
|
||||
{
|
||||
id: { type: Sequelize.BIGINT, autoIncrement: true, primaryKey: true },
|
||||
infoHash: { type: Sequelize.STRING(64), allowNull: false, references: { model: Torrent, key: 'infoHash' }, onDelete: 'CASCADE' },
|
||||
fileIndex: { type: Sequelize.INTEGER },
|
||||
title: { type: Sequelize.STRING(128), allowNull: false },
|
||||
size: { type: Sequelize.BIGINT },
|
||||
imdbId: { type: Sequelize.STRING(12) },
|
||||
imdbSeason: { type: Sequelize.INTEGER },
|
||||
imdbEpisode: { type: Sequelize.INTEGER },
|
||||
kitsuId: { type: Sequelize.INTEGER },
|
||||
kitsuEpisode: { type: Sequelize.INTEGER }
|
||||
},
|
||||
{
|
||||
indexes:[
|
||||
{ unique: true, fields:['infoHash'], where: { fileIndex: { [Op.eq]: null } } },
|
||||
{ unique: true, fields:['infoHash', 'fileIndex', 'imdbEpisode'] },
|
||||
{ unique: false, fields:['imdbId', 'imdbSeason', 'imdbEpisode'] },
|
||||
{ unique: false, fields:['kitsuId', 'kitsuEpisode'] }
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
const SkipTorrent = database.define('skip_torrent', {
|
||||
infoHash: {type: Sequelize.STRING(64), primaryKey: true},
|
||||
});
|
||||
|
||||
const FailedImdbTorrent = database.define('failed_imdb_torrent', {
|
||||
infoHash: {type: Sequelize.STRING(64), primaryKey: true},
|
||||
title: { type: Sequelize.STRING(128), allowNull: false }
|
||||
});
|
||||
|
||||
function connect() {
|
||||
@@ -34,7 +57,7 @@ function connect() {
|
||||
}
|
||||
|
||||
function getProvider(provider) {
|
||||
return Provider.findOrCreate({ where: { name: provider.name }, defaults: provider });
|
||||
return Provider.findOrCreate({ where: { name: { [Op.eq]: provider.name }}, defaults: provider });
|
||||
}
|
||||
|
||||
function updateProvider(provider) {
|
||||
@@ -51,10 +74,14 @@ function getTorrent(torrent) {
|
||||
})
|
||||
}
|
||||
|
||||
function updateTorrent(torrent) {
|
||||
function createTorrent(torrent) {
|
||||
return Torrent.upsert(torrent);
|
||||
}
|
||||
|
||||
function createFile(file) {
|
||||
return File.upsert(file);
|
||||
}
|
||||
|
||||
function getSkipTorrent(torrent) {
|
||||
return SkipTorrent.findByPk(torrent.infoHash)
|
||||
.then((result) =>{
|
||||
@@ -80,7 +107,7 @@ function getFailedImdbTorrent(torrent) {
|
||||
}
|
||||
|
||||
function createFailedImdbTorrent(torrent) {
|
||||
return FailedImdbTorrent.upsert({ infoHash: torrent.infoHash });
|
||||
return FailedImdbTorrent.upsert(torrent);
|
||||
}
|
||||
|
||||
module.exports = { connect, getProvider, updateProvider, getTorrent, updateTorrent, getSkipTorrent, createSkipTorrent, createFailedImdbTorrent };
|
||||
module.exports = { connect, getProvider, updateProvider, getTorrent, createTorrent, createFile, getSkipTorrent, createSkipTorrent, createFailedImdbTorrent };
|
||||
@@ -46,7 +46,7 @@ function filesFromKat(infoHash) {
|
||||
if (!infoHash) {
|
||||
return Promise.reject(new Error("no infoHash"));
|
||||
}
|
||||
const url = `http://kat.rip/torrent/${infoHash}.html`;
|
||||
const url = `https://kat.rip/torrent/${infoHash}.html`;
|
||||
return needle('get', url, { open_timeout: 2000 })
|
||||
.then((response) => {
|
||||
if (!response.body || response.statusCode !== 200) {
|
||||
@@ -61,6 +61,7 @@ function filesFromKat(infoHash) {
|
||||
$('table[id=\'ul_top\'] tr').each((index, row) => {
|
||||
files.push({
|
||||
fileIndex: index,
|
||||
name: $(row).find('td[class=\'torFileName\']').text().replace(/.*\//, ''),
|
||||
path: $(row).find('td[class=\'torFileName\']').text(),
|
||||
size: convertToBytes($(row).find('td[class=\'torFileSize\']').text())
|
||||
});
|
||||
@@ -139,24 +140,22 @@ function convertToBytes(sizeString) {
|
||||
|
||||
function dynamicTimeout(torrent) {
|
||||
if (torrent.seeders < 5) {
|
||||
return 3000;
|
||||
} else if (torrent.seeders < 10) {
|
||||
return 4000;
|
||||
} else if (torrent.seeders < 20) {
|
||||
return 5000;
|
||||
} else if (torrent.seeders < 30) {
|
||||
} else if (torrent.seeders < 10) {
|
||||
return 7000;
|
||||
} else if (torrent.seeders < 50) {
|
||||
return 9000;
|
||||
} else if (torrent.seeders < 100) {
|
||||
return 12000;
|
||||
} else {
|
||||
} else if (torrent.seeders < 20) {
|
||||
return 10000;
|
||||
} else if (torrent.seeders < 30) {
|
||||
return 15000;
|
||||
} else if (torrent.seeders < 50) {
|
||||
return 20000;
|
||||
} else {
|
||||
return 30000;
|
||||
}
|
||||
}
|
||||
|
||||
function getTrackerList() {
|
||||
return needle('get', 'https://torrents.me/tracker-list/', { open_timeout: 2000 })
|
||||
return needle('get', 'https://torrents.me/tracker-list/', { open_timeout: 2000, follow_max: 2 })
|
||||
.then((response) => {
|
||||
if (!response.body || response.statusCode !== 200) {
|
||||
throw new Error('tracker list not found')
|
||||
|
||||
82
lib/torrentFiles.js
Normal file
82
lib/torrentFiles.js
Normal file
@@ -0,0 +1,82 @@
|
||||
const { torrentFiles } = require('../lib/torrent');
|
||||
const { getMetadata } = require('../lib/metadata');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const { Type } = require('./types');
|
||||
|
||||
const MIN_SIZE = 20 * 1024 * 1024; // 20 MB
|
||||
|
||||
async function parseTorrentFiles(torrent, imdbId) {
|
||||
if (torrent.type === Type.MOVIE) {
|
||||
return [{
|
||||
infoHash: torrent.infoHash,
|
||||
title: torrent.title,
|
||||
size: torrent.size,
|
||||
imdbId: imdbId,
|
||||
}];
|
||||
}
|
||||
const parsedTorrentName = parse(torrent.title);
|
||||
if (parsedTorrentName.season && parsedTorrentName.episode) {
|
||||
return [{
|
||||
infoHash: torrent.infoHash,
|
||||
title: torrent.title,
|
||||
size: torrent.size,
|
||||
imdbId: imdbId,
|
||||
imdbSeason: parsedTorrentName.season,
|
||||
imdbEpisode: parsedTorrentName.episode
|
||||
}];
|
||||
}
|
||||
|
||||
return torrentFiles(torrent)
|
||||
.then(files => files
|
||||
.filter(file => file.size > MIN_SIZE)
|
||||
.map(file => parseFile(file, parsedTorrentName)))
|
||||
.then(files => decomposeAbsoluteEpisodes(files, torrent, imdbId))
|
||||
.then(files => files
|
||||
.filter(file => file.season && file.episodes && file.episodes.length)
|
||||
.map(file => file.episodes.map(episode => ({
|
||||
infoHash: torrent.infoHash,
|
||||
fileIndex: file.fileIndex,
|
||||
title: file.name,
|
||||
size: file.size,
|
||||
imdbId: imdbId,
|
||||
imdbSeason: file.season,
|
||||
imdbEpisode: episode})))
|
||||
.reduce((a, b) => a.concat(b), []))
|
||||
.catch(error => {
|
||||
console.log(`Failed getting files for ${torrent.title}`, error.message);
|
||||
return [];
|
||||
});
|
||||
}
|
||||
|
||||
function parseFile(file, parsedTorrentName) {
|
||||
const fileInfo = parse(file.name);
|
||||
// the episode may be in a folder containing season number
|
||||
if (!fileInfo.season && parsedTorrentName.season) {
|
||||
fileInfo.season = parsedTorrentName.season;
|
||||
} else if (!fileInfo.season && file.path.includes('/')) {
|
||||
const folders = file.path.split('/');
|
||||
const pathInfo = parse(folders[folders.length - 2]);
|
||||
fileInfo.season = pathInfo.season;
|
||||
}
|
||||
|
||||
return { ...file, ...fileInfo };
|
||||
}
|
||||
|
||||
async function decomposeAbsoluteEpisodes(files, torrent, imdbId) {
|
||||
if (files.every((file) => file.episodes.every((ep) => ep < 100))) {
|
||||
return; // nothing to decompose
|
||||
}
|
||||
|
||||
const metadata = await getMetadata(imdbId, torrent.type || Type.MOVIE);
|
||||
// decompose if season is inside path, but individual files are concatenated ex. 101 (S01E01)
|
||||
files
|
||||
.filter(file => file.season && metadata.episodeCount[file.season] < 100)
|
||||
.filter(file => file.episodes.every(ep => ep / 100 === file.season))
|
||||
.forEach(file => file.episodes = file.episodes.map(ep => ep % 100));
|
||||
// decompose if no season info is available, but individual files are concatenated ex. 101 (S01E01)
|
||||
// based on total episodes count per season
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
module.exports = { parseTorrentFiles };
|
||||
5
lib/types.js
Normal file
5
lib/types.js
Normal file
@@ -0,0 +1,5 @@
|
||||
exports.Type = {
|
||||
MOVIE: 'movie',
|
||||
SERIES: 'series',
|
||||
ANIME: 'anime'
|
||||
};
|
||||
328
package-lock.json
generated
328
package-lock.json
generated
@@ -14,6 +14,30 @@
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-11.11.0.tgz",
|
||||
"integrity": "sha512-D5Rt+HXgEywr3RQJcGlZUCTCx1qVbCZpVk3/tOOA6spLNZdGm8BU+zRgdRYDoF1pO3RuXLxADzMrF903JlQXqg=="
|
||||
},
|
||||
"CSSselect": {
|
||||
"version": "0.4.1",
|
||||
"resolved": "https://registry.npmjs.org/CSSselect/-/CSSselect-0.4.1.tgz",
|
||||
"integrity": "sha1-+Kt+H4QYzmPNput713ioXX7EkrI=",
|
||||
"requires": {
|
||||
"CSSwhat": "0.4",
|
||||
"domutils": "1.4"
|
||||
},
|
||||
"dependencies": {
|
||||
"domutils": {
|
||||
"version": "1.4.3",
|
||||
"resolved": "https://registry.npmjs.org/domutils/-/domutils-1.4.3.tgz",
|
||||
"integrity": "sha1-CGVRN5bGswYDGFDhdVFrr4C3Km8=",
|
||||
"requires": {
|
||||
"domelementtype": "1"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"CSSwhat": {
|
||||
"version": "0.4.7",
|
||||
"resolved": "https://registry.npmjs.org/CSSwhat/-/CSSwhat-0.4.7.tgz",
|
||||
"integrity": "sha1-hn2g/zn3eGEyQsRM/qg/CqTr35s="
|
||||
},
|
||||
"accepts": {
|
||||
"version": "1.3.5",
|
||||
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz",
|
||||
@@ -245,6 +269,15 @@
|
||||
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
|
||||
"integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24="
|
||||
},
|
||||
"boom": {
|
||||
"version": "0.4.2",
|
||||
"resolved": "https://registry.npmjs.org/boom/-/boom-0.4.2.tgz",
|
||||
"integrity": "sha1-emNune1O/O+xnO9JR6PGffrukRs=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"hoek": "0.9.x"
|
||||
}
|
||||
},
|
||||
"bottleneck": {
|
||||
"version": "2.17.1",
|
||||
"resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.17.1.tgz",
|
||||
@@ -308,6 +341,22 @@
|
||||
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz",
|
||||
"integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg="
|
||||
},
|
||||
"cache-manager": {
|
||||
"version": "2.10.1",
|
||||
"resolved": "https://registry.npmjs.org/cache-manager/-/cache-manager-2.10.1.tgz",
|
||||
"integrity": "sha512-bk17v9IkLqNcbCzggEh82LEJhjHp+COnL57L7a0ESbM/cOuXIIBatdVjD/ps7vOsofI48++zAC14Ye+8v50flg==",
|
||||
"requires": {
|
||||
"async": "1.5.2",
|
||||
"lru-cache": "4.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"async": {
|
||||
"version": "1.5.2",
|
||||
"resolved": "https://registry.npmjs.org/async/-/async-1.5.2.tgz",
|
||||
"integrity": "sha1-7GphrlZIDAw8skHJVhjiCJL5Zyo="
|
||||
}
|
||||
}
|
||||
},
|
||||
"caseless": {
|
||||
"version": "0.12.0",
|
||||
"resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
|
||||
@@ -400,6 +449,15 @@
|
||||
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
|
||||
"integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac="
|
||||
},
|
||||
"cryptiles": {
|
||||
"version": "0.2.2",
|
||||
"resolved": "https://registry.npmjs.org/cryptiles/-/cryptiles-0.2.2.tgz",
|
||||
"integrity": "sha1-7ZH/HxetE9N0gohZT4pIoNJvMlw=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"boom": "0.4.x"
|
||||
}
|
||||
},
|
||||
"css-select": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz",
|
||||
@@ -416,6 +474,12 @@
|
||||
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz",
|
||||
"integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg=="
|
||||
},
|
||||
"ctype": {
|
||||
"version": "0.5.3",
|
||||
"resolved": "https://registry.npmjs.org/ctype/-/ctype-0.5.3.tgz",
|
||||
"integrity": "sha1-gsGMJGH3QRTvFsE1IkrQuRRMoS8=",
|
||||
"optional": true
|
||||
},
|
||||
"cyclist": {
|
||||
"version": "0.1.1",
|
||||
"resolved": "https://registry.npmjs.org/cyclist/-/cyclist-0.1.1.tgz",
|
||||
@@ -429,6 +493,15 @@
|
||||
"assert-plus": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"ddg-scraper": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/ddg-scraper/-/ddg-scraper-1.0.2.tgz",
|
||||
"integrity": "sha1-YJ+aj3VFvTylll6pBxIh/zn6cCA=",
|
||||
"requires": {
|
||||
"cheerio": "^0.22.0",
|
||||
"request": "^2.80.0"
|
||||
}
|
||||
},
|
||||
"debug": {
|
||||
"version": "3.2.6",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz",
|
||||
@@ -738,6 +811,187 @@
|
||||
"path-is-absolute": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"google-search-scraper": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/google-search-scraper/-/google-search-scraper-0.1.0.tgz",
|
||||
"integrity": "sha1-KZKPKJtK0goAz4DBDDVOBPv718k=",
|
||||
"requires": {
|
||||
"cheerio": "~0.13.1",
|
||||
"request": "~2.33.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"asn1": {
|
||||
"version": "0.1.11",
|
||||
"resolved": "https://registry.npmjs.org/asn1/-/asn1-0.1.11.tgz",
|
||||
"integrity": "sha1-VZvhg3bQik7E2+gId9J4GGObLfc=",
|
||||
"optional": true
|
||||
},
|
||||
"assert-plus": {
|
||||
"version": "0.1.5",
|
||||
"resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-0.1.5.tgz",
|
||||
"integrity": "sha1-7nQAlBMALYTOxyGcasgRgS5yMWA=",
|
||||
"optional": true
|
||||
},
|
||||
"aws-sign2": {
|
||||
"version": "0.5.0",
|
||||
"resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.5.0.tgz",
|
||||
"integrity": "sha1-xXED96F/wDfwLXwuZLYC6iI/fWM=",
|
||||
"optional": true
|
||||
},
|
||||
"cheerio": {
|
||||
"version": "0.13.1",
|
||||
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-0.13.1.tgz",
|
||||
"integrity": "sha1-SK8RNFYbNSf4PZFWxPmo69grBuw=",
|
||||
"requires": {
|
||||
"CSSselect": "~0.4.0",
|
||||
"entities": "0.x",
|
||||
"htmlparser2": "~3.4.0",
|
||||
"underscore": "~1.5"
|
||||
}
|
||||
},
|
||||
"combined-stream": {
|
||||
"version": "0.0.7",
|
||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-0.0.7.tgz",
|
||||
"integrity": "sha1-ATfmV7qlp1QcV6w3rF/AfXO03B8=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"delayed-stream": "0.0.5"
|
||||
}
|
||||
},
|
||||
"delayed-stream": {
|
||||
"version": "0.0.5",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-0.0.5.tgz",
|
||||
"integrity": "sha1-1LH0OpPoKW3+AmlPRoC8N6MTxz8=",
|
||||
"optional": true
|
||||
},
|
||||
"domhandler": {
|
||||
"version": "2.2.1",
|
||||
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.2.1.tgz",
|
||||
"integrity": "sha1-Wd+dzSJ+gIs2Wuc+H2aErD2Ub8I=",
|
||||
"requires": {
|
||||
"domelementtype": "1"
|
||||
}
|
||||
},
|
||||
"domutils": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/domutils/-/domutils-1.3.0.tgz",
|
||||
"integrity": "sha1-mtTVm1r2ymhMYv5tdo7xcOcN8ZI=",
|
||||
"requires": {
|
||||
"domelementtype": "1"
|
||||
}
|
||||
},
|
||||
"entities": {
|
||||
"version": "0.5.0",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-0.5.0.tgz",
|
||||
"integrity": "sha1-9hHLWuIhBQ4AEsZpeVA/164ZzEk="
|
||||
},
|
||||
"forever-agent": {
|
||||
"version": "0.5.2",
|
||||
"resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.5.2.tgz",
|
||||
"integrity": "sha1-bQ4JxJIflKJ/Y9O0nF/v8epMUTA="
|
||||
},
|
||||
"form-data": {
|
||||
"version": "0.1.4",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-0.1.4.tgz",
|
||||
"integrity": "sha1-kavXiKupcCsaq/qLwBAxoqyeOxI=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"async": "~0.9.0",
|
||||
"combined-stream": "~0.0.4",
|
||||
"mime": "~1.2.11"
|
||||
}
|
||||
},
|
||||
"htmlparser2": {
|
||||
"version": "3.4.0",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.4.0.tgz",
|
||||
"integrity": "sha1-oc1l9YI60oXhnWOwha1yLQpR6uc=",
|
||||
"requires": {
|
||||
"domelementtype": "1",
|
||||
"domhandler": "2.2",
|
||||
"domutils": "1.3",
|
||||
"readable-stream": "1.1"
|
||||
}
|
||||
},
|
||||
"http-signature": {
|
||||
"version": "0.10.1",
|
||||
"resolved": "https://registry.npmjs.org/http-signature/-/http-signature-0.10.1.tgz",
|
||||
"integrity": "sha1-T72sEyVZqoMjEh5UB3nAoBKyfmY=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"asn1": "0.1.11",
|
||||
"assert-plus": "^0.1.5",
|
||||
"ctype": "0.5.3"
|
||||
}
|
||||
},
|
||||
"mime": {
|
||||
"version": "1.2.11",
|
||||
"resolved": "https://registry.npmjs.org/mime/-/mime-1.2.11.tgz",
|
||||
"integrity": "sha1-WCA+7Ybjpe8XrtK32evUfwpg3RA="
|
||||
},
|
||||
"node-uuid": {
|
||||
"version": "1.4.8",
|
||||
"resolved": "https://registry.npmjs.org/node-uuid/-/node-uuid-1.4.8.tgz",
|
||||
"integrity": "sha1-sEDrCSOWivq/jTL7HxfxFn/auQc="
|
||||
},
|
||||
"oauth-sign": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.3.0.tgz",
|
||||
"integrity": "sha1-y1QPk7srIqfVlBaRoojWDo6pOG4=",
|
||||
"optional": true
|
||||
},
|
||||
"qs": {
|
||||
"version": "0.6.6",
|
||||
"resolved": "https://registry.npmjs.org/qs/-/qs-0.6.6.tgz",
|
||||
"integrity": "sha1-bgFQmP9RlouKPIGQAdXyyJvEsQc="
|
||||
},
|
||||
"readable-stream": {
|
||||
"version": "1.1.14",
|
||||
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz",
|
||||
"integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=",
|
||||
"requires": {
|
||||
"core-util-is": "~1.0.0",
|
||||
"inherits": "~2.0.1",
|
||||
"isarray": "0.0.1",
|
||||
"string_decoder": "~0.10.x"
|
||||
}
|
||||
},
|
||||
"request": {
|
||||
"version": "2.33.0",
|
||||
"resolved": "https://registry.npmjs.org/request/-/request-2.33.0.tgz",
|
||||
"integrity": "sha1-UWeHgTFyYHDsYzdS6iMKI3ncZf8=",
|
||||
"requires": {
|
||||
"aws-sign2": "~0.5.0",
|
||||
"forever-agent": "~0.5.0",
|
||||
"form-data": "~0.1.0",
|
||||
"hawk": "~1.0.0",
|
||||
"http-signature": "~0.10.0",
|
||||
"json-stringify-safe": "~5.0.0",
|
||||
"mime": "~1.2.9",
|
||||
"node-uuid": "~1.4.0",
|
||||
"oauth-sign": "~0.3.0",
|
||||
"qs": "~0.6.0",
|
||||
"tough-cookie": ">=0.12.0",
|
||||
"tunnel-agent": "~0.3.0"
|
||||
}
|
||||
},
|
||||
"string_decoder": {
|
||||
"version": "0.10.31",
|
||||
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
|
||||
"integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ="
|
||||
},
|
||||
"tunnel-agent": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.3.0.tgz",
|
||||
"integrity": "sha1-rWgbaPUyGtKCfEz7G31d8s/pQu4=",
|
||||
"optional": true
|
||||
},
|
||||
"underscore": {
|
||||
"version": "1.5.2",
|
||||
"resolved": "https://registry.npmjs.org/underscore/-/underscore-1.5.2.tgz",
|
||||
"integrity": "sha1-EzXF5PXm0zu7SwBrqMhqAPVW3gg="
|
||||
}
|
||||
}
|
||||
},
|
||||
"har-schema": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz",
|
||||
@@ -757,6 +1011,24 @@
|
||||
"resolved": "https://registry.npmjs.org/hat/-/hat-0.0.3.tgz",
|
||||
"integrity": "sha1-uwFKnmSzeIrtgAWRdBPU/z1QLYo="
|
||||
},
|
||||
"hawk": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/hawk/-/hawk-1.0.0.tgz",
|
||||
"integrity": "sha1-uQuxaYByhUEdp//LjdJZhQLTtS0=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"boom": "0.4.x",
|
||||
"cryptiles": "0.2.x",
|
||||
"hoek": "0.9.x",
|
||||
"sntp": "0.2.x"
|
||||
}
|
||||
},
|
||||
"hoek": {
|
||||
"version": "0.9.1",
|
||||
"resolved": "https://registry.npmjs.org/hoek/-/hoek-0.9.1.tgz",
|
||||
"integrity": "sha1-PTIkYrrfB3Fup+uFuviAec3c5QU=",
|
||||
"optional": true
|
||||
},
|
||||
"htmlparser2": {
|
||||
"version": "3.10.1",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.1.tgz",
|
||||
@@ -1132,6 +1404,15 @@
|
||||
"inherits": "^2.0.1"
|
||||
}
|
||||
},
|
||||
"lru-cache": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.0.0.tgz",
|
||||
"integrity": "sha1-tcvwFVbBaWb+vlTO7A+03JDfbCg=",
|
||||
"requires": {
|
||||
"pseudomap": "^1.0.1",
|
||||
"yallist": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"magnet-uri": {
|
||||
"version": "5.2.4",
|
||||
"resolved": "https://registry.npmjs.org/magnet-uri/-/magnet-uri-5.2.4.tgz",
|
||||
@@ -1294,6 +1575,17 @@
|
||||
"resolved": "https://registry.npmjs.org/node-gzip/-/node-gzip-1.1.2.tgz",
|
||||
"integrity": "sha512-ZB6zWpfZHGtxZnPMrJSKHVPrRjURoUzaDbLFj3VO70mpLTW5np96vXyHwft4Id0o+PYIzgDkBUjIzaNHhQ8srw=="
|
||||
},
|
||||
"nodejs-bing": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/nodejs-bing/-/nodejs-bing-0.1.0.tgz",
|
||||
"integrity": "sha1-3i/99AtBVJKFxbwMUX3VTw5fTYo=",
|
||||
"requires": {
|
||||
"cheerio": "^0.22.0",
|
||||
"request": "^2.79.0",
|
||||
"request-promise": "^4.1.1",
|
||||
"urlencode": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"nth-check": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz",
|
||||
@@ -1377,7 +1669,7 @@
|
||||
}
|
||||
},
|
||||
"parse-torrent-title": {
|
||||
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#8cc4327ee24692a0b598842d5620b2af9c13289b",
|
||||
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#986c51f919c09e8f3a7937d22c8aea1fb9759090",
|
||||
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#master"
|
||||
},
|
||||
"parseurl": {
|
||||
@@ -1404,7 +1696,8 @@
|
||||
"bncode": "~0.2.3",
|
||||
"byline": "3.1.2",
|
||||
"compact2string": "~1.0.0",
|
||||
"hat": "0.0.3"
|
||||
"hat": "0.0.3",
|
||||
"needle": "github:Ivshti/needle"
|
||||
},
|
||||
"dependencies": {
|
||||
"bncode": {
|
||||
@@ -1431,8 +1724,8 @@
|
||||
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
|
||||
},
|
||||
"needle": {
|
||||
"version": "git+https://github.com/Ivshti/needle.git#16cceb74e24babd9c474071be1a15a09beac300d",
|
||||
"from": "git+https://github.com/Ivshti/needle.git",
|
||||
"version": "github:Ivshti/needle#16cceb74e24babd9c474071be1a15a09beac300d",
|
||||
"from": "github:Ivshti/needle",
|
||||
"requires": {
|
||||
"debug": "^2.1.2",
|
||||
"iconv-lite": "^0.4.4"
|
||||
@@ -1602,6 +1895,11 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"pseudomap": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz",
|
||||
"integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM="
|
||||
},
|
||||
"psl": {
|
||||
"version": "1.1.31",
|
||||
"resolved": "https://registry.npmjs.org/psl/-/psl-1.1.31.tgz",
|
||||
@@ -2046,6 +2344,15 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"sntp": {
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/sntp/-/sntp-0.2.4.tgz",
|
||||
"integrity": "sha1-+4hfGLDzqtGJ+CSGJTa87ux1CQA=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"hoek": "0.9.x"
|
||||
}
|
||||
},
|
||||
"speedometer": {
|
||||
"version": "0.1.4",
|
||||
"resolved": "https://registry.npmjs.org/speedometer/-/speedometer-0.1.4.tgz",
|
||||
@@ -2286,6 +2593,14 @@
|
||||
"punycode": "^2.1.0"
|
||||
}
|
||||
},
|
||||
"urlencode": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/urlencode/-/urlencode-1.1.0.tgz",
|
||||
"integrity": "sha1-HyuibwE8hfATP3o61v8nMK33y7c=",
|
||||
"requires": {
|
||||
"iconv-lite": "~0.4.11"
|
||||
}
|
||||
},
|
||||
"util-deprecate": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
||||
@@ -2367,6 +2682,11 @@
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz",
|
||||
"integrity": "sha1-pcbVMr5lbiPbgg77lDofBJmNY68="
|
||||
},
|
||||
"yallist": {
|
||||
"version": "2.1.2",
|
||||
"resolved": "https://registry.npmjs.org/yallist/-/yallist-2.1.2.tgz",
|
||||
"integrity": "sha1-HBH5IY8HYImkfdUS+TxmmaaoHVI="
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,19 +12,22 @@
|
||||
"author": "TheBeastLT <pauliox@beyond.lt>",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cache-manager": "^2.9.0",
|
||||
"bottleneck": "^2.16.2",
|
||||
"cheerio": "^0.22.0",
|
||||
"cloudscraper": "^3.0.0",
|
||||
"ddg-scraper": "^1.0.2",
|
||||
"express": "^4.16.4",
|
||||
"google-search-scraper": "^0.1.0",
|
||||
"imdb": "^1.1.0",
|
||||
"is-video": "^1.0.1",
|
||||
"line-by-line": "^0.1.6",
|
||||
"lodash": "^4.17.11",
|
||||
"magnet-uri": "^5.1.7",
|
||||
"moment": "^2.24.0",
|
||||
"name-to-imdb": "^2.3.0",
|
||||
"needle": "^2.2.4",
|
||||
"node-gzip": "^1.1.2",
|
||||
"nodejs-bing": "^0.1.0",
|
||||
"parse-torrent": "^6.1.2",
|
||||
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#master",
|
||||
"peer-search": "^0.6.x",
|
||||
|
||||
@@ -2,9 +2,14 @@ const cheerio = require('cheerio');
|
||||
const needle = require('needle');
|
||||
const moment = require('moment');
|
||||
|
||||
const defaultProxies = ['https://pirateproxy.sh', 'https://thepiratebay.org'];
|
||||
const defaultProxies = [
|
||||
'https://thepiratebay.org',
|
||||
'https://thepiratebay.vip',
|
||||
'https://proxybay.pro',
|
||||
'https://ukpiratebayproxy.com',
|
||||
'https://thepiratebayproxy.info'];
|
||||
const dumpUrl = '/static/dump/csv/';
|
||||
const defaultTimeout = 5000;
|
||||
const defaultTimeout = 30000;
|
||||
|
||||
const errors = {
|
||||
REQUEST_ERROR: { code: 'REQUEST_ERROR' },
|
||||
@@ -76,6 +81,18 @@ Categories = {
|
||||
}
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
|
||||
return raceFirstSuccessful(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config)))
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.catch((err) => torrent(torrentId, config, retries - 1));
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
@@ -87,7 +104,7 @@ function search(keyword, config = {}, retries = 2) {
|
||||
return raceFirstSuccessful(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config)))
|
||||
.then((body) => parseBody(body))
|
||||
.catch(() => search(keyword, config, retries - 1));
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function dumps(config = {}, retries = 2) {
|
||||
@@ -109,26 +126,23 @@ function dumps(config = {}, retries = 2) {
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
|
||||
return new Promise(((resolve, reject) => {
|
||||
needle.get(requestUrl,
|
||||
{ open_timeout: timeout, follow: 2 },
|
||||
(err, res, body) => {
|
||||
if (err || !body) {
|
||||
reject(err || errors.REQUEST_ERROR);
|
||||
} else if (body.includes('Access Denied') && !body.includes('<title>The Pirate Bay')) {
|
||||
console.log(`Access Denied: ${url}`);
|
||||
reject(new Error(`Access Denied: ${url}`));
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
return needle('get', requestUrl, { open_timeout: timeout, follow: 2 })
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('Access Denied') && !body.includes('<title>The Pirate Bay')) {
|
||||
console.log(`Access Denied: ${requestUrl}`);
|
||||
throw new Error(`Access Denied: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden') ||
|
||||
body.includes('Database maintenance') ||
|
||||
body.includes('Origin DNS error') ||
|
||||
!body.includes('<title>The Pirate Bay')) {
|
||||
reject(errors.REQUEST_ERROR);
|
||||
}
|
||||
|
||||
resolve(body);
|
||||
});
|
||||
}));
|
||||
!(body.includes('<title>The Pirate Bay') || body.includes('TPB</title>') || body.includes(dumpUrl))) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseBody(body) {
|
||||
@@ -143,7 +157,7 @@ function parseBody(body) {
|
||||
|
||||
$('table[id=\'searchResult\'] tr').each(function() {
|
||||
const name = $(this).find('.detLink').text();
|
||||
if (!name) {
|
||||
if (!name || name === 'Do NOT download any torrent before hiding your IP with a VPN.') {
|
||||
return;
|
||||
}
|
||||
torrents.push({
|
||||
@@ -152,13 +166,51 @@ function parseBody(body) {
|
||||
leechers: parseInt($(this).find('td[align=\'right\']').eq(1).text(), 10),
|
||||
magnetLink: $(this).find('a[title=\'Download this torrent using magnet\']').attr('href'),
|
||||
category: parseInt($(this).find('a[title=\'More from this category\']').eq(0).attr('href').match(/\d+$/)[0], 10),
|
||||
subcategory: parseInt($(this).find('a[title=\'More from this category\']').eq(1).attr('href').match(/\d+$/)[0], 10)
|
||||
subcategory: parseInt($(this).find('a[title=\'More from this category\']').eq(1).attr('href').match(/\d+$/)[0], 10),
|
||||
size: parseSize($(this).find('.detDesc').text().match(/(?:,\s?Size\s)(.+),/)[1])
|
||||
});
|
||||
});
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error(errors.PARSER_ERROR));
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
name: $('div[id=\'title\']').text().trim(),
|
||||
seeders: parseInt($('dl[class=\'col2\']').find('dd').eq(2).text(), 10),
|
||||
leechers: parseInt($('dl[class=\'col2\']').find('dd').eq(3).text(), 10),
|
||||
magnetLink: $('div[id=\'details\']').find('a[title=\'Get this torrent\']').attr('href'),
|
||||
category: Categories.VIDEO.ALL,
|
||||
subcategory: parseInt($('dl[class=\'col1\']').find('a[title=\'More from this category\']').eq(0).attr('href').match(/\d+$/)[0], 10),
|
||||
size: parseSize($('dl[class=\'col1\']').find('dd').eq(2).text().match(/(\d+)(?:.?Bytes)/)[1])
|
||||
};
|
||||
resolve(torrent);
|
||||
});
|
||||
}
|
||||
|
||||
function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return undefined;
|
||||
}
|
||||
if (sizeText.includes('GiB')) {
|
||||
return Math.floor(parseFloat(sizeText.trim()) * 1024 * 1024 * 1024);
|
||||
}
|
||||
if (sizeText.includes('MiB')) {
|
||||
return Math.floor(parseFloat(sizeText.trim()) * 1024 * 1024);
|
||||
}
|
||||
if (sizeText.includes('KiB')) {
|
||||
return Math.floor(parseFloat(sizeText.trim()) * 1024);
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText));
|
||||
}
|
||||
|
||||
function raceFirstSuccessful(promises) {
|
||||
return Promise.all(promises.map((p) => {
|
||||
// If a request fails, count that as a resolution so it will keep
|
||||
@@ -176,4 +228,4 @@ function raceFirstSuccessful(promises) {
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = { search, dumps, Categories };
|
||||
module.exports = { torrent, search, dumps, Categories };
|
||||
|
||||
@@ -4,6 +4,7 @@ const Bottleneck = require('bottleneck');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const decode = require('magnet-uri');
|
||||
const horriblesubs = require('./api/horriblesubs');
|
||||
const { Type } = require('../lib/types');
|
||||
const { torrentFiles, currentSeeders } = require('../lib/torrent');
|
||||
const repository = require('../lib/repository');
|
||||
const { getImdbId, getMetadata } = require('../lib/metadata');
|
||||
@@ -72,7 +73,7 @@ async function _constructSingleEntry(metadata, single, mirror) {
|
||||
infoHash: mirror.infoHash,
|
||||
provider: NAME,
|
||||
title: title,
|
||||
type: 'anime',
|
||||
type: Type.ANIME,
|
||||
imdbId: metadata.imdbId,
|
||||
uploadDate: single.uploadDate,
|
||||
seeders: seeders,
|
||||
|
||||
@@ -6,9 +6,11 @@ const LineByLineReader = require('line-by-line');
|
||||
const fs = require('fs');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const pirata = require('./api/thepiratebay');
|
||||
const { torrentFiles } = require('../lib/torrent');
|
||||
const bing = require('nodejs-bing');
|
||||
const { Type } = require('../lib/types');
|
||||
const repository = require('../lib/repository');
|
||||
const { getImdbId, escapeTitle } = require('../lib/metadata');
|
||||
const { parseTorrentFiles } = require('../lib/torrentFiles');
|
||||
|
||||
const NAME = 'ThePirateBay';
|
||||
const CSV_FILE_PATH = '/tmp/tpb_dump.csv';
|
||||
@@ -17,18 +19,27 @@ const limiter = new Bottleneck({maxConcurrent: 40});
|
||||
|
||||
async function scrape() {
|
||||
const lastScraped = await repository.getProvider({ name: NAME });
|
||||
const lastDump = await pirata.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
|
||||
const lastDump = { updatedAt: 2147000000 };
|
||||
//const lastDump = await pirata.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
|
||||
|
||||
if (!lastScraped.lastScraped || lastScraped.lastScraped < lastDump.updatedAt) {
|
||||
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
|
||||
//await downloadDump(lastDump);
|
||||
|
||||
let entriesProcessed = 0;
|
||||
const lr = new LineByLineReader(CSV_FILE_PATH);
|
||||
lr.on('line', (line) => {
|
||||
if (line.includes("#ADDED")) {
|
||||
return;
|
||||
}
|
||||
if (entriesProcessed % 1000 === 0) {
|
||||
console.log(`Processed ${entriesProcessed} entries`);
|
||||
}
|
||||
const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g);
|
||||
if (row.length !== 4) {
|
||||
console.log(`Invalid row: ${line}`);
|
||||
return;
|
||||
}
|
||||
const torrent = {
|
||||
uploadDate: moment(row[0], 'YYYY-MMM-DD HH:mm:ss').toDate(),
|
||||
infoHash: Buffer.from(row[1], 'base64').toString('hex'),
|
||||
@@ -50,9 +61,10 @@ async function scrape() {
|
||||
}
|
||||
|
||||
limiter.schedule(() => processTorrentRecord(torrent)
|
||||
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
|
||||
.then(() => limiter.empty())
|
||||
.then((empty) => empty && lr.resume());
|
||||
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
|
||||
.then(() => limiter.empty())
|
||||
.then((empty) => empty && lr.resume())
|
||||
.then(() => entriesProcessed++);
|
||||
});
|
||||
lr.on('error', (err) => {
|
||||
console.log(err);
|
||||
@@ -77,80 +89,90 @@ const seriesCategories = [
|
||||
pirata.Categories.VIDEO.TV_SHOWS_HD
|
||||
];
|
||||
async function processTorrentRecord(record) {
|
||||
const persisted = await repository.getSkipTorrent(record)
|
||||
.catch(() => repository.getTorrent(record)).catch(() => undefined);
|
||||
if (persisted) {
|
||||
const alreadyExists = await repository.getSkipTorrent(record)
|
||||
.catch(() => repository.getTorrent(record))
|
||||
.catch(() => undefined);
|
||||
if (alreadyExists) {
|
||||
return;
|
||||
}
|
||||
|
||||
let page = 0;
|
||||
let torrentFound;
|
||||
while (!torrentFound && page < 5) {
|
||||
const torrents = await pirata.search(record.title.replace(/[\W\s]+/, ' '), { page: page });
|
||||
torrentFound = torrents.
|
||||
filter(torrent => torrent.magnetLink.toLowerCase().includes(record.infoHash))[0];
|
||||
page = torrents.length === 0 ? 1000 : page + 1;
|
||||
}
|
||||
const torrentFound = await findTorrent(record);
|
||||
|
||||
if (!torrentFound) {
|
||||
console.log(`not found: ${JSON.stringify(record)}`);
|
||||
//console.log(`not found: ${JSON.stringify(record)}`);
|
||||
repository.createSkipTorrent(record);
|
||||
return;
|
||||
}
|
||||
if (!allowedCategories.includes(torrentFound.subcategory)) {
|
||||
console.log(`wrong category: ${torrentFound.name}`);
|
||||
//console.log(`wrong category: ${torrentFound.name}`);
|
||||
repository.createSkipTorrent(record);
|
||||
return;
|
||||
}
|
||||
|
||||
const type = seriesCategories.includes(torrentFound.subcategory) ? 'series' : 'movie';
|
||||
console.log(`imdbId search: ${torrentFound.name}`);
|
||||
const type = seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE;
|
||||
const titleInfo = parse(torrentFound.name);
|
||||
const imdbId = await getImdbId({
|
||||
name: escapeTitle(titleInfo.title).toLowerCase(),
|
||||
year: titleInfo.year,
|
||||
type: type
|
||||
}).catch(() => undefined);
|
||||
}).catch((error) => undefined);
|
||||
const torrent = {
|
||||
infoHash: record.infoHash,
|
||||
provider: NAME,
|
||||
title: torrentFound.name,
|
||||
size: record.size,
|
||||
type: type,
|
||||
uploadDate: record.uploadDate,
|
||||
seeders: torrentFound.seeders,
|
||||
};
|
||||
|
||||
if (!imdbId) {
|
||||
console.log(`imdbId not found: ${torrentFound.name}`);
|
||||
repository.updateTorrent({
|
||||
infoHash: record.infoHash,
|
||||
provider: NAME,
|
||||
title: torrentFound.name,
|
||||
uploadDate: record.uploadDate,
|
||||
seeders: torrentFound.seeders,
|
||||
});
|
||||
repository.createFailedImdbTorrent(torrent);
|
||||
return;
|
||||
}
|
||||
|
||||
if (type === 'movie' || titleInfo.episode) {
|
||||
repository.updateTorrent({
|
||||
infoHash: record.infoHash,
|
||||
provider: NAME,
|
||||
title: torrentFound.name,
|
||||
imdbId: imdbId,
|
||||
uploadDate: record.uploadDate,
|
||||
seeders: torrentFound.seeders,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const files = await torrentFiles(record).catch(() => []);
|
||||
const files = await parseTorrentFiles(torrent, imdbId);
|
||||
if (!files || !files.length) {
|
||||
console.log(`no video files found: ${torrentFound.name}`);
|
||||
return;
|
||||
}
|
||||
|
||||
repository.updateTorrent({
|
||||
infoHash: record.infoHash,
|
||||
provider: NAME,
|
||||
title: torrentFound.name,
|
||||
imdbId: imdbId,
|
||||
uploadDate: record.uploadDate,
|
||||
seeders: torrentFound.seeders,
|
||||
files: files
|
||||
})
|
||||
repository.createTorrent(torrent)
|
||||
.then(() => files.forEach(file => repository.createFile(file)));
|
||||
console.log(`Created entry for ${torrentFound.name}`);
|
||||
}
|
||||
|
||||
async function findTorrent(record) {
|
||||
return findTorrentInSource(record)
|
||||
.catch((error) => findTorrentViaBing(record));
|
||||
}
|
||||
|
||||
async function findTorrentInSource(record) {
|
||||
let page = 0;
|
||||
let torrentFound;
|
||||
while (!torrentFound && page < 5) {
|
||||
const torrents = await pirata.search(record.title.replace(/[\W\s]+/, ' '), { page: page });
|
||||
torrentFound = torrents.filter(torrent => torrent.magnetLink.toLowerCase().includes(record.infoHash))[0];
|
||||
page = torrents.length === 0 ? 1000 : page + 1;
|
||||
}
|
||||
if (!torrentFound) {
|
||||
return Promise.reject(new Error(`Failed to find torrent ${record.title}`));
|
||||
}
|
||||
return Promise.resolve(torrentFound);
|
||||
}
|
||||
|
||||
async function findTorrentViaBing(record) {
|
||||
return bing.web(`${record.infoHash}`)
|
||||
.then((results) => results
|
||||
.find(result => result.description.includes('Direct download via magnet link') || result.description.includes('Get this torrent')))
|
||||
.then((result) => {
|
||||
if (!result) {
|
||||
throw new Error(`Failed to find torrent ${record.title}`);
|
||||
}
|
||||
return result.link.match(/torrent\/(\w+)\//)[1];
|
||||
})
|
||||
.then((torrentId) => pirata.torrent(torrentId))
|
||||
}
|
||||
|
||||
function downloadDump(dump) {
|
||||
|
||||
Reference in New Issue
Block a user