remove scraper module

This commit is contained in:
TheBeastLT
2022-06-16 17:44:38 +03:00
parent a253e8e0b0
commit 554c07d636
69 changed files with 0 additions and 61771 deletions

View File

@@ -1,37 +0,0 @@
# Torrentio Scraper
## Initial dumps
### The Pirate Bay
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
https://thepiratebay.org/static/dump/csv/
### Kickass
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
https://web.archive.org/web/20150416071329/http://kickass.to/api
### RARBG
Scrape movie and tv catalog using [www.webscraper.io](https://www.webscraper.io/) for available `imdbIds` and use those via the api to search for torrents.
Movies sitemap
```json
{"_id":"rarbg-movies","startUrl":["https://rarbgmirror.org/catalog/movies/[1-4235]"],"selectors":[{"id":"rarbg-movie-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]}
```
TV sitemap
```json
{"_id":"rarbg-tv","startUrl":["https://rarbgmirror.org/catalog/tv/[1-609]"],"selectors":[{"id":"rarbg-tv-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]}
```
### Migrating Database
When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1.
```sql
ALTER SEQUENCE files_id_seq RESTART WITH <last_file_id + 1>;
```

View File

@@ -1,14 +0,0 @@
const express = require("express");
const server = express();
const { connect } = require('./lib/repository');
const { startScraper } = require('./scheduler/scheduler')
server.get('/', function (req, res) {
res.sendStatus(200);
});
server.listen(process.env.PORT || 7000, async () => {
await connect();
console.log('Scraper started');
startScraper();
});

View File

@@ -1,72 +0,0 @@
const cacheManager = require('cache-manager');
const mangodbStore = require('cache-manager-mongodb');
const GLOBAL_KEY_PREFIX = 'stremio-torrentio';
const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
const KITSU_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|kitsu_id`;
const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
const TRACKERS_KEY_PREFIX = `${GLOBAL_KEY_PREFIX}|trackers`;
const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
const MEMORY_TTL = process.env.METADATA_TTL || 2 * 60 * 60; // 2 hours
const TRACKERS_TTL = 2 * 24 * 60 * 60; // 2 days
const MONGO_URI = process.env.MONGODB_URI;
const memoryCache = initiateMemoryCache();
const remoteCache = initiateRemoteCache();
function initiateRemoteCache() {
if (MONGO_URI) {
return cacheManager.caching({
store: mangodbStore,
uri: MONGO_URI,
options: {
collection: 'torrentio_scraper_collection',
useUnifiedTopology: true
},
ttl: GLOBAL_TTL,
ignoreCacheErrors: true
});
} else {
return cacheManager.caching({
store: 'memory',
ttl: GLOBAL_TTL
});
}
}
function initiateMemoryCache() {
return cacheManager.caching({
store: 'memory',
ttl: MEMORY_TTL
});
}
function cacheWrap(cache, key, method, options) {
return cache.wrap(key, method, options);
}
function cacheWrapImdbId(key, method) {
return cacheWrap(remoteCache, `${IMDB_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
}
function cacheWrapKitsuId(key, method) {
return cacheWrap(remoteCache, `${KITSU_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
}
function cacheWrapMetadata(id, method) {
return cacheWrap(memoryCache, `${METADATA_PREFIX}:${id}`, method, { ttl: MEMORY_TTL });
}
function cacheTrackers(method) {
return cacheWrap(memoryCache, `${TRACKERS_KEY_PREFIX}`, method, { ttl: TRACKERS_TTL });
}
module.exports = {
cacheWrapImdbId,
cacheWrapKitsuId,
cacheWrapMetadata,
cacheTrackers
};

View File

@@ -1,64 +0,0 @@
const VIDEO_EXTENSIONS = [
"3g2",
"3gp",
"avi",
"flv",
"mkv",
"mk3d",
"mov",
"mp2",
"mp4",
"m4v",
"mpe",
"mpeg",
"mpg",
"mpv",
"webm",
"wmv",
"ogm",
"divx"
];
const SUBTITLE_EXTENSIONS = [
"aqt",
"gsub",
"jss",
"sub",
"ttxt",
"pjs",
"psb",
"rt",
"smi",
"slt",
"ssf",
"srt",
"ssa",
"ass",
"usf",
"idx",
"vtt"
];
const DISK_EXTENSIONS = [
"iso",
"m2ts",
"ts",
"vob"
]
function isVideo(filename) {
return isExtension(filename, VIDEO_EXTENSIONS);
}
function isSubtitle(filename) {
return isExtension(filename, SUBTITLE_EXTENSIONS);
}
function isDisk(filename) {
return isExtension(filename, DISK_EXTENSIONS);
}
function isExtension(filename, extensions) {
const extensionMatch = filename.match(/\.(\w{2,4})$/);
return extensionMatch && extensions.includes(extensionMatch[1].toLowerCase());
}
module.exports = { isVideo, isSubtitle, isDisk }

View File

@@ -1,153 +0,0 @@
const axios = require('axios');
const nameToImdb = require('name-to-imdb');
const googleSr = require('google-sr');
const he = require('he');
const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache');
const { Type } = require('./types');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.strem.fun';
const TIMEOUT = 20000;
const selectors = {
...googleSr.defaultSelectors,
LinkSelector: 'a:has(h3)'
}
function getMetadata(id, type = Type.SERIES) {
if (!id) {
return Promise.reject("no valid id provided");
}
const key = Number.isInteger(id) || id.match(/^\d+$/) ? `kitsu:${id}` : id;
const metaType = type === Type.MOVIE ? Type.MOVIE : Type.SERIES;
return cacheWrapMetadata(key, () => _requestMetadata(`${KITSU_URL}/meta/${metaType}/${key}.json`)
.catch(() => _requestMetadata(`${CINEMETA_URL}/meta/${metaType}/${key}.json`))
.catch(() => {
// try different type in case there was a mismatch
const otherType = metaType === Type.MOVIE ? Type.SERIES : Type.MOVIE;
return _requestMetadata(`${CINEMETA_URL}/meta/${otherType}/${key}.json`)
})
.catch((error) => {
throw new Error(`failed metadata query ${key} due: ${error.message}`);
}));
}
function _requestMetadata(url) {
return axios.get(url, { timeout: TIMEOUT })
.then((response) => {
const body = response.data;
if (body && body.meta && (body.meta.imdb_id || body.meta.kitsu_id)) {
return {
kitsuId: body.meta.kitsu_id,
imdbId: body.meta.imdb_id,
type: body.meta.type,
title: body.meta.name,
year: body.meta.year,
country: body.meta.country,
genres: body.meta.genres,
status: body.meta.status,
videos: (body.meta.videos || [])
.map((video) => Number.isInteger(video.imdbSeason)
? {
name: video.name || video.title,
season: video.season,
episode: video.episode,
imdbSeason: video.imdbSeason,
imdbEpisode: video.imdbEpisode
}
: {
name: video.name || video.title,
season: video.season,
episode: video.episode,
kitsuId: video.kitsu_id,
kitsuEpisode: video.kitsuEpisode,
released: video.released
}
),
episodeCount: Object.values((body.meta.videos || [])
.filter((entry) => entry.season !== 0 && entry.episode !== 0)
.sort((a, b) => a.season - b.season)
.reduce((map, next) => {
map[next.season] = map[next.season] + 1 || 1;
return map;
}, {})),
totalCount: body.meta.videos && body.meta.videos
.filter((entry) => entry.season !== 0 && entry.episode !== 0).length
};
} else {
throw new Error('No search results');
}
});
}
function escapeTitle(title) {
return title.toLowerCase()
.normalize('NFKD') // normalize non-ASCII characters
.replace(/[\u0300-\u036F]/g, '')
.replace(/&/g, 'and')
.replace(/[;, ~./]+/g, ' ') // replace dots, commas or underscores with spaces
.replace(/[^\w \-()×+#@!'\u0400-\u04ff]+/g, '') // remove all non-alphanumeric chars
.replace(/^\d{1,2}[.#\s]+(?=(?:\d+[.\s]*)?[\u0400-\u04ff])/i, '') // remove russian movie numbering
.replace(/\s{2,}/, ' ') // replace multiple spaces
.trim();
}
function escapeHTML(title) {
return he.decode(title)
.replace(/&amp;/g, "&");
}
async function getImdbId(info, type) {
const name = escapeTitle(info.title);
const year = info.year || info.date && info.date.slice(0, 4);
const key = `${name}_${year}_${type}`;
const query = `${name} ${year || ''} ${type} imdb`;
return cacheWrapImdbId(key,
() => new Promise((resolve, reject) => {
nameToImdb({ name: encodeURIComponent(name), year: info.year, type }, function (err, res) {
if (res) {
resolve(res);
} else {
reject(err || new Error('failed imdbId search'));
}
});
}).catch(() => googleSr.search(query, { selectors })
.then(response => response.length ? response : Promise.reject('No results'))
.then(results => results
.map(result => result.Link)
.find(result => result.includes('imdb.com/title/')))
.then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
.then(match => match && match[1])))
.then(imdbId => imdbId && 'tt' + imdbId.replace(/tt0*([1-9][0-9]*)$/, '$1').padStart(7, '0'));
}
async function getKitsuId(info) {
const title = escapeTitle(info.title.replace(/\s\|\s.*/, ''));
const year = info.year ? ` ${info.year}` : '';
const season = info.season > 1 ? ` S${info.season}` : '';
const key = `${title}${year}${season}`;
const query = encodeURIComponent(key);
return cacheWrapKitsuId(key,
() => axios.get(`${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { timeout: 60000 })
.then((response) => {
const body = response.data;
if (body && body.metas && body.metas.length) {
return body.metas[0].id.replace('kitsu:', '');
} else {
throw new Error('No search results');
}
}));
}
async function isEpisodeImdbId(imdbId) {
if (!imdbId) {
return false;
}
return axios.get(`https://www.imdb.com/title/${imdbId}/`, { timeout: 10000 })
.then(response => !!(response.data && response.data.includes('video.episode')))
.catch((err) => false);
}
module.exports = { getMetadata, getImdbId, getKitsuId, isEpisodeImdbId, escapeHTML, escapeTitle };

View File

@@ -1,100 +0,0 @@
const { parse } = require('parse-torrent-title');
const { Type } = require('./types');
const MULTIPLE_FILES_SIZE = 4 * 1024 * 1024 * 1024; // 4 GB
function parseSeriesVideos(torrent, videos) {
const parsedTorrentName = parse(torrent.title);
const hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/i);
const parsedVideos = videos.map(video => parseSeriesVideo(video, parsedTorrentName));
return parsedVideos.map(video => ({ ...video, isMovie: isMovieVideo(video, parsedVideos, torrent.type, hasMovies) }));
}
function parseSeriesVideo(video, parsedTorrentName) {
const videoInfo = parse(video.name);
// the episode may be in a folder containing season number
if (!Number.isInteger(videoInfo.season) && video.path.includes('/')) {
const folders = video.path.split('/');
const pathInfo = parse(folders[folders.length - 2]);
videoInfo.season = pathInfo.season;
}
if (!Number.isInteger(videoInfo.season) && parsedTorrentName.season) {
videoInfo.season = parsedTorrentName.season;
}
if (!Number.isInteger(videoInfo.season) && videoInfo.seasons && videoInfo.seasons.length > 1) {
// in case single file was interpreted as having multiple seasons
videoInfo.season = videoInfo.seasons[0];
}
if (!Number.isInteger(videoInfo.season) && video.path.includes('/') && parsedTorrentName.seasons
&& parsedTorrentName.seasons.length > 1) {
// russian season are usually named with 'series name-2` i.e. Улицы разбитых фонарей-6/22. Одиночный выстрел.mkv
const folderPathSeasonMatch = video.path.match(/[\u0400-\u04ff]-(\d{1,2})(?=.*\/)/);
videoInfo.season = folderPathSeasonMatch && parseInt(folderPathSeasonMatch[1], 10) || undefined;
}
// sometimes video file does not have correct date format as in torrent title
if (!videoInfo.episodes && !videoInfo.date && parsedTorrentName.date) {
videoInfo.date = parsedTorrentName.date;
}
// limit number of episodes in case of incorrect parsing
if (videoInfo.episodes && videoInfo.episodes.length > 20) {
videoInfo.episodes = [videoInfo.episodes[0]];
videoInfo.episode = videoInfo.episodes[0];
}
// force episode to any found number if it was not parsed
if (!videoInfo.episodes && !videoInfo.date) {
const epMatcher = videoInfo.title.match(
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W|_)(\d{1,4})(?:a|b|c|v\d)?(?:_|\W|$)(?!disk|movie|film)/i);
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
}
if (!videoInfo.episodes && !videoInfo.date) {
const epMatcher = video.name.match(new RegExp(`(?:\\(${videoInfo.year}\\)|part)[._ ]?(\\d{1,3})(?:\\b|_)`, "i"));
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
}
return { ...video, ...videoInfo };
}
function isMovieVideo(video, otherVideos, type, hasMovies) {
if (Number.isInteger(video.season) && Array.isArray(video.episodes)) {
// not movie if video has season
return false;
}
if (video.name.match(/\b(?:\d+[ .]movie|movie[ .]\d+)\b/i)) {
// movie if video explicitly has numbered movie keyword in the name, ie. 1 Movie or Movie 1
return true;
}
if (!hasMovies && type !== Type.ANIME) {
// not movie if torrent name does not contain movies keyword or is not a pack torrent and is not anime
return false;
}
if (!video.episodes) {
// movie if there's no episode info it could be a movie
return true;
}
// movie if contains year info and there aren't more than 3 video with same title and year
// as some series titles might contain year in it.
return !!video.year
&& otherVideos.length > 3
&& otherVideos.filter(other => other.title === video.title && other.year === video.year) < 3;
}
function isPackTorrent(torrent) {
if (torrent.pack) {
return true;
}
const parsedInfo = parse(torrent.title);
if (torrent.type === Type.MOVIE) {
return parsedInfo.complete || typeof parsedInfo.year === 'string' || /movies/i.test(torrent.title);
}
const hasMultipleEpisodes = parsedInfo.complete ||
torrent.size > MULTIPLE_FILES_SIZE ||
(parsedInfo.seasons && parsedInfo.seasons.length > 1) ||
(parsedInfo.episodes && parsedInfo.episodes.length > 1) ||
(parsedInfo.seasons && !parsedInfo.episodes);
const hasSingleEpisode = Number.isInteger(parsedInfo.episode) || (!parsedInfo.episodes && parsedInfo.date);
return hasMultipleEpisodes && !hasSingleEpisode;
}
module.exports = { parseSeriesVideos, isPackTorrent }

View File

@@ -1,57 +0,0 @@
/**
* Execute promises in sequence one after another.
*/
async function sequence(promises) {
return promises.reduce((promise, func) =>
promise.then(result => func().then(Array.prototype.concat.bind(result))), Promise.resolve([]));
}
/**
* Return first resolved promise as the result.
*/
async function first(promises) {
return Promise.all(promises.map((p) => {
// If a request fails, count that as a resolution so it will keep
// waiting for other possible successes. If a request succeeds,
// treat it as a rejection so Promise.all immediately bails out.
return p.then(
(val) => Promise.reject(val),
(err) => Promise.resolve(err)
);
})).then(
// If '.all' resolved, we've just got an array of errors.
(errors) => Promise.reject(errors),
// If '.all' rejected, we've got the result we wanted.
(val) => Promise.resolve(val)
);
}
/**
* Delay promise
*/
async function delay(duration) {
return new Promise((resolve) => setTimeout(resolve, duration));
}
/**
* Timeout promise after a set time in ms
*/
async function timeout(timeoutMs, promise, message = 'Timed out') {
return Promise.race([
promise,
new Promise(function (resolve, reject) {
setTimeout(function () {
reject(message);
}, timeoutMs);
})
]);
}
/**
* Return most common value from given array.
*/
function mostCommonValue(array) {
return array.sort((a, b) => array.filter(v => v === a).length - array.filter(v => v === b).length).pop();
}
module.exports = { sequence, first, delay, timeout, mostCommonValue };

View File

@@ -1,350 +0,0 @@
const moment = require('moment');
const Promises = require('./promises')
const { Sequelize, Op, DataTypes, fn, col, literal } = require('sequelize');
const DATABASE_URI = process.env.DATABASE_URI;
const database = new Sequelize(
DATABASE_URI,
{
logging: false
}
);
const Provider = database.define('provider', {
name: { type: DataTypes.STRING(32), primaryKey: true },
lastScraped: { type: DataTypes.DATE },
lastScrapedId: { type: DataTypes.STRING(128) }
});
const Torrent = database.define('torrent',
{
infoHash: { type: DataTypes.STRING(64), primaryKey: true },
provider: { type: DataTypes.STRING(32), allowNull: false },
torrentId: { type: DataTypes.STRING(512) },
title: { type: DataTypes.STRING(512), allowNull: false },
size: { type: DataTypes.BIGINT },
type: { type: DataTypes.STRING(16), allowNull: false },
uploadDate: { type: DataTypes.DATE, allowNull: false },
seeders: { type: DataTypes.SMALLINT },
trackers: { type: DataTypes.STRING(4096) },
languages: { type: DataTypes.STRING(4096) },
resolution: { type: DataTypes.STRING(16) },
reviewed: { type: DataTypes.BOOLEAN, allowNull: false, defaultValue: false },
opened: { type: DataTypes.BOOLEAN, allowNull: false, defaultValue: false }
}
);
const File = database.define('file',
{
id: { type: DataTypes.BIGINT, autoIncrement: true, primaryKey: true },
infoHash: {
type: DataTypes.STRING(64),
allowNull: false,
references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE'
},
fileIndex: { type: DataTypes.INTEGER },
title: { type: DataTypes.STRING(512), allowNull: false },
size: { type: DataTypes.BIGINT },
imdbId: { type: DataTypes.STRING(32) },
imdbSeason: { type: DataTypes.INTEGER },
imdbEpisode: { type: DataTypes.INTEGER },
kitsuId: { type: DataTypes.INTEGER },
kitsuEpisode: { type: DataTypes.INTEGER }
},
{
indexes: [
{
unique: true,
name: 'files_unique_file_constraint',
fields: [
col('infoHash'),
fn('COALESCE', (col('fileIndex')), -1),
fn('COALESCE', (col('imdbId')), 'null'),
fn('COALESCE', (col('imdbSeason')), -1),
fn('COALESCE', (col('imdbEpisode')), -1),
fn('COALESCE', (col('kitsuId')), -1),
fn('COALESCE', (col('kitsuEpisode')), -1)
]
},
{ unique: false, fields: ['imdbId', 'imdbSeason', 'imdbEpisode'] },
{ unique: false, fields: ['kitsuId', 'kitsuEpisode'] }
]
}
);
const Subtitle = database.define('subtitle',
{
infoHash: {
type: DataTypes.STRING(64),
allowNull: false,
references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE'
},
fileIndex: {
type: DataTypes.INTEGER,
allowNull: false
},
fileId: {
type: DataTypes.BIGINT,
allowNull: true,
references: { model: File, key: 'id' },
onDelete: 'SET NULL'
},
title: { type: DataTypes.STRING(512), allowNull: false },
},
{
timestamps: false,
indexes: [
{
unique: true,
name: 'subtitles_unique_subtitle_constraint',
fields: [
col('infoHash'),
col('fileIndex'),
fn('COALESCE', (col('fileId')), -1)
]
},
{ unique: false, fields: ['fileId'] }
]
}
);
const Content = database.define('content',
{
infoHash: {
type: DataTypes.STRING(64),
primaryKey: true,
allowNull: false,
references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE'
},
fileIndex: {
type: DataTypes.INTEGER,
primaryKey: true,
allowNull: false
},
path: { type: DataTypes.STRING(512), allowNull: false },
size: { type: DataTypes.BIGINT },
},
{
timestamps: false,
}
);
const SkipTorrent = database.define('skip_torrent', {
infoHash: { type: DataTypes.STRING(64), primaryKey: true },
});
Torrent.hasMany(File, { foreignKey: 'infoHash', constraints: false });
File.belongsTo(Torrent, { foreignKey: 'infoHash', constraints: false });
Torrent.hasMany(Content, { foreignKey: 'infoHash', constraints: false });
Content.belongsTo(Torrent, { foreignKey: 'infoHash', constraints: false });
File.hasMany(Subtitle, { foreignKey: 'fileId', constraints: false });
Subtitle.belongsTo(File, { foreignKey: 'fileId', constraints: false });
function connect() {
if (process.env.ENABLE_SYNC) {
return database.sync({ alter: true })
.catch(error => {
console.error('Failed syncing database: ', error);
throw error;
});
}
return Promise.resolve();
}
function getProvider(provider) {
return Provider.findOrCreate({ where: { name: { [Op.eq]: provider.name } }, defaults: provider })
.then((result) => result[0])
.catch(() => provider);
}
function getTorrent(torrent) {
const where = torrent.infoHash
? { infoHash: torrent.infoHash }
: { provider: torrent.provider, torrentId: torrent.torrentId }
return Torrent.findOne({ where: where });
}
function getTorrentsBasedOnTitle(titleQuery, type) {
return getTorrentsBasedOnQuery({ title: { [Op.regexp]: `${titleQuery}` }, type: type });
}
function getTorrentsBasedOnQuery(where) {
return Torrent.findAll({ where: where });
}
function getFilesBasedOnQuery(where) {
return File.findAll({ where: where });
}
function getTorrentsWithoutSize() {
return Torrent.findAll({
where: literal(
'exists (select 1 from files where files."infoHash" = torrent."infoHash" and files.size = 300000000)'),
order: [
['seeders', 'DESC']
]
});
}
function getUpdateSeedersTorrents(limit = 50) {
const until = moment().subtract(7, 'days').format('YYYY-MM-DD');
return Torrent.findAll({
where: literal(`torrent."updatedAt" < \'${until}\'`),
limit: limit,
order: [
['seeders', 'DESC'],
['updatedAt', 'ASC']
]
});
}
function getUpdateSeedersNewTorrents(limit = 50) {
const lastUpdate = moment().subtract(12, 'hours').format('YYYY-MM-DD');
const createdAfter = moment().subtract(4, 'days').format('YYYY-MM-DD');
return Torrent.findAll({
where: literal(`torrent."updatedAt" < \'${lastUpdate}\' AND torrent."createdAt" > \'${createdAfter}\'`),
limit: limit,
order: [
['seeders', 'ASC'],
['updatedAt', 'ASC']
]
});
}
function getNoContentsTorrents() {
return Torrent.findAll({
where: { opened: false, seeders: { [Op.gte]: 1 } },
limit: 500,
order: [[fn('RANDOM')]]
});
}
function createTorrent(torrent) {
return Torrent.upsert(torrent)
.then(() => createContents(torrent.infoHash, torrent.contents))
.then(() => createSubtitles(torrent.infoHash, torrent.subtitles));
}
function setTorrentSeeders(torrent, seeders) {
const where = torrent.infoHash
? { infoHash: torrent.infoHash }
: { provider: torrent.provider, torrentId: torrent.torrentId }
return Torrent.update(
{ seeders: seeders },
{ where: where }
);
}
function deleteTorrent(torrent) {
return Torrent.destroy({ where: { infoHash: torrent.infoHash } })
}
function createFile(file) {
if (file.id) {
return (file.dataValues ? file.save() : File.upsert(file))
.then(() => upsertSubtitles(file, file.subtitles));
}
if (file.subtitles && file.subtitles.length) {
file.subtitles = file.subtitles.map(subtitle => ({ infoHash: file.infoHash, title: subtitle.path, ...subtitle }));
}
return File.create(file, { include: [Subtitle], ignoreDuplicates: true });
}
function getFiles(torrent) {
return File.findAll({ where: { infoHash: torrent.infoHash } });
}
function getFilesBasedOnTitle(titleQuery) {
return File.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` } } });
}
function deleteFile(file) {
return File.destroy({ where: { id: file.id } })
}
function createSubtitles(infoHash, subtitles) {
if (subtitles && subtitles.length) {
return Subtitle.bulkCreate(subtitles.map(subtitle => ({ infoHash, title: subtitle.path, ...subtitle })));
}
return Promise.resolve();
}
function upsertSubtitles(file, subtitles) {
if (file.id && subtitles && subtitles.length) {
return Promises.sequence(subtitles
.map(subtitle => {
subtitle.fileId = file.id;
subtitle.infoHash = subtitle.infoHash || file.infoHash;
subtitle.title = subtitle.title || subtitle.path;
return subtitle;
})
.map(subtitle => () => subtitle.dataValues ? subtitle.save() : Subtitle.create(subtitle)));
}
return Promise.resolve();
}
function getSubtitles(torrent) {
return Subtitle.findAll({ where: { infoHash: torrent.infoHash } });
}
function getUnassignedSubtitles() {
return Subtitle.findAll({ where: { fileId: null } });
}
function createContents(infoHash, contents) {
if (contents && contents.length) {
return Content.bulkCreate(contents.map(content => ({ infoHash, ...content })), { ignoreDuplicates: true })
.then(() => Torrent.update({ opened: true }, { where: { infoHash: infoHash }, silent: true }));
}
return Promise.resolve();
}
function getContents(torrent) {
return Content.findAll({ where: { infoHash: torrent.infoHash } });
}
function getSkipTorrent(torrent) {
return SkipTorrent.findByPk(torrent.infoHash)
.then((result) => {
if (!result) {
throw new Error(`torrent not found: ${torrent.infoHash}`);
}
return result.dataValues;
})
}
function createSkipTorrent(torrent) {
return SkipTorrent.upsert({ infoHash: torrent.infoHash });
}
module.exports = {
connect,
getProvider,
createTorrent,
setTorrentSeeders,
getTorrent,
getTorrentsBasedOnTitle,
getTorrentsBasedOnQuery,
getFilesBasedOnQuery,
deleteTorrent,
getUpdateSeedersTorrents,
getUpdateSeedersNewTorrents,
getNoContentsTorrents,
createFile,
getFiles,
getFilesBasedOnTitle,
deleteFile,
createSubtitles,
upsertSubtitles,
getSubtitles,
getUnassignedSubtitles,
createContents,
getContents,
getSkipTorrent,
createSkipTorrent,
getTorrentsWithoutSize
};

View File

@@ -1,24 +0,0 @@
const UserAgent = require('user-agents');
const userAgent = new UserAgent();
function getRandomUserAgent() {
return userAgent.random().toString();
}
function defaultOptionsWithProxy() {
if (process.env.PROXY_HOST && process.env.PROXY_TYPE) {
return {
proxy: {
host: process.env.PROXY_HOST.match(/\/\/(.*):/)[1],
port: process.env.PROXY_HOST.match(/:(\d+)/)[1]
},
headers: {
'user-agent': getRandomUserAgent(),
'proxy-type': process.env.PROXY_TYPE
}
}
}
return { headers: { 'user-agent': getRandomUserAgent() } };
}
module.exports = { getRandomUserAgent, defaultOptionsWithProxy };

View File

@@ -1,208 +0,0 @@
const torrentStream = require('torrent-stream');
const axios = require('axios');
const parseTorrent = require('parse-torrent');
const BTClient = require('bittorrent-tracker')
const async = require('async');
const decode = require('magnet-uri');
const { Type } = require('./types');
const { delay } = require('./promises')
const { isVideo, isSubtitle } = require('./extension');
const { cacheTrackers } = require('./cache');
const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_all.txt';
const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
const SEEDS_CHECK_TIMEOUT = 15 * 1000; // 15 secs
const ADDITIONAL_TRACKERS = [
'http://tracker.trackerfix.com:80/announce',
'udp://9.rarbg.me:2780',
'udp://9.rarbg.to:2870'
];
const ANIME_TRACKERS = [
"http://nyaa.tracker.wf:7777/announce",
"udp://anidex.moe:6969/announce",
"udp://tracker-udp.anirena.com:80/announce",
"udp://tracker.uw0.xyz:6969/announce"
];
async function updateCurrentSeeders(torrentsInput) {
return new Promise(async (resolve) => {
const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
const perTorrentResults = Object.fromEntries(new Map(torrents.map(torrent => [torrent.infoHash, {}])));
const perTrackerInfoHashes = await Promise.all(torrents.map(torrent => getTorrentTrackers(torrent)
.then(torrentTrackers => ({ infoHash: torrent.infoHash, trackers: torrentTrackers }))))
.then(allTorrentTrackers => allTorrentTrackers
.reduce((allTrackersMap, torrentTrackers) => {
torrentTrackers.trackers.forEach(tracker =>
allTrackersMap[tracker] = (allTrackersMap[tracker] || []).concat(torrentTrackers.infoHash));
return allTrackersMap;
}, {}));
let successCounter = 0;
const callback = () => {
console.log(`Total successful tracker responses: ${successCounter}`)
resolve(perTorrentResults);
}
setTimeout(callback, SEEDS_CHECK_TIMEOUT);
async.each(Object.keys(perTrackerInfoHashes), function (tracker, ready) {
BTClient.scrape({ infoHash: perTrackerInfoHashes[tracker], announce: tracker }, (error, response) => {
if (response) {
const results = Array.isArray(torrentsInput) ? Object.entries(response) : [[response.infoHash, response]];
results
.filter(([infoHash]) => perTorrentResults[infoHash])
.forEach(([infoHash, seeders]) =>
perTorrentResults[infoHash][tracker] = [seeders.complete, seeders.incomplete])
successCounter++;
} else if (error) {
perTrackerInfoHashes[tracker]
.filter(infoHash => perTorrentResults[infoHash])
.forEach(infoHash => perTorrentResults[infoHash][tracker] = [0, 0, error.message])
}
ready();
})
}, callback);
}).then(perTorrentResults => {
const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
torrents.forEach(torrent => {
const results = perTorrentResults[torrent.infoHash];
const newSeeders = Math.max(...Object.values(results).map(values => values[0]).concat(0));
if (torrent.seeders !== newSeeders) {
console.log(`Updating seeders for [${torrent.infoHash}] ${torrent.title} - ${torrent.seeders} -> ${newSeeders}`)
torrent.seeders = newSeeders;
}
})
return torrentsInput;
});
}
async function updateTorrentSize(torrent) {
return filesAndSizeFromTorrentStream(torrent, SEEDS_CHECK_TIMEOUT)
.then(result => {
torrent.size = result.size;
torrent.files = result.files;
return torrent;
});
}
async function sizeAndFiles(torrent) {
return filesAndSizeFromTorrentStream(torrent, 30000);
}
async function torrentFiles(torrent, timeout) {
return getFilesFromObject(torrent)
.catch(() => filesFromTorrentFile(torrent))
.catch(() => filesFromTorrentStream(torrent, timeout))
.then(files => ({
contents: files,
videos: filterVideos(files),
subtitles: filterSubtitles(files)
}));
}
function getFilesFromObject(torrent) {
if (Array.isArray(torrent.files)) {
return Promise.resolve(torrent.files);
}
if (typeof torrent.files === 'function') {
return torrent.files();
}
return Promise.reject("No files in the object");
}
async function filesFromTorrentFile(torrent) {
if (!torrent.torrentLink) {
return Promise.reject(new Error("no torrentLink"));
}
return axios.get(torrent.torrentLink, { timeout: 10000, responseType: 'arraybuffer' })
.then((response) => {
if (!response.data || response.status !== 200) {
throw new Error('torrent not found')
}
return response.data
})
.then((body) => parseTorrent(body))
.then((info) => info.files.map((file, fileId) => ({
fileIndex: fileId,
name: file.name,
path: file.path.replace(/^[^\/]+\//, ''),
size: file.length
})));
}
async function filesFromTorrentStream(torrent, timeout) {
return filesAndSizeFromTorrentStream(torrent, timeout).then(result => result.files);
}
function filesAndSizeFromTorrentStream(torrent, timeout = 30000) {
if (!torrent.infoHash && !torrent.magnetLink) {
return Promise.reject(new Error("no infoHash or magnetLink"));
}
const magnet = torrent.magnetLink || decode.encode({ infoHash: torrent.infoHash, announce: torrent.trackers });
return new Promise((resolve, rejected) => {
const engine = new torrentStream(magnet, { connections: MAX_PEER_CONNECTIONS });
engine.ready(() => {
const files = engine.files
.map((file, fileId) => ({
fileIndex: fileId,
name: file.name,
path: file.path.replace(/^[^\/]+\//, ''),
size: file.length
}));
const size = engine.torrent.length;
engine.destroy();
resolve({ files, size });
});
setTimeout(() => {
engine.destroy();
rejected(new Error('No available connections for torrent!'));
}, timeout);
});
}
function filterVideos(files) {
if (files.length === 1 && !Number.isInteger(files[0].fileIndex)) {
return files;
}
const videos = files.filter(file => isVideo(file.path));
const maxSize = Math.max(...videos.map(video => video.size));
const minSampleRatio = videos.length <= 3 ? 3 : 10;
const minAnimeExtraRatio = 5;
const minRedundantRatio = videos.length <= 3 ? 30 : Number.MAX_VALUE;
const isSample = video => video.path.match(/sample|bonus|promo/i) && maxSize / parseInt(video.size) > minSampleRatio;
const isRedundant = video => maxSize / parseInt(video.size) > minRedundantRatio;
const isExtra = video => video.path.match(/extras?\//i);
const isAnimeExtra = video => video.path.match(/(?:\b|_)(?:NC)?(?:ED|OP|PV)(?:v?\d\d?)?(?:\b|_)/i)
&& maxSize / parseInt(video.size) > minAnimeExtraRatio;
const isWatermark = video => video.path.match(/^[A-Z-]+(?:\.[A-Z]+)?\.\w{3,4}$/)
&& maxSize / parseInt(video.size) > minAnimeExtraRatio
return videos
.filter(video => !isSample(video))
.filter(video => !isExtra(video))
.filter(video => !isAnimeExtra(video))
.filter(video => !isRedundant(video))
.filter(video => !isWatermark(video));
}
function filterSubtitles(files) {
return files.filter(file => isSubtitle(file.path));
}
async function getTorrentTrackers(torrent) {
const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr || [];
const torrentTrackers = torrent.trackers && torrent.trackers.split(',') || [];
const defaultTrackers = await getDefaultTrackers(torrent);
return Array.from(new Set([].concat(magnetTrackers).concat(torrentTrackers).concat(defaultTrackers)));
}
async function getDefaultTrackers(torrent, retry = 3) {
return cacheTrackers(() => axios.get(TRACKERS_URL, { timeout: SEEDS_CHECK_TIMEOUT })
.then(response => response.data && response.data.trim())
.then(body => body && body.split('\n\n') || []))
.catch(() => retry > 0 ? delay(5000).then(() => getDefaultTrackers(torrent, retry - 1)) : [])
.then(trackers => trackers.concat(ADDITIONAL_TRACKERS))
.then(trackers => torrent.type === Type.ANIME ? trackers.concat(ANIME_TRACKERS) : trackers);
}
module.exports = { updateCurrentSeeders, updateTorrentSize, sizeAndFiles, torrentFiles }

View File

@@ -1,182 +0,0 @@
const { parse } = require('parse-torrent-title');
const { Type } = require('./types');
const Promises = require('./promises');
const repository = require('./repository');
const { getImdbId, getKitsuId } = require('./metadata');
const { parseTorrentFiles } = require('./torrentFiles');
const { assignSubtitles } = require('./torrentSubtitles');
const { isPackTorrent } = require('./parseHelper')
async function createTorrentEntry(torrent, overwrite = false) {
const titleInfo = parse(torrent.title);
if ((titleInfo.seasons || torrent.title.match(/\[\d+-\d+/)) && torrent.type === Type.MOVIE) {
// sometimes series torrent might be put into movies category
torrent.type = Type.SERIES;
}
if (!torrent.imdbId && torrent.type !== Type.ANIME) {
torrent.imdbId = await getImdbId(titleInfo, torrent.type)
.catch(() => undefined);
}
if (torrent.imdbId && torrent.imdbId.length < 9) {
// pad zeros to imdbId if missing
torrent.imdbId = 'tt' + torrent.imdbId.replace('tt', '').padStart(7, '0');
}
if (torrent.imdbId && torrent.imdbId.length > 9 && torrent.imdbId.startsWith('tt0')) {
// sanitize imdbId from redundant zeros
torrent.imdbId = torrent.imdbId.replace(/tt0+([0-9]{7,})$/, 'tt$1');
}
if (!torrent.kitsuId && torrent.type === Type.ANIME) {
torrent.kitsuId = await getKitsuId(titleInfo)
.catch(() => undefined);
}
if (!torrent.imdbId && !torrent.kitsuId && !isPackTorrent(torrent)) {
console.log(`imdbId or kitsuId not found: ${torrent.provider} ${torrent.title}`);
return;
}
const { contents, videos, subtitles } = await parseTorrentFiles(torrent)
.then(torrentContents => overwrite ? overwriteExistingFiles(torrent, torrentContents) : torrentContents)
.then(torrentContents => assignSubtitles(torrentContents))
.catch(error => {
console.log(`Failed getting files for ${torrent.title}`, error.message);
return {};
});
if (!videos || !videos.length) {
console.log(`no video files found for ${torrent.provider} [${torrent.infoHash}] ${torrent.title}`);
return;
}
return repository.createTorrent({ ...torrent, contents, subtitles })
.then(() => Promises.sequence(videos.map(video => () => repository.createFile(video))))
.then(() => console.log(`Created ${torrent.provider} entry for [${torrent.infoHash}] ${torrent.title}`));
}
async function overwriteExistingFiles(torrent, torrentContents) {
const videos = torrentContents && torrentContents.videos;
if (videos && videos.length) {
const existingFiles = await repository.getFiles({ infoHash: videos[0].infoHash })
.then((existing) => existing
.reduce((map, next) => {
const fileIndex = next.fileIndex !== undefined ? next.fileIndex : null;
map[fileIndex] = (map[fileIndex] || []).concat(next);
return map;
}, {}))
.catch(() => undefined);
if (existingFiles && Object.keys(existingFiles).length) {
const overwrittenVideos = videos
.map(file => {
const mapping = videos.length === 1 && Object.keys(existingFiles).length === 1
? Object.values(existingFiles)[0]
: existingFiles[file.fileIndex !== undefined ? file.fileIndex : null];
if (mapping) {
const originalFile = mapping.shift();
return { id: originalFile.id, ...file };
}
return file;
});
return { ...torrentContents, videos: overwrittenVideos };
}
return torrentContents;
}
return Promise.reject(`No video files found for: ${torrent.title}`);
}
async function createSkipTorrentEntry(torrent) {
return repository.createSkipTorrent(torrent);
}
async function getStoredTorrentEntry(torrent) {
return repository.getSkipTorrent(torrent)
.catch(() => repository.getTorrent(torrent))
.catch(() => undefined);
}
async function checkAndUpdateTorrent(torrent) {
const storedTorrent = torrent.dataValues
? torrent
: await repository.getTorrent(torrent).catch(() => undefined);
if (!storedTorrent) {
return false;
}
if (storedTorrent.provider === 'KickassTorrents' && torrent.provider) {
storedTorrent.provider = torrent.provider;
storedTorrent.torrentId = torrent.torrentId;
}
if (!storedTorrent.languages && torrent.languages && storedTorrent.provider !== 'RARBG') {
storedTorrent.languages = torrent.languages;
storedTorrent.save();
console.log(`Updated [${storedTorrent.infoHash}] ${storedTorrent.title} language to ${torrent.languages}`);
}
return createTorrentContents({ ...storedTorrent.get(), torrentLink: torrent.torrentLink })
.then(() => updateTorrentSeeders(torrent));
}
async function createTorrentContents(torrent) {
if (torrent.opened) {
return;
}
const storedVideos = await repository.getFiles(torrent).catch(() => []);
if (!storedVideos || !storedVideos.length) {
return;
}
const notOpenedVideo = storedVideos.length === 1 && !Number.isInteger(storedVideos[0].fileIndex);
const imdbId = Promises.mostCommonValue(storedVideos.map(stored => stored.imdbId));
const kitsuId = Promises.mostCommonValue(storedVideos.map(stored => stored.kitsuId));
const { contents, videos, subtitles } = await parseTorrentFiles({ ...torrent, imdbId, kitsuId })
.then(torrentContents => notOpenedVideo ? torrentContents : { ...torrentContents, videos: storedVideos })
.then(torrentContents => assignSubtitles(torrentContents))
.catch(error => {
console.log(`Failed getting contents for [${torrent.infoHash}] ${torrent.title}`, error.message);
return {};
});
if (!contents || !contents.length) {
return;
}
if (notOpenedVideo && videos.length === 1) {
// if both have a single video and stored one was not opened, update stored one to true metadata and use that
storedVideos[0].fileIndex = videos[0].fileIndex;
storedVideos[0].title = videos[0].title;
storedVideos[0].size = videos[0].size;
storedVideos[0].subtitles = videos[0].subtitles;
videos[0] = storedVideos[0];
}
// no videos available or more than one new videos were in the torrent
const shouldDeleteOld = notOpenedVideo && videos.every(video => !video.id);
return repository.createTorrent({ ...torrent, contents, subtitles })
.then(() => {
if (shouldDeleteOld) {
console.error(`Deleting old video for [${torrent.infoHash}] ${torrent.title}`)
return storedVideos[0].destroy();
}
return Promise.resolve();
})
.then(() => Promises.sequence(videos.map(video => () => repository.createFile(video))))
.then(() => console.log(`Created contents for ${torrent.provider} [${torrent.infoHash}] ${torrent.title}`))
.catch(error => console.error(`Failed saving contents for [${torrent.infoHash}] ${torrent.title}`, error));
}
async function updateTorrentSeeders(torrent) {
if (!(torrent.infoHash || (torrent.provider && torrent.torrentId)) || !Number.isInteger(torrent.seeders)) {
return torrent;
}
return repository.setTorrentSeeders(torrent, torrent.seeders)
.catch(error => {
console.warn('Failed updating seeders:', error);
return undefined;
});
}
module.exports = {
createTorrentEntry,
createTorrentContents,
createSkipTorrentEntry,
getStoredTorrentEntry,
updateTorrentSeeders,
checkAndUpdateTorrent
};

View File

@@ -1,509 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const distance = require('jaro-winkler');
const { parse } = require('parse-torrent-title');
const Promises = require('../lib/promises');
const { torrentFiles } = require('../lib/torrent');
const { getMetadata, getImdbId, getKitsuId } = require('../lib/metadata');
const { parseSeriesVideos, isPackTorrent } = require('../lib/parseHelper');
const { Type } = require('./types');
const { isDisk } = require('./extension');
const MIN_SIZE = 5 * 1024 * 1024; // 5 MB
const imdb_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 1000 });
async function parseTorrentFiles(torrent) {
const parsedTorrentName = parse(torrent.title);
const metadata = await getMetadata(torrent.kitsuId || torrent.imdbId, torrent.type || Type.MOVIE)
.then(meta => Object.assign({}, meta))
.catch(() => undefined);
// if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) {
// throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`);
// }
if (torrent.type !== Type.ANIME && metadata && metadata.type && metadata.type !== torrent.type) {
// it's actually a movie/series
torrent.type = metadata.type;
}
if (torrent.type === Type.MOVIE && (!parsedTorrentName.seasons ||
parsedTorrentName.season === 5 && [1, 5].includes(parsedTorrentName.episode))) {
return parseMovieFiles(torrent, parsedTorrentName, metadata);
}
return parseSeriesFiles(torrent, parsedTorrentName, metadata)
}
async function parseMovieFiles(torrent, parsedName, metadata) {
const { contents, videos, subtitles } = await getMoviesTorrentContent(torrent);
const filteredVideos = videos
.filter(video => video.size > MIN_SIZE)
.filter(video => !isFeaturette(video));
if (isSingleMovie(filteredVideos)) {
const parsedVideos = filteredVideos.map(video => ({
infoHash: torrent.infoHash,
fileIndex: video.fileIndex,
title: video.path || torrent.title,
size: video.size || torrent.size,
imdbId: torrent.imdbId || metadata && metadata.imdbId,
kitsuId: torrent.kitsuId || metadata && metadata.kitsuId
}));
return { contents, videos: parsedVideos, subtitles };
}
const parsedVideos = await Promises.sequence(filteredVideos.map(video => () => isFeaturette(video)
? Promise.resolve(video)
: findMovieImdbId(video.name).then(imdbId => ({ ...video, imdbId }))))
.then(videos => videos.map(video => ({
infoHash: torrent.infoHash,
fileIndex: video.fileIndex,
title: video.path || video.name,
size: video.size,
imdbId: video.imdbId,
})));
return { contents, videos: parsedVideos, subtitles };
}
async function parseSeriesFiles(torrent, parsedName, metadata) {
const { contents, videos, subtitles } = await getSeriesTorrentContent(torrent);
const parsedVideos = await Promise.resolve(videos)
.then(videos => videos.filter(video => videos.length === 1 || video.size > MIN_SIZE))
.then(videos => parseSeriesVideos(torrent, videos))
.then(videos => decomposeEpisodes(torrent, videos, metadata))
.then(videos => assignKitsuOrImdbEpisodes(torrent, videos, metadata))
.then(videos => Promise.all(videos.map(video => video.isMovie
? mapSeriesMovie(video, torrent)
: mapSeriesEpisode(video, torrent, videos))))
.then(videos => videos
.reduce((a, b) => a.concat(b), [])
.map(video => isFeaturette(video) ? clearInfoFields(video) : video))
return { contents, videos: parsedVideos, subtitles };
}
async function getMoviesTorrentContent(torrent) {
const files = await torrentFiles(torrent)
.catch(error => {
if (!isPackTorrent(torrent)) {
return { videos: [{ name: torrent.title, path: torrent.title, size: torrent.size }] }
}
return Promise.reject(error);
});
if (files.contents && files.contents.length && !files.videos.length && isDiskTorrent(files.contents)) {
files.videos = [{ name: torrent.title, path: torrent.title, size: torrent.size }];
}
return files;
}
async function getSeriesTorrentContent(torrent) {
return torrentFiles(torrent)
.catch(error => {
if (!isPackTorrent(torrent)) {
return { videos: [{ name: torrent.title, path: torrent.title, size: torrent.size }] }
}
return Promise.reject(error);
});
}
async function mapSeriesEpisode(file, torrent, files) {
if (!file.episodes && !file.kitsuEpisodes) {
if (files.length === 1 || files.some(f => f.episodes || f.kitsuEpisodes) || parse(torrent.title).seasons) {
return Promise.resolve({
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.name,
size: file.size,
imdbId: torrent.imdbId || file.imdbId,
});
}
return Promise.resolve([]);
}
const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()];
return Promise.resolve(episodeIndexes.map((index) => ({
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.name,
size: file.size,
imdbId: file.imdbId || torrent.imdbId,
imdbSeason: file.season,
imdbEpisode: file.episodes && file.episodes[index],
kitsuId: file.kitsuId || torrent.kitsuId,
kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index]
})))
}
async function mapSeriesMovie(file, torrent) {
const kitsuId = torrent.type === Type.ANIME ? await findMovieKitsuId(file) : undefined;
const imdbId = !kitsuId ? await findMovieImdbId(file) : undefined;
const metadata = await getMetadata(kitsuId || imdbId, Type.MOVIE).catch(() => ({}));
const hasEpisode = metadata.videos && metadata.videos.length && (file.episode || metadata.videos.length === 1);
const episodeVideo = hasEpisode && metadata.videos[(file.episode || 1) - 1];
return [{
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.name,
size: file.size,
imdbId: metadata.imdbId || imdbId,
kitsuId: metadata.kitsuId || kitsuId,
imdbSeason: episodeVideo && metadata.imdbId ? episodeVideo.imdbSeason : undefined,
imdbEpisode: episodeVideo && metadata.imdbId ? episodeVideo.imdbEpisode || episodeVideo.episode : undefined,
kitsuEpisode: episodeVideo && metadata.kitsuId ? episodeVideo.kitsuEpisode || episodeVideo.episode : undefined
}];
}
async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }) {
if (files.every(file => !file.episodes && !file.date)) {
return files;
}
preprocessEpisodes(files);
if (torrent.type === Type.ANIME && torrent.kitsuId) {
if (needsCinemetaMetadataForAnime(files, metadata)) {
// In some cases anime could be resolved to wrong kitsuId
// because of imdb season naming/absolute per series naming/multiple seasons
// So in these cases we need to fetch cinemeta based metadata and decompose episodes using that
await updateToCinemetaMetadata(metadata);
if (files.some(file => Number.isInteger(file.season))) {
// sometimes multi season anime torrents don't include season 1 naming
files
.filter(file => !Number.isInteger(file.season) && file.episodes)
.forEach(file => file.season = 1);
}
} else {
// otherwise for anime type episodes are always absolute and for a single season
files
.filter(file => file.episodes && file.season !== 0)
.forEach(file => file.season = 1);
return files;
}
}
const sortedEpisodes = files
.map(file => !file.isMovie && file.episodes || [])
.reduce((a, b) => a.concat(b), [])
.sort((a, b) => a - b);
if (isConcatSeasonAndEpisodeFiles(files, sortedEpisodes, metadata)) {
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
} else if (isDateEpisodeFiles(files, metadata)) {
decomposeDateEpisodeFiles(torrent, files, metadata);
} else if (isAbsoluteEpisodeFiles(torrent, files, metadata)) {
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
}
// decomposeEpisodeTitleFiles(torrent, files, metadata);
return files;
}
function preprocessEpisodes(files) {
// reverse special episode naming when they named with 0 episode, ie. S02E00
files
.filter(file => Number.isInteger(file.season) && file.episode === 0)
.forEach(file => {
file.episode = file.season
file.episodes = [file.season]
file.season = 0;
})
}
function isConcatSeasonAndEpisodeFiles(files, sortedEpisodes, metadata) {
if (metadata.kitsuId !== undefined) {
// anime does not use this naming scheme in 99% of cases;
return false;
}
// decompose concat season and episode files (ex. 101=S01E01) in case:
// 1. file has a season, but individual files are concatenated with that season (ex. path Season 5/511 - Prize
// Fighters.avi)
// 2. file does not have a season and the episode does not go out of range for the concat season
// episode count
const thresholdAbove = Math.max(Math.ceil(files.length * 0.05), 5);
const thresholdSorted = Math.max(Math.ceil(files.length * 0.8), 8);
const threshold = Math.max(Math.ceil(files.length * 0.8), 5);
const sortedConcatEpisodes = sortedEpisodes
.filter(ep => ep > 100)
.filter(ep => metadata.episodeCount[div100(ep) - 1] < ep)
.filter(ep => metadata.episodeCount[div100(ep) - 1] >= mod100(ep));
const concatFileEpisodes = files
.filter(file => !file.isMovie && file.episodes)
.filter(file => !file.season || file.episodes.every(ep => div100(ep) === file.season));
const concatAboveTotalEpisodeCount = files
.filter(file => !file.isMovie && file.episodes && file.episodes.every(ep => ep > 100))
.filter(file => file.episodes.every(ep => ep > metadata.totalCount));
return sortedConcatEpisodes.length >= thresholdSorted && concatFileEpisodes.length >= threshold
|| concatAboveTotalEpisodeCount.length >= thresholdAbove;
}
function isDateEpisodeFiles(files, metadata) {
return files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date);
}
function isAbsoluteEpisodeFiles(torrent, files, metadata) {
const threshold = Math.ceil(files.length / 5);
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
const nonMovieEpisodes = files
.filter(file => !file.isMovie && file.episodes);
const absoluteEpisodes = files
.filter(file => file.season && file.episodes)
.filter(file => file.episodes.every(ep => metadata.episodeCount[file.season - 1] < ep))
return nonMovieEpisodes.every(file => !file.season)
|| (isAnime && nonMovieEpisodes.every(file => file.season > metadata.episodeCount.length))
|| absoluteEpisodes.length >= threshold;
}
function isNewEpisodeNotInMetadata(torrent, file, metadata) {
// new episode might not yet been indexed by cinemeta.
// detect this if episode number is larger than the last episode or season is larger than the last one
// only for non anime metas
const isAnime = torrent.type === Type.ANIME && torrent.kitsuId;
return !isAnime && !file.isMovie && file.episodes && file.season !== 1
&& /continuing|current/i.test(metadata.status)
&& file.season >= metadata.episodeCount.length
&& file.episodes.every(ep => ep > (metadata.episodeCount[file.season - 1] || 0));
}
function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
files
.filter(file => file.episodes && file.season !== 0 && file.episodes.every(ep => ep > 100))
.filter(file => metadata.episodeCount[(file.season || div100(file.episodes[0])) - 1] < 100)
.filter(file => file.season && file.episodes.every(ep => div100(ep) === file.season) || !file.season)
.forEach(file => {
file.season = div100(file.episodes[0]);
file.episodes = file.episodes.map(ep => mod100(ep))
});
}
function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
if (metadata.episodeCount.length === 0) {
files
.filter(file => !Number.isInteger(file.season) && file.episodes && !file.isMovie)
.forEach(file => {
file.season = 1;
});
return;
}
files
.filter(file => file.episodes && !file.isMovie && file.season !== 0)
.filter(file => !isNewEpisodeNotInMetadata(torrent, file, metadata))
.filter(file => !file.season || (metadata.episodeCount[file.season - 1] || 0) < file.episodes[0])
.forEach(file => {
const seasonIdx = ([...metadata.episodeCount.keys()]
.find((i) => metadata.episodeCount.slice(0, i + 1).reduce((a, b) => a + b) >= file.episodes[0])
+ 1 || metadata.episodeCount.length) - 1;
file.season = seasonIdx + 1;
file.episodes = file.episodes
.map(ep => ep - metadata.episodeCount.slice(0, seasonIdx).reduce((a, b) => a + b, 0))
});
}
function decomposeDateEpisodeFiles(torrent, files, metadata) {
if (!metadata || !metadata.videos || !metadata.videos.length) {
return;
}
const timeZoneOffset = getTimeZoneOffset(metadata.country);
const offsetVideos = metadata.videos
.reduce((map, video) => {
const releaseDate = moment(video.released).utcOffset(timeZoneOffset).format('YYYY-MM-DD');
map[releaseDate] = video;
return map;
}, {});
files
.filter(file => file.date)
.forEach(file => {
const video = offsetVideos[file.date];
if (video) {
file.season = video.season;
file.episodes = [video.episode];
}
});
}
function decomposeEpisodeTitleFiles(torrent, files, metadata) {
files
// .filter(file => !file.season)
.map(file => {
const episodeTitle = file.name.replace('_', ' ')
.replace(/^.*(?:E\d+[abc]?|- )\s?(.+)\.\w{1,4}$/, '$1')
.trim();
const foundEpisode = metadata.videos
.map(video => ({ ...video, distance: distance(episodeTitle, video.name) }))
.sort((a, b) => b.distance - a.distance)[0];
if (foundEpisode) {
file.isMovie = false;
file.season = foundEpisode.season;
file.episodes = [foundEpisode.episode];
}
})
}
function getTimeZoneOffset(country) {
switch (country) {
case 'United States':
case 'USA':
return '-08:00';
default:
return '00:00';
}
}
function assignKitsuOrImdbEpisodes(torrent, files, metadata) {
if (!metadata || !metadata.videos || !metadata.videos.length) {
if (torrent.type === Type.ANIME) {
// assign episodes as kitsu episodes for anime when no metadata available for imdb mapping
files
.filter(file => file.season && file.episodes)
.forEach(file => {
file.kitsuEpisodes = file.episodes;
file.season = undefined;
file.episodes = undefined;
})
if (metadata.type === Type.MOVIE && files.every(file => !file.imdbId)) {
// sometimes a movie has episode naming, thus not recognized as a movie and imdbId not assigned
files.forEach(file => file.imdbId = metadata.imdbId);
}
}
return files;
}
const seriesMapping = metadata.videos
.reduce((map, video) => {
const episodeMap = map[video.season] || {};
episodeMap[video.episode] = video;
map[video.season] = episodeMap;
return map;
}, {});
if (metadata.videos.some(video => Number.isInteger(video.imdbSeason)) || !metadata.imdbId) {
// kitsu episode info is the base
files
.filter(file => Number.isInteger(file.season) && file.episodes)
.map(file => {
const seasonMapping = seriesMapping[file.season];
const episodeMapping = seasonMapping && seasonMapping[file.episodes[0]];
file.kitsuEpisodes = file.episodes;
if (episodeMapping && Number.isInteger(episodeMapping.imdbSeason)) {
file.imdbId = metadata.imdbId;
file.season = episodeMapping.imdbSeason;
file.episodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].imdbEpisode);
} else {
// no imdb mapping available for episode
file.season = undefined;
file.episodes = undefined;
}
});
} else if (metadata.videos.some(video => video.kitsuEpisode)) {
// imdb episode info is base
files
.filter(file => Number.isInteger(file.season) && file.episodes)
.forEach(file => {
if (seriesMapping[file.season]) {
const seasonMapping = seriesMapping[file.season];
file.imdbId = metadata.imdbId;
file.kitsuId = seasonMapping[file.episodes[0]] && seasonMapping[file.episodes[0]].kitsuId;
file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode);
} else if (seriesMapping[file.season - 1]) {
// sometimes a second season might be a continuation of the previous season
const seasonMapping = seriesMapping[file.season - 1];
const episodes = Object.values(seasonMapping);
const firstKitsuId = episodes.length && episodes[0].kitsuId;
const differentTitlesCount = new Set(episodes.map(ep => ep.kitsuId)).size
const skippedCount = episodes.filter(ep => ep.kitsuId === firstKitsuId).length;
const seasonEpisodes = files
.filter(otherFile => otherFile.season === file.season)
.reduce((a, b) => a.concat(b.episodes), []);
const isAbsoluteOrder = seasonEpisodes.every(ep => ep > skippedCount && ep <= episodes.length)
const isNormalOrder = seasonEpisodes.every(ep => ep + skippedCount <= episodes.length)
if (differentTitlesCount >= 1 && (isAbsoluteOrder || isNormalOrder)) {
file.imdbId = metadata.imdbId;
file.season = file.season - 1;
file.episodes = file.episodes.map(ep => isAbsoluteOrder ? ep : ep + skippedCount);
file.kitsuId = seasonMapping[file.episodes[0]].kitsuId;
file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode);
}
} else if (Object.values(seriesMapping).length === 1 && seriesMapping[1]) {
// sometimes series might be named with sequel season but it's not a season on imdb and a new title
const seasonMapping = seriesMapping[1];
file.imdbId = metadata.imdbId;
file.season = 1;
file.kitsuId = seasonMapping[file.episodes[0]].kitsuId;
file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode);
}
});
}
return files;
}
function needsCinemetaMetadataForAnime(files, metadata) {
if (!metadata || !metadata.imdbId || !metadata.videos || !metadata.videos.length) {
return false;
}
const minSeason = Math.min(...metadata.videos.map(video => video.imdbSeason)) || Number.MAX_VALUE;
const maxSeason = Math.max(...metadata.videos.map(video => video.imdbSeason)) || Number.MAX_VALUE;
const differentSeasons = new Set(metadata.videos
.map(video => video.imdbSeason)
.filter(season => Number.isInteger(season))).size;
const total = metadata.totalCount || Number.MAX_VALUE;
return differentSeasons > 1 || files
.filter(file => !file.isMovie && file.episodes)
.some(file => file.season < minSeason || file.season > maxSeason || file.episodes.every(ep => ep > total));
}
async function updateToCinemetaMetadata(metadata) {
return getMetadata(metadata.imdbId, metadata.type)
.then(newMetadata => !newMetadata.videos || !newMetadata.videos.length ? metadata : newMetadata)
.then(newMetadata => {
metadata.videos = newMetadata.videos;
metadata.episodeCount = newMetadata.episodeCount;
metadata.totalCount = newMetadata.totalCount;
return metadata;
})
.catch(error => console.warn(`Failed ${metadata.imdbId} metadata cinemeta update due: ${error.message}`));
}
function findMovieImdbId(title) {
const parsedTitle = typeof title === 'string' ? parse(title) : title;
return imdb_limiter.schedule(() => getImdbId(parsedTitle, Type.MOVIE).catch(() => undefined));
}
function findMovieKitsuId(title) {
const parsedTitle = typeof title === 'string' ? parse(title) : title;
return getKitsuId(parsedTitle, Type.MOVIE).catch(() => undefined);
}
function isDiskTorrent(contents) {
return contents.some(content => isDisk(content.path));
}
function isSingleMovie(videos) {
return videos.length === 1 ||
(videos.length === 2 &&
videos.find(v => /\b(?:part|disc|cd)[ ._-]?0?1\b|^0?1\.\w{2,4}$/i.test(v.path)) &&
videos.find(v => /\b(?:part|disc|cd)[ ._-]?0?2\b|^0?2\.\w{2,4}$/i.test(v.path)));
}
function isFeaturette(video) {
return /featurettes?\/|extras-grym/i.test(video.path);
}
function clearInfoFields(video) {
video.imdbId = undefined;
video.imdbSeason = undefined;
video.imdbEpisode = undefined;
video.kitsuId = undefined;
video.kitsuEpisode = undefined;
return video;
}
function div100(episode) {
return (episode / 100 >> 0); // floor to nearest int
}
function mod100(episode) {
return episode % 100;
}
module.exports = { parseTorrentFiles };

View File

@@ -1,91 +0,0 @@
const { parse } = require('parse-torrent-title');
function assignSubtitles({ contents, videos, subtitles }) {
if (videos && videos.length && subtitles && subtitles.length) {
if (videos.length === 1) {
videos[0].subtitles = subtitles;
return { contents, videos, subtitles: [] };
}
const parsedVideos = videos
.map(video => _parseVideo(video));
const assignedSubs = subtitles
.map(subtitle => ({ subtitle, videos: _mostProbableSubtitleVideos(subtitle, parsedVideos) }));
const unassignedSubs = assignedSubs
.filter(assignedSub => !assignedSub.videos)
.map(assignedSub => assignedSub.subtitle);
assignedSubs
.filter(assignedSub => assignedSub.videos)
.forEach(assignedSub => assignedSub.videos
.forEach(video => video.subtitles = (video.subtitles || []).concat(assignedSub.subtitle)));
return { contents, videos, subtitles: unassignedSubs };
}
return { contents, videos, subtitles };
}
function _parseVideo(video) {
const fileName = video.title.split('/').pop().replace(/\.(\w{2,4})$/, '');
const folderName = video.title.replace(/\/?[^/]+$/, '');
return {
videoFile: video,
fileName: fileName,
folderName: folderName,
...parseFilename(video.title)
};
}
function _mostProbableSubtitleVideos(subtitle, parsedVideos) {
const subTitle = (subtitle.title || subtitle.path).split('/').pop().replace(/\.(\w{2,4})$/, '');
const parsedSub = parsePath(subtitle.title || subtitle.path);
const byFileName = parsedVideos.filter(video => subTitle.includes(video.fileName));
if (byFileName.length === 1) {
return byFileName.map(v => v.videoFile);
}
const byTitleSeasonEpisode = parsedVideos.filter(video => video.title === parsedSub.title
&& arrayEquals(video.seasons, parsedSub.seasons)
&& arrayEquals(video.episodes, parsedSub.episodes));
if (singleVideoFile(byTitleSeasonEpisode)) {
return byTitleSeasonEpisode.map(v => v.videoFile);
}
const bySeasonEpisode = parsedVideos.filter(video => arrayEquals(video.seasons, parsedSub.seasons)
&& arrayEquals(video.episodes, parsedSub.episodes));
if (singleVideoFile(bySeasonEpisode)) {
return bySeasonEpisode.map(v => v.videoFile);
}
const byTitle = parsedVideos.filter(video => video.title && video.title === parsedSub.title);
if (singleVideoFile(byTitle)) {
return byTitle.map(v => v.videoFile);
}
const byEpisode = parsedVideos.filter(video => arrayEquals(video.episodes, parsedSub.episodes));
if (singleVideoFile(byEpisode)) {
return byEpisode.map(v => v.videoFile);
}
return undefined;
}
function singleVideoFile(videos) {
return new Set(videos.map(v => v.videoFile.fileIndex)).size === 1;
}
function parsePath(path) {
const pathParts = path.split('/').map(part => parseFilename(part));
const parsedWithEpisode = pathParts.find(parsed => parsed.season && parsed.episodes);
return parsedWithEpisode || pathParts[pathParts.length - 1];
}
function parseFilename(filename) {
const parsedInfo = parse(filename)
const titleEpisode = parsedInfo.title.match(/(\d+)$/);
if (!parsedInfo.episodes && titleEpisode) {
parsedInfo.episodes = [parseInt(titleEpisode[1], 10)];
}
return parsedInfo;
}
function arrayEquals(array1, array2) {
if (!array1 || !array2) return array1 === array2;
return array1.length === array2.length && array1.every((value, index) => value === array2[index])
}
module.exports = { assignSubtitles }

View File

@@ -1,5 +0,0 @@
exports.Type = {
MOVIE: 'movie',
SERIES: 'series',
ANIME: 'anime'
};

View File

@@ -1,145 +0,0 @@
const Bottleneck = require('bottleneck');
const { parse } = require('parse-torrent-title');
const Promises = require('../lib/promises');
const { mostCommonValue } = require('../lib/promises');
const repository = require('../lib/repository');
const { getImdbId, getKitsuId } = require('../lib/metadata');
const { parseTorrentFiles } = require('../lib/torrentFiles');
const { createTorrentContents } = require('../lib/torrentEntries');
const { assignSubtitles } = require('../lib/torrentSubtitles');
const { Type } = require('../lib/types');
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function updateMovieCollections() {
const collectionFiles = await repository.getFilesBasedOnTitle('logy')
.then(files => files.filter(file => file.fileIndex === null))
.then(files => files.filter(file => parse(file.title).complete));
collectionFiles.map(original => repository.getTorrent({ infoHash: original.infoHash })
.then(torrent => parseTorrentFiles({ ...torrent.get(), imdbId: original.imdbId }))
.then(files => Promise.all(files.map(file => {
console.log(file);
return repository.createFile(file)
})))
.then(createdFiled => {
if (createdFiled && createdFiled.length) {
console.log(`Updated movie collection ${original.title}`);
repository.deleteFile(original)
} else {
console.log(`Failed updating movie collection ${original.title}`);
}
}));
}
async function reapplySeriesSeasonsSavedAsMovies() {
return repository.getTorrentsBasedOnTitle('(?:[^a-zA-Z0-9]|^)[Ss][012]?[0-9](?:[^0-9]|$)', Type.MOVIE)
.then(torrents => Promise.all(torrents
.filter(torrent => parse(torrent.title).seasons)
.map(torrent => limiter.schedule(() => reapplyEpisodeDecomposing(torrent.infoHash, false)
.then(() => {
torrent.type = Type.SERIES;
return torrent.save();
})))))
.then(() => console.log('Finished updating multiple torrents'));
}
async function reapplyDecomposingToTorrentsOnRegex(regex) {
return repository.getTorrentsBasedOnTitle(regex, Type.ANIME)
.then(torrents => Promise.all(torrents
.map(torrent => limiter.schedule(() => reapplyEpisodeDecomposing(torrent.infoHash, true)))))
.then(() => console.log('Finished updating multiple torrents'));
}
async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) {
const torrent = await repository.getTorrent({ infoHash });
const storedFiles = await repository.getFiles({ infoHash });
const fileIndexMap = storedFiles
.reduce((map, next) => {
const fileIndex = next.fileIndex !== undefined ? next.fileIndex : null;
map[fileIndex] = (map[fileIndex] || []).concat(next);
return map;
}, {});
const files = includeSourceFiles && Object.values(fileIndexMap)
.map(sameIndexFiles => sameIndexFiles[0])
.map(file => ({
fileIndex: file.fileIndex,
name: file.title.replace(/.*\//, ''),
path: file.title,
size: file.size
}));
const kitsuId = undefined;
const imdbId = kitsuId
? undefined
: mostCommonValue(storedFiles.map(file => file.imdbId))
|| await getImdbId(parse(torrent.title)).catch(() => undefined);
if (!imdbId && !kitsuId) {
console.log(`imdbId or kitsuId not found: ${torrent.provider} ${torrent.title}`);
return Promise.resolve();
}
return parseTorrentFiles({ ...torrent.get(), imdbId, kitsuId, files })
.then(torrentContents => torrentContents.videos)
.then(newFiles => newFiles.map(file => {
const fileIndex = file.fileIndex !== undefined ? file.fileIndex : null;
const mapping = fileIndexMap[fileIndex];
if (mapping) {
const originalFile = mapping.shift();
if (originalFile) {
if (!originalFile.imdbId) {
originalFile.imdbId = file.imdbId
}
originalFile.imdbSeason = file.imdbSeason;
originalFile.imdbEpisode = file.imdbEpisode;
originalFile.kitsuId = file.kitsuId;
originalFile.kitsuEpisode = file.kitsuEpisode;
return originalFile;
}
}
return file;
}))
.then(updatedFiles => Promise.all(updatedFiles
.map(file => file.id ? file.save() : repository.createFile(file))))
.then(() => console.log(`Updated files for [${torrent.infoHash}] ${torrent.title}`));
}
async function assignSubs() {
const unassignedSubs = await repository.getUnassignedSubtitles()
.then(subs => subs.reduce((map, sub) => {
map[sub.infoHash] = (map[sub.infoHash] || []).concat(sub);
return map;
}, {}));
const infoHashes = Object.keys(unassignedSubs);
return Promise.all(infoHashes.map(async infoHash => {
const videos = await repository.getFiles({ infoHash });
const subtitles = unassignedSubs[infoHash];
const assignedContents = assignSubtitles({ videos, subtitles });
return Promise.all(assignedContents.videos
.filter(video => video.subtitles)
.map(video => repository.upsertSubtitles(video, video.subtitles)));
}));
}
async function openTorrentContents() {
const limiter = new Bottleneck({ maxConcurrent: 15 });
const unopenedTorrents = await repository.getNoContentsTorrents();
return Promise.all(unopenedTorrents.map(torrent => limiter.schedule(() => createTorrentContents(torrent))))
.then(() => unopenedTorrents.length === 500 ? openTorrentContents() : Promise.resolve)
}
// const infoHashes = [
// ]
// Promises.sequence(infoHashes.map(infoHash => () => reapplyEpisodeDecomposing(infoHash)))
// .then(() => console.log('Finished'));
//findAllFiles().then(() => console.log('Finished'));
//updateMovieCollections().then(() => console.log('Finished'));
reapplyEpisodeDecomposing('96cc18f564f058384c18b4966a183d81808ce3fb', true).then(() => console.log('Finished'));
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
//reapplyDecomposingToTorrentsOnRegex('.*Title.*').then(() => console.log('Finished'));
//reapplyManualHashes().then(() => console.log('Finished'));
// assignSubs().then(() => console.log('Finished'));
// openTorrentContents().then(() => console.log('Finished'));

View File

@@ -1,14 +0,0 @@
const { scheduleScraping, scrapeAll } = require('./scraper')
const { scheduleUpdateSeeders, scheduleUpdateSeedersForNewTorrents } = require('./seeders')
function startScraper() {
if (process.env.ENABLE_SCHEDULING) {
scheduleScraping();
scheduleUpdateSeeders();
scheduleUpdateSeedersForNewTorrents();
} else {
scrapeAll()
}
}
module.exports = { startScraper }

View File

@@ -1,28 +0,0 @@
const schedule = require('node-schedule');
const scrapers = require('./scrapers');
const { sequence } = require('../lib/promises')
function scheduleScraping() {
const allCrons = scrapers.reduce((crons, provider) => {
crons[provider.cron] = (crons[provider.cron] || []).concat(provider)
return crons;
}, {});
Object.entries(allCrons).forEach(([cron, providers]) => schedule.scheduleJob(cron, () => _scrapeProviders(providers)))
}
function scrapeAll() {
return _scrapeProviders(scrapers)
}
async function _scrapeProviders(providers) {
return sequence(providers.map(provider => () => _singleScrape(provider)));
}
async function _singleScrape(provider) {
return provider.scraper.scrape().catch(error => {
console.warn(`Failed ${provider.name} scraping due: `, error);
return Promise.resolve()
})
}
module.exports = { scheduleScraping, scrapeAll }

View File

@@ -1,47 +0,0 @@
const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper');
const thepiratebayFakeRemoval = require('../scrapers/thepiratebay/thepiratebay_fakes_removal');
const ytsScraper = require('../scrapers/yts/yts_scraper');
const ytsFullScraper = require('../scrapers/yts/yts_full_scraper');
const eztvScraper = require('../scrapers/eztv/eztv_scraper');
const leetxScraper = require('../scrapers/1337x/1337x_scraper');
const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper');
const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper');
const erairawsScraper = require('../scrapers/erairaws/erairaws_scraper');
const torrentGalaxyScraper = require('../scrapers/torrentgalaxy/torrentgalaxy_scraper');
const rutorScraper = require('../scrapers/rutor/rutor_scraper');
const Comando = require('../scrapers/comando/comando_scraper')
const ComoEuBaixo = require('../scrapers/comoeubaixo/comoeubaixo_scraper')
const Lapumia = require('../scrapers/lapumia/lapumia_scraper')
const OndeBaixa = require('../scrapers/ondebaixa/ondebaixa_scraper');
const AnimesTorrent = require('../scrapers/animestorrent/animestorrent_scraper')
const DarkMahou = require('../scrapers/darkmahou/darkmahou_scraper')
const torrent9Scraper = require('../scrapers/torrent9/torrent9_scraper');
module.exports = [
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: ytsFullScraper, name: ytsFullScraper.NAME, cron: '0 0 0 * * 0' },
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: nyaaSiScraper, name: nyaaSiScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: nyaaPantsuScraper, name: nyaaPantsuScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */1 ? * *' },
{ scraper: rutorScraper, name: rutorScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */2 ? * *' },
{ scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' },
{ scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: torrent9Scraper, name: torrent9Scraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: Comando, name: Comando.NAME, cron: '0 0 */4 ? * *' },
{ scraper: ComoEuBaixo, name: ComoEuBaixo.NAME, cron: '0 0 */4 ? * *' },
{ scraper: Lapumia, name: Lapumia.NAME, cron: '0 0 */4 ? * *' },
{ scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' },
// { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' },
// { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' },
{ scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
// { scraper: require('../scrapers/1337x/1337x_search_scraper') }
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_update_size_scraper') }
];

View File

@@ -1,64 +0,0 @@
const Bottleneck = require('bottleneck');
const repository = require('../lib/repository')
const { delay } = require('../lib/promises')
const { updateCurrentSeeders } = require('../lib/torrent')
const { updateTorrentSeeders } = require('../lib/torrentEntries')
const DELAY_MS = 0; // 0 seconds
const DELAY_NEW_MS = 30_000; // 30 seconds
const DELAY_NO_NEW_MS = 300_000; // 300 seconds
const DELAY_FAILED_TORRENTS_MS = 5_000; // 5 seconds
const updateLimiter = new Bottleneck({ maxConcurrent: 5 });
const statistics = {};
const statisticsNew = {};
function scheduleUpdateSeeders() {
console.log('Starting seeders update...')
getTorrents()
.then(torrents => updateCurrentSeeders(torrents))
.then(updatedTorrents => Promise.all(
updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))))
.then(torrents => updateStatistics(torrents, statistics))
.then(() => console.log('Finished seeders update:', statistics))
.catch(error => console.warn('Failed seeders update:', error))
.then(() => delay(DELAY_MS))
.then(() => scheduleUpdateSeeders());
}
function scheduleUpdateSeedersForNewTorrents() {
console.log('Starting seeders update for new torrents...')
getNewTorrents()
.then(torrents => updateCurrentSeeders(torrents))
.then(updatedTorrents => Promise.all(
updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))))
.then(torrents => updateStatistics(torrents, statisticsNew))
.then(() => console.log('Finished seeders update for new torrents:', statisticsNew))
.catch(error => console.warn('Failed seeders update for new torrents:', error))
.then(() => delay(DELAY_NEW_MS))
.then(() => scheduleUpdateSeedersForNewTorrents());
}
async function getTorrents() {
return repository.getUpdateSeedersTorrents()
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getTorrents()));
}
async function getNewTorrents() {
return repository.getUpdateSeedersNewTorrents()
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getNewTorrents()))
.then(torrents => {
if (!torrents.length) {
console.log('No new torrents to update seeders')
return delay(DELAY_NO_NEW_MS).then(() => getNewTorrents())
}
return torrents;
});
}
function updateStatistics(updatedTorrents, statisticsObject) {
const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0);
const date = new Date().toISOString().replace(/T.*/, '');
statisticsObject[date] = (statisticsObject[date] || 0) + totalTorrents;
}
module.exports = { scheduleUpdateSeeders, scheduleUpdateSeedersForNewTorrents }

View File

@@ -1,168 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const Sugar = require('sugar-date');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://1337x.to'
];
const defaultTimeout = 10000;
const maxSearchPage = 50;
const Categories = {
MOVIE: 'Movies',
TV: 'TV',
ANIME: 'Anime',
DOCUMENTARIES: 'Documentaries',
APPS: 'Apps',
GAMES: 'Games',
MUSIC: 'Music',
PORN: 'XXX',
OTHER: 'Other',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const proxyList = config.proxyList || defaultProxies;
const slug = torrentId.startsWith('/torrent/') ? torrentId.replace('/torrent/', '') : torrentId;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId: slug, ...torrent }))
.catch((err) => torrent(slug, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
const requestUrl = proxyUrl => category
? `${proxyUrl}/category-search/${keyword}/${category}/${page}/`
: `${proxyUrl}/search/${keyword}/${page}/`;
return Promises.first(proxyList
.map(proxyUrl => singleRequest(requestUrl(proxyUrl), config)))
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
const sort = config.sort;
const requestUrl = proxyUrl => sort
? `${proxyUrl}/sort-cat/${category}/${sort}/desc/${page}/`
: `${proxyUrl}/cat/${category}/${page}/`;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)))
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
!(body.includes('1337x</title>'))) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('.table > tbody > tr').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find('a').eq(1).text(),
torrentId: row.find('a').eq(1).attr('href').replace('/torrent/', ''),
seeders: parseInt(row.children('td.coll-2').text()),
leechers: parseInt(row.children('td.coll-3').text()),
size: parseSize(row.children('td.coll-4').text())
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const details = $('.torrent-detail-page');
const magnetLink = details.find('a:contains(\'Magnet Download\')').attr('href');
const imdbIdMatch = details.find('div[id=\'description\']').html().match(/imdb\.com\/title\/(tt\d+)/i);
const torrent = {
name: escapeHTML(decode(magnetLink).name.replace(/\+/g, ' ')),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
seeders: parseInt(details.find('strong:contains(\'Seeders\')').next().text(), 10),
leechers: parseInt(details.find('strong:contains(\'Leechers\')').next().text(), 10),
category: details.find('strong:contains(\'Category\')').next().text(),
languages: details.find('strong:contains(\'Language\')').next().text(),
size: parseSize(details.find('strong:contains(\'Total size\')').next().text()),
uploadDate: parseDate(details.find('strong:contains(\'Date uploaded\')').next().text()),
imdbId: imdbIdMatch && imdbIdMatch[1],
files: details.find('div[id=\'files\']').first().find('li')
.map((i, elem) => $(elem).text())
.map((i, text) => ({
fileIndex: i,
name: text.match(/^(.+)\s\(.+\)$/)[1].replace(/^.+\//g, ''),
path: text.match(/^(.+)\s\(.+\)$/)[1],
size: parseSize(text.match(/^.+\s\((.+)\)$/)[1])
})).get()
};
resolve(torrent);
});
}
function parseDate(dateString) {
if (/decade.*ago/i.test(dateString)) {
return Sugar.Date.create('10 years ago');
}
return Sugar.Date.create(dateString);
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,111 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const leetx = require('./1337x_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = '1337x';
const UNTIL_PAGE = 10;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => leetx.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
leetx.Categories.MOVIE,
leetx.Categories.TV,
leetx.Categories.ANIME,
leetx.Categories.DOCUMENTARIES
];
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return leetx.browse(({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent({ provider: NAME, ...record })) {
return record;
}
const torrentFound = await leetx.torrent(record.torrentId).catch(() => undefined);
if (!torrentFound || !TYPE_MAPPING[torrentFound.category]) {
return Promise.resolve('Invalid torrent record');
}
if (isNaN(torrentFound.uploadDate)) {
console.warn(`Incorrect upload date for [${torrentFound.infoHash}] ${torrentFound.name}`);
return;
}
if (await checkAndUpdateTorrent(torrentFound)) {
return torrentFound;
}
const torrent = {
infoHash: torrentFound.infoHash,
provider: NAME,
torrentId: torrentFound.torrentId,
title: torrentFound.name.replace(/\t|\s+/g, ' ').trim(),
type: TYPE_MAPPING[torrentFound.category],
size: torrentFound.size,
seeders: torrentFound.seeders,
uploadDate: torrentFound.uploadDate,
imdbId: torrentFound.imdbId,
languages: torrentFound.languages || undefined
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[leetx.Categories.MOVIE] = Type.MOVIE;
mapping[leetx.Categories.DOCUMENTARIES] = Type.MOVIE;
mapping[leetx.Categories.TV] = Type.SERIES;
mapping[leetx.Categories.ANIME] = Type.ANIME;
return mapping;
}
function untilPage(category) {
if (leetx.Categories.ANIME === category) {
return 5;
}
if (leetx.Categories.DOCUMENTARIES === category) {
return 1;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,128 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
const maxSearchPage = 50;
const baseUrl = 'https://animestorrent.com';
const Categories = {
MOVIE: 'filme',
ANIME: 'tv',
OVA: 'ova'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = torrentId.split("/")[3];
return singleRequest(`${baseUrl}/${slug}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map((el) => ({ torrentId: slug, ...el })))
.catch((err) => torrent(slug, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, config.extendToPage || 1);
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
.then((body) => parseTableBody(body))
.then((torrents) =>
torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 })
.catch(() => [])
.then((nextTorrents) => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/tipo/${category}/page/${page}/` : `${baseUrl}/page/${page}/`;
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (
body.includes("502: Bad gateway") ||
body.includes("403 Forbidden")
) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const torrents = [];
$("article.bs").each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("span.ntitle").text(),
torrentId: row.find("div > a").attr("href"),
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise(async (resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
let magnets = [];
$(`a[href^="magnet"]`).each((i, section) => {
const magnet = $(section).attr("href");
magnets.push(magnet);
});
const details = $('div.infox')
const torrents = magnets.map((magnetLink) => {
return {
title: decode(magnetLink).name,
originalName: details.find('h1.entry-title').text(),
year: details.find('b:contains(\'Lançamento:\')')[0]
? details.find('b:contains(\'Lançamento:\')')[0].nextSibling.nodeValue.trim()
: '',
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
category: details.find('b:contains(\'Tipo:\')').next().attr('href').split('/')[4],
uploadDate: new Date($("time[itemprop=dateModified]").attr("datetime")),
};
})
resolve(torrents);
});
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,108 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const animetorrrent = require("./animestorrent_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { getKitsuId } = require("../../lib/metadata");
const NAME = "AnimesTorrent";
const UNTIL_PAGE = 5;
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => animetorrrent.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
animetorrrent.Categories.MOVIE,
animetorrrent.Categories.ANIME,
animetorrrent.Categories.OVA
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return animetorrrent
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return animetorrrent.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && !foundTorrent.kitsuId) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.kitsuId = await getKitsuId(info).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: Type.ANIME,
imdbId: foundTorrent.imdbId,
kitsuId: foundTorrent.kitsuId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function untilPage(category) {
if (animetorrrent.Categories.ANIME === category) {
return 5;
}
if (animetorrrent.Categories.OVA === category) {
return 3;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,163 +0,0 @@
const axios = require('axios');
const moment = require("moment")
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtLanguages, sanitizePtOriginalName } = require('../scraperHelper')
const defaultTimeout = 30000;
const maxSearchPage = 50
const baseUrl = 'https://comando.to';
const Categories = {
MOVIE: 'filmes',
TV: 'series',
ANIME: 'animes',
DOCUMENTARIES: 'documentario'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = torrentId.split("/")[3];
return singleRequest(`${baseUrl}/${slug}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
.catch((err) => {
console.warn(`Failed Comando ${slug} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/category/${category}/page/${page}/`, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('article').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("h2 > a").text(),
torrentId: row.find("h2 > a").attr("href")
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const magnets = $('h2 > strong')
.filter((i, elem) => isPtDubbed($(elem).text())).parent()
.map((i, elem) => $(elem).nextUntil('h2, hr'))
.map((i, elem) => $(elem).find('a[href^="magnet"]'))
.map((i, section) => $(section).attr("href")).get();
const details = $('strong, b').filter((i, elem) => $(elem).text().match(/Servidor|Orig(?:\.|inal)/)).parent();
const imdbIdMatch = details.find('a[href*="imdb.com"]').attr('href')
const torrents = magnets.map(magnetLink => {
const decodedMagnet = decode(magnetLink);
const originalNameElem = details.find('strong, b')
.filter((i, elem) => $(elem).text().match(/Baixar|Orig(?:\.|inal)|^Título:/));
const languagesElem = details.find('strong, b')
.filter((i, elem) => $(elem).text().match(/^\s*([IÍ]dioma|[AÁ]udio)/));
const originalName = parseOriginalName(originalNameElem);
const title = decodedMagnet.name && escapeHTML(decodedMagnet.name.replace(/\+/g, ' '));
return {
title: title ? sanitizePtName(title) : originalName,
originalName: sanitizePtOriginalName(originalName),
year: details.find('a[href*="comando.to/category/"]').text(),
infoHash: decodedMagnet.infoHash,
magnetLink: magnetLink,
category: parseCategory($('div.entry-categories').html()),
uploadDate: new Date(moment($('a.updated').text(), 'LL', 'pt-br').format()),
imdbId: imdbIdMatch ? imdbIdMatch.split('/')[4] : null,
languages: sanitizePtLanguages(languagesElem[0].nextSibling.nodeValue)
}
});
resolve(torrents.filter((x) => x));
});
}
function parseOriginalName(originalNameElem) {
if (!originalNameElem[0]) {
return '';
}
const originalName = originalNameElem.next().text()
|| originalNameElem[0].nextSibling.nodeValue
|| originalNameElem.text();
return originalName.replace(/[^:]*: ?/, '').trim();
}
function parseCategory(categorys) {
const $ = cheerio.load(categorys)
if ($('a:contains(\'animes\')').text()) {
return Categories.ANIME
}
if ($('a:contains(\'Filmes\')').text()) {
return Categories.MOVIE
}
if ($('a:contains(\'Series\')').text()) {
return Categories.TV
}
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,112 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const comando = require("./comando_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { getImdbId } = require("../../lib/metadata");
const NAME = "Comando";
const UNTIL_PAGE = 5;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => comando.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
comando.Categories.MOVIE,
comando.Categories.TV
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return comando.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return comando.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && foundTorrent.originalName) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
imdbId: foundTorrent.imdbId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[comando.Categories.MOVIE] = Type.MOVIE;
mapping[comando.Categories.DOCUMENTARIES] = Type.SERIES;
mapping[comando.Categories.TV] = Type.SERIES;
mapping[comando.Categories.ANIME] = Type.ANIME;
return mapping;
}
function untilPage(category) {
if (comando.Categories.TV === category) {
return 5;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,135 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')
const defaultTimeout = 30000;
const maxSearchPage = 50
const baseUrl = 'https://comoeubaixo.com';
const Categories = {
MOVIE: 'filmes',
TV: 'series',
ANIME: 'anime',
DESENHOS: 'desenhos'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = encodeURIComponent(torrentId.split("/")[3]);
return singleRequest(`${baseUrl}/${slug}/`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
.catch((err) => {
console.warn(`Failed ComoEuBaixo ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
return singleRequest(`${baseUrl}/${keyword}/${page}/`, config)
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/${category}/${page}/` : `${baseUrl}/${page}/`;
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body || (Buffer.isBuffer(body) && !body.size)) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('div.capa_larga.align-middle').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("a").text(),
torrentId: row.find("a").attr("href"),
isTorrent: !!row.find("p:contains(\'Torrent\')")[0]
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const magnets = $(`a[href^="magnet"]`)
.filter((i, elem) => isPtDubbed($(elem).attr('title')))
.map((i, elem) => $(elem).attr("href")).get();
const details = $('div#informacoes')
const category = details.find('strong:contains(\'Gêneros: \')').next().attr('href').split('/')[0]
const torrents = magnets.map(magnetLink => {
const decodedMagnet = decode(magnetLink);
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
const originalTitle = details.find('strong:contains(\'Baixar\')')[0].nextSibling.nodeValue.split('-')[0];
const year = details.find('strong:contains(\'Data de Lançamento: \')').next().text().trim();
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
return {
title: name.length > 5 ? name : fallBackTitle,
infoHash: decodedMagnet.infoHash,
magnetLink: magnetLink,
category: category,
uploadDate: new Date($('time').attr('datetime')),
imdbId: details.find('a[href*="imdb.com"]').attr('href').split('/')[4],
languages: sanitizePtLanguages(details.find('strong:contains(\'Idioma\')')[0].nextSibling.nodeValue)
};
})
resolve(torrents.filter((x) => x));
});
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,115 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const comoeubaixo = require("./comoeubaixo_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const NAME = "ComoEuBaixo";
const UNTIL_PAGE = 5;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => comoeubaixo.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
comoeubaixo.Categories.MOVIE,
comoeubaixo.Categories.TV,
comoeubaixo.Categories.DESENHOS
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return comoeubaixo
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
if (!entry.isTorrent) {
return entry;
}
return comoeubaixo.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
imdbId: foundTorrent.imdbId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[comoeubaixo.Categories.MOVIE] = Type.MOVIE;
mapping[comoeubaixo.Categories.TV] = Type.SERIES;
mapping[comoeubaixo.Categories.ANIME] = Type.ANIME;
mapping[comoeubaixo.Categories.DESENHOS] = Type.SERIES;
return mapping;
}
function untilPage(category) {
if (comoeubaixo.Categories.DESENHOS === category) {
return UNTIL_PAGE;
}
if (comoeubaixo.Categories.TV === category) {
return UNTIL_PAGE;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,127 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
const maxSearchPage = 50;
const baseUrl = 'https://darkmahou.com';
const Categories = {
MOVIE: 'movie',
ANIME: 'tv',
OVA: 'ova'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = torrentId.split("/")[3];
return singleRequest(`${baseUrl}/${slug}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map((el) => ({ torrentId: slug, ...el })))
.catch((err) => torrent(slug, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, config.extendToPage || 1);
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
.then((body) => parseTableBody(body))
.then((torrents) =>
torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 })
.catch(() => [])
.then((nextTorrents) => torrents.concat(nextTorrents))
: torrents
)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/category/${category}/page/${page}/` : `${baseUrl}/page/${page}/`;
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (
body.includes("502: Bad gateway") ||
body.includes("403 Forbidden")
) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const torrents = [];
$("article.bs").each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("span.ntitle").text(),
torrentId: row.find("div > a").attr("href"),
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise(async (resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
let magnets = [];
$(`a[href^="magnet"]`).each((i, section) => {
const magnet = $(section).attr("href");
magnets.push(magnet);
});
const details = $('div.infox')
const torrent = magnets.map((magnetLink) => {
return {
title: decode(magnetLink).name,
originalName: details.find('h1.entry-title').text(),
year: details.find('b:contains(\'Lançado:\')')[0].nextSibling.nodeValue || '',
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
category: details.find('b:contains(\'Tipo:\')').next().attr('href').split('/')[4],
uploadDate: new Date($("time[itemprop=dateModified]").attr("datetime")),
};
})
resolve(torrent.filter((x) => x));
});
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,108 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const darkmahou = require("./darkmahou_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { getKitsuId } = require("../../lib/metadata");
const NAME = "DarkMahou";
const UNTIL_PAGE = 5;
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => darkmahou.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
darkmahou.Categories.MOVIE,
darkmahou.Categories.ANIME,
darkmahou.Categories.OVA
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return darkmahou
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return darkmahou.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && !foundTorrent.kitsuId) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.kitsuId = await getKitsuId(info).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: Type.ANIME,
imdbId: foundTorrent.imdbId,
kitsuId: foundTorrent.kitsuId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function untilPage(category) {
if (darkmahou.Categories.ANIME === category) {
return 5;
}
if (darkmahou.Categories.OVA === category) {
return 4;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,93 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const Promises = require("../../lib/promises");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
const baseUrl = 'https://www.erai-raws.info';
const Categories = {
ANIMES: 'anime',
EPISODES: 'episodes'
};
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/${category}/page/${page}/`, config)
.then((body) => parseTableBody(body)
.then(animes => Promises.sequence(animes.map(anime => () => singleRequest(anime.animeLink))))
.then(animeBodies => Promise.all(animeBodies.map(animeBody => parseTorrentPage(animeBody))))
.then(animeInfos => animeInfos.reduce((a, b) => a.concat(b), [])))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout, };
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body || (Buffer.isBuffer(body) && !body.size)) {
throw new Error(`No body: ${requestUrl}`);
} else if (
body.includes("502: Bad gateway") ||
body.includes("403 Forbidden")
) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const links = $('[itemprop=\'headline\'] a, .content-area a.aa_ss_ops_new')
.map((i, element) => ({
name: $(element).text(),
animeLink: $(element).attr("href"),
})).get();
resolve(links);
});
}
function parseTorrentPage(body) {
return new Promise(async (resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const entries = $('.tab-content table, .content-area table')
.map((i, entry) => {
const languages = $(entry).find('.tooltip3').map((_, l) => $(l).attr('data-title')).get().join('/');
const magnets = $(entry).find('a[href^="magnet"]').map((_, m) => $(m).attr('href')).get();
return { languages, magnets }
}).get();
const torrents = entries
.map(entry => entry.magnets
.map(magnet => decode(magnet))
.map(decodedMagnet => ({
title: decodedMagnet.name,
infoHash: decodedMagnet.infoHash,
trackers: decodedMagnet.tr,
languages: entry.languages
})))
.reduce((a, b) => a.concat(b), []);
resolve(torrents);
});
}
module.exports = { browse, Categories };

View File

@@ -1,50 +0,0 @@
const Parser = require('rss-parser');
const decode = require("magnet-uri");
const parser = new Parser({
customFields: {
item: [['erai:subtitles', 'subtitles']]
}
});
const baseUrl = 'https://www.erai-raws.info';
const rssKey = process.env.ERAI_RSS_KEY;
const Categories = {
ANIMES: 'anime',
EPISODES: 'episodes'
};
function browse() {
return parser.parseURL(`${baseUrl}/feed/?type=magnet&${rssKey}`)
.then(result => result.items
.map(item => {
const decodedMagnet = decode(item.link);
const languages = parseLanguages(item.subtitles);
return {
title: decodedMagnet.name,
infoHash: decodedMagnet.infoHash,
trackers: decodedMagnet.tr,
languages: languages
}
}));
}
const languageMapping = {
'us': 'English',
'br': 'Portuguese(Brazil)',
'mx': 'Spanish(Latin_America)',
'es': 'Spanish',
'sa': 'Arabic',
'fr': 'French',
'de': 'German',
'it': 'Italian',
'ru': 'Russian'
}
function parseLanguages(languages) {
return languages.split('][')
.map(lang => lang.replace(/[\[\]]/g, ''))
.map(lang => languageMapping[lang] || lang)
.join('/');
}
module.exports = { browse, Categories };

View File

@@ -1,47 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const erairaws = require('./erairaws_rss_api');
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'EraiRaws';
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function scrapeLatestTorrents() {
return scrapeLatestTorrentsForCategory(erairaws.Categories.EPISODES)
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return erairaws.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processRecord(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve([]));
}
async function processRecord(foundTorrent) {
return checkAndUpdateTorrent({ provider: NAME, ...foundTorrent }).then(() => foundTorrent);
}
function untilPage(category) {
if (category === erairaws.Categories.ANIMES) {
return 45;
}
return 1;
}
module.exports = { scrape, NAME };

View File

@@ -1,117 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const { decode } = require("magnet-uri");
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('./../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://eztv.re'
];
const defaultTimeout = 120000;
const minDelay = 3000;
const jitterDelay = minDelay;
const limit = 100;
const maxPage = 5;
function torrent(torrentId, config = {}, retries = 1) {
if (!torrentId) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config)))
.then(body => parseTorrentPage(body))
.then(torrent => ({ torrentId, ...torrent }))
.catch(error => retries ? jitter().then(() => torrent(torrentId, config, retries - 1)) : Promise.reject(error));
}
function search(imdbId, config = {}, retries = 1) {
if (!imdbId) {
return Promise.reject(new Error(`Failed ${imdbId} search`));
}
const id = imdbId.replace('tt', '');
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config)))
.then(results => parseResults(results))
.then(torrents => torrents.length === limit && page < maxPage
? search(imdbId, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch(error => retries ? jitter().then(() => search(imdbId, config, retries - 1)) : Promise.reject(error));
}
function browse(config = {}, retries = 1) {
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config)))
.then(results => parseResults(results))
.catch(error => retries ? jitter().then(() => browse(config, retries - 1)) : Promise.reject(error));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then(response => {
if (!response.data) {
return Promise.reject(`No body: ${requestUrl}`);
}
return Promise.resolve(response.data);
});
}
function parseResults(results) {
if (!results || !Array.isArray(results.torrents)) {
return Promise.reject(`Incorrect results ${results}`)
}
return results.torrents.map(torrent => parseTorrent(torrent));
}
function parseTorrent(torrent) {
return {
name: torrent.title.replace(/EZTV$/, ''),
torrentId: torrent.episode_url.replace(/.*\/ep\//, ''),
infoHash: torrent.hash.trim().toLowerCase(),
magnetLink: torrent.magnet_url,
torrentLink: torrent.torrent_url,
seeders: torrent.seeds,
size: torrent.size_bytes,
uploadDate: new Date(torrent.date_released_unix * 1000),
imdbId: torrent.imdb_id !== '0' && 'tt' + torrent.imdb_id || undefined
}
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const content = $('table[class="forum_header_border_normal"]');
const magnetLink = content.find('a[title="Magnet Link"]').attr('href');
const torrent = {
name: content.find('h1 > span').text().replace(/EZTV$/, ''),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentLink: content.find('a[title="Download Torrent"]').attr('href'),
seeders: parseInt(content.find('span[class="stat_red"]').first().text(), 10) || 0,
size: parseSize(content.find('b:contains(\'Filesize:\')')[0].nextSibling.data),
uploadDate: moment(content.find('b:contains(\'Released:\')')[0].nextSibling.data, 'Do MMM YYYY').toDate(),
showUrl: content.find('.episode_left_column a').attr('href')
};
resolve(torrent);
});
}
function jitter() {
return Promises.delay(minDelay + Math.round(Math.random() * jitterDelay))
}
module.exports = { torrent, search, browse };

View File

@@ -1,85 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const { parse } = require('parse-torrent-title');
const eztv = require('./eztv_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const { isEpisodeImdbId } = require('../../lib/metadata');
const NAME = 'EZTV';
const UNTIL_PAGE = 10;
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent, getImdbIdsMethod) {
// return getImdbIdsMethod()
// .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => eztv.search(imdbId)))))
// .then(results => results.reduce((a, b) => a.concat(b), []))
// .catch(() => limiter.schedule(() => eztv.torrent(torrent.torrentId)));
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
return scrapeLatestTorrentsForCategory();
}
async function scrapeLatestTorrentsForCategory(page = 1) {
console.log(`Scrapping ${NAME} page ${page}`);
return eztv.browse(({ page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] due: `, error);
// return Promises.delay(30000).then(() => scrapeLatestTorrentsForCategory(page))
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
if (!record || !record.size) {
return Promise.resolve('Invalid torrent record');
}
// imdb id for talk shows is usually incorrect on eztv
const parsedTitle = parse(record.name);
const dateEpisode = !parsedTitle.season && parsedTitle.date;
if (dateEpisode && await isEpisodeImdbId(record.imdbId)) {
delete record.imdbId;
}
const torrent = {
infoHash: record.infoHash,
provider: NAME,
torrentId: record.torrentId,
title: record.name.replace(/\t|\s+/g, ' ').trim(),
type: Type.SERIES,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
imdbId: record.imdbId,
};
return createTorrentEntry(torrent).then(() => torrent);
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,137 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const Promises = require('../../lib/promises');
const defaultUrl = 'https://horriblesubs.info';
const defaultTimeout = 10000;
function allShows(config = {}) {
return _getContent('/shows/', config)
.then(($) => $('div[class="ind-show"]')
.map((index, element) => $(element).children('a'))
.map((index, element) => ({
title: element.attr('title'),
url: `${config.proxyUrl || defaultUrl}${element.attr('href')}`
})).get());
}
async function showData(showInfo, config = {}) {
const showEndpoint = (showInfo.url || showInfo).match(/\/show.+/)[0];
const title = showInfo.title;
const showId = await _getShowId(showEndpoint);
const packEntries = await _getShowEntries(showId, title, 'batch', config);
const singleEntries = await _getShowEntries(showId, title, 'show', config);
return {
title: title,
url: showInfo.url || showInfo,
showId: showId,
singleEpisodes: singleEntries,
packEpisodes: packEntries
};
}
async function getLatestEntries(config = {}) {
return _getAllLatestEntries(config)
.then((entries) => Promises.sequence(entries.map((entry) => () => _findLatestEntry(entry, config))))
.then((entries) => entries.filter((entry) => entry))
}
function _getContent(endpoint, config = {},) {
const baseUrl = config.proxyUrl || defaultUrl;
const timeout = config.timeout || defaultTimeout;
const url = endpoint.startsWith('http')
? endpoint.replace(/https?:\/\/[^/]+/, baseUrl)
: `${baseUrl}${endpoint}`;
return axios.get(url, { timeout: timeout })
.then((response) => response.data)
.then((body) => cheerio.load(body));
}
function _getShowId(showEndpoint) {
return _getContent(showEndpoint.replace(/(?:#\d+)?\/?$/, '/'))
.then($ => $('div.entry-content').find('script').html().match(/var hs_showid = (\d+)/)[1]);
}
function _getShowEntries(animeId, animeTitle, type, config) {
return _getAllEntries(animeId, type, config)
.then((entries) => entries.filter((entry) => entry.title === animeTitle));
}
function _getAllEntries(animeId, type, config, page = 0, autoExtend = true) {
const entriesEndpoint = `/api.php?method=getshows&type=${type}&showid=${animeId}&nextid=${page}`;
return _getEntries(entriesEndpoint, config)
.then((entries) => !autoExtend || !entries.length ? entries :
_getAllEntries(animeId, type, config, page + 1)
.then((nextEntries) => entries.concat(nextEntries)));
}
function _getEntries(endpoint, config) {
return _getContent(endpoint, config)
.then(($) => $('div[class="rls-info-container"]')
.map((index, element) => ({
title: $(element).find('a[class="rls-label"]').contents()
.filter((i, el) => el.nodeType === 3).first().text().trim(),
episode: $(element).find('a[class="rls-label"]').find('strong').text(),
uploadDate: _parseDate($(element).find('a[class="rls-label"]').find('span[class="rls-date"]').text()),
mirrors: $(element).find('div[class="rls-links-container"]').children()
.map((indexLink, elementLink) => ({
resolution: $(elementLink).attr('id').match(/\d+p$/)[0],
magnetLink: $(elementLink).find('a[title="Magnet Link"]').attr('href'),
torrentLink: $(elementLink).find('a[title="Torrent Link"]').attr('href')
})).get()
})).get());
}
function _getAllLatestEntries(config, page = 0) {
const pageParam = page === 0 ? '' : `&nextid=${page}`;
const entriesEndpoint = `/api.php?method=getlatest${pageParam}`;
return _getContent(entriesEndpoint, config)
.then(($) => $('li a')
.map((index, element) => ({
urlEndpoint: $(element).attr('href'),
episode: $(element).find('strong').text()
})).get())
.then((entries) => entries.length < 12
? entries
: _getAllLatestEntries(config, page + 1)
.then((nextEntries) => entries.concat(nextEntries)));
}
async function _findLatestEntry(entry, config) {
const showId = await _getShowId(entry.urlEndpoint);
let foundEntry;
let page = 0;
let reachedEnd = false;
while (!foundEntry && !reachedEnd) {
const allEntries = await _getAllEntries(showId, 'show', config, page, false);
foundEntry = allEntries.filter((e) => e.episode === entry.episode)[0];
page = page + 1;
reachedEnd = allEntries.length === 0;
}
if (!foundEntry) {
return;
}
return {
title: foundEntry.title,
url: entry.urlEndpoint,
showId: showId,
singleEpisodes: [foundEntry]
};
}
function _parseDate(date) {
if (date.match(/today/i)) {
return moment().toDate();
} else if (date.match(/yesterday/i)) {
return moment().subtract(1, 'day').toDate();
}
return moment(date, 'MM/DD/YYYY').toDate();
}
module.exports = { allShows, showData, getLatestEntries, _getShowId };

File diff suppressed because it is too large Load Diff

View File

@@ -1,186 +0,0 @@
const fs = require('fs');
const moment = require('moment');
const Bottleneck = require('bottleneck');
const decode = require('magnet-uri');
const horriblesubs = require('./horriblesubs_api.js');
const repository = require('../../lib/repository');
const { Type } = require('../../lib/types');
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const { getMetadata, getKitsuId } = require('../../lib/metadata');
const showMappings = require('./horriblesubs_mapping.json');
const NAME = 'HorribleSubs';
const NEXT_FULL_SCRAPE_OFFSET = 5 * 24 * 60 * 60; // 5 days;
const limiter = new Bottleneck({ maxConcurrent: 5 });
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
const lastScraped = lastScrape.lastScraped && moment(lastScrape.lastScraped);
if (!lastScraped || lastScraped.add(NEXT_FULL_SCRAPE_OFFSET, 'seconds') < scrapeStart) {
console.log(`[${scrapeStart}] scrapping all ${NAME} shows...`);
return _scrapeAllShows()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished scrapping all ${NAME} shows`));
} else {
console.log(`[${scrapeStart}] scrapping latest ${NAME} entries...`);
return _scrapeLatestEntries()
.then(() => console.log(`[${moment()}] finished scrapping latest ${NAME} entries`));
}
}
async function updateSeeders(torrent) {
return Promise.resolve([]);
}
async function _scrapeLatestEntries() {
const latestEntries = await horriblesubs.getLatestEntries();
return Promise.all(latestEntries
.map((entryData) => limiter.schedule(() => _parseShowData(entryData)
.catch((err) => console.log(err)))));
}
async function _scrapeAllShows() {
const shows = await horriblesubs.allShows();
return Promise.all(shows
.map((show) => limiter.schedule(() => horriblesubs.showData(show)
.then((showData) => _parseShowData(showData, false))
.catch((err) => console.log(err)))));
}
async function compareSearchKitsuIds() {
console.log(`${NAME}: initiating kitsu compare...`);
const shows = await horriblesubs.allShows()
.then((shows) => Promise.all(shows.slice(0, 1).map((show) => limiter.schedule(() => enrichShow(show)))));
const incorrect = shows.filter(
(show) => showMappings[show.title] && showMappings[show.title].kitsu_id !== show.kitsu_id);
const incorrectRatio = incorrect.length / shows.length;
console.log(incorrect);
console.log(`Ratio: ${incorrectRatio}`);
}
async function initMapping() {
console.log(`${NAME}: initiating kitsu mapping...`);
const shows = await horriblesubs.allShows()
.then((shows) => shows.filter((show) => !showMappings[show.title]))
.then((shows) => Promise.all(shows.map((show) => limiter.schedule(() => enrichShow(show)))))
.then((shows) => shows.reduce((map, show) => (map[show.title] = show, map), showMappings));
fs.writeFile(
"./scraper/scrapers/horriblesubs/horriblesubs_mapping.json",
JSON.stringify(shows), 'utf8',
(err) => {
if (err) {
console.log("An error occurred while writing JSON Object to File.", err);
} else {
console.log(`${NAME}: finished kitsu mapping`);
}
}
);
}
async function enrichShow(show) {
console.log(`${NAME}: getting show info for ${show.title}...`);
const showId = await horriblesubs._getShowId(show.url)
.catch(() => show.title);
const metadata = await getKitsuId({ title: show.title })
.then((kitsuId) => getMetadata(kitsuId))
.catch((error) => {
console.log(`Failed getting kitsu meta: ${error.message}`);
return {};
});
return {
showId: showId,
kitsu_id: metadata.kitsuId,
...show,
kitsuTitle: metadata.title,
imdb_id: metadata.imdbId
}
}
async function _parseShowData(showData, updateSeeders = true) {
console.log(`${NAME}: scrapping ${showData.title} data...`);
const showMapping = showMappings[showData.title];
const kitsuId = showMapping && showMapping.kitsu_id;
if (!showMapping) {
throw new Error(`No kitsu mapping found for ${showData.title}`);
}
if (!kitsuId) {
throw new Error(`No kitsuId found for ${showData.title}`);
}
// sometimes horriblesubs entry contains multiple season in it, so need to split it per kitsu season entry
const kitsuIdsMapping = Array.isArray(kitsuId) && await Promise.all(kitsuId.map(kitsuId => getMetadata(kitsuId)))
.then((metas) => metas.reduce((map, meta) => {
const epOffset = Object.keys(map).length;
[...Array(meta.totalCount || 1).keys()]
.map(ep => ep + 1)
.forEach(ep => map[ep + epOffset] = { kitsuId: meta.kitsuId, episode: ep, title: meta.title });
return map;
}, {})) || {};
const formatTitle = (episodeInfo, mirror) => {
const mapping = kitsuIdsMapping[episodeInfo.episode.replace(/^0+/, '')];
if (mapping) {
return `${mapping.title} - ${mapping.episode} [${mirror.resolution}]`;
}
return `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`;
};
const getKitsuId = inputEpisode => {
const episodeString = inputEpisode.includes('-') && inputEpisode.split('-')[0] || inputEpisode;
const episode = parseInt(episodeString, 10);
if (kitsuIdsMapping[episode]) {
return kitsuIdsMapping[episode].kitsuId;
} else if (Array.isArray(kitsuId)) {
console.warn(`Unmapped episode number for ${showData.title} - ${inputEpisode}`);
return undefined;
}
return kitsuId;
};
return Promise.all([].concat(showData.singleEpisodes || []).concat(showData.packEpisodes || [])
.map(episodeInfo => episodeInfo.mirrors
.filter(mirror => mirror.magnetLink && mirror.magnetLink.length)
.map(mirror => ({
provider: NAME,
...mirror,
infoHash: decode(mirror.magnetLink).infoHash,
trackers: decode(mirror.magnetLink).tr.join(','),
title: formatTitle(episodeInfo, mirror),
type: Type.ANIME,
kitsuId: getKitsuId(episodeInfo.episode),
uploadDate: episodeInfo.uploadDate,
})))
.reduce((a, b) => a.concat(b), [])
.filter(torrent => torrent.kitsuId)
.map(torrent => entryLimiter.schedule(() => processTorrentRecord(torrent, updateSeeders))))
.then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`));
}
async function processTorrentRecord(torrent, updateSeeders = true) {
const existingTorrent = await repository.getTorrent(torrent).catch(() => undefined);
if (existingTorrent && existingTorrent.provider === NAME) {
if (updateSeeders) {
return updateCurrentSeeders(torrent).then(updatedSeeders => checkAndUpdateTorrent(updatedSeeders))
}
return Promise.resolve(torrent)
}
return updateTorrentSize(torrent)
.then(updated => updateCurrentSeeders(updated))
.then(updated => createTorrentEntry(updated, true))
.catch(error => console.warn(`Failed creating entry for ${torrent.title}:`, error));
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,161 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://katcr.co'
];
const defaultTimeout = 10000;
const Categories = {
MOVIE: 'movies',
TV: 'tv',
ANIME: 'anime',
APPS: 'applications',
GAMES: 'games',
MUSIC: 'music',
BOOKS: 'books',
PORN: 'xxx',
OTHER: 'other',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
const proxyList = config.proxyList || defaultProxies;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => torrent(torrentId, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config)))
.then((body) => parseTableBody(body))
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config)))
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
body.includes('Origin DNS error') ||
!body.includes('Kickass Torrents</title>')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('.table > tbody > tr').each((i, element) => {
const row = $(element);
const magnetLink = row.find('a[title="Torrent magnet link"]').attr('href');
torrents.push({
name: row.find('a[class="torrents_table__torrent_title"]').first().children('b').text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentId: row.find('a[class="torrents_table__torrent_title"]').first().attr('href').replace('/torrent/', ''),
category: row.find('span[class="torrents_table__upload_info"]').first().children('a').first().attr('href')
.match(/category\/([^\/]+)/)[1],
seeders: parseInt(row.find('td[data-title="Seed"]').first().text()),
leechers: parseInt(row.find('td[data-title="Leech"]').first().text()),
size: parseSize(row.find('td[data-title="Size"]').first().text()),
uploadDate: moment(row.find('td[data-title="Age"]').first().attr('title')).toDate()
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const content = $('div[class="col"]').first();
const info = content.find('div[class="torrent_stats"]').parent();
const description = content.find('div[id="main"]');
const magnetLink = info.find('a[title="Download verified Magnet"]').attr('href');
const imdbIdMatch = description.html().match(/imdb\.com\/title\/(tt\d+)/i);
const torrent = {
name: info.find('h1').first().text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
seeders: parseInt(info.find('span[class="torrent_stats__seed_count mr-2"]').first().text().match(/\d+/)[0], 10),
leechers: parseInt(info.find('span[class="torrent_stats__leech_count mr-2"]').first().text().match(/\d+/)[0], 10),
category: info.find('small').first().children('a').first().attr('href').match(/\/category\/([^\/]+)/)[1],
languages: description.find('span:contains(\'Audio\')').next().children().eq(0).text(),
size: parseSize(description.find('ul[class="file_list"]').first().find('li').first().contents().eq(2).text()
.match(/\(Size: (.+)\)/)[1]),
uploadDate: moment(info.find('time').first().text()).toDate(),
imdbId: imdbIdMatch && imdbIdMatch[1],
files: content.find('ul[class="file_list"]').first().find('li > ul > li[class="file_list__file"]')
.map((i, elem) => $(elem))
.map((i, ele) => ({
fileIndex: i,
name: ele.find('span > ul > li').contents().eq(1).text().trim().replace(/^.+\//g, ''),
path: ele.find('span > ul > li').contents().eq(1).text().trim(),
size: parseSize(ele.contents().eq(2).text())
})).get()
};
if (torrent.files.length >= 50) {
// a max of 50 files are displayed on the page
delete torrent.files;
}
resolve(torrent);
});
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,98 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const LineByLineReader = require('line-by-line');
const fs = require('fs');
const { Type } = require('../../lib/types');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
const NAME = 'KickassTorrents';
const CSV_FILE_PATH = '/tmp/kickass.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
console.log(`starting to scrape KAT dump: ${JSON.stringify(lastDump)}`);
let entriesProcessed = 0;
const lr = new LineByLineReader(CSV_FILE_PATH);
lr.on('line', (line) => {
if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`);
}
const row = line.match(/(?<=^|\|)(".*"|[^|]+)(?=\||$)/g);
if (row.length !== 11) {
console.log(`Invalid row: ${line}`);
return;
}
const torrent = {
infoHash: row[0].toLowerCase(),
title: row[1]
.replace(/^"|"$/g, '')
.replace(/&amp;/g, '&')
.replace(/&\w{2,6};/g, ' ')
.replace(/\s+/g, ' ')
.trim(),
category: row[2],
size: parseInt(row[5], 10),
seeders: parseInt(row[8], 10),
uploadDate: moment.unix(parseInt(row[10], 10)).toDate(),
};
if (!limiter.empty()) {
lr.pause()
}
limiter.schedule(() => processTorrentRecord(torrent)
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
.then(() => limiter.empty())
.then((empty) => empty && lr.resume())
.then(() => entriesProcessed++);
});
lr.on('error', (err) => {
console.log(err);
});
lr.on('end', () => {
fs.unlink(CSV_FILE_PATH);
console.log(`finished to scrape KAT dump: ${JSON.stringify(lastDump)}!`);
});
}
const categoryMapping = {
"Movies": Type.MOVIE,
"TV": Type.SERIES,
"Anime": Type.ANIME
};
async function processTorrentRecord(record) {
if (!categoryMapping[record.category] || record.seeders === 0) {
return createSkipTorrentEntry(record);
}
if (await getStoredTorrentEntry(record)) {
return;
}
const torrentFound = await findTorrent(record).catch(() => undefined);
if (!torrentFound) {
return createSkipTorrentEntry(record);
}
const torrent = {
infoHash: record.infoHash,
provider: NAME,
title: torrentFound.name,
size: record.size,
type: categoryMapping[record.category],
imdbId: torrentFound.imdbId,
uploadDate: record.uploadDate,
seeders: torrentFound.seeders,
};
return createTorrentEntry(torrent);
}
async function findTorrent(record) {
return Promise.reject("not found");
}
module.exports = { scrape, NAME };

View File

@@ -1,91 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const kickass = require('./kickass_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'KickassTorrents';
const UNTIL_PAGE = 10;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => kickass.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
kickass.Categories.MOVIE,
kickass.Categories.TV,
kickass.Categories.ANIME,
];
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return kickass.browse(({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
const torrentFound = await kickass.torrent(record.torrentId).catch(() => undefined);
if (!torrentFound || !TYPE_MAPPING[torrentFound.category]) {
return Promise.resolve('Invalid torrent record');
}
const torrent = {
infoHash: torrentFound.infoHash,
provider: NAME,
torrentId: torrentFound.torrentId,
title: torrentFound.name.replace(/\t|\s+/g, ' '),
type: TYPE_MAPPING[torrentFound.category],
size: torrentFound.size,
seeders: torrentFound.seeders,
uploadDate: torrentFound.uploadDate,
imdbId: torrentFound.imdbId,
languages: torrentFound.languages || undefined
};
return createTorrentEntry(torrent).then(() => torrent);
}
function typeMapping() {
const mapping = {};
mapping[kickass.Categories.MOVIE] = Type.MOVIE;
mapping[kickass.Categories.TV] = Type.SERIES;
mapping[kickass.Categories.ANIME] = Type.ANIME;
return mapping;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,145 +0,0 @@
const axios = require('axios');
const moment = require("moment")
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper')
const defaultTimeout = 10000;
const maxSearchPage = 50
const baseUrl = 'https://lapumia.org';
const Categories = {
MOVIE: null,
TV: 'series',
ANIME: 'animes',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
return singleRequest(`${baseUrl}/${torrentId}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId, ...el })))
.catch((err) => {
console.warn(`Failed Lapumia ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 10 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/${category}/page/${page}/` : `${baseUrl}/page/${page}/`
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2 };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('div.post').each((i, element) => {
const row = $(element);
try {
torrents.push({
name: row.find("div > a").text(),
torrentId: row.find("div > a").attr("href").split('/')[3]
});
} catch (e) {
console.log("Failed parsing Lupumia table entry")
}
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const magnets = $('h2 > span')
.filter((i, elem) => isPtDubbed($(elem).text())).parent()
.map((i, elem) => $(elem).nextUntil('h2, hr'))
.map((i, elem) => $(elem).find('a[href^="magnet"]'))
.map((i, section) => $(section).attr("href")).get();
const category = parseCategory($('div.category').html());
const details = $('div.content')
const torrents = magnets.filter(magnetLink => decode(magnetLink).name).map(magnetLink => ({
title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))),
originalName: sanitizePtOriginalName(details.find('b:contains(\'Titulo Original:\')')[0].nextSibling.nodeValue),
year: details.find('b:contains(\'Ano de Lançamento:\')')[0].nextSibling.nodeValue.trim(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
category: category,
uploadDate: new Date(moment($('div.infos').text().split('•')[0].trim(), 'LL', 'pt-br').format()),
imdbId: $('.imdbRatingPlugin').attr('data-title') || null,
languages: sanitizePtLanguages(details.find('b:contains(\'Idioma\')')[0].nextSibling.nodeValue)
}))
resolve(torrents.filter((x) => x));
});
}
function parseCategory(categorys) {
const $ = cheerio.load(categorys)
if ($('a:contains(\'Animes\')').text()) {
return Categories.ANIME
}
if ($('a:contains(\'Series\')').text()) {
return Categories.TV
}
return Categories.MOVIE
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,112 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const lapumia = require("./lapumia_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { getImdbId } = require("../../lib/metadata");
const NAME = "Lapumia";
const UNTIL_PAGE = 5;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => lapumia.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
lapumia.Categories.MOVIE,
lapumia.Categories.TV
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return lapumia
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return lapumia.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && TYPE_MAPPING[foundTorrent.category] !== Type.ANIME) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
imdbId: foundTorrent.imdbId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[lapumia.Categories.MOVIE] = Type.MOVIE;
mapping[lapumia.Categories.TV] = Type.SERIES;
mapping[lapumia.Categories.ANIME] = Type.ANIME;
return mapping;
}
function untilPage(category) {
if (lapumia.Categories.ANIME === category) {
return 2;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,73 +0,0 @@
const { pantsu } = require('nyaapi')
const Categories = {
ANIME: {
ALL: '3_',
ENGLISH: '3_5',
RAW: '3_6',
MUSIC_VIDEO: '3_12',
NON_ENGLISH: '3_13',
},
LIVE_ACTION: {
ALL: '5_',
ENGLISH: '5_9',
RAW: '5_11',
PROMOTIONAL_VIDEO: '5_10',
NON_ENGLISH: '5_18',
}
}
function torrent(torrentId) {
if (!torrentId) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return pantsu.infoRequest(torrentId)
.then(result => parseTorrent(result))
.catch(error => handleError(error, torrentId));
}
function search(query) {
return pantsu.search(query)
.then(results => results.map(torrent => parseTorrent(torrent)))
.catch(error => handleError(error, query));
}
function browse(config = {}) {
const page = config.page || 1;
const category = config.category || Categories.ANIME.ENGLISH;
return pantsu.list(category, page)
.then(results => results.map(torrent => parseTorrent(torrent)))
.catch(error => handleError(error, category));
}
function handleError(error, identifier) {
if (error.statusCode && error.statusCode >= 400) {
return Promise.reject(new Error(`${error.statusCode}: [${identifier}] failed retrieval on NyaaPantsu`));
}
return Promise.reject(error);
}
function parseTorrent(torrent) {
return {
title: torrent.name.replace(/\t|\s+/g, ' ').trim(),
torrentId: torrent.id,
infoHash: torrent.hash.trim().toLowerCase(),
magnetLink: torrent.magnet,
torrentLink: torrent.torrent,
seeders: torrent.seeders,
size: torrent.filesize,
uploadDate: new Date(torrent.date),
category: `${torrent.category}_${torrent.sub_category}`,
languages: torrent.languages ? torrent.languages.join(',') : undefined,
files: torrent.file_list && torrent.file_list.length ? torrent.file_list.map((file, fileId) => ({
fileIndex: fileId,
name: file.path.replace(/([^\/]+$)/, '$1'),
path: file.path,
size: file.filesize
})) : undefined
}
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,97 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const pantsu = require('./nyaa_pantsu_api');
const { Type } = require('../../lib/types');
const Promises = require('../../lib/promises');
const repository = require('../../lib/repository');
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'NyaaPantsu';
const UNTIL_PAGE = 5
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
// const ids = ['1033095'];
// return Promise.all(ids.map(id => limiter.schedule(() => pantsu.torrent(id)
// .then(torrent => processTorrentRecord(torrent)))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => pantsu.torrent(torrent.torrentId))
.then(foundTorrent => {
if (Number.isInteger(foundTorrent.seeders)) {
return [foundTorrent];
}
return []
});
}
async function scrapeLatestTorrents() {
const allowedCategories = [
pantsu.Categories.ANIME.ENGLISH
];
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return pantsu.browse(({ page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)
.catch(error => {
console.warn(`Failed processing [${torrent.infoHash}] ${torrent.title} due: `, error);
return Promise.resolve();
})))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (!record || await checkAndUpdateTorrent(record)) {
return record;
}
if (!record.size) {
await updateTorrentSize(record)
}
if (record.seeders === null || record.seeders === undefined) {
await updateCurrentSeeders(record);
}
const torrent = {
infoHash: record.infoHash,
torrentLink: record.torrentLink,
provider: NAME,
torrentId: record.torrentId,
title: record.title,
type: Type.ANIME,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
languages: record.languages,
files: record.files || undefined
};
return createTorrentEntry(torrent).then(() => torrent);
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,66 +0,0 @@
const { si } = require('nyaapi')
const { parseSize } = require("../scraperHelper");
const Categories = {
ANIME: {
ALL: '1_0',
MUSIC_VIDEO: '1_1',
ENGLISH: '1_2',
NON_ENGLISH: '1_3',
RAW: '1_4'
},
LIVE_ACTION: {
ALL: '4_0',
ENGLISH: '4_1',
PROMOTIONAL_VIDEO: '4_2',
NON_ENGLISH: '4_3',
RAW: '4_4'
}
}
function torrent(torrentId) {
if (!torrentId) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return si.infoRequest(torrentId)
.then(result => parseTorrent(result))
.then(result => ({ ...result, torrentId }))
.catch(error => {
if (error.statusCode && error.statusCode === 404) {
return Promise.reject(new Error(`404: [${torrentId}] not found on NyaaSi`));
}
return Promise.reject(error);
});
}
function search(query) {
return si.search(query, null, { category: Categories.ANIME.ENGLISH})
.then(results => results.map(torrent => parseTorrent(torrent)));
}
function browse(config = {}) {
const page = config.page || 1;
const category = config.category || Categories.ANIME.ENGLISH;
const sort = config.sort || 'id'
return si.list(category, page, { sort })
.then(response => response.results || [])
.then(results => results.map(torrent => parseTorrent(torrent)));
}
function parseTorrent(torrent) {
return {
title: torrent.name.replace(/\t|\s+/g, ' ').trim(),
torrentId: torrent.id,
infoHash: torrent.hash.trim().toLowerCase(),
magnetLink: torrent.magnet,
torrentLink: torrent.torrent,
seeders: parseInt(torrent.seeders),
size: parseSize(torrent.filesize),
uploadDate: new Date(torrent.date),
category: torrent.sub_category,
}
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,87 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const nyaasi = require('./nyaa_si_api');
const { Type } = require('../../lib/types');
const Promises = require('../../lib/promises');
const repository = require('../../lib/repository');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'NyaaSi';
const UNTIL_PAGE = 10
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
// const ids = ['1292786'];
// return Promise.all(ids.map(id => limiter.schedule(() => nyaasi.torrent(id)
// .then(torrent => processTorrentRecord(torrent)))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
// const queries = ['Sagrada Reset', 'Sakurada Reset'];
// return Promise.all(queries.map(query => limiter.schedule(() => nyaasi.search(query)
// .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
// return limiter.schedule(() => nyaasi.torrent(torrent.torrentId))
// .then(foundTorrent => Number.isInteger(foundTorrent.seeders) ? [foundTorrent] : []);
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
const allowedCategories = [
nyaasi.Categories.ANIME.ENGLISH
];
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return nyaasi.browse({ page, sort: 'id' })
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)
.catch(error => {
console.warn(`Failed processing [${torrent.infoHash}] ${torrent.title} due: `, error);
return Promise.resolve();
})))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (!record || await checkAndUpdateTorrent(record)) {
return record;
}
const torrent = {
infoHash: record.infoHash,
torrentLink: record.torrentLink,
provider: NAME,
torrentId: record.torrentId,
title: record.title,
type: Type.ANIME,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
};
return createTorrentEntry(torrent).then(() => torrent);
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,151 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper')
const defaultTimeout = 30000;
const maxSearchPage = 50
const baseUrl = 'https://ondebaixa.com';
const Categories = {
MOVIE: 'filmes',
TV: 'series',
ANIME: 'anime',
DESENHOS: 'desenhos'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = encodeURIComponent(torrentId.split("/")[3]);
return singleRequest(`${baseUrl}/${slug}/`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
.catch((err) => {
console.warn(`Failed OndeBaixo ${slug} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
return singleRequest(`${baseUrl}/${keyword}/${page}/`, config)
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/${category}/${page}/` : `${baseUrl}/${page}/`;
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('div.capa_larga.align-middle').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("a").text(),
torrentId: row.find("a").attr("href")
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const magnets = $(`a[href^="magnet"]`)
.filter((i, elem) => isPtDubbed($(elem).attr('title')))
.map((i, elem) => $(elem).attr("href")).get();
const details = $('div#informacoes')
const category = details.find('span:contains(\'Gêneros: \')').next().html()
const torrents = magnets.map(magnetLink => {
const decodedMagnet = decode(magnetLink);
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
const originalTitle = details.find('span:contains(\'Título Original: \')').next().text().trim();
const year = details.find('span:contains(\'Ano de Lançamento: \')').next().text().trim();
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
return {
title: name.length > 5 ? name : fallBackTitle,
originalName: sanitizePtOriginalName(originalTitle),
year: year,
infoHash: decodedMagnet.infoHash,
magnetLink: magnetLink,
category: parseCategory(category),
uploadDate: new Date($('time').attr('datetime')),
languages: sanitizePtLanguages(details.find('span:contains(\'Idioma\')').next().text())
}
});
resolve(torrents.filter((x) => x));
});
}
function parseCategory(body) {
const $ = cheerio.load(body)
if ($("a[href*='anime']").text()) {
return Categories.ANIME
}
if ($("a[href*='series']").text()) {
return Categories.TV
}
if ($("a[href*='filmes']").text()) {
return Categories.MOVIE
}
if ($("a[href*='desenhos']").text()) {
return Categories.TV
}
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,113 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const ondebaixa = require("./ondebaixa_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { getImdbId } = require("../../lib/metadata");
const NAME = "OndeBaixa";
const UNTIL_PAGE = 5;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => ondebaixa.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
ondebaixa.Categories.MOVIE,
ondebaixa.Categories.TV,
ondebaixa.Categories.DESENHOS
];
return Promises.sequence(
allowedCategories.map(
(category) => () => scrapeLatestTorrentsForCategory(category)
)
).then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return ondebaixa
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return ondebaixa.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && TYPE_MAPPING[foundTorrent.category] !== Type.ANIME) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
imdbId: foundTorrent.imdbId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[ondebaixa.Categories.MOVIE] = Type.MOVIE;
mapping[ondebaixa.Categories.TV] = Type.SERIES;
mapping[ondebaixa.Categories.DESENHOS] = Type.SERIES;
mapping[ondebaixa.Categories.ANIME] = Type.ANIME;
return mapping;
}
function untilPage(category) {
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,161 +0,0 @@
const axios = require('axios');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require("../../lib/requestHelper");
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
const appId = 'torrentio-addon';
const defaultTimeout = 30000;
const retryDelay = 3000;
let token;
const Options = {
category: {
MOVIES_XVID: [14],
MOVIES_XVID_720P: [48],
MOVIES_X264: [17],
MOVIES_X264_1080P: [44],
MOVIES_X264_720P: [45],
MOVIES_X264_3D: [47],
MOVIES_X264_4K: [50],
MOVIES_X265_1080P: [54],
MOVIES_X265_4K: [51],
MOVIES_X265_4K_HDR: [52],
MOVIES_FULL_BD: [42],
MOVIES_BD_REMUX: [46],
MOVIES_HIGH_RES: [47, 50, 51, 52, 46],
TV_EPISODES: [18],
TV_UHD_EPISODES: [49],
TV_HD_EPISODES: [41],
MUSIC_MP3: [23],
MUSIC_FLAC: [25],
GAMES_PC_ISO: [27],
GAMES_PC_RIP: [28],
GAMES_PS3: [40],
GAMES_XBOX_360: [32],
SOFTWARE_PC_ISO: [33],
EBOOKS: [35],
XXX: [4],
},
sort: {
LAST: 'last',
SEEDERS: 'seeders',
LEECHERS: 'leechers'
},
format: {
JSON: 'json',
JSON_EXTENDED: 'json_extended'
},
ranked: {
TRUE: 1,
FALSE: 0
}
}
function search(imdbId, params = {}) {
if (!imdbId) {
return Promise.reject(new Error(`Must define imdbId`));
}
const parameters = {
mode: 'search',
search_imdb: imdbId,
category: params.category && params.category.join(';') || null,
limit: params.limit || 100,
sort: params.sort || Options.sort.SEEDERS,
min_seeders: params.min_seeders || undefined,
min_leechers: params.min_leechers || undefined,
format: params.format || Options.format.JSON_EXTENDED,
ranked: params.ranked || Options.ranked.FALSE
}
return singleRequest(parameters).then(results => parseResults(results));
}
function browse(params = {}) {
const parameters = {
mode: 'list',
category: params.category && params.category.join(';') || null,
limit: params.limit || 100,
sort: params.sort || Options.sort.LAST,
min_seeders: params.min_seeders || undefined,
min_leechers: params.min_leechers || undefined,
format: params.format || Options.format.JSON_EXTENDED,
ranked: params.ranked || Options.ranked.FALSE
}
return singleRequest(parameters).then(results => parseResults(results));
}
async function singleRequest(params = {}, config = {}, retries = 15) {
const timeout = config.timeout || defaultTimeout;
const headers = {
'user-agent': getRandomUserAgent(),
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
};
params.token = await getToken();
params.app_id = appId;
Object.keys(params)
.filter(key => params[key] === undefined || params[key] === null)
.forEach(key => delete params[key]);
const options = { headers, timeout, params };
return axios.get(baseUrl, options)
.then(response => {
if (response.data && response.data.error_code === 4) {
// token expired
token = undefined;
return singleRequest(params, config);
}
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
// too many requests
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
}
if (response.status !== 200 || (response.data && response.data.error)) {
// something went wrong
return Promise.reject(response.data || `Failed RARGB request with status=${response.status}`);
}
return response.data;
})
.catch(error => {
if (error.response && [429].includes(error.response.status) && retries > 0) {
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
}
return Promise.reject(error.message || error);
});
}
function parseResults(results) {
if (!results || !Array.isArray(results.torrent_results)) {
return Promise.reject(`Incorrect results ${JSON.stringify(results)}`)
}
return results.torrent_results.map(result => parseResult(result));
}
function parseResult(result) {
return {
title: result.title,
infoHash: decode(result.download).infoHash,
magnetLink: result.download,
seeders: result.seeders,
leechers: result.leechers,
category: result.category,
size: result.size,
uploadDate: new Date(result.pubdate),
imdbId: result.episode_info && result.episode_info.imdb
}
}
async function getToken() {
if (!token) {
const params = { get_token: 'get_token', app_id: appId };
const options = { timeout: defaultTimeout, params };
token = await axios.get(baseUrl, options)
.then(response => response.data.token);
}
return token;
}
module.exports = { search, browse, Options };

View File

@@ -1,80 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const rarbg = require('./rarbg_api');
const { Type } = require('../../lib/types');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'RARBG';
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 3000 });
const entryLimiter = new Bottleneck({ maxConcurrent: 20 });
const allowedCategories = [
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_HIGH_RES,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
].reduce((a, b) => a.concat(b), [])
async function scrape() {
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
// const movieImdbIds = require('./rargb_movie_imdb_ids_2021-02-27.json');
const seriesImdbIds = require('./rargb_series_imdb_ids_2021-02-27.json');
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
return Promise.all(
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
}
async function getTorrentsForImdbId(imdbId) {
return rarbg.search(imdbId, { category: allowedCategories })
.then(torrents => {
console.log(`Completed ${imdbId} request`);
return torrents;
})
.catch(error => {
console.warn(`Failed ${NAME} request for ${imdbId}: `, error);
return [];
});
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
const torrent = {
provider: NAME,
infoHash: record.infoHash,
title: record.title,
type: getType(record.category),
seeders: record.seeders,
size: record.size,
uploadDate: record.uploadDate,
imdbId: record.imdbId
};
return createTorrentEntry(torrent);
}
const seriesCategories = [
'TV Episodes',
'Movies/TV-UHD-episodes',
'TV HD Episodes',
];
function getType(category) {
if (seriesCategories.includes(category)) {
return Type.SERIES;
}
return Type.MOVIE;
}
module.exports = { scrape, NAME };

View File

@@ -1,95 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const rarbg = require('./rarbg_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'RARBG';
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent, getImdbIdsMethod) {
// return getImdbIdsMethod()
// .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId)))))
// .then(results => results.reduce((a, b) => a.concat(b), []));
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
const allowedCategories = [
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_HIGH_RES,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
];
return Promises.sequence(allowedCategories
.map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category))))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category) {
console.log(`Scrapping ${NAME} ${category} category`);
return rarbg.browse({ category: category })
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
return Promise.resolve([]);
});
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
const torrent = {
provider: NAME,
infoHash: record.infoHash,
title: record.title,
type: getType(record.category),
seeders: record.seeders,
size: record.size,
uploadDate: record.uploadDate,
imdbId: record.imdbId
};
return createTorrentEntry(torrent);
}
const seriesCategories = [
'TV Episodes',
'Movies/TV-UHD-episodes',
'TV HD Episodes',
];
function getType(category) {
if (seriesCategories.includes(category)) {
return Type.SERIES;
}
return Type.MOVIE;
}
module.exports = { scrape, updateSeeders, NAME };

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,207 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const { defaultOptionsWithProxy } = require('../../lib/requestHelper');
const baseUrl = 'http://www.rutor.info';
const defaultTimeout = 10000;
const Categories = {
ALL: '0',
FOREIGN_FILMS: '1',
RUSSIAN_FILMS: '5',
SCIENCE_FILMS: '12',
FOREIGN_SERIES: '4',
RUSSIAN_SERIES: '16',
RUSSIAN_TV: '6',
RUSSIAN_ANIMATION: '7',
ANIME: '10',
FOREIGN_RELEASES: '17'
};
function torrent(torrentId, config = {}, retries = 2, error = null) {
if (!torrentId || retries === 0) {
return Promise.reject(error || new Error(`Failed ${torrentId} search`));
}
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
.then((body) => parseTorrentPage(body, torrentId))
.catch((err) => torrent(torrentId, config, retries - 1, err));
}
function search(query, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
return singleRequest(`${baseUrl}/search/0/0/0/0/${encodeURIComponent(query)}`)
.then((body) => parseTableBody(body))
.catch((err) => search(query, retries - 1, err));
}
function browse(config = {}, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/browse/${page - 1}/${category}/0/0`)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function files(torrentId) {
return singleRequest(`${baseUrl}/descriptions/${torrentId}.files`)
.then((body) => parseFiles(body));
}
function singleRequest(requestUrl) {
const options = { ...defaultOptionsWithProxy(), timeout: defaultTimeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
} else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
body.includes('Origin DNS error')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = $('#index').find('tr:not(.backgr)').map((i, elem) => {
const row = $(elem).find('td');
const links = $(row[1]).find('a');
const peers = $(row[row.length - 1]);
const magnetLink = $(links[1]).attr('href');
return {
title: $(links[2]).text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentLink: $(links[0]).attr('href'),
torrentId: $(links[2]).attr('href').match(/torrent\/(\d+)/)[1],
seeders: parseInt(peers.find('.green').text()),
leechers: parseInt(peers.find('.red').text()),
uploadDate: parseRussianDate($(row[0]).text()),
size: $(row[row.length - 2]).html().replace('&#xA0;', ' '),
}
}).get();
resolve(torrents);
});
}
function parseTorrentPage(body, torrentId) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const rows = $('#details > tr')
const details = $(rows[0]).find('td:nth-of-type(2)');
const magnetLink = $('#download a:nth-of-type(1)').attr('href');
const imdbIdMatch = details.html().match(/imdb\.com\/title\/(tt\d+)/i);
const parsedTorrent = {
title: $('#all h1').first().text(),
torrentId: torrentId,
infoHash: decode(magnetLink).infoHash,
trackers: Array.from(new Set(decode(magnetLink).tr)).join(','),
magnetLink: magnetLink,
torrentLink: $('#download a:nth-of-type(2)').attr('href'),
seeders: parseInt($(rows[rows.length - 8]).find('td:nth-of-type(2)').first().text(), 10),
category: $('tr:contains(\'Категория\') a').first().attr('href').match(/\/([\w-]+)$/)[1],
languages: parseLanguages(details.text()),
size: parseSize($(rows[rows.length - 4]).find('td:nth-of-type(2)').text()),
uploadDate: parseDate($(rows[rows.length - 5]).find('td:nth-of-type(2)').first().text()),
imdbId: imdbIdMatch && imdbIdMatch[1]
};
resolve(parsedTorrent);
});
}
function parseFiles(body) {
if (!body) {
throw new Error("No files in the body");
}
return body.split('\n')
.map((item) => item.match(/<td>([^<]+)<\/td>/g).slice(1))
.map((item, index) => ({
fileIndex: index,
name: item[0].replace(/^.+\//g, ''),
path: item[0].replace(/^.+\//, ''),
size: parseSize(item[1])
}));
}
function parseDate(dateString) {
const preparedDate = dateString.replace(/\s\(.*\)/, '')
return moment(preparedDate, 'DD-MM-YYYY HH:mm:ss').toDate();
}
const russianMonths = {
'Янв': 'Jan',
'Фев': 'Feb',
'Мар': 'Mar',
'Апр': 'Apr',
'Май': 'May',
'Июн': 'Jun',
'Июл': 'Jul',
'Авг': 'Aug',
'Сен': 'Sep',
'Окт': 'Oct',
'Ноя': 'Nov',
'Дек': 'Dec'
};
function parseRussianDate(dateString) {
const rusMonth = Object.keys(russianMonths).find(month => dateString.includes(month));
const preparedDate = dateString.trim().replace(rusMonth, russianMonths[rusMonth]).replace(/\u00a0/g, ' ');
return moment(preparedDate, 'DD MMM YY').toDate();
}
function parseSize(sizeString) {
return parseInt(sizeString.match(/\((\d+) Bytes\)/)[1], 10);
}
const languageMatchers = {
'russian': /(?:Язык|Звук|Аудио|audio|language).*(russian|\brus?\b|[Рр]усский)/i,
'english': /(?:Язык|Звук|Аудио|audio|language).*(english|\beng?\b|[Аа]нглийский)/i,
'ukrainian': /(?:Язык|Звук|Аудио|audio|language).*(ukrainian|\bukr\b|украинский)/i,
'french': /(?:Язык|Звук|Аудио|audio|language).*(french|\bfr\b|французский)/i,
'spanish': /(?:Язык|Звук|Аудио|audio|language).*(spanish|\bspa\b|испанский)/i,
'italian': /(?:Язык|Звук|Аудио|audio|language).*(italian|\bita\b|итальянский)/i,
'german': /(?:Язык|Звук|Аудио|audio|language).*(german|\bger\b|Немецкий)/i,
'korean': /(?:Язык|Звук|Аудио|audio|language).*(korean|Корейский)/i,
'arabic': /(?:Язык|Звук|Аудио|audio|language).*(arabic|Арабский)/i,
'portuguese': /(?:Язык|Звук|Аудио|audio|language).*(portuguese|Португальский)/i,
'japanese': /(?:Язык|Звук|Аудио|audio|language).*(japanese|\bjap\b|\bjp\b|[Яя]понский)/i,
}
function parseLanguages(details) {
const subsInfoMatch = details.match(/\r?\n(Text|Текст)(?:\s?#?\d{1,2})?\r?\n/i);
const detailsPart = subsInfoMatch ? details.substring(0, subsInfoMatch.index) : details;
const matchedLanguages = Object.keys(languageMatchers).filter(lang => languageMatchers[lang].test(detailsPart));
const languages = Array.from(new Set(['russian'].concat(matchedLanguages)));
return languages.length > 4 ? 'multi-audio' : languages.join(',');
}
module.exports = { torrent, browse, search, Categories };

View File

@@ -1,133 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const { parse } = require('parse-torrent-title');
const rutor = require('./rutor_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'Rutor';
const TYPE_MAPPING = {
'kino': Type.MOVIE,
'nashe_kino': Type.MOVIE,
'nauchno_popularnoe': Type.MOVIE,
'inostrannoe': Type.MOVIE,
'seriali': Type.SERIES,
'nashi_seriali': Type.SERIES,
'tv': Type.SERIES,
'multiki': Type.MOVIE,
'anime': Type.ANIME
};
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
const api_entry_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [
rutor.Categories.FOREIGN_FILMS,
rutor.Categories.FOREIGN_RELEASES,
rutor.Categories.RUSSIAN_FILMS,
rutor.Categories.FOREIGN_SERIES,
rutor.Categories.RUSSIAN_SERIES,
rutor.Categories.SCIENCE_FILMS,
rutor.Categories.RUSSIAN_ANIMATION,
rutor.Categories.ANIME
];
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
// const ids = [
// '637799'
// ];
// return Promise.all(ids.map(id => api_entry_limiter.schedule(() => rutor.torrent(id))
// .then(torrent => processTorrentRecord(torrent))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return api_limiter.schedule(() => rutor.browse({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < getMaxPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
const isOld = moment(record.uploadDate).isBefore(moment().subtract(18, 'month'));
if (record.seeders === 0 && isOld) {
console.log(`Skipping old unseeded torrent [${record.infoHash}] ${record.title}`)
return record;
}
const foundTorrent = await api_entry_limiter.schedule(() => rutor.torrent(record.torrentId).catch(() => undefined));
if (!foundTorrent || !TYPE_MAPPING[foundTorrent.category]) {
return Promise.resolve(`${NAME}: Invalid torrent record: ${record.torrentId}`);
}
if (!foundTorrent.imdbId && disallowWithoutImdbId(foundTorrent)) {
return Promise.resolve(`${NAME}: No imdbId defined: ${record.torrentId}`);
}
const torrent = {
provider: NAME,
infoHash: foundTorrent.infoHash,
torrentId: foundTorrent.torrentId,
torrentLink: foundTorrent.torrentLink,
trackers: foundTorrent.trackers,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
size: foundTorrent.size,
seeders: foundTorrent.seeders,
uploadDate: foundTorrent.uploadDate,
imdbId: foundTorrent.imdbId,
languages: foundTorrent.languages || undefined,
};
return createTorrentEntry(torrent).then(() => torrent);
}
function getMaxPage(category) {
switch (category) {
case rutor.Categories.FOREIGN_FILMS:
case rutor.Categories.FOREIGN_SERIES:
return 2;
default:
return 1;
}
}
function disallowWithoutImdbId(torrent) {
if (['kino', 'anime'].includes(torrent.category)) {
return false; // allow to search foreign movie and anime ids via search
}
// allow to search id for non russian series titles via search
return !(torrent.category === 'seriali' && !parse(torrent.title).title.match(/[\u0400-\u04ff]/i));
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,41 +0,0 @@
function isPtDubbed(name) {
return name.toLowerCase().match(/dublado|dual|nacional|multi/);
}
function sanitizePtName(name) {
return name
.replace(/(.*)\b(\d{3,4}P)\b(?!.*\d{3,4}[Pp])(.*)/, '$1$3 $2') // add resolution to the end if missing
.replace(/^[\[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks
.replace(/^(\d*(?:\.\d{1,2})?(?:[4A-Z-]{3,}|P)[-.]+)+/, '') // replace metadata prefixes
.replace(/^[\[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks2
.replace(/^(COM|NET|ORG|TO|TV|ME)\b\s*-+[\s.]*/, '') // replace dangling site endings
.trim();
}
function sanitizePtOriginalName(name) {
return name.trim().replace(/S\d+$|\d.\s?[Tt]emporada/, '');
}
function sanitizePtLanguages(languages) {
return languages
.replace(/<2F><>/g, 'ê')
.replace(/ /g, '')
.trim();
}
function parseSize(sizeText) {
if (!sizeText) {
return undefined;
}
let scale = 1;
if (/Gi?B|Go/.test(sizeText)) {
scale = 1024 * 1024 * 1024
} else if (/Mi?B|Mo/.test(sizeText)) {
scale = 1024 * 1024;
} else if (/[Kk]i?B|Ko/.test(sizeText)) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
}
module.exports = { parseSize, isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages }

View File

@@ -1,129 +0,0 @@
const axios = require('axios');
const { escapeHTML } = require('../../lib/metadata');
const baseUrl = 'https://apibay.org';
const timeout = 5000;
const Categories = {
AUDIO: {
ALL: 100,
MUSIC: 101,
AUDIO_BOOKS: 102,
SOUND_CLIPS: 103,
FLAC: 104,
OTHER: 199
},
VIDEO: {
ALL: 200,
MOVIES: 201,
MOVIES_DVDR: 202,
MUSIC_VIDEOS: 203,
MOVIE_CLIPS: 204,
TV_SHOWS: 205,
HANDHELD: 206,
MOVIES_HD: 207,
TV_SHOWS_HD: 208,
MOVIES_3D: 209,
OTHER: 299
},
APPS: {
ALL: 300,
WINDOWS: 301,
MAC: 302,
UNIX: 303,
HANDHELD: 304,
IOS: 305,
ANDROID: 306,
OTHER_OS: 399
},
GAMES: {
ALL: 400,
PC: 401,
MAC: 402,
PSx: 403,
XBOX360: 404,
Wii: 405,
HANDHELD: 406,
IOS: 407,
ANDROID: 408,
OTHER: 499
},
PORN: {
ALL: 500,
MOVIES: 501,
MOVIES_DVDR: 502,
PICTURES: 503,
GAMES: 504,
MOVIES_HD: 505,
MOVIE_CLIPS: 506,
OTHER: 599
},
OTHER: {
ALL: 600,
E_BOOKS: 601,
COMICS: 602,
PICTURES: 603,
COVERS: 604,
PHYSIBLES: 605,
OTHER: 699
}
};
function torrent(torrentId, retries = 2) {
if (!torrentId) {
return Promise.reject(new Error('No valid torrentId provided'));
}
return _request(`t.php?id=${torrentId}`)
.then(result => toTorrent(result))
.catch(error => retries ? torrent(torrentId, retries - 1) : Promise.reject(error));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword) {
return Promise.reject(new Error('No valid keyword provided'));
}
const q = keyword;
const cat = config.category || Categories.VIDEO.ALL;
return _request(`q.php?q=${q}&cat=${cat}`)
.then(results => results.map((result) => toTorrent(result)))
.catch(error => retries ? search(keyword, config, retries - 1) : Promise.reject(error));
}
function browse(config = {}, retries = 2) {
const category = config.category || 0;
const page = config.page - 1 || 0;
return _request(`q.php?q=category:${category}:${page}`)
.then(results => results.map((result) => toTorrent(result)))
.catch(error => retries ? browse(config, retries - 1) : Promise.reject(error));
}
async function _request(endpoint) {
const url = `${baseUrl}/${endpoint}`;
return axios.get(url, { timeout: timeout })
.then(response => {
if (typeof response.data === 'object') {
return response.data;
}
return Promise.reject(`Unexpected response body`);
});
}
function toTorrent(result) {
return {
torrentId: result.id,
name: escapeHTML(result.name),
infoHash: result.info_hash.toLowerCase(),
size: parseInt(result.size),
seeders: parseInt(result.seeders),
leechers: parseInt(result.leechers),
subcategory: parseInt(result.category),
uploadDate: new Date(result.added * 1000),
imdbId: result.imdb || undefined,
filesCount: result.num_files && parseInt(result.num_files) || undefined
};
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,175 +0,0 @@
const axios = require('axios');
const moment = require('moment');
const Bottleneck = require('bottleneck');
const { ungzip } = require('node-gzip');
const LineByLineReader = require('line-by-line');
const fs = require('fs');
const thepiratebay = require('./thepiratebay_api.js');
const bing = require('nodejs-bing');
const { Type } = require('../../lib/types');
const { escapeHTML } = require('../../lib/metadata');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
const CSV_FILE_PATH = '/tmp/tpb_dump.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
const lastDump = { updatedAt: 2147000000 };
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
const checkPoint = 0;
if (lastDump) {
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
await downloadDump(lastDump);
let entriesProcessed = 0;
const lr = new LineByLineReader(CSV_FILE_PATH);
lr.on('line', (line) => {
if (line.includes("#ADDED")) {
return;
}
if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`);
}
if (entriesProcessed <= checkPoint) {
entriesProcessed++;
return;
}
const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g);
if (row.length !== 4) {
console.log(`Invalid row: ${line}`);
return;
}
const torrent = {
uploadDate: moment(row[0], 'YYYY-MMM-DD HH:mm:ss').toDate(),
infoHash: Buffer.from(row[1], 'base64').toString('hex'),
title: escapeHTML(row[2])
.replace(/^"|"$/g, '')
.replace(/&#?\w{2,6};/g, ' ')
.replace(/\s+/g, ' ')
.trim(),
size: parseInt(row[3], 10)
};
if (!limiter.empty()) {
lr.pause()
}
limiter.schedule(() => processTorrentRecord(torrent)
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
.then(() => limiter.empty())
.then((empty) => empty && lr.resume())
.then(() => entriesProcessed++);
});
lr.on('error', (err) => {
console.log(err);
});
lr.on('end', () => {
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
});
}
}
const allowedCategories = [
thepiratebay.Categories.VIDEO.MOVIES,
thepiratebay.Categories.VIDEO.MOVIES_HD,
thepiratebay.Categories.VIDEO.MOVIES_DVDR,
thepiratebay.Categories.VIDEO.MOVIES_3D,
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
const seriesCategories = [
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
async function processTorrentRecord(record) {
if (await getStoredTorrentEntry(record)) {
return;
}
const torrentFound = await findTorrent(record);
if (!torrentFound || !allowedCategories.includes(torrentFound.subcategory)) {
return createSkipTorrentEntry(record);
}
const torrent = {
infoHash: torrentFound.infoHash,
provider: NAME,
torrentId: torrentFound.torrentId,
title: torrentFound.name,
size: torrentFound.size,
type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE,
imdbId: seriesCategories.includes(torrentFound.subcategory) && torrentFound.imdbId || undefined,
uploadDate: torrentFound.uploadDate || record.uploadDate,
seeders: torrentFound.seeders,
};
return createTorrentEntry(torrent);
}
async function findTorrent(record) {
return findTorrentInSource(record)
.catch(() => findTorrentViaBing(record));
}
async function findTorrentInSource(record) {
let page = 0;
let torrentFound;
while (!torrentFound && page < 5) {
const torrents = await thepiratebay.search(record.title.replace(/[\W\s]+/, ' '), { page: page });
torrentFound = torrents.filter(torrent => torrent.magnetLink.toLowerCase().includes(record.infoHash))[0];
page = torrents.length === 0 ? 1000 : page + 1;
}
if (!torrentFound) {
return Promise.reject(new Error(`Failed to find torrent ${record.title}`));
}
return Promise.resolve(torrentFound)
.then((torrent) => thepiratebay.torrent(torrent.torrentId)
.catch(() => thepiratebay.torrent(torrent.torrentId)));
}
async function findTorrentViaBing(record) {
return bing.web(`${record.infoHash}`)
.then((results) => results
.find(result => result.description.includes('Direct download via magnet link') ||
result.description.includes('Get this torrent')))
.then((result) => {
if (!result) {
console.warn(`Failed to find torrent ${record.title}`);
return Promise.resolve(undefined);
}
return result.link.match(/torrent\/(\w+)\//)[1];
})
.then((torrentId) => torrentId && thepiratebay.torrent(torrentId))
}
function downloadDump(dump) {
try {
if (fs.existsSync(CSV_FILE_PATH)) {
console.log('dump file already exist...');
return;
}
} catch (err) {
console.error(err)
}
console.log('downloading dump file...');
return axios.get(dump.url, { timeout: 2000, responseType: 'stream' })
.then((response) => response.data)
.then((body) => {
console.log('unzipping dump file...');
return ungzip(body);
})
.then((unzipped) => {
console.log('writing dump file...');
return fs.promises.writeFile(CSV_FILE_PATH, unzipped);
})
}
module.exports = { scrape, NAME };

View File

@@ -1,43 +0,0 @@
const moment = require('moment');
const { Sequelize } = require('sequelize');
const Bottleneck = require('bottleneck');
const thepiratebay = require('./thepiratebay_api.js');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const NAME = 'ThePirateBay';
const EMPTY_HASH = '0000000000000000000000000000000000000000';
const Op = Sequelize.Op;
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
console.log(`Starting ${NAME} fake removal...`);
const startCreatedAt = moment().subtract(14, 'day');
const endCreatedAt = moment();
const whereQuery = {
provider: NAME,
type: Type.MOVIE,
createdAt: { [Op.between]: [startCreatedAt, endCreatedAt] }
};
return repository.getTorrentsBasedOnQuery(whereQuery)
.then(torrents => {
console.log(`Checking for ${NAME} fake entries in ${torrents.length} torrents`);
return Promise.all(torrents.map(torrent => limiter.schedule(() => removeIfFake(torrent))))
})
.then(results => {
const removed = results.filter(result => result);
console.log(`Finished ${NAME} fake removal with ${removed.length} removals in ${results.length} torrents`);
});
}
async function removeIfFake(torrent) {
const tpbTorrentInfo = await thepiratebay.torrent(torrent.torrentId).catch(() => null);
if (tpbTorrentInfo && tpbTorrentInfo.infoHash === EMPTY_HASH) {
console.log(`Removing ${NAME} fake torrent [${torrent.torrentId}][${torrent.infoHash}] ${torrent.title}`);
return repository.deleteTorrent(torrent).catch(() => null);
}
return Promise.resolve(null);
}
module.exports = { scrape, NAME };

View File

@@ -1,98 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const thepiratebay = require('./thepiratebay_api.js');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [
thepiratebay.Categories.VIDEO.MOVIES,
thepiratebay.Categories.VIDEO.MOVIES_HD,
thepiratebay.Categories.VIDEO.MOVIES_3D,
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
const seriesCategories = [
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
// return limiter.schedule(() => thepiratebay.torrent(torrent.torrentId));
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return thepiratebay.browse({ category, page })
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
if (!record || !allowedCategories.includes(record.subcategory)) {
return Promise.resolve('Invalid torrent record');
}
const torrent = {
infoHash: record.infoHash,
provider: NAME,
torrentId: record.torrentId,
title: record.name.replace(/\t|\s+/g, ' '),
type: seriesCategories.includes(record.subcategory) ? Type.SERIES : Type.MOVIE,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
imdbId: seriesCategories.includes(record.subcategory) && record.imdbId || undefined,
languages: record.languages && record.languages.trim() || undefined
};
return createTorrentEntry(torrent);
}
function getUntilPage(category) {
switch (category) {
case thepiratebay.Categories.VIDEO.MOVIES_3D:
return 1;
case thepiratebay.Categories.VIDEO.TV_SHOWS:
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
return 10;
default:
return 5;
}
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,112 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const LineByLineReader = require('line-by-line');
const decode = require('magnet-uri');
const thepiratebay = require('./thepiratebay_api.js');
const { Type } = require('../../lib/types');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
const CSV_FILE_PATH = '/tmp/tpb.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
// await processTorrentRecord({ torrentId: 26877339, category: 'Video' });
console.log(`starting to scrape tpb dump...`);
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
const checkPoint = 4115000;
let entriesProcessed = 0;
const lr = new LineByLineReader(CSV_FILE_PATH);
lr.on('line', (line) => {
if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`);
}
if (entriesProcessed <= checkPoint) {
entriesProcessed++;
return;
}
const row = line.match(/(?<=^|,)(".*"|[^,]*)(?=,|$)/g);
if (row.length !== 10) {
console.log(`Invalid row: ${line}`);
return;
}
const torrent = {
torrentId: row[0],
title: row[1]
.replace(/^"|"$/g, '')
.replace(/&amp;/g, '&')
.replace(/&\w{2,6};/g, ' ')
.replace(/\s+/g, ' ')
.trim(),
size: parseInt(row[2], 10),
category: row[4],
subcategory: row[5],
infoHash: row[7].toLowerCase() || decode(row[9]).infoHash,
magnetLink: row[9],
uploadDate: moment(row[8]).toDate(),
};
if (!limiter.empty()) {
lr.pause()
}
limiter.schedule(() => processTorrentRecord(torrent)
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
.then(() => limiter.empty())
.then((empty) => empty && lr.resume())
.then(() => entriesProcessed++);
});
lr.on('error', (err) => {
console.log(err);
});
lr.on('end', () => {
console.log(`finished to scrape tpb dump!`);
});
}
const allowedCategories = [
thepiratebay.Categories.VIDEO.MOVIES,
thepiratebay.Categories.VIDEO.MOVIES_HD,
thepiratebay.Categories.VIDEO.MOVIES_DVDR,
thepiratebay.Categories.VIDEO.MOVIES_3D,
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
const seriesCategories = [
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
async function processTorrentRecord(record) {
if (record.category !== 'Video') {
return createSkipTorrentEntry(record);
}
if (await getStoredTorrentEntry(record)) {
return;
}
const torrentFound = await thepiratebay.torrent(record.torrentId);
if (!torrentFound || !allowedCategories.includes(torrentFound.subcategory)) {
return createSkipTorrentEntry(record);
}
const torrent = {
infoHash: torrentFound.infoHash,
provider: NAME,
torrentId: torrentFound.torrentId,
title: torrentFound.name,
size: torrentFound.size,
type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE,
imdbId: torrentFound.imdbId,
uploadDate: torrentFound.uploadDate,
seeders: torrentFound.seeders,
};
return createTorrentEntry(torrent);
}
module.exports = { scrape, NAME };

View File

@@ -1,119 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const { parse } = require('parse-torrent-title');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const baseUrl = 'https://www.torrent9.re'
const defaultTimeout = 10000;
const pageSize = 50;
const Categories = {
MOVIE: 'films',
TV: 'series',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => {
console.warn(`Failed Torrent9 ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function browse(config = {}, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const offset = (page - 1) * pageSize + 1;
return singleRequest(`${baseUrl}/torrents/${category}/${offset}`)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function singleRequest(requestUrl) {
const headers = {
'user-agent': getRandomUserAgent(),
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
};
const options = { headers, timeout: defaultTimeout };
return axios.get(requestUrl, options)
.then(response => {
const body = response.data;
if (!body || !body.length) {
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('tbody tr').each((i, element) => {
const row = $(element);
const titleElement = row.find('td a');
try {
torrents.push({
name: titleElement.text().trim(),
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
seeders: parseInt(row.find('span.seed_ok').first().text()),
});
} catch (e) {
console.error('Failed parsing TorrentGalaxy row: ', e);
}
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const details = $('.movie-detail');
const magnetLink = details.find('a[href^="magnet"]').first().attr('href');
const torrentLink = details.find('div.download-btn:nth-of-type(1) a').first().attr('href');
const name = details.find('p strong').contents().filter((_, e) => e.type === 'text').text() || $('h5, h1').text();
const languages = parse(name).languages;
const torrent = {
title: name.trim(),
infoHash: magnetLink ? decode(magnetLink).infoHash : undefined,
magnetLink: magnetLink,
torrentLink: torrentLink ? `${baseUrl}${torrentLink}` : undefined,
seeders: parseInt(details.find('.movie-information ul:nth-of-type(1) li:nth-of-type(3)').text(), 10),
category: details.find('ul:nth-of-type(4) a').attr('href').match(/\/(\w+)$/)[1],
size: parseSize(details.find('ul:nth-of-type(2) li:nth-of-type(3)').text()),
uploadDate: moment(details.find('ul:nth-of-type(3) li:nth-of-type(3)').text(), 'DD/MM/YYYY').toDate(),
languages: languages && languages.includes('french') ? undefined : 'french',
};
resolve(torrent);
});
}
module.exports = { torrent, browse, Categories };

View File

@@ -1,104 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const torrent9 = require('./torrent9_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'Torrent9';
const TYPE_MAPPING = typeMapping();
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [
torrent9.Categories.MOVIE,
torrent9.Categories.TV,
];
const clients = [
torrent9
];
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function scrapeLatestTorrents() {
const scrapeFunctions = allowedCategories
.map(category => clients.map(client => () => scrapeLatestTorrentsForCategory(client, category)))
.reduce((a, b) => a.concat(b), []);
return Promises.sequence(scrapeFunctions)
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(client, category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return api_limiter.schedule(() => client.browse({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(results => Promise.all(results.map(r => limiter.schedule(() => processTorrentRecord(client, r)))))
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
? scrapeLatestTorrentsForCategory(client, category, page + 1)
: Promise.resolve([]));
}
async function processTorrentRecord(client, record) {
if (await checkAndUpdateTorrent({ provider: NAME, torrentId: record.torrentId })) {
return record;
}
const foundTorrent = await api_limiter.schedule(() => client.torrent(record.torrentId)).catch(() => undefined);
if (!foundTorrent || !foundTorrent.infoHash) {
console.warn(`Failed retrieving torrent ${record.torrentId}`);
return record;
}
const torrent = {
provider: NAME,
infoHash: foundTorrent.infoHash,
magnetLink: foundTorrent.magnetLink,
torrentLink: foundTorrent.torrentLink,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
size: foundTorrent.size,
seeders: foundTorrent.seeders,
uploadDate: foundTorrent.uploadDate,
imdbId: foundTorrent.imdbId,
languages: foundTorrent.languages
};
if (await checkAndUpdateTorrent(torrent)) {
console.info(`Skipping torrent ${torrent.torrentId} - [${torrent.infoHash}] ${torrent.title}`);
return torrent;
}
return createTorrentEntry(torrent).then(() => torrent);
}
function typeMapping() {
const mapping = {};
mapping[torrent9.Categories.MOVIE] = Type.MOVIE;
mapping[torrent9.Categories.TV] = Type.SERIES;
return mapping;
}
function getUntilPage(category) {
if (category === torrent9.Categories.TV) {
return 2;
}
return 1;
}
module.exports = { scrape, NAME };

View File

@@ -1,124 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const { parse } = require('parse-torrent-title');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const baseUrl = 'https://www.torrent9.gg'
const defaultTimeout = 10000;
const Categories = {
MOVIE: 'films',
TV: 'series',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => {
console.warn(`Failed Torrent9 ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function browse(config = {}, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/torrents_${category}.html,page-${page}`)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function singleRequest(requestUrl) {
const headers = {
'user-agent': getRandomUserAgent(),
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
};
const options = { headers, timeout: defaultTimeout };
return axios.get(requestUrl, options)
.then(response => {
const body = response.data;
if (!body || !body.length) {
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('tr').each((i, element) => {
const row = $(element);
const titleElement = row.find('td a');
if (titleElement.length) {
torrents.push({
title: titleElement.attr('title').trim(),
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
seeders: parseInt(row.find('span.seed_ok').first().text()),
});
}
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const details = $('.movie-detail');
const magnetLink = details.find('a[href^="magnet"]').first().attr('href');
const name = getName(details) || $('h1').text();
const languages = parse(name).languages;
const torrent = {
title: name.trim(),
infoHash: magnetLink ? decode(magnetLink).infoHash : undefined,
magnetLink: magnetLink,
seeders: parseInt(details.find('.movie-information ul:nth-of-type(1) li:nth-of-type(3)').text(), 10),
category: details.find('ul:nth-of-type(4) a').attr('href').match(/_(\w+)\.html$/)[1],
size: parseSize(details.find('ul:nth-of-type(2) li:nth-of-type(3)').text()),
uploadDate: moment(details.find('ul:nth-of-type(3) li:nth-of-type(3)').text(), 'DD/MM/YYYY').toDate(),
languages: languages && languages.includes('french') ? undefined : 'french',
};
resolve(torrent);
});
}
function getName(details) {
const nameElement = details.find('p strong');
if (nameElement.length === 1) {
return nameElement.contents().filter((_, elem) => elem.type === 'text').text()
}
const description = nameElement.parent().text();
const nameMatch = description.match(
/(?:[A-Z]+[^A-Z0-9]*|[A-Z0-9-]+(?:[a-z]+\d+)?)\.([\w-]+\.){3,}\w+(?:-\w+)?(?=[A-Z])/);
return nameMatch && nameMatch[0];
}
module.exports = { torrent, browse, Categories };

View File

@@ -1,171 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
// 'https://torrentgalaxy.to',
// 'https://torrentgalaxy.mx',
'https://torrentgalaxy.su'
];
const defaultTimeout = 10000;
const Categories = {
ANIME: '28',
MOVIE_4K: '3',
MOVIE_PACKS: '4',
MOVIE_SD: '1',
MOVIE_HD: '42',
MOVIE_CAM: '45',
MOVIE_BOLLYWOOD: '46',
TV_SD: '5',
TV_HD: '41',
TV_PACKS: '6',
TV_SPORT: '7',
DOCUMENTARIES: '9'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
const proxyList = config.proxyList || defaultProxies;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => torrent(torrentId, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`)))
.then((body) => parseTableBody(body))
.catch(() => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function singleRequest(requestUrl) {
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: defaultTimeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
} else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
body.includes('Origin DNS error')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('.tgxtable > div').each((i, element) => {
if (i === 0) return;
const row = $(element);
const magnetLink = row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(2)').attr('href');
const imdbIdMatch = row.html().match(/search=(tt\d+)/i);
try {
torrents.push({
name: row.find('.tgxtablecell div a[title]').first().text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentLink: row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(1)').first().attr('href'),
torrentId: row.find('.tgxtablecell div a[title]').first().attr('href').match(/torrent\/(\d+)/)[1],
verified: !!row.find('i.fa-check').length,
category: row.find('div:nth-of-type(n+2) .shrink a').first().attr('href').match(/cat=(\d+)$/)[1],
seeders: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'green\'] b').first().text()),
leechers: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'#ff0000\'] b').first().text()),
languages: row.find('.tgxtablecell img[title]').first().attr('title'),
size: parseSize(row.find('.collapsehide span.badge-secondary').first().text()),
uploadDate: parseDate(row.find('div.collapsehide:nth-of-type(12)').first().text()),
imdbId: imdbIdMatch && imdbIdMatch[1],
});
} catch (e) {
console.error('Failed parsing TorrentGalaxy row: ', e);
}
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const content = $('div[class="torrentpagetable limitwidth"]').first();
const magnetLink = $('a[class="btn btn-danger"]').attr('href');
const imdbIdContent = $('a[title="IMDB link"]').attr('href');
const imdbIdMatch = imdbIdContent && imdbIdContent.match(/imdb\.com\/title\/(tt\d+)/i);
const torrent = {
name: content.find('.linebreakup a').first().text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
verified: !content.find('i.fa-exclamation-triangle').length,
torrentLink: $('a[class="btn btn-success"]').attr('href'),
seeders: parseInt(content.find('font[color=\'green\']').first().text(), 10),
category: content.find('div:nth-of-type(4) a:nth-of-type(2)').first().attr('href').match(/cat=(\d+)$/)[1],
languages: content.find('div:nth-of-type(5) div:nth-of-type(2)').first().text().trim(),
size: parseSize(content.find('div:nth-of-type(6) div:nth-of-type(2)').first().text()),
uploadDate: parseDate(content.find('div:nth-of-type(9) div:nth-of-type(2)').first().text()),
imdbId: imdbIdMatch && imdbIdMatch[1],
};
resolve(torrent);
});
}
function parseDate(dateString) {
if (dateString.includes('ago')) {
const amount = parseInt(dateString, 10);
const unit = dateString.includes('Min') ? 'minutes' : 'hours';
return moment().subtract(amount, unit).toDate();
}
const preparedDate = dateString.replace(/\//g, '-').replace(/-(\d{2})\s/, '-20$1 ')
return moment(preparedDate, 'DD-MM-YYYY HH:mm').toDate();
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,132 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const torrentGalaxy = require('./torrentgalaxy_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'TorrentGalaxy';
const TYPE_MAPPING = typeMapping();
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [
torrentGalaxy.Categories.ANIME,
torrentGalaxy.Categories.MOVIE_4K,
torrentGalaxy.Categories.MOVIE_PACKS,
torrentGalaxy.Categories.MOVIE_SD,
torrentGalaxy.Categories.MOVIE_HD,
torrentGalaxy.Categories.MOVIE_CAM,
torrentGalaxy.Categories.MOVIE_BOLLYWOOD,
torrentGalaxy.Categories.TV_SD,
torrentGalaxy.Categories.TV_HD,
torrentGalaxy.Categories.TV_PACKS,
torrentGalaxy.Categories.DOCUMENTARIES,
];
const packCategories = [
torrentGalaxy.Categories.MOVIE_PACKS,
torrentGalaxy.Categories.TV_PACKS
];
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
// const ids = ['14212584'];
// return Promise.all(ids.map(id => limiter.schedule(() => torrentGalaxy.torrent(id)
// .then(torrent => processTorrentRecord(torrent)))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => torrentGalaxy.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return api_limiter.schedule(() => torrentGalaxy.browse({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < getMaxPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (!record || !TYPE_MAPPING[record.category] || !record.verified) {
return Promise.resolve('Invalid torrent record');
}
const torrent = {
provider: NAME,
infoHash: record.infoHash,
torrentId: record.torrentId,
torrentLink: record.torrentLink,
title: record.name.replace(/\t|\s+/g, ' '),
type: TYPE_MAPPING[record.category],
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
imdbId: record.imdbId,
pack: packCategories.includes(record.category),
languages: !(record.languages || '').includes('Other') ? record.languages : undefined
};
if (await checkAndUpdateTorrent(torrent)) {
return torrent;
}
const isOld = moment(torrent.uploadDate).isBefore(moment().subtract(18, 'month'));
if (torrent.seeders === 0 && isOld && !torrent.pack) {
console.log(`Skipping old unseeded torrent [${torrent.infoHash}] ${torrent.title}`)
return torrent;
}
return createTorrentEntry(torrent).then(() => torrent);
}
function typeMapping() {
const mapping = {};
mapping[torrentGalaxy.Categories.MOVIE_SD] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_HD] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_4K] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_CAM] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_PACKS] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_BOLLYWOOD] = Type.MOVIE;
mapping[torrentGalaxy.Categories.DOCUMENTARIES] = Type.MOVIE;
mapping[torrentGalaxy.Categories.TV_SD] = Type.SERIES;
mapping[torrentGalaxy.Categories.TV_HD] = Type.SERIES;
mapping[torrentGalaxy.Categories.TV_PACKS] = Type.SERIES;
mapping[torrentGalaxy.Categories.TV_SPORT] = Type.SERIES;
mapping[torrentGalaxy.Categories.ANIME] = Type.ANIME;
return mapping;
}
function getMaxPage(category) {
switch (category) {
case torrentGalaxy.Categories.TV_SD:
case torrentGalaxy.Categories.TV_HD:
case torrentGalaxy.Categories.MOVIE_SD:
case torrentGalaxy.Categories.MOVIE_HD:
return 5;
default:
return 1;
}
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,98 +0,0 @@
const axios = require('axios');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('./../../lib/requestHelper');
const defaultProxies = [
'https://yts.mx'
];
const defaultTimeout = 30000;
const limit = 50;
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config)))
.then(body => parseResults(body))
.catch(error => torrent(torrentId, config, retries - 1));
}
function search(query, config = {}, retries = 2) {
if (!query || retries === 0) {
return Promise.reject(new Error(`Failed ${query} search`));
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config)))
.then(results => parseResults(results))
.catch(error => search(query, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config)))
.then(results => parseResults(results))
.catch(error => browse(config, retries - 1));
}
function maxPage() {
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}`)))
.then(results => Math.round((results?.data?.movie_count || 0) / limit))
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then(response => {
if (!response.data) {
return Promise.reject(`No body: ${requestUrl}`);
}
return Promise.resolve(response.data);
});
}
function parseResults(results) {
if (!results || !results.data || (!results.data.movie && !Array.isArray(results.data.movies))) {
console.log('Incorrect results: ', results);
return Promise.reject('Incorrect results')
}
return (results.data.movies || [results.data.movie])
.filter(movie => Array.isArray(movie.torrents))
.map(movie => parseMovie(movie))
.reduce((a, b) => a.concat(b), []);
}
function parseMovie(movie) {
return movie.torrents.map(torrent => ({
name: `${movie.title} ${movie.year} ${torrent.quality} ${formatType(torrent.type)} `,
torrentId: `${movie.id}-${torrent.hash.trim().toLowerCase()}`,
infoHash: torrent.hash.trim().toLowerCase(),
torrentLink: torrent.url,
seeders: torrent.seeds,
size: torrent.size_bytes,
uploadDate: new Date(torrent.date_uploaded_unix * 1000),
imdbId: movie.imdb_code
}));
}
function formatType(type) {
if (type === 'web') {
return 'WEBRip';
}
if (type === 'bluray') {
return 'BluRay';
}
return type.toUpperCase();
}
module.exports = { torrent, search, browse, maxPage };

View File

@@ -1,15 +0,0 @@
const moment = require("moment");
const yts = require('./yts_api');
const scraper = require('./yts_scraper')
async function scrape() {
const scrapeStart = moment();
console.log(`[${scrapeStart}] starting ${scraper.NAME} full scrape...`);
return yts.maxPage()
.then(maxPage => scraper.scrape(maxPage))
.then(() => console.log(`[${moment()}] finished ${scraper.NAME} full scrape`));
}
module.exports = { scrape, NAME: scraper.NAME };

View File

@@ -1,67 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const yts = require('./yts_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'YTS';
const UNTIL_PAGE = 10;
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape(maxPage) {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrentsForCategory(maxPage)
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => yts.torrent(torrent.torrentId));
}
async function scrapeLatestTorrentsForCategory(maxPage = UNTIL_PAGE, page = 1) {
console.log(`Scrapping ${NAME} page ${page}`);
return yts.browse(({ page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < maxPage
? scrapeLatestTorrentsForCategory(maxPage, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
if (!record || !record.size) {
return Promise.resolve('Invalid torrent record');
}
const torrent = {
infoHash: record.infoHash,
provider: NAME,
torrentId: record.torrentId,
title: record.name.replace(/\t|\s+/g, ' ').trim(),
type: Type.MOVIE,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
imdbId: record.imdbId,
};
return createTorrentEntry(torrent).then(() => torrent);
}
module.exports = { scrape, updateSeeders, NAME };