Merge pull request #26 from iPromKnight/master

Big rewrite of scraping solution
This commit is contained in:
Gabisonfire
2024-02-02 11:00:34 -05:00
committed by GitHub
246 changed files with 22850 additions and 69803 deletions

1319
.editorconfig Normal file

File diff suppressed because it is too large Load Diff

406
.gitignore vendored
View File

@@ -1,5 +1,405 @@
/.idea
**/node_modules
**.env
.now
/scraper/manual/examples.js
.DS_Store
.idea
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp
# Visual Studio 6 technical files
*.ncb
*.aps
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# Visual Studio History (VSHistory) files
.vshistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp
# JetBrains Rider
*.sln.iml
dist/

View File

@@ -6,4 +6,10 @@
```
docker-compose up -d
```
Then open your browser to `127.0.0.1:7000`
Then open your browser to `127.0.0.1:7000`
If you'd like to enable crawling of RealDebridManager's shared hashlists which will massively boost your database cached entries,
enter a readonly github personal access token in 'env/producer.env' as the 'GithubSettings__PAT=<token_here>' value.
You can scale the number of consumers, by changing the consumer deploy replica count in the compose file on line 87. This is currently set to 3.
If you'd like to adjust the number of concurrent processed ingestions per consumer, thats the job concurrency setting within 'env/consumer.env'.

View File

@@ -1,12 +0,0 @@
FROM node:21-alpine
RUN apk update && apk upgrade && \
apk add --no-cache git
WORKDIR /home/node/app
COPY package*.json ./
RUN npm ci --only-production
COPY . .
CMD [ "node", "--insecure-http-parser", "index.js" ]

View File

@@ -1,82 +0,0 @@
import { DebridOptions } from '../moch/options.js';
import { QualityFilter, Providers, SizeFilter } from './filter.js';
import { LanguageOptions } from './languages.js';
export const PreConfigurations = {
lite: {
config: liteConfig(),
serialized: configValue(liteConfig()),
manifest: {
id: 'com.stremio.torrentio.lite.addon',
name: 'Torrentio Lite',
description: 'Preconfigured Lite version of Torrentio addon.'
+ ' To configure advanced options visit https://torrentio.strem.fun/lite'
}
},
brazuca: {
config: brazucaConfig(),
serialized: configValue(brazucaConfig()),
manifest: {
id: 'com.stremio.torrentio.brazuca.addon',
name: 'Torrentio Brazuca',
description: 'Preconfigured version of Torrentio addon for Brazilian content.'
+ ' To configure advanced options visit https://torrentio.strem.fun/brazuca',
logo: 'https://i.ibb.co/8mgRZPp/GwxAcDV.png'
}
}
}
const keysToSplit = [Providers.key, LanguageOptions.key, QualityFilter.key, SizeFilter.key, DebridOptions.key];
const keysToUppercase = [SizeFilter.key];
export function parseConfiguration(configuration) {
if (!configuration) {
return undefined;
}
if (PreConfigurations[configuration]) {
return PreConfigurations[configuration].config;
}
const configValues = configuration.split('|')
.reduce((map, next) => {
const parameterParts = next.split('=');
if (parameterParts.length === 2) {
map[parameterParts[0].toLowerCase()] = parameterParts[1];
}
return map;
}, {});
keysToSplit
.filter(key => configValues[key])
.forEach(key => configValues[key] = configValues[key].split(',')
.map(value => keysToUppercase.includes(key) ? value.toUpperCase() : value.toLowerCase()))
return configValues;
}
function liteConfig() {
const config = {};
config[Providers.key] = Providers.options
.filter(provider => !provider.foreign)
.map(provider => provider.key);
config[QualityFilter.key] = ['scr', 'cam']
config['limit'] = 1;
return config;
}
function brazucaConfig() {
const config = {};
config[Providers.key] = Providers.options
.filter(provider => !provider.foreign || provider.foreign === '🇵🇹')
.map(provider => provider.key);
config[LanguageOptions.key] = ['portuguese'];
return config;
}
function configValue(config) {
return Object.entries(config)
.map(([key, value]) => `${key}=${Array.isArray(value) ? value.join(',') : value}`)
.join('|');
}
export function getManifestOverride(config) {
const preConfig = Object.values(PreConfigurations).find(pre => pre.config === config);
return preConfig ? preConfig.manifest : {};
}

View File

@@ -1,11 +0,0 @@
import namedQueue from "named-queue";
export function createNamedQueue(concurrency) {
const queue = new namedQueue((task, callback) => task.method()
.then(result => callback(false, result))
.catch((error => callback(error))), 200);
queue.wrap = (id, method) => new Promise(((resolve, reject) => {
queue.push({ id, method }, (error, result) => result ? resolve(result) : reject(error));
}));
return queue;
}

View File

@@ -1,128 +0,0 @@
import { QualityFilter } from './filter.js';
import { containsLanguage, LanguageOptions } from './languages.js';
import { Type } from './types.js';
import { hasMochConfigured } from '../moch/moch.js';
import { extractSeeders, extractSize } from './titleHelper.js';
const OTHER_QUALITIES = QualityFilter.options.find(option => option.key === 'other');
const CAM_QUALITIES = QualityFilter.options.find(option => option.key === 'cam');
const HEALTHY_SEEDERS = 5;
const SEEDED_SEEDERS = 1;
const MIN_HEALTHY_COUNT = 50;
const MAX_UNHEALTHY_COUNT = 5;
export const SortOptions = {
key: 'sort',
options: {
qualitySeeders: {
key: 'quality',
description: 'By quality then seeders'
},
qualitySize: {
key: 'qualitysize',
description: 'By quality then size'
},
seeders: {
key: 'seeders',
description: 'By seeders'
},
size: {
key: 'size',
description: 'By size'
},
}
}
export default function sortStreams(streams, config, type) {
const languages = config[LanguageOptions.key];
if (languages?.length && languages[0] !== 'english') {
// No need to filter english since it's hard to predict which entries are english
const streamsWithLanguage = streams.filter(stream => containsLanguage(stream, languages));
const streamsNoLanguage = streams.filter(stream => !streamsWithLanguage.includes(stream));
return _sortStreams(streamsWithLanguage, config, type).concat(_sortStreams(streamsNoLanguage, config, type));
}
return _sortStreams(streams, config, type);
}
function _sortStreams(streams, config, type) {
const sort = config?.sort?.toLowerCase() || undefined;
const limit = /^[1-9][0-9]*$/.test(config.limit) && parseInt(config.limit) || undefined;
const sortedStreams = sortBySeeders(streams, config, type);
if (sort === SortOptions.options.seeders.key) {
return sortedStreams.slice(0, limit);
} else if (sort === SortOptions.options.size.key) {
return sortBySize(sortedStreams, limit);
}
const nestedSort = sort === SortOptions.options.qualitySize.key ? sortBySize : noopSort;
return sortByVideoQuality(sortedStreams, nestedSort, limit)
}
function noopSort(streams) {
return streams;
}
function sortBySeeders(streams, config, type) {
// streams are already presorted by seeders and upload date
const healthy = streams.filter(stream => extractSeeders(stream.title) >= HEALTHY_SEEDERS);
const seeded = streams.filter(stream => extractSeeders(stream.title) >= SEEDED_SEEDERS);
if (type === Type.SERIES && hasMochConfigured(config)) {
return streams;
} else if (healthy.length >= MIN_HEALTHY_COUNT) {
return healthy;
} else if (seeded.length >= MAX_UNHEALTHY_COUNT) {
return seeded.slice(0, MIN_HEALTHY_COUNT);
}
return streams.slice(0, MAX_UNHEALTHY_COUNT);
}
function sortBySize(streams, limit) {
return streams
.sort((a, b) => {
const aSize = extractSize(a.title);
const bSize = extractSize(b.title);
return bSize - aSize;
}).slice(0, limit);
}
function sortByVideoQuality(streams, nestedSort, limit) {
const qualityMap = streams
.reduce((map, stream) => {
const quality = extractQuality(stream.name);
map[quality] = (map[quality] || []).concat(stream);
return map;
}, {});
const sortedQualities = Object.keys(qualityMap)
.sort((a, b) => {
const aResolution = a?.match(/\d+p/) && parseInt(a, 10);
const bResolution = b?.match(/\d+p/) && parseInt(b, 10);
if (aResolution && bResolution) {
return bResolution - aResolution; // higher resolution first;
} else if (aResolution) {
return -1; // remain higher if resolution is there
} else if (bResolution) {
return 1; // move downward if other stream has resolution
}
return a < b ? -1 : b < a ? 1 : 0; // otherwise sort by alphabetic order
});
return sortedQualities
.map(quality => nestedSort(qualityMap[quality]).slice(0, limit))
.reduce((a, b) => a.concat(b), []);
}
function extractQuality(title) {
const qualityDesc = title.split('\n')[1];
const resolutionMatch = qualityDesc?.match(/\d+p/);
if (resolutionMatch) {
return resolutionMatch[0];
} else if (/8k/i.test(qualityDesc)) {
return '4320p'
} else if (/4k|uhd/i.test(qualityDesc)) {
return '2060p'
} else if (CAM_QUALITIES.test(qualityDesc)) {
return CAM_QUALITIES.label;
} else if (OTHER_QUALITIES.test(qualityDesc)) {
return OTHER_QUALITIES.label;
}
return qualityDesc;
}

2579
addon/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +0,0 @@
FROM node:16-alpine
RUN apk update && apk upgrade && \
apk add --no-cache git
WORKDIR /home/node/app
COPY ./catalogs .
COPY ./addon ../addon
RUN npm ci --only-production
CMD [ "node", "index.js" ]

View File

@@ -1,99 +0,0 @@
import Bottleneck from 'bottleneck';
import moment from 'moment';
import { addonBuilder } from 'stremio-addon-sdk';
import { Providers } from '../addon/lib/filter.js';
import { createManifest, genres } from './lib/manifest.js';
import { getMetas } from './lib/metadata.js';
import { cacheWrapCatalog, cacheWrapIds } from './lib/cache.js';
import * as repository from './lib/repository.js';
const CACHE_MAX_AGE = parseInt(process.env.CACHE_MAX_AGE) || 4 * 60 * 60; // 4 hours in seconds
const STALE_REVALIDATE_AGE = 4 * 60 * 60; // 4 hours
const STALE_ERROR_AGE = 7 * 24 * 60 * 60; // 7 days
const manifest = createManifest();
const builder = new addonBuilder(manifest);
const limiter = new Bottleneck({
maxConcurrent: process.env.LIMIT_MAX_CONCURRENT || 20,
highWater: process.env.LIMIT_QUEUE_SIZE || 50,
strategy: Bottleneck.strategy.OVERFLOW
});
const defaultProviders = Providers.options
.filter(provider => !provider.foreign)
.map(provider => provider.label)
.sort();
builder.defineCatalogHandler((args) => {
const offset = parseInt(args.extra.skip || '0', 10);
const genre = args.extra.genre || 'default';
const catalog = manifest.catalogs.find(c => c.id === args.id);
const providers = defaultProviders;
console.log(`Incoming catalog ${args.id} request with genre=${genre} and skip=${offset}`)
if (!catalog) {
return Promise.reject(`No catalog found for with id: ${args.id}`)
}
const cacheKey = createCacheKey(catalog.id, providers, genre, offset);
return limiter.schedule(() => cacheWrapCatalog(cacheKey, () => getCatalog(catalog, providers, genre, offset)))
.then(metas => ({
metas: metas,
cacheMaxAge: CACHE_MAX_AGE,
staleRevalidate: STALE_REVALIDATE_AGE,
staleError: STALE_ERROR_AGE
}))
.catch(error => Promise.reject(`Failed retrieving catalog ${args.id}: ${error.message}`));
})
async function getCursor(catalog, providers, genre, offset) {
if (offset === 0) {
return undefined;
}
const previousOffset = offset - catalog.pageSize;
const previousCacheKey = createCacheKey(catalog.id, providers, genre, previousOffset);
return cacheWrapCatalog(previousCacheKey, () => Promise.reject("cursor not found"))
.then(metas => metas[metas.length - 1])
.then(meta => meta.id.replace('kitsu:', ''))
}
async function getCatalog(catalog, providers, genre, offset) {
const cursor = await getCursor(catalog, providers, genre, offset)
const startDate = getStartDate(genre)?.toISOString();
const endDate = getEndDate(genre)?.toISOString();
const cacheKey = createCacheKey(catalog.id, providers, genre);
return cacheWrapIds(cacheKey, () => repository.getIds(providers, catalog.type, startDate, endDate))
.then(ids => ids.slice(ids.indexOf(cursor) + 1))
.then(ids => getMetas(ids, catalog.type))
.then(metas => metas.slice(0, catalog.pageSize));
}
function getStartDate(genre) {
switch (genre) {
case genres[0]: return moment().utc().subtract(1, 'day').startOf('day');
case genres[1]: return moment().utc().startOf('isoWeek');
case genres[2]: return moment().utc().subtract(7, 'day').startOf('isoWeek');
case genres[3]: return moment().utc().startOf('month');
case genres[4]: return moment().utc().subtract(30, 'day').startOf('month');
case genres[5]: return undefined;
default: return moment().utc().subtract(30, 'day').startOf('day');
}
}
function getEndDate(genre) {
switch (genre) {
case genres[0]: return moment().utc().subtract(1, 'day').endOf('day');
case genres[1]: return moment().utc().endOf('isoWeek');
case genres[2]: return moment().utc().subtract(7, 'day').endOf('isoWeek');
case genres[3]: return moment().utc().endOf('month');
case genres[4]: return moment().utc().subtract(30, 'day').endOf('month');
case genres[5]: return undefined;
default: return moment().utc().subtract(1, 'day').endOf('day');
}
}
function createCacheKey(catalogId, providers, genre, offset) {
const dateKey = moment().format('YYYY-MM-DD');
return [catalogId, providers.join(','), genre, dateKey, offset].filter(x => x !== undefined).join('|');
}
export default builder.getInterface();

View File

@@ -1,9 +0,0 @@
import express from 'express';
import serverless from './serverless.js';
const app = express();
app.use((req, res, next) => serverless(req, res, next));
app.listen(process.env.PORT || 7000, () => {
console.log(`Started addon at: http://localhost:${process.env.PORT || 7000}`);
});

View File

@@ -1,46 +0,0 @@
import cacheManager from 'cache-manager';
import mangodbStore from 'cache-manager-mongodb';
const CATALOG_TTL = process.env.STREAM_TTL || 24 * 60 * 60; // 24 hours
const MONGO_URI = process.env.MONGODB_URI;
const remoteCache = initiateRemoteCache();
function initiateRemoteCache() {
if (MONGO_URI) {
return cacheManager.caching({
store: mangodbStore,
uri: MONGO_URI,
options: {
collection: 'torrentio_catalog_collection',
socketTimeoutMS: 120000,
useNewUrlParser: true,
useUnifiedTopology: false,
ttl: CATALOG_TTL
},
ttl: CATALOG_TTL,
ignoreCacheErrors: true
});
} else {
return cacheManager.caching({
store: 'memory',
ttl: CATALOG_TTL
});
}
}
function cacheWrap(cache, key, method, options) {
if (!cache) {
return method();
}
return cache.wrap(key, method, options);
}
export function cacheWrapCatalog(key, method) {
return cacheWrap(remoteCache, key, method, { ttl: CATALOG_TTL });
}
export function cacheWrapIds(key, method) {
return cacheWrap(remoteCache, `ids|${key}`, method, { ttl: CATALOG_TTL });
}

View File

@@ -1,274 +0,0 @@
const STYLESHEET = `
* {
box-sizing: border-box;
}
body,
html {
margin: 0;
padding: 0;
width: 100%;
height: 100%
}
html {
background-size: auto 100%;
background-size: cover;
background-position: center center;
background-repeat: repeat-y;
}
body {
display: flex;
background-color: transparent;
font-family: 'Open Sans', Arial, sans-serif;
color: white;
}
h1 {
font-size: 4.5vh;
font-weight: 700;
}
h2 {
font-size: 2.2vh;
font-weight: normal;
font-style: italic;
opacity: 0.8;
}
h3 {
font-size: 2.2vh;
}
h1,
h2,
h3,
p,
label {
margin: 0;
text-shadow: 0 0 1vh rgba(0, 0, 0, 0.15);
}
p {
font-size: 1.75vh;
}
ul {
font-size: 1.75vh;
margin: 0;
margin-top: 1vh;
padding-left: 3vh;
}
a {
color: green
}
a.install-link {
text-decoration: none
}
button {
border: 0;
outline: 0;
color: white;
background: #8A5AAB;
padding: 1.2vh 3.5vh;
margin: auto;
text-align: center;
font-family: 'Open Sans', Arial, sans-serif;
font-size: 2.2vh;
font-weight: 600;
cursor: pointer;
display: block;
box-shadow: 0 0.5vh 1vh rgba(0, 0, 0, 0.2);
transition: box-shadow 0.1s ease-in-out;
}
button:hover {
box-shadow: none;
}
button:active {
box-shadow: 0 0 0 0.5vh white inset;
}
#addon {
width: 90vh;
margin: auto;
padding-left: 10%;
padding-right: 10%;
background: rgba(0, 0, 0, 0.60);
}
.logo {
height: 14vh;
width: 14vh;
margin: auto;
margin-bottom: 3vh;
}
.logo img {
width: 100%;
}
.name, .version {
display: inline-block;
vertical-align: top;
}
.name {
line-height: 5vh;
}
.version {
position: absolute;
line-height: 5vh;
margin-left: 1vh;
opacity: 0.8;
}
.contact {
position: absolute;
left: 0;
bottom: 4vh;
width: 100%;
text-align: center;
}
.contact a {
font-size: 1.4vh;
font-style: italic;
}
.separator {
margin-bottom: 4vh;
}
.label {
font-size: 2.2vh;
font-weight: 600;
padding: 0;
line-height: inherit;
}
.btn-group, .multiselect-container {
width: 100%;
}
.btn {
text-align: left;
}
.multiselect-container {
border: 0;
border-radius: 0;
}
.input, .btn {
height: 3.8vh;
width: 100%;
margin: auto;
margin-bottom: 10px;
padding: 6px 12px;
border: 0;
border-radius: 0;
outline: 0;
color: #333;
background-color: rgb(255, 255, 255);
box-shadow: 0 0.5vh 1vh rgba(0, 0, 0, 0.2);
}
`;
import { Providers } from '../../addon/lib/filter.js';
export default function landingTemplate(manifest, config = {}) {
const providers = config.providers || [];
const background = manifest.background || 'https://dl.strem.io/addon-background.jpg';
const logo = manifest.logo || 'https://dl.strem.io/addon-logo.png';
const contactHTML = manifest.contactEmail ?
`<div class="contact">
<p>Contact ${manifest.name} creator:</p>
<a href="mailto:${manifest.contactEmail}">${manifest.contactEmail}</a>
</div>` : '<div class="separator"></div>';
const providersHTML = Providers.options
.map(provider => `<option value="${provider.key}">${provider.foreign || ''}${provider.label}</option>`)
.join('\n');
const stylizedTypes = manifest.types
.map(t => t[0].toUpperCase() + t.slice(1) + (t !== 'series' ? 's' : ''));
return `
<!DOCTYPE html>
<html style="background-image: url(${background});">
<head>
<meta charset="utf-8">
<title>${manifest.name} - Stremio Addon</title>
<link rel="shortcut icon" href="${logo}" type="image/x-icon">
<link href="https://fonts.googleapis.com/css?family=Open+Sans:400,600,700&display=swap" rel="stylesheet">
<script src="https://code.jquery.com/jquery-3.5.1.slim.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.1/dist/umd/popper.min.js"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"></script>
<link href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css" rel="stylesheet" >
<script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap-multiselect/0.9.15/js/bootstrap-multiselect.min.js"></script>
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap-multiselect/0.9.15/css/bootstrap-multiselect.css" rel="stylesheet"/>
<style>${STYLESHEET}</style>
</head>
<body>
<div id="addon">
<div class="logo">
<img src="${logo}">
</div>
<h1 class="name">${manifest.name}</h1>
<h2 class="version">${manifest.version || '0.0.0'}</h2>
<h2 class="description">${manifest.description || ''}</h2>
<div class="separator"></div>
<h3 class="gives">This addon has more :</h3>
<ul>
${stylizedTypes.map(t => `<li>${t}</li>`).join('')}
</ul>
<div class="separator"></div>
<label class="label" for="iProviders">Providers:</label>
<select id="iProviders" class="input" name="providers[]" multiple="multiple">
${providersHTML}
</select>
<div class="separator"></div>
<a id="installLink" class="install-link" href="#">
<button name="Install">INSTALL</button>
</a>
${contactHTML}
</div>
<script type="text/javascript">
$(document).ready(function() {
$('#iProviders').multiselect({
nonSelectedText: 'All providers',
onChange: () => generateInstallLink()
});
$('#iProviders').multiselect('select', [${providers.map(provider => '"' + provider + '"')}]);
generateInstallLink();
});
function generateInstallLink() {
const providersValue = $('#iProviders').val().join(',') || '';
const providers = providersValue.length && providersValue;
const configurationValue = [
['${Providers.key}', providers],
]
.filter(([_, value]) => value.length)
.map(([key, value]) => key + '=' + value).join('|');
const configuration = configurationValue && configurationValue.length ? '/' + configurationValue : '';
installLink.href = 'stremio://' + window.location.host + configuration + '/manifest.json';
}
</script>
</body>
</html>`
}

View File

@@ -1,54 +0,0 @@
import { Type } from '../../addon/lib/types.js';
export const genres = [
'Yesterday',
'This Week',
'Last Week',
'This Month',
'Last Month',
'All Time'
]
export function createManifest() {
return {
id: 'com.stremio.torrentio.catalog.addon',
version: '1.0.2',
name: 'Torrent Catalogs',
description: 'Provides catalogs for movies/series/anime based on top seeded torrents. Requires Kitsu addon for anime.',
logo: `https://i.ibb.co/w4BnkC9/GwxAcDV.png`,
background: `https://i.ibb.co/VtSfFP9/t8wVwcg.jpg`,
types: [Type.MOVIE, Type.SERIES, Type.ANIME],
resources: ['catalog'],
catalogs: [
{
id: 'top-movies',
type: Type.MOVIE,
name: "Top seeded",
pageSize: 20,
extra: [{ name: 'genre', options: genres }, { name: 'skip' }],
genres: genres
},
{
id: 'top-series',
type: Type.SERIES,
name: "Top seeded",
pageSize: 20,
extra: [{ name: 'genre', options: genres }, { name: 'skip' }],
genres: genres
},
{
id: 'top-anime',
type: Type.ANIME,
name: "Top seeded",
pageSize: 20,
extra: [{ name: 'genre', options: genres }, { name: 'skip' }],
genres: genres
}
],
behaviorHints: {
// @TODO might enable configuration to configure providers
configurable: false,
configurationRequired: false
}
};
}

View File

@@ -1,41 +0,0 @@
import axios from 'axios';
import { Type } from '../../addon/lib/types.js';
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.strem.fun';
const TIMEOUT = 30000;
const MAX_SIZE = 40;
export async function getMetas(ids, type) {
if (!ids.length || !type) {
return [];
}
return _requestMetadata(ids, type)
.catch((error) => {
throw new Error(`failed metadata ${type} query due: ${error.message}`);
});
}
function _requestMetadata(ids, type) {
const url = _getUrl(ids, type);
return axios.get(url, { timeout: TIMEOUT })
.then(response => response?.data?.metas || response?.data?.metasDetailed || [])
.then(metas => metas.filter(meta => meta))
.then(metas => metas.map(meta => _sanitizeMeta(meta)));
}
function _getUrl(ids, type) {
const joinedIds = ids.slice(0, MAX_SIZE).join(',');
if (type === Type.ANIME) {
return `${KITSU_URL}/catalog/${type}/kitsu-anime-list/lastVideosIds=${joinedIds}.json`
}
return `${CINEMETA_URL}/catalog/${type}/last-videos/lastVideosIds=${joinedIds}.json`
}
function _sanitizeMeta(meta) {
delete meta.videos;
delete meta.credits_cast;
delete meta.credits_crew;
return meta;
}

View File

@@ -1,34 +0,0 @@
import { Sequelize, QueryTypes } from 'sequelize';
import { Type } from '../../addon/lib/types.js';
const DATABASE_URI = process.env.DATABASE_URI;
const database = new Sequelize(DATABASE_URI, { logging: false });
export async function getIds(providers, type, startDate, endDate) {
const idName = type === Type.ANIME ? 'kitsuId' : 'imdbId';
const episodeCondition = type === Type.SERIES
? 'AND files."imdbSeason" IS NOT NULL AND files."imdbEpisode" IS NOT NULL'
: '';
const dateCondition = startDate && endDate
? `AND "uploadDate" BETWEEN '${startDate}' AND '${endDate}'`
: '';
const providersCondition = providers && providers.length
? `AND provider in (${providers.map(it => `'${it}'`).join(',')})`
: '';
const titleCondition = type === Type.MOVIE
? 'AND torrents.title NOT LIKE \'%[Erotic]%\''
: '';
const sortCondition = type === Type.MOVIE ? 'sum(torrents.seeders)' : 'max(torrents.seeders)';
const query = `SELECT files."${idName}"
FROM (SELECT torrents."infoHash", torrents.seeders FROM torrents
WHERE seeders > 0 AND type = '${type}' ${providersCondition} ${dateCondition} ${titleCondition}
) as torrents
JOIN files ON torrents."infoHash" = files."infoHash"
WHERE files."${idName}" IS NOT NULL ${episodeCondition}
GROUP BY files."${idName}"
ORDER BY ${sortCondition} DESC
LIMIT 5000`
const results = await database.query(query, { type: QueryTypes.SELECT });
return results.map(result => `${result.imdbId || result.kitsuId}`);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,26 +0,0 @@
{
"name": "stremio-torrentio-catalogs",
"version": "1.0.3",
"exports": "./index.js",
"type": "module",
"scripts": {
"start": "node index.js"
},
"engines": {
"node": "16.x"
},
"author": "TheBeastLT <pauliox@beyond.lt>",
"license": "MIT",
"dependencies": {
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^3.4.4",
"cache-manager-mongodb": "^0.3.0",
"moment": "^2.29.4",
"pg": "^8.8.0",
"pg-hstore": "^2.3.4",
"request-ip": "^3.3.0",
"sequelize": "^6.29.0",
"stremio-addon-sdk": "^1.6.10"
}
}

View File

@@ -1,68 +0,0 @@
import getRouter from 'stremio-addon-sdk/src/getRouter.js';
import addonInterface from './addon.js';
import qs from 'querystring';
import { parseConfiguration } from '../addon/lib/configuration.js';
import { createManifest } from './lib/manifest.js';
const router = getRouter(addonInterface);
// router.get('/', (_, res) => {
// res.redirect('/configure')
// res.end();
// });
//
// router.get('/:configuration?/configure', (req, res) => {
// const configValues = parseConfiguration(req.params.configuration || '');
// const landingHTML = landingTemplate(createManifest(configValues), configValues);
// res.setHeader('content-type', 'text/html');
// res.end(landingHTML);
// });
router.get('/:configuration?/manifest.json', (req, res) => {
const configValues = parseConfiguration(req.params.configuration || '');
const manifestBuf = JSON.stringify(createManifest(configValues));
res.setHeader('Content-Type', 'application/json; charset=utf-8');
res.end(manifestBuf)
});
router.get('/:configuration/:resource/:type/:id/:extra?.json', (req, res, next) => {
const { configuration, resource, type, id } = req.params;
const extra = req.params.extra ? qs.parse(req.url.split('/').pop().slice(0, -5)) : {}
const configValues = { ...extra, ...parseConfiguration(configuration) };
addonInterface.get(resource, type, id, configValues)
.then(resp => {
const cacheHeaders = {
cacheMaxAge: 'max-age',
staleRevalidate: 'stale-while-revalidate',
staleError: 'stale-if-error'
};
const cacheControl = Object.keys(cacheHeaders)
.map(prop => Number.isInteger(resp[prop]) && cacheHeaders[prop] + '=' + resp[prop])
.filter(val => !!val).join(', ');
res.setHeader('Cache-Control', `${cacheControl}, public`);
res.setHeader('Content-Type', 'application/json; charset=utf-8');
res.end(JSON.stringify(resp));
})
.catch(err => {
if (err.noHandler) {
if (next) {
next()
} else {
res.writeHead(404);
res.end(JSON.stringify({ err: 'not found' }));
}
} else {
console.error(err);
res.writeHead(500);
res.end(JSON.stringify({ err: 'handler error' }));
}
});
});
export default function (req, res) {
router(req, res, function () {
res.statusCode = 404;
res.end();
});
};

103
docker-compose.yaml Normal file
View File

@@ -0,0 +1,103 @@
version: '3.8'
name: torrentio-selfhostio
x-restart: &restart-policy
"unless-stopped"
x-basehealth: &base-health
interval: 10s
timeout: 10s
retries: 3
start_period: 10s
x-rabbithealth: &rabbitmq-health
test: rabbitmq-diagnostics -q ping
<<: *base-health
x-mongohealth: &mongodb-health
test: ["CMD","mongosh", "--eval", "db.adminCommand('ping')"]
<<: *base-health
x-postgreshealth: &postgresdb-health
test: pg_isready
<<: *base-health
x-apps: &selfhostio-app
depends_on:
mongodb:
condition: service_healthy
postgres:
condition: service_healthy
rabbitmq:
condition: service_healthy
restart: *restart-policy
services:
postgres:
image: postgres:latest
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: selfhostio
PGUSER: postgres # needed for healthcheck.
ports:
- "5432:5432"
volumes:
- postgres:/var/lib/postgresql/data
healthcheck: *postgresdb-health
restart: *restart-policy
mongodb:
image: mongo:latest
environment:
MONGO_INITDB_ROOT_USERNAME: mongo
MONGO_INITDB_ROOT_PASSWORD: mongo
ports:
- "27017:27017"
volumes:
- mongo:/data/db
restart: *restart-policy
healthcheck: *mongodb-health
rabbitmq:
image: rabbitmq:3-management
ports:
- "5672:5672"
- "15672:15672"
volumes:
- rabbitmq:/var/lib/rabbitmq
restart: *restart-policy
healthcheck: *rabbitmq-health
producer:
build:
context: src/producer
dockerfile: Dockerfile
env_file:
- env/producer.env
<<: *selfhostio-app
consumer:
build:
context: src/node/consumer
dockerfile: Dockerfile
env_file:
- env/consumer.env
deploy:
replicas: 3
<<: *selfhostio-app
addon:
build:
context: src/node/addon
dockerfile: Dockerfile
ports:
- "7000:7000"
env_file:
- env/addon.env
<<: *selfhostio-app
volumes:
postgres:
mongo:
rabbitmq:

View File

@@ -1,61 +0,0 @@
---
version: '3.9'
name: torrentio-self-host
services:
mongodb:
restart: unless-stopped
image: docker.io/bitnami/mongodb:7.0
ports:
- "27017:27017"
volumes:
- mongo-data:/bitnami/mongodb
scraper:
build: ./scraper
restart: unless-stopped
depends_on:
- mongodb
- postgres
environment:
- PORT=7001
- MONGODB_URI=mongodb://mongodb:27017/torrentio
- DATABASE_URI=postgres://postgres@postgres:5432/torrentio
- ENABLE_SYNC=true
torrentio:
build: ./addon
restart: unless-stopped
depends_on:
- mongodb
- postgres
ports:
- "7000:7000"
environment:
- MONGODB_URI=mongodb://mongodb:27017/torrentio
- DATABASE_URI=postgres://postgres@postgres:5432/torrentio
- ENABLE_SYNC=true
postgres:
image: postgres:14-alpine
restart: unless-stopped
ports:
- 5432:5432
volumes:
- postgres-data:/var/lib/postgresql/data
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
- POSTGRES_USER=postgres
- POSTGRES_DB=torrentio
flaresolverr:
image: ghcr.io/flaresolverr/flaresolverr:latest
restart: unless-stopped
ports:
- 8191:8191
volumes:
mongo-data:
postgres-data:

4
env/addon.env vendored Normal file
View File

@@ -0,0 +1,4 @@
TZ=London/Europe
DATABASE_URI=postgres://postgres:postgres@postgres/selfhostio
MONGODB_URI=mongodb://mongo:mongo@mongodb/selfhostio?tls=false&authSource=admin
DEBUG_MODE=false

11
env/consumer.env vendored Normal file
View File

@@ -0,0 +1,11 @@
TZ=London/Europe
MONGODB_URI=mongodb://mongo:mongo@mongodb/selfhostio?tls=false&authSource=admin
DATABASE_URI=postgres://postgres:postgres@postgres/selfhostio
RABBIT_URI=amqp://guest:guest@rabbitmq:5672/?heartbeat=30
QUEUE_NAME=ingested
JOB_CONCURRENCY=5
JOBS_ENABLED=true
ENABLE_SYNC=true
MAX_SINGLE_TORRENT_CONNECTIONS=10
TORRENT_TIMEOUT=30000
UDP_TRACKERS_ENABLED=true

10
env/producer.env vendored Normal file
View File

@@ -0,0 +1,10 @@
ScrapeConfiguration__StorageConnectionString=host=postgres;username=postgres;password=postgres;database=selfhostio;
RabbitMqConfiguration__Host=rabbitmq
RabbitMqConfiguration__QueueName=ingested
RabbitMqConfiguration__Username=guest
RabbitMqConfiguration__Password=guest
RabbitMqConfiguration__Durable=true
RabbitMqConfiguration__MaxQueueSize=0
RabbitMqConfiguration__MaxPublishBatchSize=500
RabbitMqConfiguration__PublishIntervalInSeconds=10
GithubSettings__PAT=

View File

@@ -1,12 +0,0 @@
FROM node:16-alpine
RUN apk update && apk upgrade && \
apk add --no-cache git
WORKDIR /home/node/app
COPY package*.json ./
RUN npm ci --only-production
COPY . .
CMD [ "node", "--insecure-http-parser", "index.js" ]

View File

@@ -1,37 +0,0 @@
# Torrentio Scraper
## Initial dumps
### The Pirate Bay
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
https://thepiratebay.org/static/dump/csv/
### Kickass
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
https://web.archive.org/web/20150416071329/http://kickass.to/api
### RARBG
Scrape movie and tv catalog using [www.webscraper.io](https://www.webscraper.io/) for available `imdbIds` and use those via the api to search for torrents.
Movies sitemap
```json
{"_id":"rarbg-movies","startUrl":["https://rarbgmirror.org/catalog/movies/[1-4235]"],"selectors":[{"id":"rarbg-movie-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]}
```
TV sitemap
```json
{"_id":"rarbg-tv","startUrl":["https://rarbgmirror.org/catalog/tv/[1-609]"],"selectors":[{"id":"rarbg-tv-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]}
```
### Migrating Database
When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1.
```sql
ALTER SEQUENCE files_id_seq RESTART WITH <last_file_id + 1>;
```

View File

@@ -1,14 +0,0 @@
const express = require("express");
const server = express();
const { connect } = require('./lib/repository');
const { startScraper } = require('./scheduler/scheduler')
server.get('/', function (req, res) {
res.sendStatus(200);
});
server.listen(process.env.PORT || 7000, async () => {
await connect();
console.log('Scraper started');
startScraper();
});

View File

@@ -1,72 +0,0 @@
const cacheManager = require('cache-manager');
const mangodbStore = require('cache-manager-mongodb');
const GLOBAL_KEY_PREFIX = 'stremio-torrentio';
const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
const KITSU_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|kitsu_id`;
const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
const TRACKERS_KEY_PREFIX = `${GLOBAL_KEY_PREFIX}|trackers`;
const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
const MEMORY_TTL = process.env.METADATA_TTL || 2 * 60 * 60; // 2 hours
const TRACKERS_TTL = 2 * 24 * 60 * 60; // 2 days
const MONGO_URI = process.env.MONGODB_URI;
const memoryCache = initiateMemoryCache();
const remoteCache = initiateRemoteCache();
function initiateRemoteCache() {
if (MONGO_URI) {
return cacheManager.caching({
store: mangodbStore,
uri: MONGO_URI,
options: {
collection: 'torrentio_scraper_collection',
useUnifiedTopology: true
},
ttl: GLOBAL_TTL,
ignoreCacheErrors: true
});
} else {
return cacheManager.caching({
store: 'memory',
ttl: GLOBAL_TTL
});
}
}
function initiateMemoryCache() {
return cacheManager.caching({
store: 'memory',
ttl: MEMORY_TTL
});
}
function cacheWrap(cache, key, method, options) {
return cache.wrap(key, method, options);
}
function cacheWrapImdbId(key, method) {
return cacheWrap(remoteCache, `${IMDB_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
}
function cacheWrapKitsuId(key, method) {
return cacheWrap(remoteCache, `${KITSU_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
}
function cacheWrapMetadata(id, method) {
return cacheWrap(memoryCache, `${METADATA_PREFIX}:${id}`, method, { ttl: MEMORY_TTL });
}
function cacheTrackers(method) {
return cacheWrap(memoryCache, `${TRACKERS_KEY_PREFIX}`, method, { ttl: TRACKERS_TTL });
}
module.exports = {
cacheWrapImdbId,
cacheWrapKitsuId,
cacheWrapMetadata,
cacheTrackers
};

View File

@@ -1,24 +0,0 @@
const UserAgent = require('user-agents');
const userAgent = new UserAgent();
function getRandomUserAgent() {
return userAgent.random().toString();
}
function defaultOptionsWithProxy() {
if (process.env.PROXY_HOST && process.env.PROXY_TYPE) {
return {
proxy: {
host: process.env.PROXY_HOST.match(/\/\/(.*):/)[1],
port: process.env.PROXY_HOST.match(/:(\d+)/)[1]
},
headers: {
'user-agent': getRandomUserAgent(),
'proxy-type': process.env.PROXY_TYPE
}
}
}
return { headers: { 'user-agent': getRandomUserAgent() } };
}
module.exports = { getRandomUserAgent, defaultOptionsWithProxy };

View File

@@ -1,208 +0,0 @@
const torrentStream = require('torrent-stream');
const axios = require('axios');
const parseTorrent = require('parse-torrent');
const BTClient = require('bittorrent-tracker')
const async = require('async');
const decode = require('magnet-uri');
const { Type } = require('./types');
const { delay } = require('./promises')
const { isVideo, isSubtitle } = require('./extension');
const { cacheTrackers } = require('./cache');
const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_all.txt';
const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
const SEEDS_CHECK_TIMEOUT = 15 * 1000; // 15 secs
const ADDITIONAL_TRACKERS = [
'http://tracker.trackerfix.com:80/announce',
'udp://9.rarbg.me:2780',
'udp://9.rarbg.to:2870'
];
const ANIME_TRACKERS = [
"http://nyaa.tracker.wf:7777/announce",
"udp://anidex.moe:6969/announce",
"udp://tracker-udp.anirena.com:80/announce",
"udp://tracker.uw0.xyz:6969/announce"
];
async function updateCurrentSeeders(torrentsInput) {
return new Promise(async (resolve) => {
const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
const perTorrentResults = Object.fromEntries(new Map(torrents.map(torrent => [torrent.infoHash, {}])));
const perTrackerInfoHashes = await Promise.all(torrents.map(torrent => getTorrentTrackers(torrent)
.then(torrentTrackers => ({ infoHash: torrent.infoHash, trackers: torrentTrackers }))))
.then(allTorrentTrackers => allTorrentTrackers
.reduce((allTrackersMap, torrentTrackers) => {
torrentTrackers.trackers.forEach(tracker =>
allTrackersMap[tracker] = (allTrackersMap[tracker] || []).concat(torrentTrackers.infoHash));
return allTrackersMap;
}, {}));
let successCounter = 0;
const callback = () => {
console.log(`Total successful tracker responses: ${successCounter}`)
resolve(perTorrentResults);
}
setTimeout(callback, SEEDS_CHECK_TIMEOUT);
async.each(Object.keys(perTrackerInfoHashes), function (tracker, ready) {
BTClient.scrape({ infoHash: perTrackerInfoHashes[tracker], announce: tracker }, (error, response) => {
if (response) {
const results = Array.isArray(torrentsInput) ? Object.entries(response) : [[response.infoHash, response]];
results
.filter(([infoHash]) => perTorrentResults[infoHash])
.forEach(([infoHash, seeders]) =>
perTorrentResults[infoHash][tracker] = [seeders.complete, seeders.incomplete])
successCounter++;
} else if (error) {
perTrackerInfoHashes[tracker]
.filter(infoHash => perTorrentResults[infoHash])
.forEach(infoHash => perTorrentResults[infoHash][tracker] = [0, 0, error.message])
}
ready();
})
}, callback);
}).then(perTorrentResults => {
const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
torrents.forEach(torrent => {
const results = perTorrentResults[torrent.infoHash];
const newSeeders = Math.max(...Object.values(results).map(values => values[0]).concat(0));
if (torrent.seeders !== newSeeders) {
console.log(`Updating seeders for [${torrent.infoHash}] ${torrent.title} - ${torrent.seeders} -> ${newSeeders}`)
torrent.seeders = newSeeders;
}
})
return torrentsInput;
});
}
async function updateTorrentSize(torrent) {
return filesAndSizeFromTorrentStream(torrent, SEEDS_CHECK_TIMEOUT)
.then(result => {
torrent.size = result.size;
torrent.files = result.files;
return torrent;
});
}
async function sizeAndFiles(torrent) {
return filesAndSizeFromTorrentStream(torrent, 30000);
}
async function torrentFiles(torrent, timeout) {
return getFilesFromObject(torrent)
.catch(() => filesFromTorrentFile(torrent))
.catch(() => filesFromTorrentStream(torrent, timeout))
.then(files => ({
contents: files,
videos: filterVideos(files),
subtitles: filterSubtitles(files)
}));
}
function getFilesFromObject(torrent) {
if (Array.isArray(torrent.files)) {
return Promise.resolve(torrent.files);
}
if (typeof torrent.files === 'function') {
return torrent.files();
}
return Promise.reject("No files in the object");
}
async function filesFromTorrentFile(torrent) {
if (!torrent.torrentLink) {
return Promise.reject(new Error("no torrentLink"));
}
return axios.get(torrent.torrentLink, { timeout: 10000, responseType: 'arraybuffer' })
.then((response) => {
if (!response.data || response.status !== 200) {
throw new Error('torrent not found')
}
return response.data
})
.then((body) => parseTorrent(body))
.then((info) => info.files.map((file, fileId) => ({
fileIndex: fileId,
name: file.name,
path: file.path.replace(/^[^\/]+\//, ''),
size: file.length
})));
}
async function filesFromTorrentStream(torrent, timeout) {
return filesAndSizeFromTorrentStream(torrent, timeout).then(result => result.files);
}
function filesAndSizeFromTorrentStream(torrent, timeout = 30000) {
if (!torrent.infoHash && !torrent.magnetLink) {
return Promise.reject(new Error("no infoHash or magnetLink"));
}
const magnet = torrent.magnetLink || decode.encode({ infoHash: torrent.infoHash, announce: torrent.trackers });
return new Promise((resolve, rejected) => {
const engine = new torrentStream(magnet, { connections: MAX_PEER_CONNECTIONS });
engine.ready(() => {
const files = engine.files
.map((file, fileId) => ({
fileIndex: fileId,
name: file.name,
path: file.path.replace(/^[^\/]+\//, ''),
size: file.length
}));
const size = engine.torrent.length;
engine.destroy();
resolve({ files, size });
});
setTimeout(() => {
engine.destroy();
rejected(new Error('No available connections for torrent!'));
}, timeout);
});
}
function filterVideos(files) {
if (files.length === 1 && !Number.isInteger(files[0].fileIndex)) {
return files;
}
const videos = files.filter(file => isVideo(file.path));
const maxSize = Math.max(...videos.map(video => video.size));
const minSampleRatio = videos.length <= 3 ? 3 : 10;
const minAnimeExtraRatio = 5;
const minRedundantRatio = videos.length <= 3 ? 30 : Number.MAX_VALUE;
const isSample = video => video.path.match(/sample|bonus|promo/i) && maxSize / parseInt(video.size) > minSampleRatio;
const isRedundant = video => maxSize / parseInt(video.size) > minRedundantRatio;
const isExtra = video => video.path.match(/extras?\//i);
const isAnimeExtra = video => video.path.match(/(?:\b|_)(?:NC)?(?:ED|OP|PV)(?:v?\d\d?)?(?:\b|_)/i)
&& maxSize / parseInt(video.size) > minAnimeExtraRatio;
const isWatermark = video => video.path.match(/^[A-Z-]+(?:\.[A-Z]+)?\.\w{3,4}$/)
&& maxSize / parseInt(video.size) > minAnimeExtraRatio
return videos
.filter(video => !isSample(video))
.filter(video => !isExtra(video))
.filter(video => !isAnimeExtra(video))
.filter(video => !isRedundant(video))
.filter(video => !isWatermark(video));
}
function filterSubtitles(files) {
return files.filter(file => isSubtitle(file.path));
}
async function getTorrentTrackers(torrent) {
const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr || [];
const torrentTrackers = torrent.trackers && torrent.trackers.split(',') || [];
const defaultTrackers = await getDefaultTrackers(torrent);
return Array.from(new Set([].concat(magnetTrackers).concat(torrentTrackers).concat(defaultTrackers)));
}
async function getDefaultTrackers(torrent, retry = 3) {
return cacheTrackers(() => axios.get(TRACKERS_URL, { timeout: SEEDS_CHECK_TIMEOUT })
.then(response => response.data && response.data.trim())
.then(body => body && body.split('\n\n') || []))
.catch(() => retry > 0 ? delay(5000).then(() => getDefaultTrackers(torrent, retry - 1)) : [])
.then(trackers => trackers.concat(ADDITIONAL_TRACKERS))
.then(trackers => torrent.type === Type.ANIME ? trackers.concat(ANIME_TRACKERS) : trackers);
}
module.exports = { updateCurrentSeeders, updateTorrentSize, sizeAndFiles, torrentFiles }

View File

@@ -1,5 +0,0 @@
exports.Type = {
MOVIE: 'movie',
SERIES: 'series',
ANIME: 'anime'
};

View File

@@ -1,145 +0,0 @@
const Bottleneck = require('bottleneck');
const { parse } = require('parse-torrent-title');
const Promises = require('../lib/promises');
const { mostCommonValue } = require('../lib/promises');
const repository = require('../lib/repository');
const { getImdbId, getKitsuId } = require('../lib/metadata');
const { parseTorrentFiles } = require('../lib/torrentFiles');
const { createTorrentContents } = require('../lib/torrentEntries');
const { assignSubtitles } = require('../lib/torrentSubtitles');
const { Type } = require('../lib/types');
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function updateMovieCollections() {
const collectionFiles = await repository.getFilesBasedOnTitle('logy')
.then(files => files.filter(file => file.fileIndex === null))
.then(files => files.filter(file => parse(file.title).complete));
collectionFiles.map(original => repository.getTorrent({ infoHash: original.infoHash })
.then(torrent => parseTorrentFiles({ ...torrent.get(), imdbId: original.imdbId }))
.then(files => Promise.all(files.map(file => {
console.log(file);
return repository.createFile(file)
})))
.then(createdFiled => {
if (createdFiled && createdFiled.length) {
console.log(`Updated movie collection ${original.title}`);
repository.deleteFile(original)
} else {
console.log(`Failed updating movie collection ${original.title}`);
}
}));
}
async function reapplySeriesSeasonsSavedAsMovies() {
return repository.getTorrentsBasedOnTitle('(?:[^a-zA-Z0-9]|^)[Ss][012]?[0-9](?:[^0-9]|$)', Type.MOVIE)
.then(torrents => Promise.all(torrents
.filter(torrent => parse(torrent.title).seasons)
.map(torrent => limiter.schedule(() => reapplyEpisodeDecomposing(torrent.infoHash, false)
.then(() => {
torrent.type = Type.SERIES;
return torrent.save();
})))))
.then(() => console.log('Finished updating multiple torrents'));
}
async function reapplyDecomposingToTorrentsOnRegex(regex) {
return repository.getTorrentsBasedOnTitle(regex, Type.ANIME)
.then(torrents => Promise.all(torrents
.map(torrent => limiter.schedule(() => reapplyEpisodeDecomposing(torrent.infoHash, true)))))
.then(() => console.log('Finished updating multiple torrents'));
}
async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) {
const torrent = await repository.getTorrent({ infoHash });
const storedFiles = await repository.getFiles({ infoHash });
const fileIndexMap = storedFiles
.reduce((map, next) => {
const fileIndex = next.fileIndex !== undefined ? next.fileIndex : null;
map[fileIndex] = (map[fileIndex] || []).concat(next);
return map;
}, {});
const files = includeSourceFiles && Object.values(fileIndexMap)
.map(sameIndexFiles => sameIndexFiles[0])
.map(file => ({
fileIndex: file.fileIndex,
name: file.title.replace(/.*\//, ''),
path: file.title,
size: file.size
}));
const kitsuId = undefined;
const imdbId = kitsuId
? undefined
: mostCommonValue(storedFiles.map(file => file.imdbId))
|| await getImdbId(parse(torrent.title)).catch(() => undefined);
if (!imdbId && !kitsuId) {
console.log(`imdbId or kitsuId not found: ${torrent.provider} ${torrent.title}`);
return Promise.resolve();
}
return parseTorrentFiles({ ...torrent.get(), imdbId, kitsuId, files })
.then(torrentContents => torrentContents.videos)
.then(newFiles => newFiles.map(file => {
const fileIndex = file.fileIndex !== undefined ? file.fileIndex : null;
const mapping = fileIndexMap[fileIndex];
if (mapping) {
const originalFile = mapping.shift();
if (originalFile) {
if (!originalFile.imdbId) {
originalFile.imdbId = file.imdbId
}
originalFile.imdbSeason = file.imdbSeason;
originalFile.imdbEpisode = file.imdbEpisode;
originalFile.kitsuId = file.kitsuId;
originalFile.kitsuEpisode = file.kitsuEpisode;
return originalFile;
}
}
return file;
}))
.then(updatedFiles => Promise.all(updatedFiles
.map(file => file.id ? file.save() : repository.createFile(file))))
.then(() => console.log(`Updated files for [${torrent.infoHash}] ${torrent.title}`));
}
async function assignSubs() {
const unassignedSubs = await repository.getUnassignedSubtitles()
.then(subs => subs.reduce((map, sub) => {
map[sub.infoHash] = (map[sub.infoHash] || []).concat(sub);
return map;
}, {}));
const infoHashes = Object.keys(unassignedSubs);
return Promise.all(infoHashes.map(async infoHash => {
const videos = await repository.getFiles({ infoHash });
const subtitles = unassignedSubs[infoHash];
const assignedContents = assignSubtitles({ videos, subtitles });
return Promise.all(assignedContents.videos
.filter(video => video.subtitles)
.map(video => repository.upsertSubtitles(video, video.subtitles)));
}));
}
async function openTorrentContents() {
const limiter = new Bottleneck({ maxConcurrent: 15 });
const unopenedTorrents = await repository.getNoContentsTorrents();
return Promise.all(unopenedTorrents.map(torrent => limiter.schedule(() => createTorrentContents(torrent))))
.then(() => unopenedTorrents.length === 500 ? openTorrentContents() : Promise.resolve)
}
// const infoHashes = [
// ]
// Promises.sequence(infoHashes.map(infoHash => () => reapplyEpisodeDecomposing(infoHash)))
// .then(() => console.log('Finished'));
//findAllFiles().then(() => console.log('Finished'));
//updateMovieCollections().then(() => console.log('Finished'));
reapplyEpisodeDecomposing('96cc18f564f058384c18b4966a183d81808ce3fb', true).then(() => console.log('Finished'));
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
//reapplyDecomposingToTorrentsOnRegex('.*Title.*').then(() => console.log('Finished'));
//reapplyManualHashes().then(() => console.log('Finished'));
// assignSubs().then(() => console.log('Finished'));
// openTorrentContents().then(() => console.log('Finished'));

View File

@@ -1,14 +0,0 @@
const { scheduleScraping, scrapeAll } = require('./scraper')
const { scheduleUpdateSeeders, scheduleUpdateSeedersForNewTorrents } = require('./seeders')
function startScraper() {
if (process.env.ENABLE_SCHEDULING) {
scheduleScraping();
scheduleUpdateSeeders();
scheduleUpdateSeedersForNewTorrents();
} else {
scrapeAll()
}
}
module.exports = { startScraper }

View File

@@ -1,28 +0,0 @@
const schedule = require('node-schedule');
const scrapers = require('./scrapers');
const { sequence } = require('../lib/promises')
function scheduleScraping() {
const allCrons = scrapers.reduce((crons, provider) => {
crons[provider.cron] = (crons[provider.cron] || []).concat(provider)
return crons;
}, {});
Object.entries(allCrons).forEach(([cron, providers]) => schedule.scheduleJob(cron, () => _scrapeProviders(providers)))
}
function scrapeAll() {
return _scrapeProviders(scrapers)
}
async function _scrapeProviders(providers) {
return sequence(providers.map(provider => () => _singleScrape(provider)));
}
async function _singleScrape(provider) {
return provider.scraper.scrape().catch(error => {
console.warn(`Failed ${provider.name} scraping due: `, error);
return Promise.resolve()
})
}
module.exports = { scheduleScraping, scrapeAll }

View File

@@ -1,47 +0,0 @@
// const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper');
// const thepiratebayFakeRemoval = require('../scrapers/thepiratebay/thepiratebay_fakes_removal');
const ytsScraper = require('../scrapers/yts/yts_scraper');
const ytsFullScraper = require('../scrapers/yts/yts_full_scraper');
const eztvScraper = require('../scrapers/eztv/eztv_scraper');
const leetxScraper = require('../scrapers/1337x/1337x_scraper');
// const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper');
const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper');
// const erairawsScraper = require('../scrapers/erairaws/erairaws_scraper');
// const torrentGalaxyScraper = require('../scrapers/torrentgalaxy/torrentgalaxy_scraper');
// const rutorScraper = require('../scrapers/rutor/rutor_scraper');
// const Comando = require('../scrapers/comando/comando_scraper')
// const ComoEuBaixo = require('../scrapers/comoeubaixo/comoeubaixo_scraper')
// const Lapumia = require('../scrapers/lapumia/lapumia_scraper')
// const OndeBaixa = require('../scrapers/ondebaixa/ondebaixa_scraper');
// const AnimesTorrent = require('../scrapers/animestorrent/animestorrent_scraper')
// const DarkMahou = require('../scrapers/darkmahou/darkmahou_scraper')
// const torrent9Scraper = require('../scrapers/torrent9/torrent9_scraper');
module.exports = [
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: ytsFullScraper, name: ytsFullScraper.NAME, cron: '0 0 0 * * 0' },
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: nyaaSiScraper, name: nyaaSiScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: nyaaPantsuScraper, name: nyaaPantsuScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */1 ? * *' },
// { scraper: rutorScraper, name: rutorScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */2 ? * *' },
// { scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' },
// { scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' }
// { scraper: torrent9Scraper, name: torrent9Scraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: Comando, name: Comando.NAME, cron: '0 0 */4 ? * *' },
// { scraper: ComoEuBaixo, name: ComoEuBaixo.NAME, cron: '0 0 */4 ? * *' },
// { scraper: Lapumia, name: Lapumia.NAME, cron: '0 0 */4 ? * *' },
// { scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' },
// { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' },
// { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' },
// { scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
// { scraper: require('../scrapers/1337x/1337x_search_scraper') }
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper') }
// { scraper: require('../scrapers/thepiratebay/thepiratebay_update_size_scraper') }
];

View File

@@ -1,64 +0,0 @@
const Bottleneck = require('bottleneck');
const repository = require('../lib/repository')
const { delay } = require('../lib/promises')
const { updateCurrentSeeders } = require('../lib/torrent')
const { updateTorrentSeeders } = require('../lib/torrentEntries')
const DELAY_MS = 0; // 0 seconds
const DELAY_NEW_MS = 30_000; // 30 seconds
const DELAY_NO_NEW_MS = 300_000; // 300 seconds
const DELAY_FAILED_TORRENTS_MS = 5_000; // 5 seconds
const updateLimiter = new Bottleneck({ maxConcurrent: 5 });
const statistics = {};
const statisticsNew = {};
function scheduleUpdateSeeders() {
console.log('Starting seeders update...')
getTorrents()
.then(torrents => updateCurrentSeeders(torrents))
.then(updatedTorrents => Promise.all(
updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))))
.then(torrents => updateStatistics(torrents, statistics))
.then(() => console.log('Finished seeders update:', statistics))
.catch(error => console.warn('Failed seeders update:', error))
.then(() => delay(DELAY_MS))
.then(() => scheduleUpdateSeeders());
}
function scheduleUpdateSeedersForNewTorrents() {
console.log('Starting seeders update for new torrents...')
getNewTorrents()
.then(torrents => updateCurrentSeeders(torrents))
.then(updatedTorrents => Promise.all(
updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))))
.then(torrents => updateStatistics(torrents, statisticsNew))
.then(() => console.log('Finished seeders update for new torrents:', statisticsNew))
.catch(error => console.warn('Failed seeders update for new torrents:', error))
.then(() => delay(DELAY_NEW_MS))
.then(() => scheduleUpdateSeedersForNewTorrents());
}
async function getTorrents() {
return repository.getUpdateSeedersTorrents()
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getTorrents()));
}
async function getNewTorrents() {
return repository.getUpdateSeedersNewTorrents()
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getNewTorrents()))
.then(torrents => {
if (!torrents.length) {
console.log('No new torrents to update seeders')
return delay(DELAY_NO_NEW_MS).then(() => getNewTorrents())
}
return torrents;
});
}
function updateStatistics(updatedTorrents, statisticsObject) {
const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0);
const date = new Date().toISOString().replace(/T.*/, '');
statisticsObject[date] = (statisticsObject[date] || 0) + totalTorrents;
}
module.exports = { scheduleUpdateSeeders, scheduleUpdateSeedersForNewTorrents }

View File

@@ -1,206 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const Sugar = require('sugar-date');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://1337x.to'
];
const defaultTimeout = 50000;
const maxSearchPage = 50;
let FlaresolverrUserAgent = '';
let FlaresolverrCookies = '';
const Categories = {
MOVIE: 'Movies',
TV: 'TV',
ANIME: 'Anime',
DOCUMENTARIES: 'Documentaries',
APPS: 'Apps',
GAMES: 'Games',
MUSIC: 'Music',
PORN: 'XXX',
OTHER: 'Other',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const proxyList = config.proxyList || defaultProxies;
const slug = torrentId.startsWith('/torrent/') ? torrentId.replace('/torrent/', '') : torrentId;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId: slug, ...torrent }))
.catch((err) => torrent(slug, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
const requestUrl = proxyUrl => category
? `${proxyUrl}/category-search/${keyword}/${category}/${page}/`
: `${proxyUrl}/search/${keyword}/${page}/`;
return Promises.first(proxyList
.map(proxyUrl => singleRequest(requestUrl(proxyUrl), config)))
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
const sort = config.sort;
const requestUrl = proxyUrl => sort
? `${proxyUrl}/sort-cat/${category}/${sort}/desc/${page}/`
: `${proxyUrl}/cat/${category}/${page}/`;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)))
.then((body) => parseTableBody(body))
.catch((err) => {
console.error(err);
browse(config, retries - 1);
});
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
let options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
if (FlaresolverrUserAgent === '' || FlaresolverrCookies === '') {
console.log("using flaresolverr");
return axios.post('http://flaresolverr:8191/v1', {
cmd: 'request.get',
url: requestUrl,
}, options)
.then((response) => {
if (response.data.status !== 'ok') {
throw new Error(`FlareSolverr did not return status 'ok': ${response.data.message}`)
}
const body = response.data.solution.response;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
!(body.includes('1337x</title>'))) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
FlaresolverrUserAgent = response.data.solution.userAgent;
response.data.solution.cookies.forEach(cookie => {
FlaresolverrCookies = FlaresolverrCookies + `${cookie.name}=${cookie.value}; `;
});
return body;
});
}
else {
console.log("using direct request");
options.headers['User-Agent'] = FlaresolverrUserAgent;
options.headers['Cookie'] = FlaresolverrCookies;
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
!(body.includes('1337x</title>'))) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
}
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('.table > tbody > tr').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find('a').eq(1).text(),
torrentId: row.find('a').eq(1).attr('href').replace('/torrent/', ''),
seeders: parseInt(row.children('td.coll-2').text()),
leechers: parseInt(row.children('td.coll-3').text()),
size: parseSize(row.children('td.coll-4').text())
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const details = $('.torrent-detail-page');
const magnetLink = details.find('a:contains(\'Magnet Download\')').attr('href');
const imdbIdMatch = details.find('div[id=\'description\']').html().match(/imdb\.com\/title\/(tt\d+)/i);
const torrent = {
name: escapeHTML(decode(magnetLink).name.replace(/\+/g, ' ')),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
seeders: parseInt(details.find('strong:contains(\'Seeders\')').next().text(), 10),
leechers: parseInt(details.find('strong:contains(\'Leechers\')').next().text(), 10),
category: details.find('strong:contains(\'Category\')').next().text(),
languages: details.find('strong:contains(\'Language\')').next().text(),
size: parseSize(details.find('strong:contains(\'Total size\')').next().text()),
uploadDate: parseDate(details.find('strong:contains(\'Date uploaded\')').next().text()),
imdbId: imdbIdMatch && imdbIdMatch[1],
files: details.find('div[id=\'files\']').first().find('li')
.map((i, elem) => $(elem).text())
.map((i, text) => ({
fileIndex: i,
name: text.match(/^(.+)\s\(.+\)$/)[1].replace(/^.+\//g, ''),
path: text.match(/^(.+)\s\(.+\)$/)[1],
size: parseSize(text.match(/^.+\s\((.+)\)$/)[1])
})).get()
};
resolve(torrent);
});
}
function parseDate(dateString) {
if (/decade.*ago/i.test(dateString)) {
return Sugar.Date.create('10 years ago');
}
return Sugar.Date.create(dateString);
}
module.exports = { torrent, search, browse, Categories, FlaresolverrCookies, FlaresolverrUserAgent };

View File

@@ -1,115 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const leetx = require('./1337x_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = '1337x';
const UNTIL_PAGE = 10;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`))
.then(() => {
leetx.FlaresolverrCookies = '';
leetx.FlaresolverrUserAgent = '';
});
}
async function updateSeeders(torrent) {
return limiter.schedule(() => leetx.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
leetx.Categories.MOVIE,
leetx.Categories.TV,
leetx.Categories.ANIME,
leetx.Categories.DOCUMENTARIES
];
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return leetx.browse(({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent({ provider: NAME, ...record })) {
return record;
}
const torrentFound = await leetx.torrent(record.torrentId).catch(() => undefined);
if (!torrentFound || !TYPE_MAPPING[torrentFound.category]) {
return Promise.resolve('Invalid torrent record');
}
if (isNaN(torrentFound.uploadDate)) {
console.warn(`Incorrect upload date for [${torrentFound.infoHash}] ${torrentFound.name}`);
return;
}
if (await checkAndUpdateTorrent(torrentFound)) {
return torrentFound;
}
const torrent = {
infoHash: torrentFound.infoHash,
provider: NAME,
torrentId: torrentFound.torrentId,
title: torrentFound.name.replace(/\t|\s+/g, ' ').trim(),
type: TYPE_MAPPING[torrentFound.category],
size: torrentFound.size,
seeders: torrentFound.seeders,
uploadDate: torrentFound.uploadDate,
imdbId: torrentFound.imdbId,
languages: torrentFound.languages || undefined
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[leetx.Categories.MOVIE] = Type.MOVIE;
mapping[leetx.Categories.DOCUMENTARIES] = Type.MOVIE;
mapping[leetx.Categories.TV] = Type.SERIES;
mapping[leetx.Categories.ANIME] = Type.ANIME;
return mapping;
}
function untilPage(category) {
if (leetx.Categories.ANIME === category) {
return 5;
}
if (leetx.Categories.DOCUMENTARIES === category) {
return 1;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,128 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
const maxSearchPage = 50;
const baseUrl = 'https://animestorrent.com';
const Categories = {
MOVIE: 'filme',
ANIME: 'tv',
OVA: 'ova'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = torrentId.split("/")[3];
return singleRequest(`${baseUrl}/${slug}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map((el) => ({ torrentId: slug, ...el })))
.catch((err) => torrent(slug, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, config.extendToPage || 1);
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
.then((body) => parseTableBody(body))
.then((torrents) =>
torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 })
.catch(() => [])
.then((nextTorrents) => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/tipo/${category}/page/${page}/` : `${baseUrl}/page/${page}/`;
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (
body.includes("502: Bad gateway") ||
body.includes("403 Forbidden")
) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const torrents = [];
$("article.bs").each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("span.ntitle").text(),
torrentId: row.find("div > a").attr("href"),
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise(async (resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
let magnets = [];
$(`a[href^="magnet"]`).each((i, section) => {
const magnet = $(section).attr("href");
magnets.push(magnet);
});
const details = $('div.infox')
const torrents = magnets.map((magnetLink) => {
return {
title: decode(magnetLink).name,
originalName: details.find('h1.entry-title').text(),
year: details.find('b:contains(\'Lançamento:\')')[0]
? details.find('b:contains(\'Lançamento:\')')[0].nextSibling.nodeValue.trim()
: '',
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
category: details.find('b:contains(\'Tipo:\')').next().attr('href').split('/')[4],
uploadDate: new Date($("time[itemprop=dateModified]").attr("datetime")),
};
})
resolve(torrents);
});
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,108 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const animetorrrent = require("./animestorrent_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { getKitsuId } = require("../../lib/metadata");
const NAME = "AnimesTorrent";
const UNTIL_PAGE = 5;
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => animetorrrent.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
animetorrrent.Categories.MOVIE,
animetorrrent.Categories.ANIME,
animetorrrent.Categories.OVA
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return animetorrrent
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return animetorrrent.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && !foundTorrent.kitsuId) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.kitsuId = await getKitsuId(info).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: Type.ANIME,
imdbId: foundTorrent.imdbId,
kitsuId: foundTorrent.kitsuId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function untilPage(category) {
if (animetorrrent.Categories.ANIME === category) {
return 5;
}
if (animetorrrent.Categories.OVA === category) {
return 3;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,163 +0,0 @@
const axios = require('axios');
const moment = require("moment")
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtLanguages, sanitizePtOriginalName } = require('../scraperHelper')
const defaultTimeout = 30000;
const maxSearchPage = 50
const baseUrl = 'https://comando.to';
const Categories = {
MOVIE: 'filmes',
TV: 'series',
ANIME: 'animes',
DOCUMENTARIES: 'documentario'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = torrentId.split("/")[3];
return singleRequest(`${baseUrl}/${slug}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
.catch((err) => {
console.warn(`Failed Comando ${slug} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/category/${category}/page/${page}/`, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('article').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("h2 > a").text(),
torrentId: row.find("h2 > a").attr("href")
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const magnets = $('h2 > strong')
.filter((i, elem) => isPtDubbed($(elem).text())).parent()
.map((i, elem) => $(elem).nextUntil('h2, hr'))
.map((i, elem) => $(elem).find('a[href^="magnet"]'))
.map((i, section) => $(section).attr("href")).get();
const details = $('strong, b').filter((i, elem) => $(elem).text().match(/Servidor|Orig(?:\.|inal)/)).parent();
const imdbIdMatch = details.find('a[href*="imdb.com"]').attr('href')
const torrents = magnets.map(magnetLink => {
const decodedMagnet = decode(magnetLink);
const originalNameElem = details.find('strong, b')
.filter((i, elem) => $(elem).text().match(/Baixar|Orig(?:\.|inal)|^Título:/));
const languagesElem = details.find('strong, b')
.filter((i, elem) => $(elem).text().match(/^\s*([IÍ]dioma|[AÁ]udio)/));
const originalName = parseOriginalName(originalNameElem);
const title = decodedMagnet.name && escapeHTML(decodedMagnet.name.replace(/\+/g, ' '));
return {
title: title ? sanitizePtName(title) : originalName,
originalName: sanitizePtOriginalName(originalName),
year: details.find('a[href*="comando.to/category/"]').text(),
infoHash: decodedMagnet.infoHash,
magnetLink: magnetLink,
category: parseCategory($('div.entry-categories').html()),
uploadDate: new Date(moment($('a.updated').text(), 'LL', 'pt-br').format()),
imdbId: imdbIdMatch ? imdbIdMatch.split('/')[4] : null,
languages: sanitizePtLanguages(languagesElem[0].nextSibling.nodeValue)
}
});
resolve(torrents.filter((x) => x));
});
}
function parseOriginalName(originalNameElem) {
if (!originalNameElem[0]) {
return '';
}
const originalName = originalNameElem.next().text()
|| originalNameElem[0].nextSibling.nodeValue
|| originalNameElem.text();
return originalName.replace(/[^:]*: ?/, '').trim();
}
function parseCategory(categorys) {
const $ = cheerio.load(categorys)
if ($('a:contains(\'animes\')').text()) {
return Categories.ANIME
}
if ($('a:contains(\'Filmes\')').text()) {
return Categories.MOVIE
}
if ($('a:contains(\'Series\')').text()) {
return Categories.TV
}
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,112 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const comando = require("./comando_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { getImdbId } = require("../../lib/metadata");
const NAME = "Comando";
const UNTIL_PAGE = 5;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => comando.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
comando.Categories.MOVIE,
comando.Categories.TV
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return comando.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return comando.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && foundTorrent.originalName) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
imdbId: foundTorrent.imdbId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[comando.Categories.MOVIE] = Type.MOVIE;
mapping[comando.Categories.DOCUMENTARIES] = Type.SERIES;
mapping[comando.Categories.TV] = Type.SERIES;
mapping[comando.Categories.ANIME] = Type.ANIME;
return mapping;
}
function untilPage(category) {
if (comando.Categories.TV === category) {
return 5;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,135 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')
const defaultTimeout = 30000;
const maxSearchPage = 50
const baseUrl = 'https://comoeubaixo.com';
const Categories = {
MOVIE: 'filmes',
TV: 'series',
ANIME: 'anime',
DESENHOS: 'desenhos'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = encodeURIComponent(torrentId.split("/")[3]);
return singleRequest(`${baseUrl}/${slug}/`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
.catch((err) => {
console.warn(`Failed ComoEuBaixo ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
return singleRequest(`${baseUrl}/${keyword}/${page}/`, config)
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/${category}/${page}/` : `${baseUrl}/${page}/`;
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body || (Buffer.isBuffer(body) && !body.size)) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('div.capa_larga.align-middle').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("a").text(),
torrentId: row.find("a").attr("href"),
isTorrent: !!row.find("p:contains(\'Torrent\')")[0]
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const magnets = $(`a[href^="magnet"]`)
.filter((i, elem) => isPtDubbed($(elem).attr('title')))
.map((i, elem) => $(elem).attr("href")).get();
const details = $('div#informacoes')
const category = details.find('strong:contains(\'Gêneros: \')').next().attr('href').split('/')[0]
const torrents = magnets.map(magnetLink => {
const decodedMagnet = decode(magnetLink);
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
const originalTitle = details.find('strong:contains(\'Baixar\')')[0].nextSibling.nodeValue.split('-')[0];
const year = details.find('strong:contains(\'Data de Lançamento: \')').next().text().trim();
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
return {
title: name.length > 5 ? name : fallBackTitle,
infoHash: decodedMagnet.infoHash,
magnetLink: magnetLink,
category: category,
uploadDate: new Date($('time').attr('datetime')),
imdbId: details.find('a[href*="imdb.com"]').attr('href').split('/')[4],
languages: sanitizePtLanguages(details.find('strong:contains(\'Idioma\')')[0].nextSibling.nodeValue)
};
})
resolve(torrents.filter((x) => x));
});
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,115 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const comoeubaixo = require("./comoeubaixo_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const NAME = "ComoEuBaixo";
const UNTIL_PAGE = 5;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => comoeubaixo.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
comoeubaixo.Categories.MOVIE,
comoeubaixo.Categories.TV,
comoeubaixo.Categories.DESENHOS
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return comoeubaixo
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
if (!entry.isTorrent) {
return entry;
}
return comoeubaixo.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
imdbId: foundTorrent.imdbId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[comoeubaixo.Categories.MOVIE] = Type.MOVIE;
mapping[comoeubaixo.Categories.TV] = Type.SERIES;
mapping[comoeubaixo.Categories.ANIME] = Type.ANIME;
mapping[comoeubaixo.Categories.DESENHOS] = Type.SERIES;
return mapping;
}
function untilPage(category) {
if (comoeubaixo.Categories.DESENHOS === category) {
return UNTIL_PAGE;
}
if (comoeubaixo.Categories.TV === category) {
return UNTIL_PAGE;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,127 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
const maxSearchPage = 50;
const baseUrl = 'https://darkmahou.com';
const Categories = {
MOVIE: 'movie',
ANIME: 'tv',
OVA: 'ova'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = torrentId.split("/")[3];
return singleRequest(`${baseUrl}/${slug}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map((el) => ({ torrentId: slug, ...el })))
.catch((err) => torrent(slug, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, config.extendToPage || 1);
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
.then((body) => parseTableBody(body))
.then((torrents) =>
torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 })
.catch(() => [])
.then((nextTorrents) => torrents.concat(nextTorrents))
: torrents
)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/category/${category}/page/${page}/` : `${baseUrl}/page/${page}/`;
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (
body.includes("502: Bad gateway") ||
body.includes("403 Forbidden")
) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const torrents = [];
$("article.bs").each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("span.ntitle").text(),
torrentId: row.find("div > a").attr("href"),
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise(async (resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
let magnets = [];
$(`a[href^="magnet"]`).each((i, section) => {
const magnet = $(section).attr("href");
magnets.push(magnet);
});
const details = $('div.infox')
const torrent = magnets.map((magnetLink) => {
return {
title: decode(magnetLink).name,
originalName: details.find('h1.entry-title').text(),
year: details.find('b:contains(\'Lançado:\')')[0].nextSibling.nodeValue || '',
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
category: details.find('b:contains(\'Tipo:\')').next().attr('href').split('/')[4],
uploadDate: new Date($("time[itemprop=dateModified]").attr("datetime")),
};
})
resolve(torrent.filter((x) => x));
});
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,108 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const darkmahou = require("./darkmahou_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { getKitsuId } = require("../../lib/metadata");
const NAME = "DarkMahou";
const UNTIL_PAGE = 5;
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => darkmahou.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
darkmahou.Categories.MOVIE,
darkmahou.Categories.ANIME,
darkmahou.Categories.OVA
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return darkmahou
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return darkmahou.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && !foundTorrent.kitsuId) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.kitsuId = await getKitsuId(info).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: Type.ANIME,
imdbId: foundTorrent.imdbId,
kitsuId: foundTorrent.kitsuId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function untilPage(category) {
if (darkmahou.Categories.ANIME === category) {
return 5;
}
if (darkmahou.Categories.OVA === category) {
return 4;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,93 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require("magnet-uri");
const Promises = require("../../lib/promises");
const { getRandomUserAgent } = require("../../lib/requestHelper");
const defaultTimeout = 10000;
const baseUrl = 'https://www.erai-raws.info';
const Categories = {
ANIMES: 'anime',
EPISODES: 'episodes'
};
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/${category}/page/${page}/`, config)
.then((body) => parseTableBody(body)
.then(animes => Promises.sequence(animes.map(anime => () => singleRequest(anime.animeLink))))
.then(animeBodies => Promise.all(animeBodies.map(animeBody => parseTorrentPage(animeBody))))
.then(animeInfos => animeInfos.reduce((a, b) => a.concat(b), [])))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout, };
return axios.get(requestUrl, options).then((response) => {
const body = response.data;
if (!body || (Buffer.isBuffer(body) && !body.size)) {
throw new Error(`No body: ${requestUrl}`);
} else if (
body.includes("502: Bad gateway") ||
body.includes("403 Forbidden")
) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const links = $('[itemprop=\'headline\'] a, .content-area a.aa_ss_ops_new')
.map((i, element) => ({
name: $(element).text(),
animeLink: $(element).attr("href"),
})).get();
resolve(links);
});
}
function parseTorrentPage(body) {
return new Promise(async (resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error("Failed loading body"));
}
const entries = $('.tab-content table, .content-area table')
.map((i, entry) => {
const languages = $(entry).find('.tooltip3').map((_, l) => $(l).attr('data-title')).get().join('/');
const magnets = $(entry).find('a[href^="magnet"]').map((_, m) => $(m).attr('href')).get();
return { languages, magnets }
}).get();
const torrents = entries
.map(entry => entry.magnets
.map(magnet => decode(magnet))
.map(decodedMagnet => ({
title: decodedMagnet.name,
infoHash: decodedMagnet.infoHash,
trackers: decodedMagnet.tr,
languages: entry.languages
})))
.reduce((a, b) => a.concat(b), []);
resolve(torrents);
});
}
module.exports = { browse, Categories };

View File

@@ -1,50 +0,0 @@
const Parser = require('rss-parser');
const decode = require("magnet-uri");
const parser = new Parser({
customFields: {
item: [['erai:subtitles', 'subtitles']]
}
});
const baseUrl = 'https://www.erai-raws.info';
const rssKey = process.env.ERAI_RSS_KEY;
const Categories = {
ANIMES: 'anime',
EPISODES: 'episodes'
};
function browse() {
return parser.parseURL(`${baseUrl}/feed/?type=magnet&${rssKey}`)
.then(result => result.items
.map(item => {
const decodedMagnet = decode(item.link);
const languages = parseLanguages(item.subtitles);
return {
title: decodedMagnet.name,
infoHash: decodedMagnet.infoHash,
trackers: decodedMagnet.tr,
languages: languages
}
}));
}
const languageMapping = {
'us': 'English',
'br': 'Portuguese(Brazil)',
'mx': 'Spanish(Latin_America)',
'es': 'Spanish',
'sa': 'Arabic',
'fr': 'French',
'de': 'German',
'it': 'Italian',
'ru': 'Russian'
}
function parseLanguages(languages) {
return languages.split('][')
.map(lang => lang.replace(/[\[\]]/g, ''))
.map(lang => languageMapping[lang] || lang)
.join('/');
}
module.exports = { browse, Categories };

View File

@@ -1,47 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const erairaws = require('./erairaws_rss_api');
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'EraiRaws';
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function scrapeLatestTorrents() {
return scrapeLatestTorrentsForCategory(erairaws.Categories.EPISODES)
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return erairaws.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processRecord(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve([]));
}
async function processRecord(foundTorrent) {
return checkAndUpdateTorrent({ provider: NAME, ...foundTorrent }).then(() => foundTorrent);
}
function untilPage(category) {
if (category === erairaws.Categories.ANIMES) {
return 45;
}
return 1;
}
module.exports = { scrape, NAME };

View File

@@ -1,117 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const { decode } = require("magnet-uri");
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('./../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://eztv.re'
];
const defaultTimeout = 120000;
const minDelay = 3000;
const jitterDelay = minDelay;
const limit = 100;
const maxPage = 5;
function torrent(torrentId, config = {}, retries = 1) {
if (!torrentId) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config)))
.then(body => parseTorrentPage(body))
.then(torrent => ({ torrentId, ...torrent }))
.catch(error => retries ? jitter().then(() => torrent(torrentId, config, retries - 1)) : Promise.reject(error));
}
function search(imdbId, config = {}, retries = 1) {
if (!imdbId) {
return Promise.reject(new Error(`Failed ${imdbId} search`));
}
const id = imdbId.replace('tt', '');
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config)))
.then(results => parseResults(results))
.then(torrents => torrents.length === limit && page < maxPage
? search(imdbId, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch(error => retries ? jitter().then(() => search(imdbId, config, retries - 1)) : Promise.reject(error));
}
function browse(config = {}, retries = 1) {
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config)))
.then(results => parseResults(results))
.catch(error => retries ? jitter().then(() => browse(config, retries - 1)) : Promise.reject(error));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then(response => {
if (!response.data) {
return Promise.reject(`No body: ${requestUrl}`);
}
return Promise.resolve(response.data);
});
}
function parseResults(results) {
if (!results || !Array.isArray(results.torrents)) {
return Promise.reject(`Incorrect results ${results}`)
}
return results.torrents.map(torrent => parseTorrent(torrent));
}
function parseTorrent(torrent) {
return {
name: torrent.title.replace(/EZTV$/, ''),
torrentId: torrent.episode_url.replace(/.*\/ep\//, ''),
infoHash: torrent.hash.trim().toLowerCase(),
magnetLink: torrent.magnet_url,
torrentLink: torrent.torrent_url,
seeders: torrent.seeds,
size: torrent.size_bytes,
uploadDate: new Date(torrent.date_released_unix * 1000),
imdbId: torrent.imdb_id !== '0' && 'tt' + torrent.imdb_id || undefined
}
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const content = $('table[class="forum_header_border_normal"]');
const magnetLink = content.find('a[title="Magnet Link"]').attr('href');
const torrent = {
name: content.find('h1 > span').text().replace(/EZTV$/, ''),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentLink: content.find('a[title="Download Torrent"]').attr('href'),
seeders: parseInt(content.find('span[class="stat_red"]').first().text(), 10) || 0,
size: parseSize(content.find('b:contains(\'Filesize:\')')[0].nextSibling.data),
uploadDate: moment(content.find('b:contains(\'Released:\')')[0].nextSibling.data, 'Do MMM YYYY').toDate(),
showUrl: content.find('.episode_left_column a').attr('href')
};
resolve(torrent);
});
}
function jitter() {
return Promises.delay(minDelay + Math.round(Math.random() * jitterDelay))
}
module.exports = { torrent, search, browse };

View File

@@ -1,85 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const { parse } = require('parse-torrent-title');
const eztv = require('./eztv_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const { isEpisodeImdbId } = require('../../lib/metadata');
const NAME = 'EZTV';
const UNTIL_PAGE = 10;
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent, getImdbIdsMethod) {
// return getImdbIdsMethod()
// .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => eztv.search(imdbId)))))
// .then(results => results.reduce((a, b) => a.concat(b), []))
// .catch(() => limiter.schedule(() => eztv.torrent(torrent.torrentId)));
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
return scrapeLatestTorrentsForCategory();
}
async function scrapeLatestTorrentsForCategory(page = 1) {
console.log(`Scrapping ${NAME} page ${page}`);
return eztv.browse(({ page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] due: `, error);
// return Promises.delay(30000).then(() => scrapeLatestTorrentsForCategory(page))
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
if (!record || !record.size) {
return Promise.resolve('Invalid torrent record');
}
// imdb id for talk shows is usually incorrect on eztv
const parsedTitle = parse(record.name);
const dateEpisode = !parsedTitle.season && parsedTitle.date;
if (dateEpisode && await isEpisodeImdbId(record.imdbId)) {
delete record.imdbId;
}
const torrent = {
infoHash: record.infoHash,
provider: NAME,
torrentId: record.torrentId,
title: record.name.replace(/\t|\s+/g, ' ').trim(),
type: Type.SERIES,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
imdbId: record.imdbId,
};
return createTorrentEntry(torrent).then(() => torrent);
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,137 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const Promises = require('../../lib/promises');
const defaultUrl = 'https://horriblesubs.info';
const defaultTimeout = 10000;
function allShows(config = {}) {
return _getContent('/shows/', config)
.then(($) => $('div[class="ind-show"]')
.map((index, element) => $(element).children('a'))
.map((index, element) => ({
title: element.attr('title'),
url: `${config.proxyUrl || defaultUrl}${element.attr('href')}`
})).get());
}
async function showData(showInfo, config = {}) {
const showEndpoint = (showInfo.url || showInfo).match(/\/show.+/)[0];
const title = showInfo.title;
const showId = await _getShowId(showEndpoint);
const packEntries = await _getShowEntries(showId, title, 'batch', config);
const singleEntries = await _getShowEntries(showId, title, 'show', config);
return {
title: title,
url: showInfo.url || showInfo,
showId: showId,
singleEpisodes: singleEntries,
packEpisodes: packEntries
};
}
async function getLatestEntries(config = {}) {
return _getAllLatestEntries(config)
.then((entries) => Promises.sequence(entries.map((entry) => () => _findLatestEntry(entry, config))))
.then((entries) => entries.filter((entry) => entry))
}
function _getContent(endpoint, config = {},) {
const baseUrl = config.proxyUrl || defaultUrl;
const timeout = config.timeout || defaultTimeout;
const url = endpoint.startsWith('http')
? endpoint.replace(/https?:\/\/[^/]+/, baseUrl)
: `${baseUrl}${endpoint}`;
return axios.get(url, { timeout: timeout })
.then((response) => response.data)
.then((body) => cheerio.load(body));
}
function _getShowId(showEndpoint) {
return _getContent(showEndpoint.replace(/(?:#\d+)?\/?$/, '/'))
.then($ => $('div.entry-content').find('script').html().match(/var hs_showid = (\d+)/)[1]);
}
function _getShowEntries(animeId, animeTitle, type, config) {
return _getAllEntries(animeId, type, config)
.then((entries) => entries.filter((entry) => entry.title === animeTitle));
}
function _getAllEntries(animeId, type, config, page = 0, autoExtend = true) {
const entriesEndpoint = `/api.php?method=getshows&type=${type}&showid=${animeId}&nextid=${page}`;
return _getEntries(entriesEndpoint, config)
.then((entries) => !autoExtend || !entries.length ? entries :
_getAllEntries(animeId, type, config, page + 1)
.then((nextEntries) => entries.concat(nextEntries)));
}
function _getEntries(endpoint, config) {
return _getContent(endpoint, config)
.then(($) => $('div[class="rls-info-container"]')
.map((index, element) => ({
title: $(element).find('a[class="rls-label"]').contents()
.filter((i, el) => el.nodeType === 3).first().text().trim(),
episode: $(element).find('a[class="rls-label"]').find('strong').text(),
uploadDate: _parseDate($(element).find('a[class="rls-label"]').find('span[class="rls-date"]').text()),
mirrors: $(element).find('div[class="rls-links-container"]').children()
.map((indexLink, elementLink) => ({
resolution: $(elementLink).attr('id').match(/\d+p$/)[0],
magnetLink: $(elementLink).find('a[title="Magnet Link"]').attr('href'),
torrentLink: $(elementLink).find('a[title="Torrent Link"]').attr('href')
})).get()
})).get());
}
function _getAllLatestEntries(config, page = 0) {
const pageParam = page === 0 ? '' : `&nextid=${page}`;
const entriesEndpoint = `/api.php?method=getlatest${pageParam}`;
return _getContent(entriesEndpoint, config)
.then(($) => $('li a')
.map((index, element) => ({
urlEndpoint: $(element).attr('href'),
episode: $(element).find('strong').text()
})).get())
.then((entries) => entries.length < 12
? entries
: _getAllLatestEntries(config, page + 1)
.then((nextEntries) => entries.concat(nextEntries)));
}
async function _findLatestEntry(entry, config) {
const showId = await _getShowId(entry.urlEndpoint);
let foundEntry;
let page = 0;
let reachedEnd = false;
while (!foundEntry && !reachedEnd) {
const allEntries = await _getAllEntries(showId, 'show', config, page, false);
foundEntry = allEntries.filter((e) => e.episode === entry.episode)[0];
page = page + 1;
reachedEnd = allEntries.length === 0;
}
if (!foundEntry) {
return;
}
return {
title: foundEntry.title,
url: entry.urlEndpoint,
showId: showId,
singleEpisodes: [foundEntry]
};
}
function _parseDate(date) {
if (date.match(/today/i)) {
return moment().toDate();
} else if (date.match(/yesterday/i)) {
return moment().subtract(1, 'day').toDate();
}
return moment(date, 'MM/DD/YYYY').toDate();
}
module.exports = { allShows, showData, getLatestEntries, _getShowId };

File diff suppressed because it is too large Load Diff

View File

@@ -1,186 +0,0 @@
const fs = require('fs');
const moment = require('moment');
const Bottleneck = require('bottleneck');
const decode = require('magnet-uri');
const horriblesubs = require('./horriblesubs_api.js');
const repository = require('../../lib/repository');
const { Type } = require('../../lib/types');
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const { getMetadata, getKitsuId } = require('../../lib/metadata');
const showMappings = require('./horriblesubs_mapping.json');
const NAME = 'HorribleSubs';
const NEXT_FULL_SCRAPE_OFFSET = 5 * 24 * 60 * 60; // 5 days;
const limiter = new Bottleneck({ maxConcurrent: 5 });
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
const lastScraped = lastScrape.lastScraped && moment(lastScrape.lastScraped);
if (!lastScraped || lastScraped.add(NEXT_FULL_SCRAPE_OFFSET, 'seconds') < scrapeStart) {
console.log(`[${scrapeStart}] scrapping all ${NAME} shows...`);
return _scrapeAllShows()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished scrapping all ${NAME} shows`));
} else {
console.log(`[${scrapeStart}] scrapping latest ${NAME} entries...`);
return _scrapeLatestEntries()
.then(() => console.log(`[${moment()}] finished scrapping latest ${NAME} entries`));
}
}
async function updateSeeders(torrent) {
return Promise.resolve([]);
}
async function _scrapeLatestEntries() {
const latestEntries = await horriblesubs.getLatestEntries();
return Promise.all(latestEntries
.map((entryData) => limiter.schedule(() => _parseShowData(entryData)
.catch((err) => console.log(err)))));
}
async function _scrapeAllShows() {
const shows = await horriblesubs.allShows();
return Promise.all(shows
.map((show) => limiter.schedule(() => horriblesubs.showData(show)
.then((showData) => _parseShowData(showData, false))
.catch((err) => console.log(err)))));
}
async function compareSearchKitsuIds() {
console.log(`${NAME}: initiating kitsu compare...`);
const shows = await horriblesubs.allShows()
.then((shows) => Promise.all(shows.slice(0, 1).map((show) => limiter.schedule(() => enrichShow(show)))));
const incorrect = shows.filter(
(show) => showMappings[show.title] && showMappings[show.title].kitsu_id !== show.kitsu_id);
const incorrectRatio = incorrect.length / shows.length;
console.log(incorrect);
console.log(`Ratio: ${incorrectRatio}`);
}
async function initMapping() {
console.log(`${NAME}: initiating kitsu mapping...`);
const shows = await horriblesubs.allShows()
.then((shows) => shows.filter((show) => !showMappings[show.title]))
.then((shows) => Promise.all(shows.map((show) => limiter.schedule(() => enrichShow(show)))))
.then((shows) => shows.reduce((map, show) => (map[show.title] = show, map), showMappings));
fs.writeFile(
"./scraper/scrapers/horriblesubs/horriblesubs_mapping.json",
JSON.stringify(shows), 'utf8',
(err) => {
if (err) {
console.log("An error occurred while writing JSON Object to File.", err);
} else {
console.log(`${NAME}: finished kitsu mapping`);
}
}
);
}
async function enrichShow(show) {
console.log(`${NAME}: getting show info for ${show.title}...`);
const showId = await horriblesubs._getShowId(show.url)
.catch(() => show.title);
const metadata = await getKitsuId({ title: show.title })
.then((kitsuId) => getMetadata(kitsuId))
.catch((error) => {
console.log(`Failed getting kitsu meta: ${error.message}`);
return {};
});
return {
showId: showId,
kitsu_id: metadata.kitsuId,
...show,
kitsuTitle: metadata.title,
imdb_id: metadata.imdbId
}
}
async function _parseShowData(showData, updateSeeders = true) {
console.log(`${NAME}: scrapping ${showData.title} data...`);
const showMapping = showMappings[showData.title];
const kitsuId = showMapping && showMapping.kitsu_id;
if (!showMapping) {
throw new Error(`No kitsu mapping found for ${showData.title}`);
}
if (!kitsuId) {
throw new Error(`No kitsuId found for ${showData.title}`);
}
// sometimes horriblesubs entry contains multiple season in it, so need to split it per kitsu season entry
const kitsuIdsMapping = Array.isArray(kitsuId) && await Promise.all(kitsuId.map(kitsuId => getMetadata(kitsuId)))
.then((metas) => metas.reduce((map, meta) => {
const epOffset = Object.keys(map).length;
[...Array(meta.totalCount || 1).keys()]
.map(ep => ep + 1)
.forEach(ep => map[ep + epOffset] = { kitsuId: meta.kitsuId, episode: ep, title: meta.title });
return map;
}, {})) || {};
const formatTitle = (episodeInfo, mirror) => {
const mapping = kitsuIdsMapping[episodeInfo.episode.replace(/^0+/, '')];
if (mapping) {
return `${mapping.title} - ${mapping.episode} [${mirror.resolution}]`;
}
return `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`;
};
const getKitsuId = inputEpisode => {
const episodeString = inputEpisode.includes('-') && inputEpisode.split('-')[0] || inputEpisode;
const episode = parseInt(episodeString, 10);
if (kitsuIdsMapping[episode]) {
return kitsuIdsMapping[episode].kitsuId;
} else if (Array.isArray(kitsuId)) {
console.warn(`Unmapped episode number for ${showData.title} - ${inputEpisode}`);
return undefined;
}
return kitsuId;
};
return Promise.all([].concat(showData.singleEpisodes || []).concat(showData.packEpisodes || [])
.map(episodeInfo => episodeInfo.mirrors
.filter(mirror => mirror.magnetLink && mirror.magnetLink.length)
.map(mirror => ({
provider: NAME,
...mirror,
infoHash: decode(mirror.magnetLink).infoHash,
trackers: decode(mirror.magnetLink).tr.join(','),
title: formatTitle(episodeInfo, mirror),
type: Type.ANIME,
kitsuId: getKitsuId(episodeInfo.episode),
uploadDate: episodeInfo.uploadDate,
})))
.reduce((a, b) => a.concat(b), [])
.filter(torrent => torrent.kitsuId)
.map(torrent => entryLimiter.schedule(() => processTorrentRecord(torrent, updateSeeders))))
.then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`));
}
async function processTorrentRecord(torrent, updateSeeders = true) {
const existingTorrent = await repository.getTorrent(torrent).catch(() => undefined);
if (existingTorrent && existingTorrent.provider === NAME) {
if (updateSeeders) {
return updateCurrentSeeders(torrent).then(updatedSeeders => checkAndUpdateTorrent(updatedSeeders))
}
return Promise.resolve(torrent)
}
return updateTorrentSize(torrent)
.then(updated => updateCurrentSeeders(updated))
.then(updated => createTorrentEntry(updated, true))
.catch(error => console.warn(`Failed creating entry for ${torrent.title}:`, error));
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,161 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
'https://katcr.co'
];
const defaultTimeout = 10000;
const Categories = {
MOVIE: 'movies',
TV: 'tv',
ANIME: 'anime',
APPS: 'applications',
GAMES: 'games',
MUSIC: 'music',
BOOKS: 'books',
PORN: 'xxx',
OTHER: 'other',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
const proxyList = config.proxyList || defaultProxies;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => torrent(torrentId, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config)))
.then((body) => parseTableBody(body))
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config)))
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
body.includes('Origin DNS error') ||
!body.includes('Kickass Torrents</title>')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('.table > tbody > tr').each((i, element) => {
const row = $(element);
const magnetLink = row.find('a[title="Torrent magnet link"]').attr('href');
torrents.push({
name: row.find('a[class="torrents_table__torrent_title"]').first().children('b').text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentId: row.find('a[class="torrents_table__torrent_title"]').first().attr('href').replace('/torrent/', ''),
category: row.find('span[class="torrents_table__upload_info"]').first().children('a').first().attr('href')
.match(/category\/([^\/]+)/)[1],
seeders: parseInt(row.find('td[data-title="Seed"]').first().text()),
leechers: parseInt(row.find('td[data-title="Leech"]').first().text()),
size: parseSize(row.find('td[data-title="Size"]').first().text()),
uploadDate: moment(row.find('td[data-title="Age"]').first().attr('title')).toDate()
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const content = $('div[class="col"]').first();
const info = content.find('div[class="torrent_stats"]').parent();
const description = content.find('div[id="main"]');
const magnetLink = info.find('a[title="Download verified Magnet"]').attr('href');
const imdbIdMatch = description.html().match(/imdb\.com\/title\/(tt\d+)/i);
const torrent = {
name: info.find('h1').first().text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
seeders: parseInt(info.find('span[class="torrent_stats__seed_count mr-2"]').first().text().match(/\d+/)[0], 10),
leechers: parseInt(info.find('span[class="torrent_stats__leech_count mr-2"]').first().text().match(/\d+/)[0], 10),
category: info.find('small').first().children('a').first().attr('href').match(/\/category\/([^\/]+)/)[1],
languages: description.find('span:contains(\'Audio\')').next().children().eq(0).text(),
size: parseSize(description.find('ul[class="file_list"]').first().find('li').first().contents().eq(2).text()
.match(/\(Size: (.+)\)/)[1]),
uploadDate: moment(info.find('time').first().text()).toDate(),
imdbId: imdbIdMatch && imdbIdMatch[1],
files: content.find('ul[class="file_list"]').first().find('li > ul > li[class="file_list__file"]')
.map((i, elem) => $(elem))
.map((i, ele) => ({
fileIndex: i,
name: ele.find('span > ul > li').contents().eq(1).text().trim().replace(/^.+\//g, ''),
path: ele.find('span > ul > li').contents().eq(1).text().trim(),
size: parseSize(ele.contents().eq(2).text())
})).get()
};
if (torrent.files.length >= 50) {
// a max of 50 files are displayed on the page
delete torrent.files;
}
resolve(torrent);
});
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,98 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const LineByLineReader = require('line-by-line');
const fs = require('fs');
const { Type } = require('../../lib/types');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
const NAME = 'KickassTorrents';
const CSV_FILE_PATH = '/tmp/kickass.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
console.log(`starting to scrape KAT dump: ${JSON.stringify(lastDump)}`);
let entriesProcessed = 0;
const lr = new LineByLineReader(CSV_FILE_PATH);
lr.on('line', (line) => {
if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`);
}
const row = line.match(/(?<=^|\|)(".*"|[^|]+)(?=\||$)/g);
if (row.length !== 11) {
console.log(`Invalid row: ${line}`);
return;
}
const torrent = {
infoHash: row[0].toLowerCase(),
title: row[1]
.replace(/^"|"$/g, '')
.replace(/&amp;/g, '&')
.replace(/&\w{2,6};/g, ' ')
.replace(/\s+/g, ' ')
.trim(),
category: row[2],
size: parseInt(row[5], 10),
seeders: parseInt(row[8], 10),
uploadDate: moment.unix(parseInt(row[10], 10)).toDate(),
};
if (!limiter.empty()) {
lr.pause()
}
limiter.schedule(() => processTorrentRecord(torrent)
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
.then(() => limiter.empty())
.then((empty) => empty && lr.resume())
.then(() => entriesProcessed++);
});
lr.on('error', (err) => {
console.log(err);
});
lr.on('end', () => {
fs.unlink(CSV_FILE_PATH);
console.log(`finished to scrape KAT dump: ${JSON.stringify(lastDump)}!`);
});
}
const categoryMapping = {
"Movies": Type.MOVIE,
"TV": Type.SERIES,
"Anime": Type.ANIME
};
async function processTorrentRecord(record) {
if (!categoryMapping[record.category] || record.seeders === 0) {
return createSkipTorrentEntry(record);
}
if (await getStoredTorrentEntry(record)) {
return;
}
const torrentFound = await findTorrent(record).catch(() => undefined);
if (!torrentFound) {
return createSkipTorrentEntry(record);
}
const torrent = {
infoHash: record.infoHash,
provider: NAME,
title: torrentFound.name,
size: record.size,
type: categoryMapping[record.category],
imdbId: torrentFound.imdbId,
uploadDate: record.uploadDate,
seeders: torrentFound.seeders,
};
return createTorrentEntry(torrent);
}
async function findTorrent(record) {
return Promise.reject("not found");
}
module.exports = { scrape, NAME };

View File

@@ -1,91 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const kickass = require('./kickass_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'KickassTorrents';
const UNTIL_PAGE = 10;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => kickass.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
kickass.Categories.MOVIE,
kickass.Categories.TV,
kickass.Categories.ANIME,
];
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return kickass.browse(({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
const torrentFound = await kickass.torrent(record.torrentId).catch(() => undefined);
if (!torrentFound || !TYPE_MAPPING[torrentFound.category]) {
return Promise.resolve('Invalid torrent record');
}
const torrent = {
infoHash: torrentFound.infoHash,
provider: NAME,
torrentId: torrentFound.torrentId,
title: torrentFound.name.replace(/\t|\s+/g, ' '),
type: TYPE_MAPPING[torrentFound.category],
size: torrentFound.size,
seeders: torrentFound.seeders,
uploadDate: torrentFound.uploadDate,
imdbId: torrentFound.imdbId,
languages: torrentFound.languages || undefined
};
return createTorrentEntry(torrent).then(() => torrent);
}
function typeMapping() {
const mapping = {};
mapping[kickass.Categories.MOVIE] = Type.MOVIE;
mapping[kickass.Categories.TV] = Type.SERIES;
mapping[kickass.Categories.ANIME] = Type.ANIME;
return mapping;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,145 +0,0 @@
const axios = require('axios');
const moment = require("moment")
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper')
const defaultTimeout = 10000;
const maxSearchPage = 50
const baseUrl = 'https://lapumia.org';
const Categories = {
MOVIE: null,
TV: 'series',
ANIME: 'animes',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
return singleRequest(`${baseUrl}/${torrentId}`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId, ...el })))
.catch((err) => {
console.warn(`Failed Lapumia ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 10 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/${category}/page/${page}/` : `${baseUrl}/page/${page}/`
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2 };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('div.post').each((i, element) => {
const row = $(element);
try {
torrents.push({
name: row.find("div > a").text(),
torrentId: row.find("div > a").attr("href").split('/')[3]
});
} catch (e) {
console.log("Failed parsing Lupumia table entry")
}
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const magnets = $('h2 > span')
.filter((i, elem) => isPtDubbed($(elem).text())).parent()
.map((i, elem) => $(elem).nextUntil('h2, hr'))
.map((i, elem) => $(elem).find('a[href^="magnet"]'))
.map((i, section) => $(section).attr("href")).get();
const category = parseCategory($('div.category').html());
const details = $('div.content')
const torrents = magnets.filter(magnetLink => decode(magnetLink).name).map(magnetLink => ({
title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))),
originalName: sanitizePtOriginalName(details.find('b:contains(\'Titulo Original:\')')[0].nextSibling.nodeValue),
year: details.find('b:contains(\'Ano de Lançamento:\')')[0].nextSibling.nodeValue.trim(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
category: category,
uploadDate: new Date(moment($('div.infos').text().split('•')[0].trim(), 'LL', 'pt-br').format()),
imdbId: $('.imdbRatingPlugin').attr('data-title') || null,
languages: sanitizePtLanguages(details.find('b:contains(\'Idioma\')')[0].nextSibling.nodeValue)
}))
resolve(torrents.filter((x) => x));
});
}
function parseCategory(categorys) {
const $ = cheerio.load(categorys)
if ($('a:contains(\'Animes\')').text()) {
return Categories.ANIME
}
if ($('a:contains(\'Series\')').text()) {
return Categories.TV
}
return Categories.MOVIE
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,112 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const lapumia = require("./lapumia_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { getImdbId } = require("../../lib/metadata");
const NAME = "Lapumia";
const UNTIL_PAGE = 5;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => lapumia.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
lapumia.Categories.MOVIE,
lapumia.Categories.TV
];
return Promises.sequence(allowedCategories
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return lapumia
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return lapumia.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && TYPE_MAPPING[foundTorrent.category] !== Type.ANIME) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
imdbId: foundTorrent.imdbId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[lapumia.Categories.MOVIE] = Type.MOVIE;
mapping[lapumia.Categories.TV] = Type.SERIES;
mapping[lapumia.Categories.ANIME] = Type.ANIME;
return mapping;
}
function untilPage(category) {
if (lapumia.Categories.ANIME === category) {
return 2;
}
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,73 +0,0 @@
const { pantsu } = require('nyaapi')
const Categories = {
ANIME: {
ALL: '3_',
ENGLISH: '3_5',
RAW: '3_6',
MUSIC_VIDEO: '3_12',
NON_ENGLISH: '3_13',
},
LIVE_ACTION: {
ALL: '5_',
ENGLISH: '5_9',
RAW: '5_11',
PROMOTIONAL_VIDEO: '5_10',
NON_ENGLISH: '5_18',
}
}
function torrent(torrentId) {
if (!torrentId) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return pantsu.infoRequest(torrentId)
.then(result => parseTorrent(result))
.catch(error => handleError(error, torrentId));
}
function search(query) {
return pantsu.search(query)
.then(results => results.map(torrent => parseTorrent(torrent)))
.catch(error => handleError(error, query));
}
function browse(config = {}) {
const page = config.page || 1;
const category = config.category || Categories.ANIME.ENGLISH;
return pantsu.list(category, page)
.then(results => results.map(torrent => parseTorrent(torrent)))
.catch(error => handleError(error, category));
}
function handleError(error, identifier) {
if (error.statusCode && error.statusCode >= 400) {
return Promise.reject(new Error(`${error.statusCode}: [${identifier}] failed retrieval on NyaaPantsu`));
}
return Promise.reject(error);
}
function parseTorrent(torrent) {
return {
title: torrent.name.replace(/\t|\s+/g, ' ').trim(),
torrentId: torrent.id,
infoHash: torrent.hash.trim().toLowerCase(),
magnetLink: torrent.magnet,
torrentLink: torrent.torrent,
seeders: torrent.seeders,
size: torrent.filesize,
uploadDate: new Date(torrent.date),
category: `${torrent.category}_${torrent.sub_category}`,
languages: torrent.languages ? torrent.languages.join(',') : undefined,
files: torrent.file_list && torrent.file_list.length ? torrent.file_list.map((file, fileId) => ({
fileIndex: fileId,
name: file.path.replace(/([^\/]+$)/, '$1'),
path: file.path,
size: file.filesize
})) : undefined
}
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,97 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const pantsu = require('./nyaa_pantsu_api');
const { Type } = require('../../lib/types');
const Promises = require('../../lib/promises');
const repository = require('../../lib/repository');
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'NyaaPantsu';
const UNTIL_PAGE = 5
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
// const ids = ['1033095'];
// return Promise.all(ids.map(id => limiter.schedule(() => pantsu.torrent(id)
// .then(torrent => processTorrentRecord(torrent)))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => pantsu.torrent(torrent.torrentId))
.then(foundTorrent => {
if (Number.isInteger(foundTorrent.seeders)) {
return [foundTorrent];
}
return []
});
}
async function scrapeLatestTorrents() {
const allowedCategories = [
pantsu.Categories.ANIME.ENGLISH
];
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return pantsu.browse(({ page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)
.catch(error => {
console.warn(`Failed processing [${torrent.infoHash}] ${torrent.title} due: `, error);
return Promise.resolve();
})))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (!record || await checkAndUpdateTorrent(record)) {
return record;
}
if (!record.size) {
await updateTorrentSize(record)
}
if (record.seeders === null || record.seeders === undefined) {
await updateCurrentSeeders(record);
}
const torrent = {
infoHash: record.infoHash,
torrentLink: record.torrentLink,
provider: NAME,
torrentId: record.torrentId,
title: record.title,
type: Type.ANIME,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
languages: record.languages,
files: record.files || undefined
};
return createTorrentEntry(torrent).then(() => torrent);
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,66 +0,0 @@
const { si } = require('nyaapi')
const { parseSize } = require("../scraperHelper");
const Categories = {
ANIME: {
ALL: '1_0',
MUSIC_VIDEO: '1_1',
ENGLISH: '1_2',
NON_ENGLISH: '1_3',
RAW: '1_4'
},
LIVE_ACTION: {
ALL: '4_0',
ENGLISH: '4_1',
PROMOTIONAL_VIDEO: '4_2',
NON_ENGLISH: '4_3',
RAW: '4_4'
}
}
function torrent(torrentId) {
if (!torrentId) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return si.infoRequest(torrentId)
.then(result => parseTorrent(result))
.then(result => ({ ...result, torrentId }))
.catch(error => {
if (error.statusCode && error.statusCode === 404) {
return Promise.reject(new Error(`404: [${torrentId}] not found on NyaaSi`));
}
return Promise.reject(error);
});
}
function search(query) {
return si.search(query, null, { category: Categories.ANIME.ENGLISH})
.then(results => results.map(torrent => parseTorrent(torrent)));
}
function browse(config = {}) {
const page = config.page || 1;
const category = config.category || Categories.ANIME.ENGLISH;
const sort = config.sort || 'id'
return si.list(category, page, { sort })
.then(response => response.results || [])
.then(results => results.map(torrent => parseTorrent(torrent)));
}
function parseTorrent(torrent) {
return {
title: torrent.name.replace(/\t|\s+/g, ' ').trim(),
torrentId: torrent.id,
infoHash: torrent.hash.trim().toLowerCase(),
magnetLink: torrent.magnet,
torrentLink: torrent.torrent,
seeders: parseInt(torrent.seeders),
size: parseSize(torrent.filesize),
uploadDate: new Date(torrent.date),
category: torrent.sub_category,
}
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,87 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const nyaasi = require('./nyaa_si_api');
const { Type } = require('../../lib/types');
const Promises = require('../../lib/promises');
const repository = require('../../lib/repository');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'NyaaSi';
const UNTIL_PAGE = 10
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
// const ids = ['1292786'];
// return Promise.all(ids.map(id => limiter.schedule(() => nyaasi.torrent(id)
// .then(torrent => processTorrentRecord(torrent)))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
// const queries = ['Sagrada Reset', 'Sakurada Reset'];
// return Promise.all(queries.map(query => limiter.schedule(() => nyaasi.search(query)
// .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
// return limiter.schedule(() => nyaasi.torrent(torrent.torrentId))
// .then(foundTorrent => Number.isInteger(foundTorrent.seeders) ? [foundTorrent] : []);
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
const allowedCategories = [
nyaasi.Categories.ANIME.ENGLISH
];
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return nyaasi.browse({ page, sort: 'id' })
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)
.catch(error => {
console.warn(`Failed processing [${torrent.infoHash}] ${torrent.title} due: `, error);
return Promise.resolve();
})))))
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (!record || await checkAndUpdateTorrent(record)) {
return record;
}
const torrent = {
infoHash: record.infoHash,
torrentLink: record.torrentLink,
provider: NAME,
torrentId: record.torrentId,
title: record.title,
type: Type.ANIME,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
};
return createTorrentEntry(torrent).then(() => torrent);
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,151 +0,0 @@
const axios = require('axios');
const cheerio = require("cheerio");
const decode = require('magnet-uri');
const { escapeHTML } = require('../../lib/metadata');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper')
const defaultTimeout = 30000;
const maxSearchPage = 50
const baseUrl = 'https://ondebaixa.com';
const Categories = {
MOVIE: 'filmes',
TV: 'series',
ANIME: 'anime',
DESENHOS: 'desenhos'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} query`));
}
const slug = encodeURIComponent(torrentId.split("/")[3]);
return singleRequest(`${baseUrl}/${slug}/`, config)
.then((body) => parseTorrentPage(body))
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
.catch((err) => {
console.warn(`Failed OndeBaixo ${slug} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const page = config.page || 1;
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
return singleRequest(`${baseUrl}/${keyword}/${page}/`, config)
.then(body => parseTableBody(body))
.then(torrents => torrents.length === 40 && page < extendToPage
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
.then(nextTorrents => torrents.concat(nextTorrents))
: torrents)
.catch((err) => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const requestUrl = category ? `${baseUrl}/${category}/${page}/` : `${baseUrl}/${page}/`;
return singleRequest(requestUrl, config)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1));
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('div.capa_larga.align-middle').each((i, element) => {
const row = $(element);
torrents.push({
name: row.find("a").text(),
torrentId: row.find("a").attr("href")
});
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const magnets = $(`a[href^="magnet"]`)
.filter((i, elem) => isPtDubbed($(elem).attr('title')))
.map((i, elem) => $(elem).attr("href")).get();
const details = $('div#informacoes')
const category = details.find('span:contains(\'Gêneros: \')').next().html()
const torrents = magnets.map(magnetLink => {
const decodedMagnet = decode(magnetLink);
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
const originalTitle = details.find('span:contains(\'Título Original: \')').next().text().trim();
const year = details.find('span:contains(\'Ano de Lançamento: \')').next().text().trim();
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
return {
title: name.length > 5 ? name : fallBackTitle,
originalName: sanitizePtOriginalName(originalTitle),
year: year,
infoHash: decodedMagnet.infoHash,
magnetLink: magnetLink,
category: parseCategory(category),
uploadDate: new Date($('time').attr('datetime')),
languages: sanitizePtLanguages(details.find('span:contains(\'Idioma\')').next().text())
}
});
resolve(torrents.filter((x) => x));
});
}
function parseCategory(body) {
const $ = cheerio.load(body)
if ($("a[href*='anime']").text()) {
return Categories.ANIME
}
if ($("a[href*='series']").text()) {
return Categories.TV
}
if ($("a[href*='filmes']").text()) {
return Categories.MOVIE
}
if ($("a[href*='desenhos']").text()) {
return Categories.TV
}
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,113 +0,0 @@
const moment = require("moment");
const Bottleneck = require("bottleneck");
const ondebaixa = require("./ondebaixa_api");
const { Type } = require("../../lib/types");
const repository = require("../../lib/repository");
const Promises = require("../../lib/promises");
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
const { getImdbId } = require("../../lib/metadata");
const NAME = "OndeBaixa";
const UNTIL_PAGE = 5;
const TYPE_MAPPING = typeMapping();
const limiter = new Bottleneck({ maxConcurrent: 5 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => ondebaixa.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
const allowedCategories = [
ondebaixa.Categories.MOVIE,
ondebaixa.Categories.TV,
ondebaixa.Categories.DESENHOS
];
return Promises.sequence(
allowedCategories.map(
(category) => () => scrapeLatestTorrentsForCategory(category)
)
).then((entries) => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return ondebaixa
.browse({ category, page })
.catch((error) => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
.then((resolved) => resolved.length > 0 && page < untilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processEntry(entry) {
return ondebaixa.torrent(entry.torrentId)
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
.catch(() => undefined);
}
async function processTorrentRecord(foundTorrent) {
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
return foundTorrent;
}
if (!foundTorrent.size) {
await updateTorrentSize(foundTorrent);
}
if (!Number.isInteger(foundTorrent.seeders)) {
await updateCurrentSeeders(foundTorrent);
}
if (!foundTorrent.imdbId && TYPE_MAPPING[foundTorrent.category] !== Type.ANIME) {
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
}
const torrent = {
infoHash: foundTorrent.infoHash,
provider: NAME,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
imdbId: foundTorrent.imdbId,
uploadDate: foundTorrent.uploadDate,
seeders: foundTorrent.seeders,
size: foundTorrent.size,
files: foundTorrent.files,
languages: foundTorrent.languages
};
return createTorrentEntry(torrent);
}
function typeMapping() {
const mapping = {};
mapping[ondebaixa.Categories.MOVIE] = Type.MOVIE;
mapping[ondebaixa.Categories.TV] = Type.SERIES;
mapping[ondebaixa.Categories.DESENHOS] = Type.SERIES;
mapping[ondebaixa.Categories.ANIME] = Type.ANIME;
return mapping;
}
function untilPage(category) {
return UNTIL_PAGE;
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,161 +0,0 @@
const axios = require('axios');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require("../../lib/requestHelper");
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
const appId = 'torrentio-addon';
const defaultTimeout = 30000;
const retryDelay = 3000;
let token;
const Options = {
category: {
MOVIES_XVID: [14],
MOVIES_XVID_720P: [48],
MOVIES_X264: [17],
MOVIES_X264_1080P: [44],
MOVIES_X264_720P: [45],
MOVIES_X264_3D: [47],
MOVIES_X264_4K: [50],
MOVIES_X265_1080P: [54],
MOVIES_X265_4K: [51],
MOVIES_X265_4K_HDR: [52],
MOVIES_FULL_BD: [42],
MOVIES_BD_REMUX: [46],
MOVIES_HIGH_RES: [47, 50, 51, 52, 46],
TV_EPISODES: [18],
TV_UHD_EPISODES: [49],
TV_HD_EPISODES: [41],
MUSIC_MP3: [23],
MUSIC_FLAC: [25],
GAMES_PC_ISO: [27],
GAMES_PC_RIP: [28],
GAMES_PS3: [40],
GAMES_XBOX_360: [32],
SOFTWARE_PC_ISO: [33],
EBOOKS: [35],
XXX: [4],
},
sort: {
LAST: 'last',
SEEDERS: 'seeders',
LEECHERS: 'leechers'
},
format: {
JSON: 'json',
JSON_EXTENDED: 'json_extended'
},
ranked: {
TRUE: 1,
FALSE: 0
}
}
function search(imdbId, params = {}) {
if (!imdbId) {
return Promise.reject(new Error(`Must define imdbId`));
}
const parameters = {
mode: 'search',
search_imdb: imdbId,
category: params.category && params.category.join(';') || null,
limit: params.limit || 100,
sort: params.sort || Options.sort.SEEDERS,
min_seeders: params.min_seeders || undefined,
min_leechers: params.min_leechers || undefined,
format: params.format || Options.format.JSON_EXTENDED,
ranked: params.ranked || Options.ranked.FALSE
}
return singleRequest(parameters).then(results => parseResults(results));
}
function browse(params = {}) {
const parameters = {
mode: 'list',
category: params.category && params.category.join(';') || null,
limit: params.limit || 100,
sort: params.sort || Options.sort.LAST,
min_seeders: params.min_seeders || undefined,
min_leechers: params.min_leechers || undefined,
format: params.format || Options.format.JSON_EXTENDED,
ranked: params.ranked || Options.ranked.FALSE
}
return singleRequest(parameters).then(results => parseResults(results));
}
async function singleRequest(params = {}, config = {}, retries = 15) {
const timeout = config.timeout || defaultTimeout;
const headers = {
'user-agent': getRandomUserAgent(),
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
};
params.token = await getToken();
params.app_id = appId;
Object.keys(params)
.filter(key => params[key] === undefined || params[key] === null)
.forEach(key => delete params[key]);
const options = { headers, timeout, params };
return axios.get(baseUrl, options)
.then(response => {
if (response.data && response.data.error_code === 4) {
// token expired
token = undefined;
return singleRequest(params, config);
}
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
// too many requests
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
}
if (response.status !== 200 || (response.data && response.data.error)) {
// something went wrong
return Promise.reject(response.data || `Failed RARGB request with status=${response.status}`);
}
return response.data;
})
.catch(error => {
if (error.response && [429].includes(error.response.status) && retries > 0) {
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
}
return Promise.reject(error.message || error);
});
}
function parseResults(results) {
if (!results || !Array.isArray(results.torrent_results)) {
return Promise.reject(`Incorrect results ${JSON.stringify(results)}`)
}
return results.torrent_results.map(result => parseResult(result));
}
function parseResult(result) {
return {
title: result.title,
infoHash: decode(result.download).infoHash,
magnetLink: result.download,
seeders: result.seeders,
leechers: result.leechers,
category: result.category,
size: result.size,
uploadDate: new Date(result.pubdate),
imdbId: result.episode_info && result.episode_info.imdb
}
}
async function getToken() {
if (!token) {
const params = { get_token: 'get_token', app_id: appId };
const options = { timeout: defaultTimeout, params };
token = await axios.get(baseUrl, options)
.then(response => response.data.token);
}
return token;
}
module.exports = { search, browse, Options };

View File

@@ -1,80 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const rarbg = require('./rarbg_api');
const { Type } = require('../../lib/types');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'RARBG';
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 3000 });
const entryLimiter = new Bottleneck({ maxConcurrent: 20 });
const allowedCategories = [
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_HIGH_RES,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
].reduce((a, b) => a.concat(b), [])
async function scrape() {
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
// const movieImdbIds = require('./rargb_movie_imdb_ids_2021-02-27.json');
const seriesImdbIds = require('./rargb_series_imdb_ids_2021-02-27.json');
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
return Promise.all(
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
}
async function getTorrentsForImdbId(imdbId) {
return rarbg.search(imdbId, { category: allowedCategories })
.then(torrents => {
console.log(`Completed ${imdbId} request`);
return torrents;
})
.catch(error => {
console.warn(`Failed ${NAME} request for ${imdbId}: `, error);
return [];
});
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
const torrent = {
provider: NAME,
infoHash: record.infoHash,
title: record.title,
type: getType(record.category),
seeders: record.seeders,
size: record.size,
uploadDate: record.uploadDate,
imdbId: record.imdbId
};
return createTorrentEntry(torrent);
}
const seriesCategories = [
'TV Episodes',
'Movies/TV-UHD-episodes',
'TV HD Episodes',
];
function getType(category) {
if (seriesCategories.includes(category)) {
return Type.SERIES;
}
return Type.MOVIE;
}
module.exports = { scrape, NAME };

View File

@@ -1,95 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const rarbg = require('./rarbg_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'RARBG';
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent, getImdbIdsMethod) {
// return getImdbIdsMethod()
// .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId)))))
// .then(results => results.reduce((a, b) => a.concat(b), []));
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
const allowedCategories = [
rarbg.Options.category.MOVIES_XVID,
rarbg.Options.category.MOVIES_XVID_720P,
rarbg.Options.category.MOVIES_X265_1080P,
rarbg.Options.category.MOVIES_X264,
rarbg.Options.category.MOVIES_X264_720P,
rarbg.Options.category.MOVIES_X264_1080P,
rarbg.Options.category.MOVIES_HIGH_RES,
rarbg.Options.category.TV_EPISODES,
rarbg.Options.category.TV_UHD_EPISODES,
rarbg.Options.category.TV_HD_EPISODES
];
return Promises.sequence(allowedCategories
.map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category))))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category) {
console.log(`Scrapping ${NAME} ${category} category`);
return rarbg.browse({ category: category })
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
return Promise.resolve([]);
});
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
const torrent = {
provider: NAME,
infoHash: record.infoHash,
title: record.title,
type: getType(record.category),
seeders: record.seeders,
size: record.size,
uploadDate: record.uploadDate,
imdbId: record.imdbId
};
return createTorrentEntry(torrent);
}
const seriesCategories = [
'TV Episodes',
'Movies/TV-UHD-episodes',
'TV HD Episodes',
];
function getType(category) {
if (seriesCategories.includes(category)) {
return Type.SERIES;
}
return Type.MOVIE;
}
module.exports = { scrape, updateSeeders, NAME };

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,207 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const { defaultOptionsWithProxy } = require('../../lib/requestHelper');
const baseUrl = 'http://www.rutor.info';
const defaultTimeout = 10000;
const Categories = {
ALL: '0',
FOREIGN_FILMS: '1',
RUSSIAN_FILMS: '5',
SCIENCE_FILMS: '12',
FOREIGN_SERIES: '4',
RUSSIAN_SERIES: '16',
RUSSIAN_TV: '6',
RUSSIAN_ANIMATION: '7',
ANIME: '10',
FOREIGN_RELEASES: '17'
};
function torrent(torrentId, config = {}, retries = 2, error = null) {
if (!torrentId || retries === 0) {
return Promise.reject(error || new Error(`Failed ${torrentId} search`));
}
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
.then((body) => parseTorrentPage(body, torrentId))
.catch((err) => torrent(torrentId, config, retries - 1, err));
}
function search(query, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
return singleRequest(`${baseUrl}/search/0/0/0/0/${encodeURIComponent(query)}`)
.then((body) => parseTableBody(body))
.catch((err) => search(query, retries - 1, err));
}
function browse(config = {}, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/browse/${page - 1}/${category}/0/0`)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function files(torrentId) {
return singleRequest(`${baseUrl}/descriptions/${torrentId}.files`)
.then((body) => parseFiles(body));
}
function singleRequest(requestUrl) {
const options = { ...defaultOptionsWithProxy(), timeout: defaultTimeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
} else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
body.includes('Origin DNS error')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = $('#index').find('tr:not(.backgr)').map((i, elem) => {
const row = $(elem).find('td');
const links = $(row[1]).find('a');
const peers = $(row[row.length - 1]);
const magnetLink = $(links[1]).attr('href');
return {
title: $(links[2]).text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentLink: $(links[0]).attr('href'),
torrentId: $(links[2]).attr('href').match(/torrent\/(\d+)/)[1],
seeders: parseInt(peers.find('.green').text()),
leechers: parseInt(peers.find('.red').text()),
uploadDate: parseRussianDate($(row[0]).text()),
size: $(row[row.length - 2]).html().replace('&#xA0;', ' '),
}
}).get();
resolve(torrents);
});
}
function parseTorrentPage(body, torrentId) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const rows = $('#details > tr')
const details = $(rows[0]).find('td:nth-of-type(2)');
const magnetLink = $('#download a:nth-of-type(1)').attr('href');
const imdbIdMatch = details.html().match(/imdb\.com\/title\/(tt\d+)/i);
const parsedTorrent = {
title: $('#all h1').first().text(),
torrentId: torrentId,
infoHash: decode(magnetLink).infoHash,
trackers: Array.from(new Set(decode(magnetLink).tr)).join(','),
magnetLink: magnetLink,
torrentLink: $('#download a:nth-of-type(2)').attr('href'),
seeders: parseInt($(rows[rows.length - 8]).find('td:nth-of-type(2)').first().text(), 10),
category: $('tr:contains(\'Категория\') a').first().attr('href').match(/\/([\w-]+)$/)[1],
languages: parseLanguages(details.text()),
size: parseSize($(rows[rows.length - 4]).find('td:nth-of-type(2)').text()),
uploadDate: parseDate($(rows[rows.length - 5]).find('td:nth-of-type(2)').first().text()),
imdbId: imdbIdMatch && imdbIdMatch[1]
};
resolve(parsedTorrent);
});
}
function parseFiles(body) {
if (!body) {
throw new Error("No files in the body");
}
return body.split('\n')
.map((item) => item.match(/<td>([^<]+)<\/td>/g).slice(1))
.map((item, index) => ({
fileIndex: index,
name: item[0].replace(/^.+\//g, ''),
path: item[0].replace(/^.+\//, ''),
size: parseSize(item[1])
}));
}
function parseDate(dateString) {
const preparedDate = dateString.replace(/\s\(.*\)/, '')
return moment(preparedDate, 'DD-MM-YYYY HH:mm:ss').toDate();
}
const russianMonths = {
'Янв': 'Jan',
'Фев': 'Feb',
'Мар': 'Mar',
'Апр': 'Apr',
'Май': 'May',
'Июн': 'Jun',
'Июл': 'Jul',
'Авг': 'Aug',
'Сен': 'Sep',
'Окт': 'Oct',
'Ноя': 'Nov',
'Дек': 'Dec'
};
function parseRussianDate(dateString) {
const rusMonth = Object.keys(russianMonths).find(month => dateString.includes(month));
const preparedDate = dateString.trim().replace(rusMonth, russianMonths[rusMonth]).replace(/\u00a0/g, ' ');
return moment(preparedDate, 'DD MMM YY').toDate();
}
function parseSize(sizeString) {
return parseInt(sizeString.match(/\((\d+) Bytes\)/)[1], 10);
}
const languageMatchers = {
'russian': /(?:Язык|Звук|Аудио|audio|language).*(russian|\brus?\b|[Рр]усский)/i,
'english': /(?:Язык|Звук|Аудио|audio|language).*(english|\beng?\b|[Аа]нглийский)/i,
'ukrainian': /(?:Язык|Звук|Аудио|audio|language).*(ukrainian|\bukr\b|украинский)/i,
'french': /(?:Язык|Звук|Аудио|audio|language).*(french|\bfr\b|французский)/i,
'spanish': /(?:Язык|Звук|Аудио|audio|language).*(spanish|\bspa\b|испанский)/i,
'italian': /(?:Язык|Звук|Аудио|audio|language).*(italian|\bita\b|итальянский)/i,
'german': /(?:Язык|Звук|Аудио|audio|language).*(german|\bger\b|Немецкий)/i,
'korean': /(?:Язык|Звук|Аудио|audio|language).*(korean|Корейский)/i,
'arabic': /(?:Язык|Звук|Аудио|audio|language).*(arabic|Арабский)/i,
'portuguese': /(?:Язык|Звук|Аудио|audio|language).*(portuguese|Португальский)/i,
'japanese': /(?:Язык|Звук|Аудио|audio|language).*(japanese|\bjap\b|\bjp\b|[Яя]понский)/i,
}
function parseLanguages(details) {
const subsInfoMatch = details.match(/\r?\n(Text|Текст)(?:\s?#?\d{1,2})?\r?\n/i);
const detailsPart = subsInfoMatch ? details.substring(0, subsInfoMatch.index) : details;
const matchedLanguages = Object.keys(languageMatchers).filter(lang => languageMatchers[lang].test(detailsPart));
const languages = Array.from(new Set(['russian'].concat(matchedLanguages)));
return languages.length > 4 ? 'multi-audio' : languages.join(',');
}
module.exports = { torrent, browse, search, Categories };

View File

@@ -1,133 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const { parse } = require('parse-torrent-title');
const rutor = require('./rutor_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'Rutor';
const TYPE_MAPPING = {
'kino': Type.MOVIE,
'nashe_kino': Type.MOVIE,
'nauchno_popularnoe': Type.MOVIE,
'inostrannoe': Type.MOVIE,
'seriali': Type.SERIES,
'nashi_seriali': Type.SERIES,
'tv': Type.SERIES,
'multiki': Type.MOVIE,
'anime': Type.ANIME
};
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
const api_entry_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [
rutor.Categories.FOREIGN_FILMS,
rutor.Categories.FOREIGN_RELEASES,
rutor.Categories.RUSSIAN_FILMS,
rutor.Categories.FOREIGN_SERIES,
rutor.Categories.RUSSIAN_SERIES,
rutor.Categories.SCIENCE_FILMS,
rutor.Categories.RUSSIAN_ANIMATION,
rutor.Categories.ANIME
];
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
// const ids = [
// '637799'
// ];
// return Promise.all(ids.map(id => api_entry_limiter.schedule(() => rutor.torrent(id))
// .then(torrent => processTorrentRecord(torrent))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return api_limiter.schedule(() => rutor.browse({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < getMaxPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
const isOld = moment(record.uploadDate).isBefore(moment().subtract(18, 'month'));
if (record.seeders === 0 && isOld) {
console.log(`Skipping old unseeded torrent [${record.infoHash}] ${record.title}`)
return record;
}
const foundTorrent = await api_entry_limiter.schedule(() => rutor.torrent(record.torrentId).catch(() => undefined));
if (!foundTorrent || !TYPE_MAPPING[foundTorrent.category]) {
return Promise.resolve(`${NAME}: Invalid torrent record: ${record.torrentId}`);
}
if (!foundTorrent.imdbId && disallowWithoutImdbId(foundTorrent)) {
return Promise.resolve(`${NAME}: No imdbId defined: ${record.torrentId}`);
}
const torrent = {
provider: NAME,
infoHash: foundTorrent.infoHash,
torrentId: foundTorrent.torrentId,
torrentLink: foundTorrent.torrentLink,
trackers: foundTorrent.trackers,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
size: foundTorrent.size,
seeders: foundTorrent.seeders,
uploadDate: foundTorrent.uploadDate,
imdbId: foundTorrent.imdbId,
languages: foundTorrent.languages || undefined,
};
return createTorrentEntry(torrent).then(() => torrent);
}
function getMaxPage(category) {
switch (category) {
case rutor.Categories.FOREIGN_FILMS:
case rutor.Categories.FOREIGN_SERIES:
return 2;
default:
return 1;
}
}
function disallowWithoutImdbId(torrent) {
if (['kino', 'anime'].includes(torrent.category)) {
return false; // allow to search foreign movie and anime ids via search
}
// allow to search id for non russian series titles via search
return !(torrent.category === 'seriali' && !parse(torrent.title).title.match(/[\u0400-\u04ff]/i));
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,41 +0,0 @@
function isPtDubbed(name) {
return name.toLowerCase().match(/dublado|dual|nacional|multi/);
}
function sanitizePtName(name) {
return name
.replace(/(.*)\b(\d{3,4}P)\b(?!.*\d{3,4}[Pp])(.*)/, '$1$3 $2') // add resolution to the end if missing
.replace(/^[\[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks
.replace(/^(\d*(?:\.\d{1,2})?(?:[4A-Z-]{3,}|P)[-.]+)+/, '') // replace metadata prefixes
.replace(/^[\[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks2
.replace(/^(COM|NET|ORG|TO|TV|ME)\b\s*-+[\s.]*/, '') // replace dangling site endings
.trim();
}
function sanitizePtOriginalName(name) {
return name.trim().replace(/S\d+$|\d.\s?[Tt]emporada/, '');
}
function sanitizePtLanguages(languages) {
return languages
.replace(/<2F><>/g, 'ê')
.replace(/ /g, '')
.trim();
}
function parseSize(sizeText) {
if (!sizeText) {
return undefined;
}
let scale = 1;
if (/Gi?B|Go/.test(sizeText)) {
scale = 1024 * 1024 * 1024
} else if (/Mi?B|Mo/.test(sizeText)) {
scale = 1024 * 1024;
} else if (/[Kk]i?B|Ko/.test(sizeText)) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
}
module.exports = { parseSize, isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages }

View File

@@ -1,129 +0,0 @@
const axios = require('axios');
const { escapeHTML } = require('../../lib/metadata');
const baseUrl = 'https://apibay.org';
const timeout = 5000;
const Categories = {
AUDIO: {
ALL: 100,
MUSIC: 101,
AUDIO_BOOKS: 102,
SOUND_CLIPS: 103,
FLAC: 104,
OTHER: 199
},
VIDEO: {
ALL: 200,
MOVIES: 201,
MOVIES_DVDR: 202,
MUSIC_VIDEOS: 203,
MOVIE_CLIPS: 204,
TV_SHOWS: 205,
HANDHELD: 206,
MOVIES_HD: 207,
TV_SHOWS_HD: 208,
MOVIES_3D: 209,
OTHER: 299
},
APPS: {
ALL: 300,
WINDOWS: 301,
MAC: 302,
UNIX: 303,
HANDHELD: 304,
IOS: 305,
ANDROID: 306,
OTHER_OS: 399
},
GAMES: {
ALL: 400,
PC: 401,
MAC: 402,
PSx: 403,
XBOX360: 404,
Wii: 405,
HANDHELD: 406,
IOS: 407,
ANDROID: 408,
OTHER: 499
},
PORN: {
ALL: 500,
MOVIES: 501,
MOVIES_DVDR: 502,
PICTURES: 503,
GAMES: 504,
MOVIES_HD: 505,
MOVIE_CLIPS: 506,
OTHER: 599
},
OTHER: {
ALL: 600,
E_BOOKS: 601,
COMICS: 602,
PICTURES: 603,
COVERS: 604,
PHYSIBLES: 605,
OTHER: 699
}
};
function torrent(torrentId, retries = 2) {
if (!torrentId) {
return Promise.reject(new Error('No valid torrentId provided'));
}
return _request(`t.php?id=${torrentId}`)
.then(result => toTorrent(result))
.catch(error => retries ? torrent(torrentId, retries - 1) : Promise.reject(error));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword) {
return Promise.reject(new Error('No valid keyword provided'));
}
const q = keyword;
const cat = config.category || Categories.VIDEO.ALL;
return _request(`q.php?q=${q}&cat=${cat}`)
.then(results => results.map((result) => toTorrent(result)))
.catch(error => retries ? search(keyword, config, retries - 1) : Promise.reject(error));
}
function browse(config = {}, retries = 2) {
const category = config.category || 0;
const page = config.page - 1 || 0;
return _request(`q.php?q=category:${category}:${page}`)
.then(results => results.map((result) => toTorrent(result)))
.catch(error => retries ? browse(config, retries - 1) : Promise.reject(error));
}
async function _request(endpoint) {
const url = `${baseUrl}/${endpoint}`;
return axios.get(url, { timeout: timeout })
.then(response => {
if (typeof response.data === 'object') {
return response.data;
}
return Promise.reject(`Unexpected response body`);
});
}
function toTorrent(result) {
return {
torrentId: result.id,
name: escapeHTML(result.name),
infoHash: result.info_hash.toLowerCase(),
size: parseInt(result.size),
seeders: parseInt(result.seeders),
leechers: parseInt(result.leechers),
subcategory: parseInt(result.category),
uploadDate: new Date(result.added * 1000),
imdbId: result.imdb || undefined,
filesCount: result.num_files && parseInt(result.num_files) || undefined
};
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,175 +0,0 @@
const axios = require('axios');
const moment = require('moment');
const Bottleneck = require('bottleneck');
const { ungzip } = require('node-gzip');
const LineByLineReader = require('line-by-line');
const fs = require('fs');
const thepiratebay = require('./thepiratebay_api.js');
const bing = require('nodejs-bing');
const { Type } = require('../../lib/types');
const { escapeHTML } = require('../../lib/metadata');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
const CSV_FILE_PATH = '/tmp/tpb_dump.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
const lastDump = { updatedAt: 2147000000 };
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
const checkPoint = 0;
if (lastDump) {
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
await downloadDump(lastDump);
let entriesProcessed = 0;
const lr = new LineByLineReader(CSV_FILE_PATH);
lr.on('line', (line) => {
if (line.includes("#ADDED")) {
return;
}
if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`);
}
if (entriesProcessed <= checkPoint) {
entriesProcessed++;
return;
}
const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g);
if (row.length !== 4) {
console.log(`Invalid row: ${line}`);
return;
}
const torrent = {
uploadDate: moment(row[0], 'YYYY-MMM-DD HH:mm:ss').toDate(),
infoHash: Buffer.from(row[1], 'base64').toString('hex'),
title: escapeHTML(row[2])
.replace(/^"|"$/g, '')
.replace(/&#?\w{2,6};/g, ' ')
.replace(/\s+/g, ' ')
.trim(),
size: parseInt(row[3], 10)
};
if (!limiter.empty()) {
lr.pause()
}
limiter.schedule(() => processTorrentRecord(torrent)
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
.then(() => limiter.empty())
.then((empty) => empty && lr.resume())
.then(() => entriesProcessed++);
});
lr.on('error', (err) => {
console.log(err);
});
lr.on('end', () => {
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
});
}
}
const allowedCategories = [
thepiratebay.Categories.VIDEO.MOVIES,
thepiratebay.Categories.VIDEO.MOVIES_HD,
thepiratebay.Categories.VIDEO.MOVIES_DVDR,
thepiratebay.Categories.VIDEO.MOVIES_3D,
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
const seriesCategories = [
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
async function processTorrentRecord(record) {
if (await getStoredTorrentEntry(record)) {
return;
}
const torrentFound = await findTorrent(record);
if (!torrentFound || !allowedCategories.includes(torrentFound.subcategory)) {
return createSkipTorrentEntry(record);
}
const torrent = {
infoHash: torrentFound.infoHash,
provider: NAME,
torrentId: torrentFound.torrentId,
title: torrentFound.name,
size: torrentFound.size,
type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE,
imdbId: seriesCategories.includes(torrentFound.subcategory) && torrentFound.imdbId || undefined,
uploadDate: torrentFound.uploadDate || record.uploadDate,
seeders: torrentFound.seeders,
};
return createTorrentEntry(torrent);
}
async function findTorrent(record) {
return findTorrentInSource(record)
.catch(() => findTorrentViaBing(record));
}
async function findTorrentInSource(record) {
let page = 0;
let torrentFound;
while (!torrentFound && page < 5) {
const torrents = await thepiratebay.search(record.title.replace(/[\W\s]+/, ' '), { page: page });
torrentFound = torrents.filter(torrent => torrent.magnetLink.toLowerCase().includes(record.infoHash))[0];
page = torrents.length === 0 ? 1000 : page + 1;
}
if (!torrentFound) {
return Promise.reject(new Error(`Failed to find torrent ${record.title}`));
}
return Promise.resolve(torrentFound)
.then((torrent) => thepiratebay.torrent(torrent.torrentId)
.catch(() => thepiratebay.torrent(torrent.torrentId)));
}
async function findTorrentViaBing(record) {
return bing.web(`${record.infoHash}`)
.then((results) => results
.find(result => result.description.includes('Direct download via magnet link') ||
result.description.includes('Get this torrent')))
.then((result) => {
if (!result) {
console.warn(`Failed to find torrent ${record.title}`);
return Promise.resolve(undefined);
}
return result.link.match(/torrent\/(\w+)\//)[1];
})
.then((torrentId) => torrentId && thepiratebay.torrent(torrentId))
}
function downloadDump(dump) {
try {
if (fs.existsSync(CSV_FILE_PATH)) {
console.log('dump file already exist...');
return;
}
} catch (err) {
console.error(err)
}
console.log('downloading dump file...');
return axios.get(dump.url, { timeout: 2000, responseType: 'stream' })
.then((response) => response.data)
.then((body) => {
console.log('unzipping dump file...');
return ungzip(body);
})
.then((unzipped) => {
console.log('writing dump file...');
return fs.promises.writeFile(CSV_FILE_PATH, unzipped);
})
}
module.exports = { scrape, NAME };

View File

@@ -1,43 +0,0 @@
const moment = require('moment');
const { Sequelize } = require('sequelize');
const Bottleneck = require('bottleneck');
const thepiratebay = require('./thepiratebay_api.js');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const NAME = 'ThePirateBay';
const EMPTY_HASH = '0000000000000000000000000000000000000000';
const Op = Sequelize.Op;
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
console.log(`Starting ${NAME} fake removal...`);
const startCreatedAt = moment().subtract(14, 'day');
const endCreatedAt = moment();
const whereQuery = {
provider: NAME,
type: Type.MOVIE,
createdAt: { [Op.between]: [startCreatedAt, endCreatedAt] }
};
return repository.getTorrentsBasedOnQuery(whereQuery)
.then(torrents => {
console.log(`Checking for ${NAME} fake entries in ${torrents.length} torrents`);
return Promise.all(torrents.map(torrent => limiter.schedule(() => removeIfFake(torrent))))
})
.then(results => {
const removed = results.filter(result => result);
console.log(`Finished ${NAME} fake removal with ${removed.length} removals in ${results.length} torrents`);
});
}
async function removeIfFake(torrent) {
const tpbTorrentInfo = await thepiratebay.torrent(torrent.torrentId).catch(() => null);
if (tpbTorrentInfo && tpbTorrentInfo.infoHash === EMPTY_HASH) {
console.log(`Removing ${NAME} fake torrent [${torrent.torrentId}][${torrent.infoHash}] ${torrent.title}`);
return repository.deleteTorrent(torrent).catch(() => null);
}
return Promise.resolve(null);
}
module.exports = { scrape, NAME };

View File

@@ -1,98 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const thepiratebay = require('./thepiratebay_api.js');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [
thepiratebay.Categories.VIDEO.MOVIES,
thepiratebay.Categories.VIDEO.MOVIES_HD,
thepiratebay.Categories.VIDEO.MOVIES_3D,
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
const seriesCategories = [
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
// return limiter.schedule(() => thepiratebay.torrent(torrent.torrentId));
return Promise.resolve([]);
}
async function scrapeLatestTorrents() {
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return thepiratebay.browse({ category, page })
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
if (!record || !allowedCategories.includes(record.subcategory)) {
return Promise.resolve('Invalid torrent record');
}
const torrent = {
infoHash: record.infoHash,
provider: NAME,
torrentId: record.torrentId,
title: record.name.replace(/\t|\s+/g, ' '),
type: seriesCategories.includes(record.subcategory) ? Type.SERIES : Type.MOVIE,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
imdbId: seriesCategories.includes(record.subcategory) && record.imdbId || undefined,
languages: record.languages && record.languages.trim() || undefined
};
return createTorrentEntry(torrent);
}
function getUntilPage(category) {
switch (category) {
case thepiratebay.Categories.VIDEO.MOVIES_3D:
return 1;
case thepiratebay.Categories.VIDEO.TV_SHOWS:
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
return 10;
default:
return 5;
}
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,112 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const LineByLineReader = require('line-by-line');
const decode = require('magnet-uri');
const thepiratebay = require('./thepiratebay_api.js');
const { Type } = require('../../lib/types');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
const NAME = 'ThePirateBay';
const CSV_FILE_PATH = '/tmp/tpb.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
// await processTorrentRecord({ torrentId: 26877339, category: 'Video' });
console.log(`starting to scrape tpb dump...`);
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
const checkPoint = 4115000;
let entriesProcessed = 0;
const lr = new LineByLineReader(CSV_FILE_PATH);
lr.on('line', (line) => {
if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`);
}
if (entriesProcessed <= checkPoint) {
entriesProcessed++;
return;
}
const row = line.match(/(?<=^|,)(".*"|[^,]*)(?=,|$)/g);
if (row.length !== 10) {
console.log(`Invalid row: ${line}`);
return;
}
const torrent = {
torrentId: row[0],
title: row[1]
.replace(/^"|"$/g, '')
.replace(/&amp;/g, '&')
.replace(/&\w{2,6};/g, ' ')
.replace(/\s+/g, ' ')
.trim(),
size: parseInt(row[2], 10),
category: row[4],
subcategory: row[5],
infoHash: row[7].toLowerCase() || decode(row[9]).infoHash,
magnetLink: row[9],
uploadDate: moment(row[8]).toDate(),
};
if (!limiter.empty()) {
lr.pause()
}
limiter.schedule(() => processTorrentRecord(torrent)
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
.then(() => limiter.empty())
.then((empty) => empty && lr.resume())
.then(() => entriesProcessed++);
});
lr.on('error', (err) => {
console.log(err);
});
lr.on('end', () => {
console.log(`finished to scrape tpb dump!`);
});
}
const allowedCategories = [
thepiratebay.Categories.VIDEO.MOVIES,
thepiratebay.Categories.VIDEO.MOVIES_HD,
thepiratebay.Categories.VIDEO.MOVIES_DVDR,
thepiratebay.Categories.VIDEO.MOVIES_3D,
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
const seriesCategories = [
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
async function processTorrentRecord(record) {
if (record.category !== 'Video') {
return createSkipTorrentEntry(record);
}
if (await getStoredTorrentEntry(record)) {
return;
}
const torrentFound = await thepiratebay.torrent(record.torrentId);
if (!torrentFound || !allowedCategories.includes(torrentFound.subcategory)) {
return createSkipTorrentEntry(record);
}
const torrent = {
infoHash: torrentFound.infoHash,
provider: NAME,
torrentId: torrentFound.torrentId,
title: torrentFound.name,
size: torrentFound.size,
type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE,
imdbId: torrentFound.imdbId,
uploadDate: torrentFound.uploadDate,
seeders: torrentFound.seeders,
};
return createTorrentEntry(torrent);
}
module.exports = { scrape, NAME };

View File

@@ -1,119 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const { parse } = require('parse-torrent-title');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const baseUrl = 'https://www.torrent9.st'
const defaultTimeout = 10000;
const pageSize = 50;
const Categories = {
MOVIE: 'films',
TV: 'series',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => {
console.warn(`Failed Torrent9 ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function browse(config = {}, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
const offset = (page - 1) * pageSize + 1;
return singleRequest(`${baseUrl}/torrents/${category}/${offset}`)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function singleRequest(requestUrl) {
const headers = {
'user-agent': getRandomUserAgent(),
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
};
const options = { headers, timeout: defaultTimeout };
return axios.get(requestUrl, options)
.then(response => {
const body = response.data;
if (!body || !body.length) {
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('tbody tr').each((i, element) => {
const row = $(element);
const titleElement = row.find('td a');
try {
torrents.push({
name: titleElement.text().trim(),
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
seeders: parseInt(row.find('span.seed_ok').first().text()),
});
} catch (e) {
console.error('Failed parsing TorrentGalaxy row: ', e);
}
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const details = $('.movie-detail');
const magnetLink = details.find('a[href^="magnet"]').first().attr('href');
const torrentLink = details.find('div.download-btn:nth-of-type(1) a').first().attr('href');
const name = details.find('p strong').contents().filter((_, e) => e.type === 'text').text() || $('h5, h1').text();
const languages = parse(name).languages;
const torrent = {
title: name.trim(),
infoHash: magnetLink ? decode(magnetLink).infoHash : undefined,
magnetLink: magnetLink,
torrentLink: torrentLink ? `${baseUrl}${torrentLink}` : undefined,
seeders: parseInt(details.find('.movie-information ul:nth-of-type(1) li:nth-of-type(3)').text(), 10),
category: details.find('ul:nth-of-type(4) a').attr('href').match(/\/(\w+)$/)[1],
size: parseSize(details.find('ul:nth-of-type(2) li:nth-of-type(3)').text()),
uploadDate: moment(details.find('ul:nth-of-type(3) li:nth-of-type(3)').text(), 'DD/MM/YYYY').toDate(),
languages: languages && languages.includes('french') ? undefined : 'french',
};
resolve(torrent);
});
}
module.exports = { torrent, browse, Categories };

View File

@@ -1,104 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const torrent9 = require('./torrent9_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'Torrent9';
const TYPE_MAPPING = typeMapping();
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [
torrent9.Categories.MOVIE,
torrent9.Categories.TV,
];
const clients = [
torrent9
];
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function scrapeLatestTorrents() {
const scrapeFunctions = allowedCategories
.map(category => clients.map(client => () => scrapeLatestTorrentsForCategory(client, category)))
.reduce((a, b) => a.concat(b), []);
return Promises.sequence(scrapeFunctions)
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(client, category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return api_limiter.schedule(() => client.browse({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(results => Promise.all(results.map(r => limiter.schedule(() => processTorrentRecord(client, r)))))
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
? scrapeLatestTorrentsForCategory(client, category, page + 1)
: Promise.resolve([]));
}
async function processTorrentRecord(client, record) {
if (await checkAndUpdateTorrent({ provider: NAME, torrentId: record.torrentId })) {
return record;
}
const foundTorrent = await api_limiter.schedule(() => client.torrent(record.torrentId)).catch(() => undefined);
if (!foundTorrent || !foundTorrent.infoHash) {
console.warn(`Failed retrieving torrent ${record.torrentId}`);
return record;
}
const torrent = {
provider: NAME,
infoHash: foundTorrent.infoHash,
magnetLink: foundTorrent.magnetLink,
torrentLink: foundTorrent.torrentLink,
torrentId: foundTorrent.torrentId,
title: foundTorrent.title,
type: TYPE_MAPPING[foundTorrent.category],
size: foundTorrent.size,
seeders: foundTorrent.seeders,
uploadDate: foundTorrent.uploadDate,
imdbId: foundTorrent.imdbId,
languages: foundTorrent.languages
};
if (await checkAndUpdateTorrent(torrent)) {
console.info(`Skipping torrent ${torrent.torrentId} - [${torrent.infoHash}] ${torrent.title}`);
return torrent;
}
return createTorrentEntry(torrent).then(() => torrent);
}
function typeMapping() {
const mapping = {};
mapping[torrent9.Categories.MOVIE] = Type.MOVIE;
mapping[torrent9.Categories.TV] = Type.SERIES;
return mapping;
}
function getUntilPage(category) {
if (category === torrent9.Categories.TV) {
return 2;
}
return 1;
}
module.exports = { scrape, NAME };

View File

@@ -1,124 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const { parse } = require('parse-torrent-title');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const baseUrl = 'https://www.torrent9.st'
const defaultTimeout = 10000;
const Categories = {
MOVIE: 'films',
TV: 'series',
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => {
console.warn(`Failed Torrent9 ${torrentId} request: `, err);
return torrent(torrentId, config, retries - 1)
});
}
function browse(config = {}, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
const page = config.page || 1;
const category = config.category;
return singleRequest(`${baseUrl}/torrents_${category}.html,page-${page}`)
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function singleRequest(requestUrl) {
const headers = {
'user-agent': getRandomUserAgent(),
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
};
const options = { headers, timeout: defaultTimeout };
return axios.get(requestUrl, options)
.then(response => {
const body = response.data;
if (!body || !body.length) {
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
}
return body;
})
.catch(error => Promise.reject(error.message || error));
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('tr').each((i, element) => {
const row = $(element);
const titleElement = row.find('td a');
if (titleElement.length) {
torrents.push({
title: titleElement.attr('title').trim(),
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
seeders: parseInt(row.find('span.seed_ok').first().text()),
});
}
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const details = $('.movie-detail');
const magnetLink = details.find('a[href^="magnet"]').first().attr('href');
const name = getName(details) || $('h1').text();
const languages = parse(name).languages;
const torrent = {
title: name.trim(),
infoHash: magnetLink ? decode(magnetLink).infoHash : undefined,
magnetLink: magnetLink,
seeders: parseInt(details.find('.movie-information ul:nth-of-type(1) li:nth-of-type(3)').text(), 10),
category: details.find('ul:nth-of-type(4) a').attr('href').match(/_(\w+)\.html$/)[1],
size: parseSize(details.find('ul:nth-of-type(2) li:nth-of-type(3)').text()),
uploadDate: moment(details.find('ul:nth-of-type(3) li:nth-of-type(3)').text(), 'DD/MM/YYYY').toDate(),
languages: languages && languages.includes('french') ? undefined : 'french',
};
resolve(torrent);
});
}
function getName(details) {
const nameElement = details.find('p strong');
if (nameElement.length === 1) {
return nameElement.contents().filter((_, elem) => elem.type === 'text').text()
}
const description = nameElement.parent().text();
const nameMatch = description.match(
/(?:[A-Z]+[^A-Z0-9]*|[A-Z0-9-]+(?:[a-z]+\d+)?)\.([\w-]+\.){3,}\w+(?:-\w+)?(?=[A-Z])/);
return nameMatch && nameMatch[0];
}
module.exports = { torrent, browse, Categories };

View File

@@ -1,171 +0,0 @@
const axios = require('axios');
const cheerio = require('cheerio');
const moment = require('moment');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('../../lib/requestHelper');
const { parseSize } = require("../scraperHelper");
const defaultProxies = [
// 'https://torrentgalaxy.to',
// 'https://torrentgalaxy.mx',
'https://torrentgalaxy.su'
];
const defaultTimeout = 10000;
const Categories = {
ANIME: '28',
MOVIE_4K: '3',
MOVIE_PACKS: '4',
MOVIE_SD: '1',
MOVIE_HD: '42',
MOVIE_CAM: '45',
MOVIE_BOLLYWOOD: '46',
TV_SD: '5',
TV_HD: '41',
TV_PACKS: '6',
TV_SPORT: '7',
DOCUMENTARIES: '9'
};
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
const proxyList = config.proxyList || defaultProxies;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`)))
.then((body) => parseTorrentPage(body))
.then((torrent) => ({ torrentId, ...torrent }))
.catch((err) => torrent(torrentId, config, retries - 1));
}
function search(keyword, config = {}, retries = 2) {
if (!keyword || retries === 0) {
return Promise.reject(new Error(`Failed ${keyword} search`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`)))
.then((body) => parseTableBody(body))
.catch(() => search(keyword, config, retries - 1));
}
function browse(config = {}, retries = 2, error = null) {
if (retries === 0) {
return Promise.reject(error || new Error(`Failed browse request`));
}
const proxyList = config.proxyList || defaultProxies;
const page = config.page || 1;
const category = config.category;
return Promises.first(proxyList
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
.then((body) => parseTableBody(body))
.catch((err) => browse(config, retries - 1, err));
}
function singleRequest(requestUrl) {
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: defaultTimeout };
return axios.get(requestUrl, options)
.then((response) => {
const body = response.data;
if (!body) {
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
} else if (body.includes('Access Denied')) {
console.log(`Access Denied: ${requestUrl}`);
throw new Error(`Access Denied: ${requestUrl}`);
} else if (body.includes('502: Bad gateway') ||
body.includes('403 Forbidden') ||
body.includes('Origin DNS error')) {
throw new Error(`Invalid body contents: ${requestUrl}`);
}
return body;
});
}
function parseTableBody(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const torrents = [];
$('.tgxtable > div').each((i, element) => {
if (i === 0) return;
const row = $(element);
const magnetLink = row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(2)').attr('href');
const imdbIdMatch = row.html().match(/search=(tt\d+)/i);
try {
torrents.push({
name: row.find('.tgxtablecell div a[title]').first().text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
torrentLink: row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(1)').first().attr('href'),
torrentId: row.find('.tgxtablecell div a[title]').first().attr('href').match(/torrent\/(\d+)/)[1],
verified: !!row.find('i.fa-check').length,
category: row.find('div:nth-of-type(n+2) .shrink a').first().attr('href').match(/cat=(\d+)$/)[1],
seeders: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'green\'] b').first().text()),
leechers: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'#ff0000\'] b').first().text()),
languages: row.find('.tgxtablecell img[title]').first().attr('title'),
size: parseSize(row.find('.collapsehide span.badge-secondary').first().text()),
uploadDate: parseDate(row.find('div.collapsehide:nth-of-type(12)').first().text()),
imdbId: imdbIdMatch && imdbIdMatch[1],
});
} catch (e) {
console.error('Failed parsing TorrentGalaxy row: ', e);
}
});
resolve(torrents);
});
}
function parseTorrentPage(body) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(body);
if (!$) {
reject(new Error('Failed loading body'));
}
const content = $('div[class="torrentpagetable limitwidth"]').first();
const magnetLink = $('a[class="btn btn-danger"]').attr('href');
const imdbIdContent = $('a[title="IMDB link"]').attr('href');
const imdbIdMatch = imdbIdContent && imdbIdContent.match(/imdb\.com\/title\/(tt\d+)/i);
const torrent = {
name: content.find('.linebreakup a').first().text(),
infoHash: decode(magnetLink).infoHash,
magnetLink: magnetLink,
verified: !content.find('i.fa-exclamation-triangle').length,
torrentLink: $('a[class="btn btn-success"]').attr('href'),
seeders: parseInt(content.find('font[color=\'green\']').first().text(), 10),
category: content.find('div:nth-of-type(4) a:nth-of-type(2)').first().attr('href').match(/cat=(\d+)$/)[1],
languages: content.find('div:nth-of-type(5) div:nth-of-type(2)').first().text().trim(),
size: parseSize(content.find('div:nth-of-type(6) div:nth-of-type(2)').first().text()),
uploadDate: parseDate(content.find('div:nth-of-type(9) div:nth-of-type(2)').first().text()),
imdbId: imdbIdMatch && imdbIdMatch[1],
};
resolve(torrent);
});
}
function parseDate(dateString) {
if (dateString.includes('ago')) {
const amount = parseInt(dateString, 10);
const unit = dateString.includes('Min') ? 'minutes' : 'hours';
return moment().subtract(amount, unit).toDate();
}
const preparedDate = dateString.replace(/\//g, '-').replace(/-(\d{2})\s/, '-20$1 ')
return moment(preparedDate, 'DD-MM-YYYY HH:mm').toDate();
}
module.exports = { torrent, search, browse, Categories };

View File

@@ -1,132 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const torrentGalaxy = require('./torrentgalaxy_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'TorrentGalaxy';
const TYPE_MAPPING = typeMapping();
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
const limiter = new Bottleneck({ maxConcurrent: 10 });
const allowedCategories = [
torrentGalaxy.Categories.ANIME,
torrentGalaxy.Categories.MOVIE_4K,
torrentGalaxy.Categories.MOVIE_PACKS,
torrentGalaxy.Categories.MOVIE_SD,
torrentGalaxy.Categories.MOVIE_HD,
torrentGalaxy.Categories.MOVIE_CAM,
torrentGalaxy.Categories.MOVIE_BOLLYWOOD,
torrentGalaxy.Categories.TV_SD,
torrentGalaxy.Categories.TV_HD,
torrentGalaxy.Categories.TV_PACKS,
torrentGalaxy.Categories.DOCUMENTARIES,
];
const packCategories = [
torrentGalaxy.Categories.MOVIE_PACKS,
torrentGalaxy.Categories.TV_PACKS
];
async function scrape() {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
// const ids = ['14212584'];
// return Promise.all(ids.map(id => limiter.schedule(() => torrentGalaxy.torrent(id)
// .then(torrent => processTorrentRecord(torrent)))))
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
return scrapeLatestTorrents()
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => torrentGalaxy.torrent(torrent.torrentId));
}
async function scrapeLatestTorrents() {
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
.then(entries => entries.reduce((a, b) => a.concat(b), []));
}
async function scrapeLatestTorrentsForCategory(category, page = 1) {
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
return api_limiter.schedule(() => torrentGalaxy.browse({ category, page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < getMaxPage(category)
? scrapeLatestTorrentsForCategory(category, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (!record || !TYPE_MAPPING[record.category] || !record.verified) {
return Promise.resolve('Invalid torrent record');
}
const torrent = {
provider: NAME,
infoHash: record.infoHash,
torrentId: record.torrentId,
torrentLink: record.torrentLink,
title: record.name.replace(/\t|\s+/g, ' '),
type: TYPE_MAPPING[record.category],
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
imdbId: record.imdbId,
pack: packCategories.includes(record.category),
languages: !(record.languages || '').includes('Other') ? record.languages : undefined
};
if (await checkAndUpdateTorrent(torrent)) {
return torrent;
}
const isOld = moment(torrent.uploadDate).isBefore(moment().subtract(18, 'month'));
if (torrent.seeders === 0 && isOld && !torrent.pack) {
console.log(`Skipping old unseeded torrent [${torrent.infoHash}] ${torrent.title}`)
return torrent;
}
return createTorrentEntry(torrent).then(() => torrent);
}
function typeMapping() {
const mapping = {};
mapping[torrentGalaxy.Categories.MOVIE_SD] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_HD] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_4K] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_CAM] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_PACKS] = Type.MOVIE;
mapping[torrentGalaxy.Categories.MOVIE_BOLLYWOOD] = Type.MOVIE;
mapping[torrentGalaxy.Categories.DOCUMENTARIES] = Type.MOVIE;
mapping[torrentGalaxy.Categories.TV_SD] = Type.SERIES;
mapping[torrentGalaxy.Categories.TV_HD] = Type.SERIES;
mapping[torrentGalaxy.Categories.TV_PACKS] = Type.SERIES;
mapping[torrentGalaxy.Categories.TV_SPORT] = Type.SERIES;
mapping[torrentGalaxy.Categories.ANIME] = Type.ANIME;
return mapping;
}
function getMaxPage(category) {
switch (category) {
case torrentGalaxy.Categories.TV_SD:
case torrentGalaxy.Categories.TV_HD:
case torrentGalaxy.Categories.MOVIE_SD:
case torrentGalaxy.Categories.MOVIE_HD:
return 5;
default:
return 1;
}
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -1,98 +0,0 @@
const axios = require('axios');
const Promises = require('../../lib/promises');
const { getRandomUserAgent } = require('./../../lib/requestHelper');
const defaultProxies = [
'https://yts.mx'
];
const defaultTimeout = 30000;
const limit = 50;
function torrent(torrentId, config = {}, retries = 2) {
if (!torrentId || retries === 0) {
return Promise.reject(new Error(`Failed ${torrentId} search`));
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config)))
.then(body => parseResults(body))
.catch(error => torrent(torrentId, config, retries - 1));
}
function search(query, config = {}, retries = 2) {
if (!query || retries === 0) {
return Promise.reject(new Error(`Failed ${query} search`));
}
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config)))
.then(results => parseResults(results))
.catch(error => search(query, config, retries - 1));
}
function browse(config = {}, retries = 2) {
if (retries === 0) {
return Promise.reject(new Error(`Failed browse request`));
}
const page = config.page || 1;
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config)))
.then(results => parseResults(results))
.catch(error => browse(config, retries - 1));
}
function maxPage() {
return Promises.first(defaultProxies
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}`)))
.then(results => Math.round((results?.data?.movie_count || 0) / limit))
}
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
return axios.get(requestUrl, options)
.then(response => {
if (!response.data) {
return Promise.reject(`No body: ${requestUrl}`);
}
return Promise.resolve(response.data);
});
}
function parseResults(results) {
if (!results || !results.data || (!results.data.movie && !Array.isArray(results.data.movies))) {
console.log('Incorrect results: ', results);
return Promise.reject('Incorrect results')
}
return (results.data.movies || [results.data.movie])
.filter(movie => Array.isArray(movie.torrents))
.map(movie => parseMovie(movie))
.reduce((a, b) => a.concat(b), []);
}
function parseMovie(movie) {
return movie.torrents.map(torrent => ({
name: `${movie.title} ${movie.year} ${torrent.quality} ${formatType(torrent.type)} `,
torrentId: `${movie.id}-${torrent.hash.trim().toLowerCase()}`,
infoHash: torrent.hash.trim().toLowerCase(),
torrentLink: torrent.url,
seeders: torrent.seeds,
size: torrent.size_bytes,
uploadDate: new Date(torrent.date_uploaded_unix * 1000),
imdbId: movie.imdb_code
}));
}
function formatType(type) {
if (type === 'web') {
return 'WEBRip';
}
if (type === 'bluray') {
return 'BluRay';
}
return type.toUpperCase();
}
module.exports = { torrent, search, browse, maxPage };

View File

@@ -1,15 +0,0 @@
const moment = require("moment");
const yts = require('./yts_api');
const scraper = require('./yts_scraper')
async function scrape() {
const scrapeStart = moment();
console.log(`[${scrapeStart}] starting ${scraper.NAME} full scrape...`);
return yts.maxPage()
.then(maxPage => scraper.scrape(maxPage))
.then(() => console.log(`[${moment()}] finished ${scraper.NAME} full scrape`));
}
module.exports = { scrape, NAME: scraper.NAME };

View File

@@ -1,67 +0,0 @@
const moment = require('moment');
const Bottleneck = require('bottleneck');
const yts = require('./yts_api');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
const NAME = 'YTS';
const UNTIL_PAGE = 10;
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape(maxPage) {
const scrapeStart = moment();
const lastScrape = await repository.getProvider({ name: NAME });
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
return scrapeLatestTorrentsForCategory(maxPage)
.then(() => {
lastScrape.lastScraped = scrapeStart;
return lastScrape.save();
})
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
}
async function updateSeeders(torrent) {
return limiter.schedule(() => yts.torrent(torrent.torrentId));
}
async function scrapeLatestTorrentsForCategory(maxPage = UNTIL_PAGE, page = 1) {
console.log(`Scrapping ${NAME} page ${page}`);
return yts.browse(({ page }))
.catch(error => {
console.warn(`Failed ${NAME} scrapping for [${page}] due: `, error);
return Promise.resolve([]);
})
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
.then(resolved => resolved.length > 0 && page < maxPage
? scrapeLatestTorrentsForCategory(maxPage, page + 1)
: Promise.resolve());
}
async function processTorrentRecord(record) {
if (await checkAndUpdateTorrent(record)) {
return record;
}
if (!record || !record.size) {
return Promise.resolve('Invalid torrent record');
}
const torrent = {
infoHash: record.infoHash,
provider: NAME,
torrentId: record.torrentId,
title: record.name.replace(/\t|\s+/g, ' ').trim(),
type: Type.MOVIE,
size: record.size,
seeders: record.seeders,
uploadDate: record.uploadDate,
imdbId: record.imdbId,
};
return createTorrentEntry(torrent).then(() => torrent);
}
module.exports = { scrape, updateSeeders, NAME };

View File

@@ -0,0 +1 @@
*.ts

View File

@@ -0,0 +1,39 @@
/** @type {import("eslint").ESLint.Options} */
module.exports = {
env: {
es2024: true,
node: true,
},
globals: {
Atomics: "readonly",
SharedArrayBuffer: "readonly",
},
parserOptions: {
sourceType: "module",
},
plugins: ["import", "import-helpers"],
rules: {
"default-case": "off",
"import/no-duplicates": "off",
"import/no-extraneous-dependencies": ["off", { devDependencies: ["backend", "frontend", "mobile"] }],
"import/order": "off",
"import-helpers/order-imports": [
"warn",
{
alphabetize: {
order: "asc",
},
},
],
"lines-between-class-members": ["error", "always", { exceptAfterSingleLine: true }],
"no-continue": "off",
"no-param-reassign": "off",
"no-plusplus": ["error", { allowForLoopAfterthoughts: true }],
"no-restricted-syntax": "off",
"no-unused-expressions": ["off", { allowShortCircuit: true }],
"no-unused-vars": "off",
"no-use-before-define": "off",
"one-var": ["error", { uninitialized: "consecutive" }],
"prefer-destructuring": "warn",
},
};

View File

@@ -0,0 +1,29 @@
# --- Build Stage ---
FROM node:lts-alpine AS builder
RUN apk update && apk upgrade && \
apk add --no-cache git
WORKDIR /app
COPY package*.json ./
RUN npm install
COPY . .
RUN npm run build
# --- Runtime Stage ---
FROM node:lts-alpine
# Install pm2
RUN npm install pm2 -g
WORKDIR /app
ENV NODE_ENV production
COPY --from=builder /app ./
RUN npm prune --omit=dev
EXPOSE 7001
ENTRYPOINT [ "pm2-runtime", "start", "ecosystem.config.cjs"]

View File

@@ -0,0 +1 @@
# addon-jackett

View File

@@ -0,0 +1,4 @@
#!/bin/bash
docker build -t ippexdeploymentscr.azurecr.io/dave/stremio-addon-jackett:latest . --platform linux/amd64
docker push ippexdeploymentscr.azurecr.io/dave/stremio-addon-jackett:latest

View File

@@ -0,0 +1,14 @@
module.exports = {
apps: [
{
name: 'stremio-jackett',
script: 'npm start',
cwd: '/app',
watch: ['./dist/index.js'],
autorestart: true,
env: {
...process.env,
},
},
],
};

View File

@@ -0,0 +1,68 @@
import { build } from 'esbuild';
import { copy } from 'esbuild-plugin-copy';
import { readFileSync, rmSync } from 'fs';
const { devDependencies } = JSON.parse(readFileSync('./package.json', 'utf8'));
const start = Date.now();
try {
const outdir = 'dist';
rmSync(outdir, { recursive: true, force: true });
build({
bundle: true,
entryPoints: [
'./src/index.js',
// "./src/**/*.css",
// "./src/**/*.hbs",
// "./src/**/*.html"
],
external: [...(devDependencies && Object.keys(devDependencies))],
keepNames: true,
loader: {
'.css': 'copy',
'.hbs': 'copy',
'.html': 'copy',
},
minify: true,
outbase: './src',
outdir,
outExtension: {
'.js': '.cjs',
},
platform: 'node',
plugins: [
{
name: 'populate-import-meta',
setup: ({ onLoad }) => {
onLoad({ filter: new RegExp(`${import.meta.dirname}/src/.*\.(js|ts)$`) }, args => {
const contents = readFileSync(args.path, 'utf8');
const transformedContents = contents
.replace(/import\.meta/g, `{dirname:__dirname,filename:__filename}`)
.replace(/import\.meta\.filename/g, '__filename')
.replace(/import\.meta\.dirname/g, '__dirname');
return { contents: transformedContents, loader: 'default' };
});
},
},
copy({
assets: [
{
from: ['./static/**'],
to: ['./static'],
},
],
}),
],
}).then(() => {
// biome-ignore lint/style/useTemplate: <explanation>
console.log('⚡ ' + '\x1b[32m' + `Done in ${Date.now() - start}ms`);
});
} catch (e) {
console.log(e);
process.exit(1);
}

View File

@@ -0,0 +1,21 @@
{
"compilerOptions": {
"baseUrl": "./src",
"checkJs": true,
"isolatedModules": true,
"lib": ["es6"],
"module": "ESNext",
"moduleResolution": "node",
"outDir": "./dist",
"pretty": true,
"removeComments": true,
"resolveJsonModule": true,
"rootDir": "./src",
"skipLibCheck": true,
"sourceMap": true,
"target": "ES6",
"types": ["node"],
"typeRoots": ["node_modules/@types", "src/@types"]
},
"exclude": ["node_modules"]
}

6024
src/node/addon-jackett/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,13 @@
{
"name": "stremio-torrentio",
"version": "1.0.14",
"exports": "./index.js",
"type": "commonjs",
"name": "jackettio-addon",
"version": "0.0.1",
"type": "module",
"scripts": {
"start": "node index.js"
"build": "node esbuild.js",
"dev": "tsx watch --ignore node_modules src/index.js",
"start": "node dist/index.cjs",
"lint": "eslint . --ext .ts,.js"
},
"author": "TheBeastLT <pauliox@beyond.lt>",
"license": "MIT",
"dependencies": {
"@putdotio/api-client": "^8.42.0",
"all-debrid-api": "^1.1.0",
@@ -17,34 +17,32 @@
"cache-manager-mongodb": "^0.3.0",
"cors": "^2.8.5",
"debrid-link-api": "^1.0.1",
"express": "^4.18.2",
"express-rate-limit": "^6.7.0",
"google-it": "^1.6.4",
"google-sr": "^3.2.1",
"he": "^1.2.0",
"jaro-winkler": "^0.2.8",
"magnet-uri": "^6.2.0",
"name-to-imdb": "^3.0.4",
"named-queue": "^2.2.1",
"needle": "^3.3.1",
"node-schedule": "^2.1.1",
"nodejs-bing": "^0.1.0",
"nyaapi": "^2.4.4",
"offcloud-api": "^1.0.2",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#022408972c2a040f846331a912a6a8487746a654",
"pg": "^8.11.3",
"pg-hstore": "^2.3.4",
"premiumize-api": "^1.0.3",
"prom-client": "^12.0.0",
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#d1f7eaa8593b947edbfbc8a92a176448b48ef445",
"request-ip": "^3.3.0",
"router": "^1.3.8",
"rss-parser": "^3.13.0",
"sequelize": "^6.31.1",
"stremio-addon-sdk": "^1.6.10",
"sugar-date": "^2.0.6",
"swagger-stats": "^0.99.7",
"torrent-stream": "^1.2.1",
"ua-parser-js": "^1.0.36",
"user-agents": "^1.0.1444"
"url-join": "^5.0.0",
"user-agents": "^1.0.1444",
"video-name-parser": "^1.4.6",
"xml-js": "^1.6.11",
"xml2js": "^0.6.2"
},
"devDependencies": {
"@types/node": "^20.11.6",
"@types/stremio-addon-sdk": "^1.6.10",
"esbuild": "^0.19.12",
"esbuild-plugin-copy": "^2.1.1",
"eslint": "^8.56.0",
"eslint-plugin-import": "^2.29.1",
"eslint-plugin-import-helpers": "^1.3.1",
"tsx": "^4.7.0"
}
}

View File

@@ -0,0 +1,126 @@
import Bottleneck from 'bottleneck';
import {addonBuilder} from 'stremio-addon-sdk';
import {searchJackett} from "./jackett/jackett.js";
import {cacheWrapStream} from './lib/cache.js';
import {getMetaData} from "./lib/cinemetaProvider.js";
import {dummyManifest} from './lib/manifest.js';
import {cacheConfig, processConfig} from "./lib/settings.js";
import applySorting from './lib/sort.js';
import {toStreamInfo} from './lib/streamInfo.js';
import {Type} from './lib/types.js';
import {applyMochs, getMochCatalog, getMochItemMeta} from './moch/moch.js';
import StaticLinks from './moch/static.js';
const builder = new addonBuilder(dummyManifest());
const limiter = new Bottleneck({
maxConcurrent: 200,
highWater: 220,
strategy: Bottleneck.strategy.OVERFLOW
});
builder.defineStreamHandler((args) => {
if (!args.id.match(/tt\d+/i) && !args.id.match(/kitsu:\d+/i)) {
return Promise.resolve({ streams: [] });
}
if (processConfig.DEBUG) {
console.log(`Incoming stream ${args.id} request`)
console.log('args', args);
}
return cacheWrapStream(args.id, () => limiter.schedule(() =>
streamHandler(args)
.then(records => records.map(record => toStreamInfo(record, args.type))))
.then(streams => applySorting(streams, args.extra))
.then(streams => applyMochs(streams, args.extra))
.then(streams => enrichCacheParams(streams))
.catch(error => {
console.log(`Failed request ${args.id}: ${error}`);
return Promise.reject(`Failed request ${args.id}: ${error}`);
}));
});
builder.defineCatalogHandler((args) => {
const mochKey = args.id.replace("jackettio-", '');
console.log(`Incoming catalog ${args.id} request with skip=${args.extra.skip || 0}`)
return getMochCatalog(mochKey, args.extra)
.then(metas => ({
metas: metas,
cacheMaxAge: cacheConfig.CATALOG_CACHE_MAX_AGE
}))
.catch(error => {
return Promise.reject(`Failed retrieving catalog ${args.id}: ${JSON.stringify(error)}`);
});
})
builder.defineMetaHandler((args) => {
const [mochKey, metaId] = args.id.split(':');
console.log(`Incoming debrid meta ${args.id} request`)
return getMochItemMeta(mochKey, metaId, args.extra)
.then(meta => ({
meta: meta,
cacheMaxAge: metaId === 'Downloads' ? 0 : cacheConfig.CACHE_MAX_AGE
}))
.catch(error => {
return Promise.reject(`Failed retrieving catalog meta ${args.id}: ${JSON.stringify(error)}`);
});
})
async function streamHandler(args) {
if (args.type === Type.MOVIE) {
return movieRecordsHandler(args);
} else if (args.type === Type.SERIES) {
return seriesRecordsHandler(args);
}
return Promise.reject('not supported type');
}
async function seriesRecordsHandler(args) {
if (args.id.match(/^tt\d+:\d+:\d+$/)) {
const parts = args.id.split(':');
const season = parts[1] !== undefined ? parseInt(parts[1], 10) : 1;
const episode = parts[2] !== undefined ? parseInt(parts[2], 10) : 1;
const metaData = await getMetaData(args);
return await searchJackett({
type: Type.SERIES,
season: season,
episode: episode,
name: metaData.name,
});
}
return [];
}
async function movieRecordsHandler(args) {
if (args.id.match(/^tt\d+$/)) {
const metaData = await getMetaData(args);
return await searchJackett({
type: Type.MOVIE,
name: metaData.name,
year: metaData.year,
});
}
return [];
}
function enrichCacheParams(streams) {
let cacheAge = cacheConfig.CACHE_MAX_AGE;
if (!streams.length) {
cacheAge = cacheConfig.CACHE_MAX_AGE_EMPTY;
} else if (streams.every(stream => stream?.url?.endsWith(StaticLinks.FAILED_ACCESS))) {
cacheAge = 0;
}
return {
streams: streams,
cacheMaxAge: cacheAge,
staleRevalidate: cacheConfig.STALE_REVALIDATE_AGE,
staleError: cacheConfig.STALE_ERROR_AGE
}
}
export default builder.getInterface();

View File

@@ -0,0 +1,13 @@
import express from 'express';
import { initBestTrackers } from './lib/magnetHelper.js';
import {processConfig} from "./lib/settings.js";
import serverless from './serverless.js';
const app = express();
app.enable('trust proxy');
app.use(express.static('static', { maxAge: '1y' }));
app.use((req, res) => serverless(req, res));
app.listen(processConfig.PORT, () => {
initBestTrackers()
.then(() => console.log(`Started addon at: http://localhost:${processConfig.PORT}`));
});

View File

@@ -0,0 +1,151 @@
import videoNameParser from "video-name-parser";
import {parseStringPromise as parseString} from "xml2js";
import {processConfig, jackettConfig} from "../lib/settings.js";
export function extractSize(title) {
const seedersMatch = title.match(/💾 ([\d.]+ \w+)/);
return seedersMatch && parseSize(seedersMatch[1]) || 0;
}
export function parseSize(sizeText) {
if (!sizeText) {
return 0;
}
let scale = 1;
if (sizeText.includes('TB')) {
scale = 1024 * 1024 * 1024 * 1024
} else if (sizeText.includes('GB')) {
scale = 1024 * 1024 * 1024
} else if (sizeText.includes('MB')) {
scale = 1024 * 1024;
} else if (sizeText.includes('kB')) {
scale = 1024;
}
return Math.floor(parseFloat(sizeText.replace(/,/g, '')) * scale);
}
export const parseVideo = (name) => {
return videoNameParser(name + '.mp4');
};
export const episodeTag = (season, episode) => {
const paddedSeason = season < 10 ? `0${season}` : season;
const paddedEpisode = episode < 10 ? `0${episode}` : episode;
return `S${paddedSeason}E${paddedEpisode}`;
};
export const cleanName = (name) => {
name = name.replace(/[._\-()\[\]:,]/g, ' ');
name = name.replace(/\s+/g, ' ');
name = name.replace(/'/g, '');
name = name.replace(/\\\\/g, '\\').replace(/\\\\'|\\'|\\\\"|\\"/g, '');
return name;
};
export const insertIntoSortedArray = (sortedArray, newObject, sortingProperty, maxSize) => {
const indexToInsert = sortedArray.findIndex(item => item[sortingProperty] < newObject[sortingProperty]);
if (indexToInsert === -1) {
if (sortedArray.length < maxSize) {
sortedArray.push(newObject);
return true;
}
return false;
} else {
// Insert the new object at the correct position to maintain the sorted order (descending)
sortedArray.splice(indexToInsert, 0, newObject);
// Trim the array if it exceeds maxSize
if (sortedArray.length > maxSize) {
sortedArray.pop();
}
return true;
}
};
export const extraTag = (name, searchQuery) => {
const parsedName = parseVideo(name + '.mp4');
let extraTag = cleanName(name);
searchQuery = cleanName(searchQuery);
extraTag = extraTag.replace(new RegExp(searchQuery, 'gi'), '');
extraTag = extraTag.replace(new RegExp(parsedName.name, 'gi'), '');
if (parsedName.year) {
extraTag = extraTag.replace(parsedName.year.toString(), '');
}
if (parsedName.season && parsedName.episode && parsedName.episode.length) {
extraTag = extraTag.replace(new RegExp(episodeTag(parsedName.season, parsedName.episode[0]), 'gi'), '');
}
extraTag = extraTag.trim();
let extraParts = extraTag.split(' ');
if (parsedName.season && parsedName.episode && parsedName.episode.length) {
if (extraParts[0] && extraParts[0].length === 2 && !isNaN(extraParts[0])) {
const possibleEpTag = `${episodeTag(parsedName.season, parsedName.episode[0])}-${extraParts[0]}`;
if (name.toLowerCase().includes(possibleEpTag.toLowerCase())) {
extraParts[0] = possibleEpTag;
}
}
}
const foundPart = name.toLowerCase().indexOf(extraParts[0].toLowerCase());
if (foundPart > -1) {
extraTag = name.substring(foundPart).replace(/[_()\[\],]/g, ' ');
if ((extraTag.match(/\./g) || []).length > 1) {
extraTag = extraTag.replace(/\./g, ' ');
}
extraTag = extraTag.replace(/\s+/g, ' ');
}
return extraTag;
};
export const transformData = async (data, query) => {
console.log("Transforming data for query " + data);
let results = [];
const parsedData = await parseString(data);
if (!parsedData.rss.channel[0]?.item) {
return [];
}
for (const rssItem of parsedData.rss.channel[0].item) {
let torznabData = {};
rssItem["torznab:attr"].forEach((torznabDataItem) =>
Object.assign(torznabData, {
[torznabDataItem.$.name]: torznabDataItem.$.value,
})
);
if (torznabData.infohash) {
const [title, pubDate, category, size] = [rssItem.title[0], rssItem.pubDate[0], rssItem.category[0], rssItem.size[0]];
torznabData = {
...torznabData,
title,
pubDate,
category,
size,
extraTag: extraTag(title, query.name)
};
if (insertIntoSortedArray(results, torznabData, 'size', jackettConfig.MAXIMUM_RESULTS)) {
processConfig.DEBUG && console.log(torznabData);
}
}
}
return results;
};

Some files were not shown because too many files have changed in this diff Show More