mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Merge pull request #26 from iPromKnight/master
Big rewrite of scraping solution
This commit is contained in:
1319
.editorconfig
Normal file
1319
.editorconfig
Normal file
File diff suppressed because it is too large
Load Diff
406
.gitignore
vendored
406
.gitignore
vendored
@@ -1,5 +1,405 @@
|
||||
/.idea
|
||||
**/node_modules
|
||||
**.env
|
||||
.now
|
||||
/scraper/manual/examples.js
|
||||
.DS_Store
|
||||
.idea
|
||||
|
||||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
##
|
||||
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
|
||||
|
||||
# User-specific files
|
||||
*.rsuser
|
||||
*.suo
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
# Mono auto generated files
|
||||
mono_crash.*
|
||||
|
||||
# Build results
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPublic/
|
||||
[Rr]elease/
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
[Ww][Ii][Nn]32/
|
||||
[Aa][Rr][Mm]/
|
||||
[Aa][Rr][Mm]64/
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
[Ll]ogs/
|
||||
|
||||
# Visual Studio 2015/2017 cache/options directory
|
||||
.vs/
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
|
||||
# Visual Studio 2017 auto generated files
|
||||
Generated\ Files/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
# NUnit
|
||||
*.VisualState.xml
|
||||
TestResult.xml
|
||||
nunit-*.xml
|
||||
|
||||
# Build Results of an ATL Project
|
||||
[Dd]ebugPS/
|
||||
[Rr]eleasePS/
|
||||
dlldata.c
|
||||
|
||||
# Benchmark Results
|
||||
BenchmarkDotNet.Artifacts/
|
||||
|
||||
# .NET Core
|
||||
project.lock.json
|
||||
project.fragment.lock.json
|
||||
artifacts/
|
||||
|
||||
# ASP.NET Scaffolding
|
||||
ScaffoldingReadMe.txt
|
||||
|
||||
# StyleCop
|
||||
StyleCopReport.xml
|
||||
|
||||
# Files built by Visual Studio
|
||||
*_i.c
|
||||
*_p.c
|
||||
*_h.h
|
||||
*.ilk
|
||||
*.meta
|
||||
*.obj
|
||||
*.iobj
|
||||
*.pch
|
||||
*.pdb
|
||||
*.ipdb
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.rsp
|
||||
*.sbr
|
||||
*.tlb
|
||||
*.tli
|
||||
*.tlh
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*_wpftmp.csproj
|
||||
*.log
|
||||
*.tlog
|
||||
*.vspscc
|
||||
*.vssscc
|
||||
.builds
|
||||
*.pidb
|
||||
*.svclog
|
||||
*.scc
|
||||
|
||||
# Chutzpah Test files
|
||||
_Chutzpah*
|
||||
|
||||
# Visual C++ cache files
|
||||
ipch/
|
||||
*.aps
|
||||
*.ncb
|
||||
*.opendb
|
||||
*.opensdf
|
||||
*.sdf
|
||||
*.cachefile
|
||||
*.VC.db
|
||||
*.VC.VC.opendb
|
||||
|
||||
# Visual Studio profiler
|
||||
*.psess
|
||||
*.vsp
|
||||
*.vspx
|
||||
*.sap
|
||||
|
||||
# Visual Studio Trace Files
|
||||
*.e2e
|
||||
|
||||
# TFS 2012 Local Workspace
|
||||
$tf/
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
*.gpState
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
_ReSharper*/
|
||||
*.[Rr]e[Ss]harper
|
||||
*.DotSettings.user
|
||||
|
||||
# TeamCity is a build add-in
|
||||
_TeamCity*
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
*.dotCover
|
||||
|
||||
# AxoCover is a Code Coverage Tool
|
||||
.axoCover/*
|
||||
!.axoCover/settings.json
|
||||
|
||||
# Coverlet is a free, cross platform Code Coverage Tool
|
||||
coverage*.json
|
||||
coverage*.xml
|
||||
coverage*.info
|
||||
|
||||
# Visual Studio code coverage results
|
||||
*.coverage
|
||||
*.coveragexml
|
||||
|
||||
# NCrunch
|
||||
_NCrunch_*
|
||||
.*crunch*.local.xml
|
||||
nCrunchTemp_*
|
||||
|
||||
# MightyMoose
|
||||
*.mm.*
|
||||
AutoTest.Net/
|
||||
|
||||
# Web workbench (sass)
|
||||
.sass-cache/
|
||||
|
||||
# Installshield output folder
|
||||
[Ee]xpress/
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/Html2
|
||||
DocProject/Help/html
|
||||
|
||||
# Click-Once directory
|
||||
publish/
|
||||
|
||||
# Publish Web Output
|
||||
*.[Pp]ublish.xml
|
||||
*.azurePubxml
|
||||
# Note: Comment the next line if you want to checkin your web deploy settings,
|
||||
# but database connection strings (with potential passwords) will be unencrypted
|
||||
*.pubxml
|
||||
*.publishproj
|
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||
# in these scripts will be unencrypted
|
||||
PublishScripts/
|
||||
|
||||
# NuGet Packages
|
||||
*.nupkg
|
||||
# NuGet Symbol Packages
|
||||
*.snupkg
|
||||
# The packages folder can be ignored because of Package Restore
|
||||
**/[Pp]ackages/*
|
||||
# except build/, which is used as an MSBuild target.
|
||||
!**/[Pp]ackages/build/
|
||||
# Uncomment if necessary however generally it will be regenerated when needed
|
||||
#!**/[Pp]ackages/repositories.config
|
||||
# NuGet v3's project.json files produces more ignorable files
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
|
||||
# Microsoft Azure Build Output
|
||||
csx/
|
||||
*.build.csdef
|
||||
|
||||
# Microsoft Azure Emulator
|
||||
ecf/
|
||||
rcf/
|
||||
|
||||
# Windows Store app package directories and files
|
||||
AppPackages/
|
||||
BundleArtifacts/
|
||||
Package.StoreAssociation.xml
|
||||
_pkginfo.txt
|
||||
*.appx
|
||||
*.appxbundle
|
||||
*.appxupload
|
||||
|
||||
# Visual Studio cache files
|
||||
# files ending in .cache can be ignored
|
||||
*.[Cc]ache
|
||||
# but keep track of directories ending in .cache
|
||||
!?*.[Cc]ache/
|
||||
|
||||
# Others
|
||||
ClientBin/
|
||||
~$*
|
||||
*~
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.jfm
|
||||
*.pfx
|
||||
*.publishsettings
|
||||
orleans.codegen.cs
|
||||
|
||||
# Including strong name files can present a security risk
|
||||
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
|
||||
#*.snk
|
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||
#bower_components/
|
||||
|
||||
# RIA/Silverlight projects
|
||||
Generated_Code/
|
||||
|
||||
# Backup & report files from converting an old project file
|
||||
# to a newer Visual Studio version. Backup files are not needed,
|
||||
# because we have git ;-)
|
||||
_UpgradeReport_Files/
|
||||
Backup*/
|
||||
UpgradeLog*.XML
|
||||
UpgradeLog*.htm
|
||||
ServiceFabricBackup/
|
||||
*.rptproj.bak
|
||||
|
||||
# SQL Server files
|
||||
*.mdf
|
||||
*.ldf
|
||||
*.ndf
|
||||
|
||||
# Business Intelligence projects
|
||||
*.rdl.data
|
||||
*.bim.layout
|
||||
*.bim_*.settings
|
||||
*.rptproj.rsuser
|
||||
*- [Bb]ackup.rdl
|
||||
*- [Bb]ackup ([0-9]).rdl
|
||||
*- [Bb]ackup ([0-9][0-9]).rdl
|
||||
|
||||
# Microsoft Fakes
|
||||
FakesAssemblies/
|
||||
|
||||
# GhostDoc plugin setting file
|
||||
*.GhostDoc.xml
|
||||
|
||||
# Node.js Tools for Visual Studio
|
||||
.ntvs_analysis.dat
|
||||
node_modules/
|
||||
|
||||
# Visual Studio 6 build log
|
||||
*.plg
|
||||
|
||||
# Visual Studio 6 workspace options file
|
||||
*.opt
|
||||
|
||||
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||
*.vbw
|
||||
|
||||
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
|
||||
*.vbp
|
||||
|
||||
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
|
||||
*.dsw
|
||||
*.dsp
|
||||
|
||||
# Visual Studio 6 technical files
|
||||
*.ncb
|
||||
*.aps
|
||||
|
||||
# Visual Studio LightSwitch build output
|
||||
**/*.HTMLClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/ModelManifest.xml
|
||||
**/*.Server/GeneratedArtifacts
|
||||
**/*.Server/ModelManifest.xml
|
||||
_Pvt_Extensions
|
||||
|
||||
# Paket dependency manager
|
||||
.paket/paket.exe
|
||||
paket-files/
|
||||
|
||||
# FAKE - F# Make
|
||||
.fake/
|
||||
|
||||
# CodeRush personal settings
|
||||
.cr/personal
|
||||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Cake - Uncomment if you are using it
|
||||
# tools/**
|
||||
# !tools/packages.config
|
||||
|
||||
# Tabs Studio
|
||||
*.tss
|
||||
|
||||
# Telerik's JustMock configuration file
|
||||
*.jmconfig
|
||||
|
||||
# BizTalk build output
|
||||
*.btp.cs
|
||||
*.btm.cs
|
||||
*.odx.cs
|
||||
*.xsd.cs
|
||||
|
||||
# OpenCover UI analysis results
|
||||
OpenCover/
|
||||
|
||||
# Azure Stream Analytics local run output
|
||||
ASALocalRun/
|
||||
|
||||
# MSBuild Binary and Structured Log
|
||||
*.binlog
|
||||
|
||||
# NVidia Nsight GPU debugger configuration file
|
||||
*.nvuser
|
||||
|
||||
# MFractors (Xamarin productivity tool) working folder
|
||||
.mfractor/
|
||||
|
||||
# Local History for Visual Studio
|
||||
.localhistory/
|
||||
|
||||
# Visual Studio History (VSHistory) files
|
||||
.vshistory/
|
||||
|
||||
# BeatPulse healthcheck temp database
|
||||
healthchecksdb
|
||||
|
||||
# Backup folder for Package Reference Convert tool in Visual Studio 2017
|
||||
MigrationBackup/
|
||||
|
||||
# Ionide (cross platform F# VS Code tools) working folder
|
||||
.ionide/
|
||||
|
||||
# Fody - auto-generated XML schema
|
||||
FodyWeavers.xsd
|
||||
|
||||
# VS Code files for those working on multiple tools
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
*.code-workspace
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
# Windows Installer files from build outputs
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# JetBrains Rider
|
||||
*.sln.iml
|
||||
|
||||
dist/
|
||||
|
||||
@@ -6,4 +6,10 @@
|
||||
```
|
||||
docker-compose up -d
|
||||
```
|
||||
Then open your browser to `127.0.0.1:7000`
|
||||
Then open your browser to `127.0.0.1:7000`
|
||||
|
||||
If you'd like to enable crawling of RealDebridManager's shared hashlists which will massively boost your database cached entries,
|
||||
enter a readonly github personal access token in 'env/producer.env' as the 'GithubSettings__PAT=<token_here>' value.
|
||||
|
||||
You can scale the number of consumers, by changing the consumer deploy replica count in the compose file on line 87. This is currently set to 3.
|
||||
If you'd like to adjust the number of concurrent processed ingestions per consumer, thats the job concurrency setting within 'env/consumer.env'.
|
||||
@@ -1,12 +0,0 @@
|
||||
FROM node:21-alpine
|
||||
|
||||
RUN apk update && apk upgrade && \
|
||||
apk add --no-cache git
|
||||
|
||||
WORKDIR /home/node/app
|
||||
|
||||
COPY package*.json ./
|
||||
RUN npm ci --only-production
|
||||
COPY . .
|
||||
|
||||
CMD [ "node", "--insecure-http-parser", "index.js" ]
|
||||
@@ -1,82 +0,0 @@
|
||||
import { DebridOptions } from '../moch/options.js';
|
||||
import { QualityFilter, Providers, SizeFilter } from './filter.js';
|
||||
import { LanguageOptions } from './languages.js';
|
||||
|
||||
export const PreConfigurations = {
|
||||
lite: {
|
||||
config: liteConfig(),
|
||||
serialized: configValue(liteConfig()),
|
||||
manifest: {
|
||||
id: 'com.stremio.torrentio.lite.addon',
|
||||
name: 'Torrentio Lite',
|
||||
description: 'Preconfigured Lite version of Torrentio addon.'
|
||||
+ ' To configure advanced options visit https://torrentio.strem.fun/lite'
|
||||
}
|
||||
},
|
||||
brazuca: {
|
||||
config: brazucaConfig(),
|
||||
serialized: configValue(brazucaConfig()),
|
||||
manifest: {
|
||||
id: 'com.stremio.torrentio.brazuca.addon',
|
||||
name: 'Torrentio Brazuca',
|
||||
description: 'Preconfigured version of Torrentio addon for Brazilian content.'
|
||||
+ ' To configure advanced options visit https://torrentio.strem.fun/brazuca',
|
||||
logo: 'https://i.ibb.co/8mgRZPp/GwxAcDV.png'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const keysToSplit = [Providers.key, LanguageOptions.key, QualityFilter.key, SizeFilter.key, DebridOptions.key];
|
||||
const keysToUppercase = [SizeFilter.key];
|
||||
|
||||
export function parseConfiguration(configuration) {
|
||||
if (!configuration) {
|
||||
return undefined;
|
||||
}
|
||||
if (PreConfigurations[configuration]) {
|
||||
return PreConfigurations[configuration].config;
|
||||
}
|
||||
const configValues = configuration.split('|')
|
||||
.reduce((map, next) => {
|
||||
const parameterParts = next.split('=');
|
||||
if (parameterParts.length === 2) {
|
||||
map[parameterParts[0].toLowerCase()] = parameterParts[1];
|
||||
}
|
||||
return map;
|
||||
}, {});
|
||||
keysToSplit
|
||||
.filter(key => configValues[key])
|
||||
.forEach(key => configValues[key] = configValues[key].split(',')
|
||||
.map(value => keysToUppercase.includes(key) ? value.toUpperCase() : value.toLowerCase()))
|
||||
return configValues;
|
||||
}
|
||||
|
||||
function liteConfig() {
|
||||
const config = {};
|
||||
config[Providers.key] = Providers.options
|
||||
.filter(provider => !provider.foreign)
|
||||
.map(provider => provider.key);
|
||||
config[QualityFilter.key] = ['scr', 'cam']
|
||||
config['limit'] = 1;
|
||||
return config;
|
||||
}
|
||||
|
||||
function brazucaConfig() {
|
||||
const config = {};
|
||||
config[Providers.key] = Providers.options
|
||||
.filter(provider => !provider.foreign || provider.foreign === '🇵🇹')
|
||||
.map(provider => provider.key);
|
||||
config[LanguageOptions.key] = ['portuguese'];
|
||||
return config;
|
||||
}
|
||||
|
||||
function configValue(config) {
|
||||
return Object.entries(config)
|
||||
.map(([key, value]) => `${key}=${Array.isArray(value) ? value.join(',') : value}`)
|
||||
.join('|');
|
||||
}
|
||||
|
||||
export function getManifestOverride(config) {
|
||||
const preConfig = Object.values(PreConfigurations).find(pre => pre.config === config);
|
||||
return preConfig ? preConfig.manifest : {};
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
import namedQueue from "named-queue";
|
||||
|
||||
export function createNamedQueue(concurrency) {
|
||||
const queue = new namedQueue((task, callback) => task.method()
|
||||
.then(result => callback(false, result))
|
||||
.catch((error => callback(error))), 200);
|
||||
queue.wrap = (id, method) => new Promise(((resolve, reject) => {
|
||||
queue.push({ id, method }, (error, result) => result ? resolve(result) : reject(error));
|
||||
}));
|
||||
return queue;
|
||||
}
|
||||
@@ -1,128 +0,0 @@
|
||||
import { QualityFilter } from './filter.js';
|
||||
import { containsLanguage, LanguageOptions } from './languages.js';
|
||||
import { Type } from './types.js';
|
||||
import { hasMochConfigured } from '../moch/moch.js';
|
||||
import { extractSeeders, extractSize } from './titleHelper.js';
|
||||
|
||||
const OTHER_QUALITIES = QualityFilter.options.find(option => option.key === 'other');
|
||||
const CAM_QUALITIES = QualityFilter.options.find(option => option.key === 'cam');
|
||||
const HEALTHY_SEEDERS = 5;
|
||||
const SEEDED_SEEDERS = 1;
|
||||
const MIN_HEALTHY_COUNT = 50;
|
||||
const MAX_UNHEALTHY_COUNT = 5;
|
||||
|
||||
export const SortOptions = {
|
||||
key: 'sort',
|
||||
options: {
|
||||
qualitySeeders: {
|
||||
key: 'quality',
|
||||
description: 'By quality then seeders'
|
||||
},
|
||||
qualitySize: {
|
||||
key: 'qualitysize',
|
||||
description: 'By quality then size'
|
||||
},
|
||||
seeders: {
|
||||
key: 'seeders',
|
||||
description: 'By seeders'
|
||||
},
|
||||
size: {
|
||||
key: 'size',
|
||||
description: 'By size'
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
export default function sortStreams(streams, config, type) {
|
||||
const languages = config[LanguageOptions.key];
|
||||
if (languages?.length && languages[0] !== 'english') {
|
||||
// No need to filter english since it's hard to predict which entries are english
|
||||
const streamsWithLanguage = streams.filter(stream => containsLanguage(stream, languages));
|
||||
const streamsNoLanguage = streams.filter(stream => !streamsWithLanguage.includes(stream));
|
||||
return _sortStreams(streamsWithLanguage, config, type).concat(_sortStreams(streamsNoLanguage, config, type));
|
||||
}
|
||||
return _sortStreams(streams, config, type);
|
||||
}
|
||||
|
||||
function _sortStreams(streams, config, type) {
|
||||
const sort = config?.sort?.toLowerCase() || undefined;
|
||||
const limit = /^[1-9][0-9]*$/.test(config.limit) && parseInt(config.limit) || undefined;
|
||||
const sortedStreams = sortBySeeders(streams, config, type);
|
||||
if (sort === SortOptions.options.seeders.key) {
|
||||
return sortedStreams.slice(0, limit);
|
||||
} else if (sort === SortOptions.options.size.key) {
|
||||
return sortBySize(sortedStreams, limit);
|
||||
}
|
||||
const nestedSort = sort === SortOptions.options.qualitySize.key ? sortBySize : noopSort;
|
||||
return sortByVideoQuality(sortedStreams, nestedSort, limit)
|
||||
}
|
||||
|
||||
function noopSort(streams) {
|
||||
return streams;
|
||||
}
|
||||
|
||||
function sortBySeeders(streams, config, type) {
|
||||
// streams are already presorted by seeders and upload date
|
||||
const healthy = streams.filter(stream => extractSeeders(stream.title) >= HEALTHY_SEEDERS);
|
||||
const seeded = streams.filter(stream => extractSeeders(stream.title) >= SEEDED_SEEDERS);
|
||||
|
||||
if (type === Type.SERIES && hasMochConfigured(config)) {
|
||||
return streams;
|
||||
} else if (healthy.length >= MIN_HEALTHY_COUNT) {
|
||||
return healthy;
|
||||
} else if (seeded.length >= MAX_UNHEALTHY_COUNT) {
|
||||
return seeded.slice(0, MIN_HEALTHY_COUNT);
|
||||
}
|
||||
return streams.slice(0, MAX_UNHEALTHY_COUNT);
|
||||
}
|
||||
|
||||
function sortBySize(streams, limit) {
|
||||
return streams
|
||||
.sort((a, b) => {
|
||||
const aSize = extractSize(a.title);
|
||||
const bSize = extractSize(b.title);
|
||||
return bSize - aSize;
|
||||
}).slice(0, limit);
|
||||
}
|
||||
|
||||
function sortByVideoQuality(streams, nestedSort, limit) {
|
||||
const qualityMap = streams
|
||||
.reduce((map, stream) => {
|
||||
const quality = extractQuality(stream.name);
|
||||
map[quality] = (map[quality] || []).concat(stream);
|
||||
return map;
|
||||
}, {});
|
||||
const sortedQualities = Object.keys(qualityMap)
|
||||
.sort((a, b) => {
|
||||
const aResolution = a?.match(/\d+p/) && parseInt(a, 10);
|
||||
const bResolution = b?.match(/\d+p/) && parseInt(b, 10);
|
||||
if (aResolution && bResolution) {
|
||||
return bResolution - aResolution; // higher resolution first;
|
||||
} else if (aResolution) {
|
||||
return -1; // remain higher if resolution is there
|
||||
} else if (bResolution) {
|
||||
return 1; // move downward if other stream has resolution
|
||||
}
|
||||
return a < b ? -1 : b < a ? 1 : 0; // otherwise sort by alphabetic order
|
||||
});
|
||||
return sortedQualities
|
||||
.map(quality => nestedSort(qualityMap[quality]).slice(0, limit))
|
||||
.reduce((a, b) => a.concat(b), []);
|
||||
}
|
||||
|
||||
function extractQuality(title) {
|
||||
const qualityDesc = title.split('\n')[1];
|
||||
const resolutionMatch = qualityDesc?.match(/\d+p/);
|
||||
if (resolutionMatch) {
|
||||
return resolutionMatch[0];
|
||||
} else if (/8k/i.test(qualityDesc)) {
|
||||
return '4320p'
|
||||
} else if (/4k|uhd/i.test(qualityDesc)) {
|
||||
return '2060p'
|
||||
} else if (CAM_QUALITIES.test(qualityDesc)) {
|
||||
return CAM_QUALITIES.label;
|
||||
} else if (OTHER_QUALITIES.test(qualityDesc)) {
|
||||
return OTHER_QUALITIES.label;
|
||||
}
|
||||
return qualityDesc;
|
||||
}
|
||||
2579
addon/package-lock.json
generated
2579
addon/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
FROM node:16-alpine
|
||||
|
||||
RUN apk update && apk upgrade && \
|
||||
apk add --no-cache git
|
||||
|
||||
WORKDIR /home/node/app
|
||||
|
||||
COPY ./catalogs .
|
||||
COPY ./addon ../addon
|
||||
RUN npm ci --only-production
|
||||
|
||||
CMD [ "node", "index.js" ]
|
||||
@@ -1,99 +0,0 @@
|
||||
import Bottleneck from 'bottleneck';
|
||||
import moment from 'moment';
|
||||
import { addonBuilder } from 'stremio-addon-sdk';
|
||||
import { Providers } from '../addon/lib/filter.js';
|
||||
import { createManifest, genres } from './lib/manifest.js';
|
||||
import { getMetas } from './lib/metadata.js';
|
||||
import { cacheWrapCatalog, cacheWrapIds } from './lib/cache.js';
|
||||
import * as repository from './lib/repository.js';
|
||||
|
||||
const CACHE_MAX_AGE = parseInt(process.env.CACHE_MAX_AGE) || 4 * 60 * 60; // 4 hours in seconds
|
||||
const STALE_REVALIDATE_AGE = 4 * 60 * 60; // 4 hours
|
||||
const STALE_ERROR_AGE = 7 * 24 * 60 * 60; // 7 days
|
||||
|
||||
const manifest = createManifest();
|
||||
const builder = new addonBuilder(manifest);
|
||||
const limiter = new Bottleneck({
|
||||
maxConcurrent: process.env.LIMIT_MAX_CONCURRENT || 20,
|
||||
highWater: process.env.LIMIT_QUEUE_SIZE || 50,
|
||||
strategy: Bottleneck.strategy.OVERFLOW
|
||||
});
|
||||
const defaultProviders = Providers.options
|
||||
.filter(provider => !provider.foreign)
|
||||
.map(provider => provider.label)
|
||||
.sort();
|
||||
|
||||
builder.defineCatalogHandler((args) => {
|
||||
const offset = parseInt(args.extra.skip || '0', 10);
|
||||
const genre = args.extra.genre || 'default';
|
||||
const catalog = manifest.catalogs.find(c => c.id === args.id);
|
||||
const providers = defaultProviders;
|
||||
console.log(`Incoming catalog ${args.id} request with genre=${genre} and skip=${offset}`)
|
||||
if (!catalog) {
|
||||
return Promise.reject(`No catalog found for with id: ${args.id}`)
|
||||
}
|
||||
|
||||
const cacheKey = createCacheKey(catalog.id, providers, genre, offset);
|
||||
return limiter.schedule(() => cacheWrapCatalog(cacheKey, () => getCatalog(catalog, providers, genre, offset)))
|
||||
.then(metas => ({
|
||||
metas: metas,
|
||||
cacheMaxAge: CACHE_MAX_AGE,
|
||||
staleRevalidate: STALE_REVALIDATE_AGE,
|
||||
staleError: STALE_ERROR_AGE
|
||||
}))
|
||||
.catch(error => Promise.reject(`Failed retrieving catalog ${args.id}: ${error.message}`));
|
||||
})
|
||||
|
||||
async function getCursor(catalog, providers, genre, offset) {
|
||||
if (offset === 0) {
|
||||
return undefined;
|
||||
}
|
||||
const previousOffset = offset - catalog.pageSize;
|
||||
const previousCacheKey = createCacheKey(catalog.id, providers, genre, previousOffset);
|
||||
return cacheWrapCatalog(previousCacheKey, () => Promise.reject("cursor not found"))
|
||||
.then(metas => metas[metas.length - 1])
|
||||
.then(meta => meta.id.replace('kitsu:', ''))
|
||||
}
|
||||
|
||||
async function getCatalog(catalog, providers, genre, offset) {
|
||||
const cursor = await getCursor(catalog, providers, genre, offset)
|
||||
const startDate = getStartDate(genre)?.toISOString();
|
||||
const endDate = getEndDate(genre)?.toISOString();
|
||||
const cacheKey = createCacheKey(catalog.id, providers, genre);
|
||||
|
||||
return cacheWrapIds(cacheKey, () => repository.getIds(providers, catalog.type, startDate, endDate))
|
||||
.then(ids => ids.slice(ids.indexOf(cursor) + 1))
|
||||
.then(ids => getMetas(ids, catalog.type))
|
||||
.then(metas => metas.slice(0, catalog.pageSize));
|
||||
}
|
||||
|
||||
function getStartDate(genre) {
|
||||
switch (genre) {
|
||||
case genres[0]: return moment().utc().subtract(1, 'day').startOf('day');
|
||||
case genres[1]: return moment().utc().startOf('isoWeek');
|
||||
case genres[2]: return moment().utc().subtract(7, 'day').startOf('isoWeek');
|
||||
case genres[3]: return moment().utc().startOf('month');
|
||||
case genres[4]: return moment().utc().subtract(30, 'day').startOf('month');
|
||||
case genres[5]: return undefined;
|
||||
default: return moment().utc().subtract(30, 'day').startOf('day');
|
||||
}
|
||||
}
|
||||
|
||||
function getEndDate(genre) {
|
||||
switch (genre) {
|
||||
case genres[0]: return moment().utc().subtract(1, 'day').endOf('day');
|
||||
case genres[1]: return moment().utc().endOf('isoWeek');
|
||||
case genres[2]: return moment().utc().subtract(7, 'day').endOf('isoWeek');
|
||||
case genres[3]: return moment().utc().endOf('month');
|
||||
case genres[4]: return moment().utc().subtract(30, 'day').endOf('month');
|
||||
case genres[5]: return undefined;
|
||||
default: return moment().utc().subtract(1, 'day').endOf('day');
|
||||
}
|
||||
}
|
||||
|
||||
function createCacheKey(catalogId, providers, genre, offset) {
|
||||
const dateKey = moment().format('YYYY-MM-DD');
|
||||
return [catalogId, providers.join(','), genre, dateKey, offset].filter(x => x !== undefined).join('|');
|
||||
}
|
||||
|
||||
export default builder.getInterface();
|
||||
@@ -1,9 +0,0 @@
|
||||
import express from 'express';
|
||||
import serverless from './serverless.js';
|
||||
|
||||
const app = express();
|
||||
|
||||
app.use((req, res, next) => serverless(req, res, next));
|
||||
app.listen(process.env.PORT || 7000, () => {
|
||||
console.log(`Started addon at: http://localhost:${process.env.PORT || 7000}`);
|
||||
});
|
||||
@@ -1,46 +0,0 @@
|
||||
import cacheManager from 'cache-manager';
|
||||
import mangodbStore from 'cache-manager-mongodb';
|
||||
|
||||
const CATALOG_TTL = process.env.STREAM_TTL || 24 * 60 * 60; // 24 hours
|
||||
|
||||
const MONGO_URI = process.env.MONGODB_URI;
|
||||
|
||||
const remoteCache = initiateRemoteCache();
|
||||
|
||||
function initiateRemoteCache() {
|
||||
if (MONGO_URI) {
|
||||
return cacheManager.caching({
|
||||
store: mangodbStore,
|
||||
uri: MONGO_URI,
|
||||
options: {
|
||||
collection: 'torrentio_catalog_collection',
|
||||
socketTimeoutMS: 120000,
|
||||
useNewUrlParser: true,
|
||||
useUnifiedTopology: false,
|
||||
ttl: CATALOG_TTL
|
||||
},
|
||||
ttl: CATALOG_TTL,
|
||||
ignoreCacheErrors: true
|
||||
});
|
||||
} else {
|
||||
return cacheManager.caching({
|
||||
store: 'memory',
|
||||
ttl: CATALOG_TTL
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function cacheWrap(cache, key, method, options) {
|
||||
if (!cache) {
|
||||
return method();
|
||||
}
|
||||
return cache.wrap(key, method, options);
|
||||
}
|
||||
|
||||
export function cacheWrapCatalog(key, method) {
|
||||
return cacheWrap(remoteCache, key, method, { ttl: CATALOG_TTL });
|
||||
}
|
||||
|
||||
export function cacheWrapIds(key, method) {
|
||||
return cacheWrap(remoteCache, `ids|${key}`, method, { ttl: CATALOG_TTL });
|
||||
}
|
||||
@@ -1,274 +0,0 @@
|
||||
const STYLESHEET = `
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body,
|
||||
html {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
width: 100%;
|
||||
height: 100%
|
||||
}
|
||||
|
||||
html {
|
||||
background-size: auto 100%;
|
||||
background-size: cover;
|
||||
background-position: center center;
|
||||
background-repeat: repeat-y;
|
||||
}
|
||||
|
||||
body {
|
||||
display: flex;
|
||||
background-color: transparent;
|
||||
font-family: 'Open Sans', Arial, sans-serif;
|
||||
color: white;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 4.5vh;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 2.2vh;
|
||||
font-weight: normal;
|
||||
font-style: italic;
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 2.2vh;
|
||||
}
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
p,
|
||||
label {
|
||||
margin: 0;
|
||||
text-shadow: 0 0 1vh rgba(0, 0, 0, 0.15);
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 1.75vh;
|
||||
}
|
||||
|
||||
ul {
|
||||
font-size: 1.75vh;
|
||||
margin: 0;
|
||||
margin-top: 1vh;
|
||||
padding-left: 3vh;
|
||||
}
|
||||
|
||||
a {
|
||||
color: green
|
||||
}
|
||||
|
||||
a.install-link {
|
||||
text-decoration: none
|
||||
}
|
||||
|
||||
button {
|
||||
border: 0;
|
||||
outline: 0;
|
||||
color: white;
|
||||
background: #8A5AAB;
|
||||
padding: 1.2vh 3.5vh;
|
||||
margin: auto;
|
||||
text-align: center;
|
||||
font-family: 'Open Sans', Arial, sans-serif;
|
||||
font-size: 2.2vh;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
display: block;
|
||||
box-shadow: 0 0.5vh 1vh rgba(0, 0, 0, 0.2);
|
||||
transition: box-shadow 0.1s ease-in-out;
|
||||
}
|
||||
|
||||
button:hover {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
button:active {
|
||||
box-shadow: 0 0 0 0.5vh white inset;
|
||||
}
|
||||
|
||||
#addon {
|
||||
width: 90vh;
|
||||
margin: auto;
|
||||
padding-left: 10%;
|
||||
padding-right: 10%;
|
||||
background: rgba(0, 0, 0, 0.60);
|
||||
}
|
||||
|
||||
.logo {
|
||||
height: 14vh;
|
||||
width: 14vh;
|
||||
margin: auto;
|
||||
margin-bottom: 3vh;
|
||||
}
|
||||
|
||||
.logo img {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.name, .version {
|
||||
display: inline-block;
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.name {
|
||||
line-height: 5vh;
|
||||
}
|
||||
|
||||
.version {
|
||||
position: absolute;
|
||||
line-height: 5vh;
|
||||
margin-left: 1vh;
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
.contact {
|
||||
position: absolute;
|
||||
left: 0;
|
||||
bottom: 4vh;
|
||||
width: 100%;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.contact a {
|
||||
font-size: 1.4vh;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.separator {
|
||||
margin-bottom: 4vh;
|
||||
}
|
||||
|
||||
.label {
|
||||
font-size: 2.2vh;
|
||||
font-weight: 600;
|
||||
padding: 0;
|
||||
line-height: inherit;
|
||||
}
|
||||
|
||||
.btn-group, .multiselect-container {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.btn {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.multiselect-container {
|
||||
border: 0;
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
.input, .btn {
|
||||
height: 3.8vh;
|
||||
width: 100%;
|
||||
margin: auto;
|
||||
margin-bottom: 10px;
|
||||
padding: 6px 12px;
|
||||
border: 0;
|
||||
border-radius: 0;
|
||||
outline: 0;
|
||||
color: #333;
|
||||
background-color: rgb(255, 255, 255);
|
||||
box-shadow: 0 0.5vh 1vh rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
`;
|
||||
import { Providers } from '../../addon/lib/filter.js';
|
||||
|
||||
export default function landingTemplate(manifest, config = {}) {
|
||||
const providers = config.providers || [];
|
||||
|
||||
const background = manifest.background || 'https://dl.strem.io/addon-background.jpg';
|
||||
const logo = manifest.logo || 'https://dl.strem.io/addon-logo.png';
|
||||
const contactHTML = manifest.contactEmail ?
|
||||
`<div class="contact">
|
||||
<p>Contact ${manifest.name} creator:</p>
|
||||
<a href="mailto:${manifest.contactEmail}">${manifest.contactEmail}</a>
|
||||
</div>` : '<div class="separator"></div>';
|
||||
const providersHTML = Providers.options
|
||||
.map(provider => `<option value="${provider.key}">${provider.foreign || ''}${provider.label}</option>`)
|
||||
.join('\n');
|
||||
const stylizedTypes = manifest.types
|
||||
.map(t => t[0].toUpperCase() + t.slice(1) + (t !== 'series' ? 's' : ''));
|
||||
|
||||
return `
|
||||
<!DOCTYPE html>
|
||||
<html style="background-image: url(${background});">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>${manifest.name} - Stremio Addon</title>
|
||||
<link rel="shortcut icon" href="${logo}" type="image/x-icon">
|
||||
<link href="https://fonts.googleapis.com/css?family=Open+Sans:400,600,700&display=swap" rel="stylesheet">
|
||||
<script src="https://code.jquery.com/jquery-3.5.1.slim.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.1/dist/umd/popper.min.js"></script>
|
||||
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"></script>
|
||||
<link href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css" rel="stylesheet" >
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap-multiselect/0.9.15/js/bootstrap-multiselect.min.js"></script>
|
||||
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap-multiselect/0.9.15/css/bootstrap-multiselect.css" rel="stylesheet"/>
|
||||
<style>${STYLESHEET}</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="addon">
|
||||
<div class="logo">
|
||||
<img src="${logo}">
|
||||
</div>
|
||||
<h1 class="name">${manifest.name}</h1>
|
||||
<h2 class="version">${manifest.version || '0.0.0'}</h2>
|
||||
<h2 class="description">${manifest.description || ''}</h2>
|
||||
|
||||
<div class="separator"></div>
|
||||
|
||||
<h3 class="gives">This addon has more :</h3>
|
||||
<ul>
|
||||
${stylizedTypes.map(t => `<li>${t}</li>`).join('')}
|
||||
</ul>
|
||||
|
||||
<div class="separator"></div>
|
||||
|
||||
<label class="label" for="iProviders">Providers:</label>
|
||||
<select id="iProviders" class="input" name="providers[]" multiple="multiple">
|
||||
${providersHTML}
|
||||
</select>
|
||||
|
||||
<div class="separator"></div>
|
||||
|
||||
<a id="installLink" class="install-link" href="#">
|
||||
<button name="Install">INSTALL</button>
|
||||
</a>
|
||||
${contactHTML}
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() {
|
||||
$('#iProviders').multiselect({
|
||||
nonSelectedText: 'All providers',
|
||||
onChange: () => generateInstallLink()
|
||||
});
|
||||
$('#iProviders').multiselect('select', [${providers.map(provider => '"' + provider + '"')}]);
|
||||
generateInstallLink();
|
||||
});
|
||||
|
||||
function generateInstallLink() {
|
||||
const providersValue = $('#iProviders').val().join(',') || '';
|
||||
const providers = providersValue.length && providersValue;
|
||||
const configurationValue = [
|
||||
['${Providers.key}', providers],
|
||||
]
|
||||
.filter(([_, value]) => value.length)
|
||||
.map(([key, value]) => key + '=' + value).join('|');
|
||||
const configuration = configurationValue && configurationValue.length ? '/' + configurationValue : '';
|
||||
installLink.href = 'stremio://' + window.location.host + configuration + '/manifest.json';
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
|
||||
</html>`
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
import { Type } from '../../addon/lib/types.js';
|
||||
|
||||
export const genres = [
|
||||
'Yesterday',
|
||||
'This Week',
|
||||
'Last Week',
|
||||
'This Month',
|
||||
'Last Month',
|
||||
'All Time'
|
||||
]
|
||||
|
||||
export function createManifest() {
|
||||
return {
|
||||
id: 'com.stremio.torrentio.catalog.addon',
|
||||
version: '1.0.2',
|
||||
name: 'Torrent Catalogs',
|
||||
description: 'Provides catalogs for movies/series/anime based on top seeded torrents. Requires Kitsu addon for anime.',
|
||||
logo: `https://i.ibb.co/w4BnkC9/GwxAcDV.png`,
|
||||
background: `https://i.ibb.co/VtSfFP9/t8wVwcg.jpg`,
|
||||
types: [Type.MOVIE, Type.SERIES, Type.ANIME],
|
||||
resources: ['catalog'],
|
||||
catalogs: [
|
||||
{
|
||||
id: 'top-movies',
|
||||
type: Type.MOVIE,
|
||||
name: "Top seeded",
|
||||
pageSize: 20,
|
||||
extra: [{ name: 'genre', options: genres }, { name: 'skip' }],
|
||||
genres: genres
|
||||
},
|
||||
{
|
||||
id: 'top-series',
|
||||
type: Type.SERIES,
|
||||
name: "Top seeded",
|
||||
pageSize: 20,
|
||||
extra: [{ name: 'genre', options: genres }, { name: 'skip' }],
|
||||
genres: genres
|
||||
},
|
||||
{
|
||||
id: 'top-anime',
|
||||
type: Type.ANIME,
|
||||
name: "Top seeded",
|
||||
pageSize: 20,
|
||||
extra: [{ name: 'genre', options: genres }, { name: 'skip' }],
|
||||
genres: genres
|
||||
}
|
||||
],
|
||||
behaviorHints: {
|
||||
// @TODO might enable configuration to configure providers
|
||||
configurable: false,
|
||||
configurationRequired: false
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
import axios from 'axios';
|
||||
import { Type } from '../../addon/lib/types.js';
|
||||
|
||||
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
|
||||
const KITSU_URL = 'https://anime-kitsu.strem.fun';
|
||||
const TIMEOUT = 30000;
|
||||
const MAX_SIZE = 40;
|
||||
|
||||
export async function getMetas(ids, type) {
|
||||
if (!ids.length || !type) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return _requestMetadata(ids, type)
|
||||
.catch((error) => {
|
||||
throw new Error(`failed metadata ${type} query due: ${error.message}`);
|
||||
});
|
||||
}
|
||||
|
||||
function _requestMetadata(ids, type) {
|
||||
const url = _getUrl(ids, type);
|
||||
return axios.get(url, { timeout: TIMEOUT })
|
||||
.then(response => response?.data?.metas || response?.data?.metasDetailed || [])
|
||||
.then(metas => metas.filter(meta => meta))
|
||||
.then(metas => metas.map(meta => _sanitizeMeta(meta)));
|
||||
}
|
||||
|
||||
function _getUrl(ids, type) {
|
||||
const joinedIds = ids.slice(0, MAX_SIZE).join(',');
|
||||
if (type === Type.ANIME) {
|
||||
return `${KITSU_URL}/catalog/${type}/kitsu-anime-list/lastVideosIds=${joinedIds}.json`
|
||||
}
|
||||
return `${CINEMETA_URL}/catalog/${type}/last-videos/lastVideosIds=${joinedIds}.json`
|
||||
}
|
||||
|
||||
function _sanitizeMeta(meta) {
|
||||
delete meta.videos;
|
||||
delete meta.credits_cast;
|
||||
delete meta.credits_crew;
|
||||
return meta;
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
import { Sequelize, QueryTypes } from 'sequelize';
|
||||
import { Type } from '../../addon/lib/types.js';
|
||||
|
||||
const DATABASE_URI = process.env.DATABASE_URI;
|
||||
|
||||
const database = new Sequelize(DATABASE_URI, { logging: false });
|
||||
|
||||
export async function getIds(providers, type, startDate, endDate) {
|
||||
const idName = type === Type.ANIME ? 'kitsuId' : 'imdbId';
|
||||
const episodeCondition = type === Type.SERIES
|
||||
? 'AND files."imdbSeason" IS NOT NULL AND files."imdbEpisode" IS NOT NULL'
|
||||
: '';
|
||||
const dateCondition = startDate && endDate
|
||||
? `AND "uploadDate" BETWEEN '${startDate}' AND '${endDate}'`
|
||||
: '';
|
||||
const providersCondition = providers && providers.length
|
||||
? `AND provider in (${providers.map(it => `'${it}'`).join(',')})`
|
||||
: '';
|
||||
const titleCondition = type === Type.MOVIE
|
||||
? 'AND torrents.title NOT LIKE \'%[Erotic]%\''
|
||||
: '';
|
||||
const sortCondition = type === Type.MOVIE ? 'sum(torrents.seeders)' : 'max(torrents.seeders)';
|
||||
const query = `SELECT files."${idName}"
|
||||
FROM (SELECT torrents."infoHash", torrents.seeders FROM torrents
|
||||
WHERE seeders > 0 AND type = '${type}' ${providersCondition} ${dateCondition} ${titleCondition}
|
||||
) as torrents
|
||||
JOIN files ON torrents."infoHash" = files."infoHash"
|
||||
WHERE files."${idName}" IS NOT NULL ${episodeCondition}
|
||||
GROUP BY files."${idName}"
|
||||
ORDER BY ${sortCondition} DESC
|
||||
LIMIT 5000`
|
||||
const results = await database.query(query, { type: QueryTypes.SELECT });
|
||||
return results.map(result => `${result.imdbId || result.kitsuId}`);
|
||||
}
|
||||
3039
catalogs/package-lock.json
generated
3039
catalogs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,26 +0,0 @@
|
||||
{
|
||||
"name": "stremio-torrentio-catalogs",
|
||||
"version": "1.0.3",
|
||||
"exports": "./index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "node index.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": "16.x"
|
||||
},
|
||||
"author": "TheBeastLT <pauliox@beyond.lt>",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.1",
|
||||
"bottleneck": "^2.19.5",
|
||||
"cache-manager": "^3.4.4",
|
||||
"cache-manager-mongodb": "^0.3.0",
|
||||
"moment": "^2.29.4",
|
||||
"pg": "^8.8.0",
|
||||
"pg-hstore": "^2.3.4",
|
||||
"request-ip": "^3.3.0",
|
||||
"sequelize": "^6.29.0",
|
||||
"stremio-addon-sdk": "^1.6.10"
|
||||
}
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
import getRouter from 'stremio-addon-sdk/src/getRouter.js';
|
||||
import addonInterface from './addon.js';
|
||||
import qs from 'querystring';
|
||||
import { parseConfiguration } from '../addon/lib/configuration.js';
|
||||
import { createManifest } from './lib/manifest.js';
|
||||
|
||||
const router = getRouter(addonInterface);
|
||||
|
||||
// router.get('/', (_, res) => {
|
||||
// res.redirect('/configure')
|
||||
// res.end();
|
||||
// });
|
||||
//
|
||||
// router.get('/:configuration?/configure', (req, res) => {
|
||||
// const configValues = parseConfiguration(req.params.configuration || '');
|
||||
// const landingHTML = landingTemplate(createManifest(configValues), configValues);
|
||||
// res.setHeader('content-type', 'text/html');
|
||||
// res.end(landingHTML);
|
||||
// });
|
||||
|
||||
router.get('/:configuration?/manifest.json', (req, res) => {
|
||||
const configValues = parseConfiguration(req.params.configuration || '');
|
||||
const manifestBuf = JSON.stringify(createManifest(configValues));
|
||||
res.setHeader('Content-Type', 'application/json; charset=utf-8');
|
||||
res.end(manifestBuf)
|
||||
});
|
||||
|
||||
router.get('/:configuration/:resource/:type/:id/:extra?.json', (req, res, next) => {
|
||||
const { configuration, resource, type, id } = req.params;
|
||||
const extra = req.params.extra ? qs.parse(req.url.split('/').pop().slice(0, -5)) : {}
|
||||
const configValues = { ...extra, ...parseConfiguration(configuration) };
|
||||
addonInterface.get(resource, type, id, configValues)
|
||||
.then(resp => {
|
||||
const cacheHeaders = {
|
||||
cacheMaxAge: 'max-age',
|
||||
staleRevalidate: 'stale-while-revalidate',
|
||||
staleError: 'stale-if-error'
|
||||
};
|
||||
const cacheControl = Object.keys(cacheHeaders)
|
||||
.map(prop => Number.isInteger(resp[prop]) && cacheHeaders[prop] + '=' + resp[prop])
|
||||
.filter(val => !!val).join(', ');
|
||||
|
||||
res.setHeader('Cache-Control', `${cacheControl}, public`);
|
||||
res.setHeader('Content-Type', 'application/json; charset=utf-8');
|
||||
res.end(JSON.stringify(resp));
|
||||
})
|
||||
.catch(err => {
|
||||
if (err.noHandler) {
|
||||
if (next) {
|
||||
next()
|
||||
} else {
|
||||
res.writeHead(404);
|
||||
res.end(JSON.stringify({ err: 'not found' }));
|
||||
}
|
||||
} else {
|
||||
console.error(err);
|
||||
res.writeHead(500);
|
||||
res.end(JSON.stringify({ err: 'handler error' }));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
export default function (req, res) {
|
||||
router(req, res, function () {
|
||||
res.statusCode = 404;
|
||||
res.end();
|
||||
});
|
||||
};
|
||||
103
docker-compose.yaml
Normal file
103
docker-compose.yaml
Normal file
@@ -0,0 +1,103 @@
|
||||
version: '3.8'
|
||||
name: torrentio-selfhostio
|
||||
|
||||
x-restart: &restart-policy
|
||||
"unless-stopped"
|
||||
|
||||
x-basehealth: &base-health
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
x-rabbithealth: &rabbitmq-health
|
||||
test: rabbitmq-diagnostics -q ping
|
||||
<<: *base-health
|
||||
|
||||
x-mongohealth: &mongodb-health
|
||||
test: ["CMD","mongosh", "--eval", "db.adminCommand('ping')"]
|
||||
<<: *base-health
|
||||
|
||||
x-postgreshealth: &postgresdb-health
|
||||
test: pg_isready
|
||||
<<: *base-health
|
||||
|
||||
x-apps: &selfhostio-app
|
||||
depends_on:
|
||||
mongodb:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
rabbitmq:
|
||||
condition: service_healthy
|
||||
restart: *restart-policy
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:latest
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: selfhostio
|
||||
PGUSER: postgres # needed for healthcheck.
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- postgres:/var/lib/postgresql/data
|
||||
healthcheck: *postgresdb-health
|
||||
restart: *restart-policy
|
||||
|
||||
mongodb:
|
||||
image: mongo:latest
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: mongo
|
||||
MONGO_INITDB_ROOT_PASSWORD: mongo
|
||||
ports:
|
||||
- "27017:27017"
|
||||
volumes:
|
||||
- mongo:/data/db
|
||||
restart: *restart-policy
|
||||
healthcheck: *mongodb-health
|
||||
|
||||
rabbitmq:
|
||||
image: rabbitmq:3-management
|
||||
ports:
|
||||
- "5672:5672"
|
||||
- "15672:15672"
|
||||
volumes:
|
||||
- rabbitmq:/var/lib/rabbitmq
|
||||
restart: *restart-policy
|
||||
healthcheck: *rabbitmq-health
|
||||
|
||||
producer:
|
||||
build:
|
||||
context: src/producer
|
||||
dockerfile: Dockerfile
|
||||
env_file:
|
||||
- env/producer.env
|
||||
<<: *selfhostio-app
|
||||
|
||||
consumer:
|
||||
build:
|
||||
context: src/node/consumer
|
||||
dockerfile: Dockerfile
|
||||
env_file:
|
||||
- env/consumer.env
|
||||
deploy:
|
||||
replicas: 3
|
||||
<<: *selfhostio-app
|
||||
|
||||
addon:
|
||||
build:
|
||||
context: src/node/addon
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "7000:7000"
|
||||
env_file:
|
||||
- env/addon.env
|
||||
<<: *selfhostio-app
|
||||
|
||||
volumes:
|
||||
postgres:
|
||||
mongo:
|
||||
rabbitmq:
|
||||
@@ -1,61 +0,0 @@
|
||||
---
|
||||
|
||||
version: '3.9'
|
||||
name: torrentio-self-host
|
||||
|
||||
services:
|
||||
|
||||
mongodb:
|
||||
restart: unless-stopped
|
||||
image: docker.io/bitnami/mongodb:7.0
|
||||
ports:
|
||||
- "27017:27017"
|
||||
volumes:
|
||||
- mongo-data:/bitnami/mongodb
|
||||
|
||||
scraper:
|
||||
build: ./scraper
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- mongodb
|
||||
- postgres
|
||||
environment:
|
||||
- PORT=7001
|
||||
- MONGODB_URI=mongodb://mongodb:27017/torrentio
|
||||
- DATABASE_URI=postgres://postgres@postgres:5432/torrentio
|
||||
- ENABLE_SYNC=true
|
||||
|
||||
torrentio:
|
||||
build: ./addon
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- mongodb
|
||||
- postgres
|
||||
ports:
|
||||
- "7000:7000"
|
||||
environment:
|
||||
- MONGODB_URI=mongodb://mongodb:27017/torrentio
|
||||
- DATABASE_URI=postgres://postgres@postgres:5432/torrentio
|
||||
- ENABLE_SYNC=true
|
||||
|
||||
postgres:
|
||||
image: postgres:14-alpine
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 5432:5432
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- POSTGRES_HOST_AUTH_METHOD=trust
|
||||
- POSTGRES_USER=postgres
|
||||
- POSTGRES_DB=torrentio
|
||||
|
||||
flaresolverr:
|
||||
image: ghcr.io/flaresolverr/flaresolverr:latest
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 8191:8191
|
||||
|
||||
volumes:
|
||||
mongo-data:
|
||||
postgres-data:
|
||||
4
env/addon.env
vendored
Normal file
4
env/addon.env
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
TZ=London/Europe
|
||||
DATABASE_URI=postgres://postgres:postgres@postgres/selfhostio
|
||||
MONGODB_URI=mongodb://mongo:mongo@mongodb/selfhostio?tls=false&authSource=admin
|
||||
DEBUG_MODE=false
|
||||
11
env/consumer.env
vendored
Normal file
11
env/consumer.env
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
TZ=London/Europe
|
||||
MONGODB_URI=mongodb://mongo:mongo@mongodb/selfhostio?tls=false&authSource=admin
|
||||
DATABASE_URI=postgres://postgres:postgres@postgres/selfhostio
|
||||
RABBIT_URI=amqp://guest:guest@rabbitmq:5672/?heartbeat=30
|
||||
QUEUE_NAME=ingested
|
||||
JOB_CONCURRENCY=5
|
||||
JOBS_ENABLED=true
|
||||
ENABLE_SYNC=true
|
||||
MAX_SINGLE_TORRENT_CONNECTIONS=10
|
||||
TORRENT_TIMEOUT=30000
|
||||
UDP_TRACKERS_ENABLED=true
|
||||
10
env/producer.env
vendored
Normal file
10
env/producer.env
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
ScrapeConfiguration__StorageConnectionString=host=postgres;username=postgres;password=postgres;database=selfhostio;
|
||||
RabbitMqConfiguration__Host=rabbitmq
|
||||
RabbitMqConfiguration__QueueName=ingested
|
||||
RabbitMqConfiguration__Username=guest
|
||||
RabbitMqConfiguration__Password=guest
|
||||
RabbitMqConfiguration__Durable=true
|
||||
RabbitMqConfiguration__MaxQueueSize=0
|
||||
RabbitMqConfiguration__MaxPublishBatchSize=500
|
||||
RabbitMqConfiguration__PublishIntervalInSeconds=10
|
||||
GithubSettings__PAT=
|
||||
@@ -1,12 +0,0 @@
|
||||
FROM node:16-alpine
|
||||
|
||||
RUN apk update && apk upgrade && \
|
||||
apk add --no-cache git
|
||||
|
||||
WORKDIR /home/node/app
|
||||
|
||||
COPY package*.json ./
|
||||
RUN npm ci --only-production
|
||||
COPY . .
|
||||
|
||||
CMD [ "node", "--insecure-http-parser", "index.js" ]
|
||||
@@ -1,37 +0,0 @@
|
||||
# Torrentio Scraper
|
||||
|
||||
## Initial dumps
|
||||
|
||||
### The Pirate Bay
|
||||
|
||||
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
|
||||
|
||||
https://thepiratebay.org/static/dump/csv/
|
||||
|
||||
### Kickass
|
||||
|
||||
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
|
||||
|
||||
https://web.archive.org/web/20150416071329/http://kickass.to/api
|
||||
|
||||
### RARBG
|
||||
|
||||
Scrape movie and tv catalog using [www.webscraper.io](https://www.webscraper.io/) for available `imdbIds` and use those via the api to search for torrents.
|
||||
|
||||
Movies sitemap
|
||||
```json
|
||||
{"_id":"rarbg-movies","startUrl":["https://rarbgmirror.org/catalog/movies/[1-4235]"],"selectors":[{"id":"rarbg-movie-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]}
|
||||
```
|
||||
|
||||
TV sitemap
|
||||
```json
|
||||
{"_id":"rarbg-tv","startUrl":["https://rarbgmirror.org/catalog/tv/[1-609]"],"selectors":[{"id":"rarbg-tv-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]}
|
||||
```
|
||||
|
||||
### Migrating Database
|
||||
|
||||
When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1.
|
||||
|
||||
```sql
|
||||
ALTER SEQUENCE files_id_seq RESTART WITH <last_file_id + 1>;
|
||||
```
|
||||
@@ -1,14 +0,0 @@
|
||||
const express = require("express");
|
||||
const server = express();
|
||||
const { connect } = require('./lib/repository');
|
||||
const { startScraper } = require('./scheduler/scheduler')
|
||||
|
||||
server.get('/', function (req, res) {
|
||||
res.sendStatus(200);
|
||||
});
|
||||
|
||||
server.listen(process.env.PORT || 7000, async () => {
|
||||
await connect();
|
||||
console.log('Scraper started');
|
||||
startScraper();
|
||||
});
|
||||
@@ -1,72 +0,0 @@
|
||||
const cacheManager = require('cache-manager');
|
||||
const mangodbStore = require('cache-manager-mongodb');
|
||||
|
||||
const GLOBAL_KEY_PREFIX = 'stremio-torrentio';
|
||||
const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
|
||||
const KITSU_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|kitsu_id`;
|
||||
const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
|
||||
const TRACKERS_KEY_PREFIX = `${GLOBAL_KEY_PREFIX}|trackers`;
|
||||
|
||||
const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
|
||||
const MEMORY_TTL = process.env.METADATA_TTL || 2 * 60 * 60; // 2 hours
|
||||
const TRACKERS_TTL = 2 * 24 * 60 * 60; // 2 days
|
||||
|
||||
const MONGO_URI = process.env.MONGODB_URI;
|
||||
|
||||
const memoryCache = initiateMemoryCache();
|
||||
const remoteCache = initiateRemoteCache();
|
||||
|
||||
function initiateRemoteCache() {
|
||||
if (MONGO_URI) {
|
||||
return cacheManager.caching({
|
||||
store: mangodbStore,
|
||||
uri: MONGO_URI,
|
||||
options: {
|
||||
collection: 'torrentio_scraper_collection',
|
||||
useUnifiedTopology: true
|
||||
},
|
||||
ttl: GLOBAL_TTL,
|
||||
ignoreCacheErrors: true
|
||||
});
|
||||
} else {
|
||||
return cacheManager.caching({
|
||||
store: 'memory',
|
||||
ttl: GLOBAL_TTL
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function initiateMemoryCache() {
|
||||
return cacheManager.caching({
|
||||
store: 'memory',
|
||||
ttl: MEMORY_TTL
|
||||
});
|
||||
}
|
||||
|
||||
function cacheWrap(cache, key, method, options) {
|
||||
return cache.wrap(key, method, options);
|
||||
}
|
||||
|
||||
function cacheWrapImdbId(key, method) {
|
||||
return cacheWrap(remoteCache, `${IMDB_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
|
||||
}
|
||||
|
||||
function cacheWrapKitsuId(key, method) {
|
||||
return cacheWrap(remoteCache, `${KITSU_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
|
||||
}
|
||||
|
||||
function cacheWrapMetadata(id, method) {
|
||||
return cacheWrap(memoryCache, `${METADATA_PREFIX}:${id}`, method, { ttl: MEMORY_TTL });
|
||||
}
|
||||
|
||||
function cacheTrackers(method) {
|
||||
return cacheWrap(memoryCache, `${TRACKERS_KEY_PREFIX}`, method, { ttl: TRACKERS_TTL });
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
cacheWrapImdbId,
|
||||
cacheWrapKitsuId,
|
||||
cacheWrapMetadata,
|
||||
cacheTrackers
|
||||
};
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
const UserAgent = require('user-agents');
|
||||
const userAgent = new UserAgent();
|
||||
|
||||
function getRandomUserAgent() {
|
||||
return userAgent.random().toString();
|
||||
}
|
||||
|
||||
function defaultOptionsWithProxy() {
|
||||
if (process.env.PROXY_HOST && process.env.PROXY_TYPE) {
|
||||
return {
|
||||
proxy: {
|
||||
host: process.env.PROXY_HOST.match(/\/\/(.*):/)[1],
|
||||
port: process.env.PROXY_HOST.match(/:(\d+)/)[1]
|
||||
},
|
||||
headers: {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'proxy-type': process.env.PROXY_TYPE
|
||||
}
|
||||
}
|
||||
}
|
||||
return { headers: { 'user-agent': getRandomUserAgent() } };
|
||||
}
|
||||
|
||||
module.exports = { getRandomUserAgent, defaultOptionsWithProxy };
|
||||
@@ -1,208 +0,0 @@
|
||||
const torrentStream = require('torrent-stream');
|
||||
const axios = require('axios');
|
||||
const parseTorrent = require('parse-torrent');
|
||||
const BTClient = require('bittorrent-tracker')
|
||||
const async = require('async');
|
||||
const decode = require('magnet-uri');
|
||||
const { Type } = require('./types');
|
||||
const { delay } = require('./promises')
|
||||
const { isVideo, isSubtitle } = require('./extension');
|
||||
const { cacheTrackers } = require('./cache');
|
||||
|
||||
const TRACKERS_URL = 'https://ngosang.github.io/trackerslist/trackers_all.txt';
|
||||
const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
|
||||
const SEEDS_CHECK_TIMEOUT = 15 * 1000; // 15 secs
|
||||
const ADDITIONAL_TRACKERS = [
|
||||
'http://tracker.trackerfix.com:80/announce',
|
||||
'udp://9.rarbg.me:2780',
|
||||
'udp://9.rarbg.to:2870'
|
||||
];
|
||||
const ANIME_TRACKERS = [
|
||||
"http://nyaa.tracker.wf:7777/announce",
|
||||
"udp://anidex.moe:6969/announce",
|
||||
"udp://tracker-udp.anirena.com:80/announce",
|
||||
"udp://tracker.uw0.xyz:6969/announce"
|
||||
];
|
||||
|
||||
async function updateCurrentSeeders(torrentsInput) {
|
||||
return new Promise(async (resolve) => {
|
||||
const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
|
||||
const perTorrentResults = Object.fromEntries(new Map(torrents.map(torrent => [torrent.infoHash, {}])));
|
||||
const perTrackerInfoHashes = await Promise.all(torrents.map(torrent => getTorrentTrackers(torrent)
|
||||
.then(torrentTrackers => ({ infoHash: torrent.infoHash, trackers: torrentTrackers }))))
|
||||
.then(allTorrentTrackers => allTorrentTrackers
|
||||
.reduce((allTrackersMap, torrentTrackers) => {
|
||||
torrentTrackers.trackers.forEach(tracker =>
|
||||
allTrackersMap[tracker] = (allTrackersMap[tracker] || []).concat(torrentTrackers.infoHash));
|
||||
return allTrackersMap;
|
||||
}, {}));
|
||||
let successCounter = 0;
|
||||
const callback = () => {
|
||||
console.log(`Total successful tracker responses: ${successCounter}`)
|
||||
resolve(perTorrentResults);
|
||||
}
|
||||
setTimeout(callback, SEEDS_CHECK_TIMEOUT);
|
||||
|
||||
async.each(Object.keys(perTrackerInfoHashes), function (tracker, ready) {
|
||||
BTClient.scrape({ infoHash: perTrackerInfoHashes[tracker], announce: tracker }, (error, response) => {
|
||||
if (response) {
|
||||
const results = Array.isArray(torrentsInput) ? Object.entries(response) : [[response.infoHash, response]];
|
||||
results
|
||||
.filter(([infoHash]) => perTorrentResults[infoHash])
|
||||
.forEach(([infoHash, seeders]) =>
|
||||
perTorrentResults[infoHash][tracker] = [seeders.complete, seeders.incomplete])
|
||||
successCounter++;
|
||||
} else if (error) {
|
||||
perTrackerInfoHashes[tracker]
|
||||
.filter(infoHash => perTorrentResults[infoHash])
|
||||
.forEach(infoHash => perTorrentResults[infoHash][tracker] = [0, 0, error.message])
|
||||
}
|
||||
ready();
|
||||
})
|
||||
}, callback);
|
||||
}).then(perTorrentResults => {
|
||||
const torrents = Array.isArray(torrentsInput) ? torrentsInput : [torrentsInput];
|
||||
torrents.forEach(torrent => {
|
||||
const results = perTorrentResults[torrent.infoHash];
|
||||
const newSeeders = Math.max(...Object.values(results).map(values => values[0]).concat(0));
|
||||
if (torrent.seeders !== newSeeders) {
|
||||
console.log(`Updating seeders for [${torrent.infoHash}] ${torrent.title} - ${torrent.seeders} -> ${newSeeders}`)
|
||||
torrent.seeders = newSeeders;
|
||||
}
|
||||
})
|
||||
return torrentsInput;
|
||||
});
|
||||
}
|
||||
|
||||
async function updateTorrentSize(torrent) {
|
||||
return filesAndSizeFromTorrentStream(torrent, SEEDS_CHECK_TIMEOUT)
|
||||
.then(result => {
|
||||
torrent.size = result.size;
|
||||
torrent.files = result.files;
|
||||
return torrent;
|
||||
});
|
||||
}
|
||||
|
||||
async function sizeAndFiles(torrent) {
|
||||
return filesAndSizeFromTorrentStream(torrent, 30000);
|
||||
}
|
||||
|
||||
async function torrentFiles(torrent, timeout) {
|
||||
return getFilesFromObject(torrent)
|
||||
.catch(() => filesFromTorrentFile(torrent))
|
||||
.catch(() => filesFromTorrentStream(torrent, timeout))
|
||||
.then(files => ({
|
||||
contents: files,
|
||||
videos: filterVideos(files),
|
||||
subtitles: filterSubtitles(files)
|
||||
}));
|
||||
}
|
||||
|
||||
function getFilesFromObject(torrent) {
|
||||
if (Array.isArray(torrent.files)) {
|
||||
return Promise.resolve(torrent.files);
|
||||
}
|
||||
if (typeof torrent.files === 'function') {
|
||||
return torrent.files();
|
||||
}
|
||||
return Promise.reject("No files in the object");
|
||||
}
|
||||
|
||||
async function filesFromTorrentFile(torrent) {
|
||||
if (!torrent.torrentLink) {
|
||||
return Promise.reject(new Error("no torrentLink"));
|
||||
}
|
||||
|
||||
return axios.get(torrent.torrentLink, { timeout: 10000, responseType: 'arraybuffer' })
|
||||
.then((response) => {
|
||||
if (!response.data || response.status !== 200) {
|
||||
throw new Error('torrent not found')
|
||||
}
|
||||
return response.data
|
||||
})
|
||||
.then((body) => parseTorrent(body))
|
||||
.then((info) => info.files.map((file, fileId) => ({
|
||||
fileIndex: fileId,
|
||||
name: file.name,
|
||||
path: file.path.replace(/^[^\/]+\//, ''),
|
||||
size: file.length
|
||||
})));
|
||||
}
|
||||
|
||||
async function filesFromTorrentStream(torrent, timeout) {
|
||||
return filesAndSizeFromTorrentStream(torrent, timeout).then(result => result.files);
|
||||
}
|
||||
|
||||
function filesAndSizeFromTorrentStream(torrent, timeout = 30000) {
|
||||
if (!torrent.infoHash && !torrent.magnetLink) {
|
||||
return Promise.reject(new Error("no infoHash or magnetLink"));
|
||||
}
|
||||
const magnet = torrent.magnetLink || decode.encode({ infoHash: torrent.infoHash, announce: torrent.trackers });
|
||||
return new Promise((resolve, rejected) => {
|
||||
const engine = new torrentStream(magnet, { connections: MAX_PEER_CONNECTIONS });
|
||||
|
||||
engine.ready(() => {
|
||||
const files = engine.files
|
||||
.map((file, fileId) => ({
|
||||
fileIndex: fileId,
|
||||
name: file.name,
|
||||
path: file.path.replace(/^[^\/]+\//, ''),
|
||||
size: file.length
|
||||
}));
|
||||
const size = engine.torrent.length;
|
||||
|
||||
engine.destroy();
|
||||
resolve({ files, size });
|
||||
});
|
||||
setTimeout(() => {
|
||||
engine.destroy();
|
||||
rejected(new Error('No available connections for torrent!'));
|
||||
}, timeout);
|
||||
});
|
||||
}
|
||||
|
||||
function filterVideos(files) {
|
||||
if (files.length === 1 && !Number.isInteger(files[0].fileIndex)) {
|
||||
return files;
|
||||
}
|
||||
const videos = files.filter(file => isVideo(file.path));
|
||||
const maxSize = Math.max(...videos.map(video => video.size));
|
||||
const minSampleRatio = videos.length <= 3 ? 3 : 10;
|
||||
const minAnimeExtraRatio = 5;
|
||||
const minRedundantRatio = videos.length <= 3 ? 30 : Number.MAX_VALUE;
|
||||
const isSample = video => video.path.match(/sample|bonus|promo/i) && maxSize / parseInt(video.size) > minSampleRatio;
|
||||
const isRedundant = video => maxSize / parseInt(video.size) > minRedundantRatio;
|
||||
const isExtra = video => video.path.match(/extras?\//i);
|
||||
const isAnimeExtra = video => video.path.match(/(?:\b|_)(?:NC)?(?:ED|OP|PV)(?:v?\d\d?)?(?:\b|_)/i)
|
||||
&& maxSize / parseInt(video.size) > minAnimeExtraRatio;
|
||||
const isWatermark = video => video.path.match(/^[A-Z-]+(?:\.[A-Z]+)?\.\w{3,4}$/)
|
||||
&& maxSize / parseInt(video.size) > minAnimeExtraRatio
|
||||
return videos
|
||||
.filter(video => !isSample(video))
|
||||
.filter(video => !isExtra(video))
|
||||
.filter(video => !isAnimeExtra(video))
|
||||
.filter(video => !isRedundant(video))
|
||||
.filter(video => !isWatermark(video));
|
||||
}
|
||||
|
||||
function filterSubtitles(files) {
|
||||
return files.filter(file => isSubtitle(file.path));
|
||||
}
|
||||
|
||||
async function getTorrentTrackers(torrent) {
|
||||
const magnetTrackers = torrent.magnetLink && decode(torrent.magnetLink).tr || [];
|
||||
const torrentTrackers = torrent.trackers && torrent.trackers.split(',') || [];
|
||||
const defaultTrackers = await getDefaultTrackers(torrent);
|
||||
return Array.from(new Set([].concat(magnetTrackers).concat(torrentTrackers).concat(defaultTrackers)));
|
||||
}
|
||||
|
||||
async function getDefaultTrackers(torrent, retry = 3) {
|
||||
return cacheTrackers(() => axios.get(TRACKERS_URL, { timeout: SEEDS_CHECK_TIMEOUT })
|
||||
.then(response => response.data && response.data.trim())
|
||||
.then(body => body && body.split('\n\n') || []))
|
||||
.catch(() => retry > 0 ? delay(5000).then(() => getDefaultTrackers(torrent, retry - 1)) : [])
|
||||
.then(trackers => trackers.concat(ADDITIONAL_TRACKERS))
|
||||
.then(trackers => torrent.type === Type.ANIME ? trackers.concat(ANIME_TRACKERS) : trackers);
|
||||
}
|
||||
|
||||
module.exports = { updateCurrentSeeders, updateTorrentSize, sizeAndFiles, torrentFiles }
|
||||
@@ -1,5 +0,0 @@
|
||||
exports.Type = {
|
||||
MOVIE: 'movie',
|
||||
SERIES: 'series',
|
||||
ANIME: 'anime'
|
||||
};
|
||||
@@ -1,145 +0,0 @@
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const Promises = require('../lib/promises');
|
||||
const { mostCommonValue } = require('../lib/promises');
|
||||
const repository = require('../lib/repository');
|
||||
const { getImdbId, getKitsuId } = require('../lib/metadata');
|
||||
const { parseTorrentFiles } = require('../lib/torrentFiles');
|
||||
const { createTorrentContents } = require('../lib/torrentEntries');
|
||||
const { assignSubtitles } = require('../lib/torrentSubtitles');
|
||||
const { Type } = require('../lib/types');
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 40 });
|
||||
|
||||
async function updateMovieCollections() {
|
||||
const collectionFiles = await repository.getFilesBasedOnTitle('logy')
|
||||
.then(files => files.filter(file => file.fileIndex === null))
|
||||
.then(files => files.filter(file => parse(file.title).complete));
|
||||
|
||||
collectionFiles.map(original => repository.getTorrent({ infoHash: original.infoHash })
|
||||
.then(torrent => parseTorrentFiles({ ...torrent.get(), imdbId: original.imdbId }))
|
||||
.then(files => Promise.all(files.map(file => {
|
||||
console.log(file);
|
||||
return repository.createFile(file)
|
||||
})))
|
||||
.then(createdFiled => {
|
||||
if (createdFiled && createdFiled.length) {
|
||||
console.log(`Updated movie collection ${original.title}`);
|
||||
repository.deleteFile(original)
|
||||
} else {
|
||||
console.log(`Failed updating movie collection ${original.title}`);
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
async function reapplySeriesSeasonsSavedAsMovies() {
|
||||
return repository.getTorrentsBasedOnTitle('(?:[^a-zA-Z0-9]|^)[Ss][012]?[0-9](?:[^0-9]|$)', Type.MOVIE)
|
||||
.then(torrents => Promise.all(torrents
|
||||
.filter(torrent => parse(torrent.title).seasons)
|
||||
.map(torrent => limiter.schedule(() => reapplyEpisodeDecomposing(torrent.infoHash, false)
|
||||
.then(() => {
|
||||
torrent.type = Type.SERIES;
|
||||
return torrent.save();
|
||||
})))))
|
||||
.then(() => console.log('Finished updating multiple torrents'));
|
||||
}
|
||||
|
||||
async function reapplyDecomposingToTorrentsOnRegex(regex) {
|
||||
return repository.getTorrentsBasedOnTitle(regex, Type.ANIME)
|
||||
.then(torrents => Promise.all(torrents
|
||||
.map(torrent => limiter.schedule(() => reapplyEpisodeDecomposing(torrent.infoHash, true)))))
|
||||
.then(() => console.log('Finished updating multiple torrents'));
|
||||
}
|
||||
|
||||
async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) {
|
||||
const torrent = await repository.getTorrent({ infoHash });
|
||||
const storedFiles = await repository.getFiles({ infoHash });
|
||||
const fileIndexMap = storedFiles
|
||||
.reduce((map, next) => {
|
||||
const fileIndex = next.fileIndex !== undefined ? next.fileIndex : null;
|
||||
map[fileIndex] = (map[fileIndex] || []).concat(next);
|
||||
return map;
|
||||
}, {});
|
||||
const files = includeSourceFiles && Object.values(fileIndexMap)
|
||||
.map(sameIndexFiles => sameIndexFiles[0])
|
||||
.map(file => ({
|
||||
fileIndex: file.fileIndex,
|
||||
name: file.title.replace(/.*\//, ''),
|
||||
path: file.title,
|
||||
size: file.size
|
||||
}));
|
||||
const kitsuId = undefined;
|
||||
const imdbId = kitsuId
|
||||
? undefined
|
||||
: mostCommonValue(storedFiles.map(file => file.imdbId))
|
||||
|| await getImdbId(parse(torrent.title)).catch(() => undefined);
|
||||
|
||||
if (!imdbId && !kitsuId) {
|
||||
console.log(`imdbId or kitsuId not found: ${torrent.provider} ${torrent.title}`);
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
return parseTorrentFiles({ ...torrent.get(), imdbId, kitsuId, files })
|
||||
.then(torrentContents => torrentContents.videos)
|
||||
.then(newFiles => newFiles.map(file => {
|
||||
const fileIndex = file.fileIndex !== undefined ? file.fileIndex : null;
|
||||
const mapping = fileIndexMap[fileIndex];
|
||||
if (mapping) {
|
||||
const originalFile = mapping.shift();
|
||||
if (originalFile) {
|
||||
if (!originalFile.imdbId) {
|
||||
originalFile.imdbId = file.imdbId
|
||||
}
|
||||
originalFile.imdbSeason = file.imdbSeason;
|
||||
originalFile.imdbEpisode = file.imdbEpisode;
|
||||
originalFile.kitsuId = file.kitsuId;
|
||||
originalFile.kitsuEpisode = file.kitsuEpisode;
|
||||
return originalFile;
|
||||
}
|
||||
}
|
||||
return file;
|
||||
}))
|
||||
.then(updatedFiles => Promise.all(updatedFiles
|
||||
.map(file => file.id ? file.save() : repository.createFile(file))))
|
||||
.then(() => console.log(`Updated files for [${torrent.infoHash}] ${torrent.title}`));
|
||||
}
|
||||
|
||||
async function assignSubs() {
|
||||
const unassignedSubs = await repository.getUnassignedSubtitles()
|
||||
.then(subs => subs.reduce((map, sub) => {
|
||||
map[sub.infoHash] = (map[sub.infoHash] || []).concat(sub);
|
||||
return map;
|
||||
}, {}));
|
||||
const infoHashes = Object.keys(unassignedSubs);
|
||||
|
||||
return Promise.all(infoHashes.map(async infoHash => {
|
||||
const videos = await repository.getFiles({ infoHash });
|
||||
const subtitles = unassignedSubs[infoHash];
|
||||
const assignedContents = assignSubtitles({ videos, subtitles });
|
||||
return Promise.all(assignedContents.videos
|
||||
.filter(video => video.subtitles)
|
||||
.map(video => repository.upsertSubtitles(video, video.subtitles)));
|
||||
}));
|
||||
}
|
||||
|
||||
async function openTorrentContents() {
|
||||
const limiter = new Bottleneck({ maxConcurrent: 15 });
|
||||
const unopenedTorrents = await repository.getNoContentsTorrents();
|
||||
|
||||
return Promise.all(unopenedTorrents.map(torrent => limiter.schedule(() => createTorrentContents(torrent))))
|
||||
.then(() => unopenedTorrents.length === 500 ? openTorrentContents() : Promise.resolve)
|
||||
}
|
||||
|
||||
// const infoHashes = [
|
||||
// ]
|
||||
// Promises.sequence(infoHashes.map(infoHash => () => reapplyEpisodeDecomposing(infoHash)))
|
||||
// .then(() => console.log('Finished'));
|
||||
|
||||
//findAllFiles().then(() => console.log('Finished'));
|
||||
//updateMovieCollections().then(() => console.log('Finished'));
|
||||
reapplyEpisodeDecomposing('96cc18f564f058384c18b4966a183d81808ce3fb', true).then(() => console.log('Finished'));
|
||||
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
|
||||
//reapplyDecomposingToTorrentsOnRegex('.*Title.*').then(() => console.log('Finished'));
|
||||
//reapplyManualHashes().then(() => console.log('Finished'));
|
||||
// assignSubs().then(() => console.log('Finished'));
|
||||
// openTorrentContents().then(() => console.log('Finished'));
|
||||
@@ -1,14 +0,0 @@
|
||||
const { scheduleScraping, scrapeAll } = require('./scraper')
|
||||
const { scheduleUpdateSeeders, scheduleUpdateSeedersForNewTorrents } = require('./seeders')
|
||||
|
||||
function startScraper() {
|
||||
if (process.env.ENABLE_SCHEDULING) {
|
||||
scheduleScraping();
|
||||
scheduleUpdateSeeders();
|
||||
scheduleUpdateSeedersForNewTorrents();
|
||||
} else {
|
||||
scrapeAll()
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { startScraper }
|
||||
@@ -1,28 +0,0 @@
|
||||
const schedule = require('node-schedule');
|
||||
const scrapers = require('./scrapers');
|
||||
const { sequence } = require('../lib/promises')
|
||||
|
||||
function scheduleScraping() {
|
||||
const allCrons = scrapers.reduce((crons, provider) => {
|
||||
crons[provider.cron] = (crons[provider.cron] || []).concat(provider)
|
||||
return crons;
|
||||
}, {});
|
||||
Object.entries(allCrons).forEach(([cron, providers]) => schedule.scheduleJob(cron, () => _scrapeProviders(providers)))
|
||||
}
|
||||
|
||||
function scrapeAll() {
|
||||
return _scrapeProviders(scrapers)
|
||||
}
|
||||
|
||||
async function _scrapeProviders(providers) {
|
||||
return sequence(providers.map(provider => () => _singleScrape(provider)));
|
||||
}
|
||||
|
||||
async function _singleScrape(provider) {
|
||||
return provider.scraper.scrape().catch(error => {
|
||||
console.warn(`Failed ${provider.name} scraping due: `, error);
|
||||
return Promise.resolve()
|
||||
})
|
||||
}
|
||||
|
||||
module.exports = { scheduleScraping, scrapeAll }
|
||||
@@ -1,47 +0,0 @@
|
||||
// const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper');
|
||||
// const thepiratebayFakeRemoval = require('../scrapers/thepiratebay/thepiratebay_fakes_removal');
|
||||
const ytsScraper = require('../scrapers/yts/yts_scraper');
|
||||
const ytsFullScraper = require('../scrapers/yts/yts_full_scraper');
|
||||
const eztvScraper = require('../scrapers/eztv/eztv_scraper');
|
||||
const leetxScraper = require('../scrapers/1337x/1337x_scraper');
|
||||
// const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper');
|
||||
const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper');
|
||||
const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper');
|
||||
// const erairawsScraper = require('../scrapers/erairaws/erairaws_scraper');
|
||||
// const torrentGalaxyScraper = require('../scrapers/torrentgalaxy/torrentgalaxy_scraper');
|
||||
// const rutorScraper = require('../scrapers/rutor/rutor_scraper');
|
||||
// const Comando = require('../scrapers/comando/comando_scraper')
|
||||
// const ComoEuBaixo = require('../scrapers/comoeubaixo/comoeubaixo_scraper')
|
||||
// const Lapumia = require('../scrapers/lapumia/lapumia_scraper')
|
||||
// const OndeBaixa = require('../scrapers/ondebaixa/ondebaixa_scraper');
|
||||
// const AnimesTorrent = require('../scrapers/animestorrent/animestorrent_scraper')
|
||||
// const DarkMahou = require('../scrapers/darkmahou/darkmahou_scraper')
|
||||
// const torrent9Scraper = require('../scrapers/torrent9/torrent9_scraper');
|
||||
|
||||
module.exports = [
|
||||
{ scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: ytsFullScraper, name: ytsFullScraper.NAME, cron: '0 0 0 * * 0' },
|
||||
{ scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: nyaaSiScraper, name: nyaaSiScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: nyaaPantsuScraper, name: nyaaPantsuScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */1 ? * *' },
|
||||
// { scraper: rutorScraper, name: rutorScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */2 ? * *' },
|
||||
// { scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' },
|
||||
// { scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' }
|
||||
// { scraper: torrent9Scraper, name: torrent9Scraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: Comando, name: Comando.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: ComoEuBaixo, name: ComoEuBaixo.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: Lapumia, name: Lapumia.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: OndeBaixa, name: OndeBaixa.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: AnimesTorrent, name: AnimesTorrent.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: DarkMahou, name: DarkMahou.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: erairawsScraper, name: erairawsScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
// { scraper: require('../scrapers/1337x/1337x_search_scraper') }
|
||||
// { scraper: require('../scrapers/rarbg/rarbg_dump_scraper') }
|
||||
// { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') }
|
||||
// { scraper: require('../scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper') }
|
||||
// { scraper: require('../scrapers/thepiratebay/thepiratebay_update_size_scraper') }
|
||||
];
|
||||
@@ -1,64 +0,0 @@
|
||||
const Bottleneck = require('bottleneck');
|
||||
const repository = require('../lib/repository')
|
||||
const { delay } = require('../lib/promises')
|
||||
const { updateCurrentSeeders } = require('../lib/torrent')
|
||||
const { updateTorrentSeeders } = require('../lib/torrentEntries')
|
||||
|
||||
const DELAY_MS = 0; // 0 seconds
|
||||
const DELAY_NEW_MS = 30_000; // 30 seconds
|
||||
const DELAY_NO_NEW_MS = 300_000; // 300 seconds
|
||||
const DELAY_FAILED_TORRENTS_MS = 5_000; // 5 seconds
|
||||
const updateLimiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
const statistics = {};
|
||||
const statisticsNew = {};
|
||||
|
||||
function scheduleUpdateSeeders() {
|
||||
console.log('Starting seeders update...')
|
||||
getTorrents()
|
||||
.then(torrents => updateCurrentSeeders(torrents))
|
||||
.then(updatedTorrents => Promise.all(
|
||||
updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))))
|
||||
.then(torrents => updateStatistics(torrents, statistics))
|
||||
.then(() => console.log('Finished seeders update:', statistics))
|
||||
.catch(error => console.warn('Failed seeders update:', error))
|
||||
.then(() => delay(DELAY_MS))
|
||||
.then(() => scheduleUpdateSeeders());
|
||||
}
|
||||
|
||||
function scheduleUpdateSeedersForNewTorrents() {
|
||||
console.log('Starting seeders update for new torrents...')
|
||||
getNewTorrents()
|
||||
.then(torrents => updateCurrentSeeders(torrents))
|
||||
.then(updatedTorrents => Promise.all(
|
||||
updatedTorrents.map(updated => updateLimiter.schedule(() => updateTorrentSeeders(updated)))))
|
||||
.then(torrents => updateStatistics(torrents, statisticsNew))
|
||||
.then(() => console.log('Finished seeders update for new torrents:', statisticsNew))
|
||||
.catch(error => console.warn('Failed seeders update for new torrents:', error))
|
||||
.then(() => delay(DELAY_NEW_MS))
|
||||
.then(() => scheduleUpdateSeedersForNewTorrents());
|
||||
}
|
||||
|
||||
async function getTorrents() {
|
||||
return repository.getUpdateSeedersTorrents()
|
||||
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getTorrents()));
|
||||
}
|
||||
|
||||
async function getNewTorrents() {
|
||||
return repository.getUpdateSeedersNewTorrents()
|
||||
.catch(() => delay(DELAY_FAILED_TORRENTS_MS).then(() => getNewTorrents()))
|
||||
.then(torrents => {
|
||||
if (!torrents.length) {
|
||||
console.log('No new torrents to update seeders')
|
||||
return delay(DELAY_NO_NEW_MS).then(() => getNewTorrents())
|
||||
}
|
||||
return torrents;
|
||||
});
|
||||
}
|
||||
|
||||
function updateStatistics(updatedTorrents, statisticsObject) {
|
||||
const totalTorrents = updatedTorrents.map(nested => nested.length).reduce((a, b) => a + b, 0);
|
||||
const date = new Date().toISOString().replace(/T.*/, '');
|
||||
statisticsObject[date] = (statisticsObject[date] || 0) + totalTorrents;
|
||||
}
|
||||
|
||||
module.exports = { scheduleUpdateSeeders, scheduleUpdateSeedersForNewTorrents }
|
||||
@@ -1,206 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const Sugar = require('sugar-date');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const defaultProxies = [
|
||||
'https://1337x.to'
|
||||
];
|
||||
const defaultTimeout = 50000;
|
||||
const maxSearchPage = 50;
|
||||
|
||||
let FlaresolverrUserAgent = '';
|
||||
let FlaresolverrCookies = '';
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'Movies',
|
||||
TV: 'TV',
|
||||
ANIME: 'Anime',
|
||||
DOCUMENTARIES: 'Documentaries',
|
||||
APPS: 'Apps',
|
||||
GAMES: 'Games',
|
||||
MUSIC: 'Music',
|
||||
PORN: 'XXX',
|
||||
OTHER: 'Other',
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} query`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
const slug = torrentId.startsWith('/torrent/') ? torrentId.replace('/torrent/', '') : torrentId;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config)))
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => ({ torrentId: slug, ...torrent }))
|
||||
.catch((err) => torrent(slug, config, retries - 1));
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
|
||||
const requestUrl = proxyUrl => category
|
||||
? `${proxyUrl}/category-search/${keyword}/${category}/${page}/`
|
||||
: `${proxyUrl}/search/${keyword}/${page}/`;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map(proxyUrl => singleRequest(requestUrl(proxyUrl), config)))
|
||||
.then(body => parseTableBody(body))
|
||||
.then(torrents => torrents.length === 40 && page < extendToPage
|
||||
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
|
||||
.then(nextTorrents => torrents.concat(nextTorrents))
|
||||
: torrents)
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
const sort = config.sort;
|
||||
const requestUrl = proxyUrl => sort
|
||||
? `${proxyUrl}/sort-cat/${category}/${sort}/desc/${page}/`
|
||||
: `${proxyUrl}/cat/${category}/${page}/`;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(requestUrl(proxyUrl), config)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => {
|
||||
console.error(err);
|
||||
browse(config, retries - 1);
|
||||
});
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
let options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
if (FlaresolverrUserAgent === '' || FlaresolverrCookies === '') {
|
||||
console.log("using flaresolverr");
|
||||
return axios.post('http://flaresolverr:8191/v1', {
|
||||
cmd: 'request.get',
|
||||
url: requestUrl,
|
||||
}, options)
|
||||
.then((response) => {
|
||||
if (response.data.status !== 'ok') {
|
||||
throw new Error(`FlareSolverr did not return status 'ok': ${response.data.message}`)
|
||||
}
|
||||
|
||||
const body = response.data.solution.response;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden') ||
|
||||
!(body.includes('1337x</title>'))) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
FlaresolverrUserAgent = response.data.solution.userAgent;
|
||||
response.data.solution.cookies.forEach(cookie => {
|
||||
FlaresolverrCookies = FlaresolverrCookies + `${cookie.name}=${cookie.value}; `;
|
||||
});
|
||||
|
||||
return body;
|
||||
});
|
||||
}
|
||||
else {
|
||||
console.log("using direct request");
|
||||
options.headers['User-Agent'] = FlaresolverrUserAgent;
|
||||
options.headers['Cookie'] = FlaresolverrCookies;
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden') ||
|
||||
!(body.includes('1337x</title>'))) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('.table > tbody > tr').each((i, element) => {
|
||||
const row = $(element);
|
||||
torrents.push({
|
||||
name: row.find('a').eq(1).text(),
|
||||
torrentId: row.find('a').eq(1).attr('href').replace('/torrent/', ''),
|
||||
seeders: parseInt(row.children('td.coll-2').text()),
|
||||
leechers: parseInt(row.children('td.coll-3').text()),
|
||||
size: parseSize(row.children('td.coll-4').text())
|
||||
});
|
||||
});
|
||||
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const details = $('.torrent-detail-page');
|
||||
const magnetLink = details.find('a:contains(\'Magnet Download\')').attr('href');
|
||||
const imdbIdMatch = details.find('div[id=\'description\']').html().match(/imdb\.com\/title\/(tt\d+)/i);
|
||||
|
||||
const torrent = {
|
||||
name: escapeHTML(decode(magnetLink).name.replace(/\+/g, ' ')),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
seeders: parseInt(details.find('strong:contains(\'Seeders\')').next().text(), 10),
|
||||
leechers: parseInt(details.find('strong:contains(\'Leechers\')').next().text(), 10),
|
||||
category: details.find('strong:contains(\'Category\')').next().text(),
|
||||
languages: details.find('strong:contains(\'Language\')').next().text(),
|
||||
size: parseSize(details.find('strong:contains(\'Total size\')').next().text()),
|
||||
uploadDate: parseDate(details.find('strong:contains(\'Date uploaded\')').next().text()),
|
||||
imdbId: imdbIdMatch && imdbIdMatch[1],
|
||||
files: details.find('div[id=\'files\']').first().find('li')
|
||||
.map((i, elem) => $(elem).text())
|
||||
.map((i, text) => ({
|
||||
fileIndex: i,
|
||||
name: text.match(/^(.+)\s\(.+\)$/)[1].replace(/^.+\//g, ''),
|
||||
path: text.match(/^(.+)\s\(.+\)$/)[1],
|
||||
size: parseSize(text.match(/^.+\s\((.+)\)$/)[1])
|
||||
})).get()
|
||||
};
|
||||
resolve(torrent);
|
||||
});
|
||||
}
|
||||
|
||||
function parseDate(dateString) {
|
||||
if (/decade.*ago/i.test(dateString)) {
|
||||
return Sugar.Date.create('10 years ago');
|
||||
}
|
||||
return Sugar.Date.create(dateString);
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories, FlaresolverrCookies, FlaresolverrUserAgent };
|
||||
@@ -1,115 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const leetx = require('./1337x_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = '1337x';
|
||||
const UNTIL_PAGE = 10;
|
||||
const TYPE_MAPPING = typeMapping();
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`))
|
||||
.then(() => {
|
||||
leetx.FlaresolverrCookies = '';
|
||||
leetx.FlaresolverrUserAgent = '';
|
||||
});
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => leetx.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
leetx.Categories.MOVIE,
|
||||
leetx.Categories.TV,
|
||||
leetx.Categories.ANIME,
|
||||
leetx.Categories.DOCUMENTARIES
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return leetx.browse(({ category, page }))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await checkAndUpdateTorrent({ provider: NAME, ...record })) {
|
||||
return record;
|
||||
}
|
||||
|
||||
const torrentFound = await leetx.torrent(record.torrentId).catch(() => undefined);
|
||||
|
||||
if (!torrentFound || !TYPE_MAPPING[torrentFound.category]) {
|
||||
return Promise.resolve('Invalid torrent record');
|
||||
}
|
||||
if (isNaN(torrentFound.uploadDate)) {
|
||||
console.warn(`Incorrect upload date for [${torrentFound.infoHash}] ${torrentFound.name}`);
|
||||
return;
|
||||
}
|
||||
if (await checkAndUpdateTorrent(torrentFound)) {
|
||||
return torrentFound;
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: torrentFound.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: torrentFound.torrentId,
|
||||
title: torrentFound.name.replace(/\t|\s+/g, ' ').trim(),
|
||||
type: TYPE_MAPPING[torrentFound.category],
|
||||
size: torrentFound.size,
|
||||
seeders: torrentFound.seeders,
|
||||
uploadDate: torrentFound.uploadDate,
|
||||
imdbId: torrentFound.imdbId,
|
||||
languages: torrentFound.languages || undefined
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[leetx.Categories.MOVIE] = Type.MOVIE;
|
||||
mapping[leetx.Categories.DOCUMENTARIES] = Type.MOVIE;
|
||||
mapping[leetx.Categories.TV] = Type.SERIES;
|
||||
mapping[leetx.Categories.ANIME] = Type.ANIME;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (leetx.Categories.ANIME === category) {
|
||||
return 5;
|
||||
}
|
||||
if (leetx.Categories.DOCUMENTARIES === category) {
|
||||
return 1;
|
||||
}
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,128 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
const maxSearchPage = 50;
|
||||
|
||||
const baseUrl = 'https://animestorrent.com';
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'filme',
|
||||
ANIME: 'tv',
|
||||
OVA: 'ova'
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} query`));
|
||||
}
|
||||
const slug = torrentId.split("/")[3];
|
||||
return singleRequest(`${baseUrl}/${slug}`, config)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => torrent.map((el) => ({ torrentId: slug, ...el })))
|
||||
.catch((err) => torrent(slug, config, retries - 1));
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const extendToPage = Math.min(maxSearchPage, config.extendToPage || 1);
|
||||
|
||||
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
|
||||
.then((body) => parseTableBody(body))
|
||||
.then((torrents) =>
|
||||
torrents.length === 40 && page < extendToPage
|
||||
? search(keyword, { ...config, page: page + 1 })
|
||||
.catch(() => [])
|
||||
.then((nextTorrents) => torrents.concat(nextTorrents))
|
||||
: torrents)
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
const requestUrl = category ? `${baseUrl}/tipo/${category}/page/${page}/` : `${baseUrl}/page/${page}/`;
|
||||
|
||||
return singleRequest(requestUrl, config)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (
|
||||
body.includes("502: Bad gateway") ||
|
||||
body.includes("403 Forbidden")
|
||||
) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$("article.bs").each((i, element) => {
|
||||
const row = $(element);
|
||||
torrents.push({
|
||||
name: row.find("span.ntitle").text(),
|
||||
torrentId: row.find("div > a").attr("href"),
|
||||
});
|
||||
});
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
let magnets = [];
|
||||
$(`a[href^="magnet"]`).each((i, section) => {
|
||||
const magnet = $(section).attr("href");
|
||||
magnets.push(magnet);
|
||||
});
|
||||
const details = $('div.infox')
|
||||
const torrents = magnets.map((magnetLink) => {
|
||||
return {
|
||||
title: decode(magnetLink).name,
|
||||
originalName: details.find('h1.entry-title').text(),
|
||||
year: details.find('b:contains(\'Lançamento:\')')[0]
|
||||
? details.find('b:contains(\'Lançamento:\')')[0].nextSibling.nodeValue.trim()
|
||||
: '',
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
category: details.find('b:contains(\'Tipo:\')').next().attr('href').split('/')[4],
|
||||
uploadDate: new Date($("time[itemprop=dateModified]").attr("datetime")),
|
||||
};
|
||||
})
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,108 +0,0 @@
|
||||
const moment = require("moment");
|
||||
const Bottleneck = require("bottleneck");
|
||||
const animetorrrent = require("./animestorrent_api");
|
||||
const { Type } = require("../../lib/types");
|
||||
const repository = require("../../lib/repository");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
|
||||
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
|
||||
const { getKitsuId } = require("../../lib/metadata");
|
||||
|
||||
const NAME = "AnimesTorrent";
|
||||
const UNTIL_PAGE = 5;
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => animetorrrent.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
animetorrrent.Categories.MOVIE,
|
||||
animetorrrent.Categories.ANIME,
|
||||
animetorrrent.Categories.OVA
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return animetorrrent
|
||||
.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processEntry(entry) {
|
||||
return animetorrrent.torrent(entry.torrentId)
|
||||
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
|
||||
.catch(() => undefined);
|
||||
}
|
||||
|
||||
async function processTorrentRecord(foundTorrent) {
|
||||
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
|
||||
return foundTorrent;
|
||||
}
|
||||
|
||||
if (!foundTorrent.size) {
|
||||
await updateTorrentSize(foundTorrent);
|
||||
}
|
||||
if (!Number.isInteger(foundTorrent.seeders)) {
|
||||
await updateCurrentSeeders(foundTorrent);
|
||||
}
|
||||
if (!foundTorrent.imdbId && !foundTorrent.kitsuId) {
|
||||
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
|
||||
foundTorrent.kitsuId = await getKitsuId(info).catch(() => undefined);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: foundTorrent.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: foundTorrent.torrentId,
|
||||
title: foundTorrent.title,
|
||||
type: Type.ANIME,
|
||||
imdbId: foundTorrent.imdbId,
|
||||
kitsuId: foundTorrent.kitsuId,
|
||||
uploadDate: foundTorrent.uploadDate,
|
||||
seeders: foundTorrent.seeders,
|
||||
size: foundTorrent.size,
|
||||
files: foundTorrent.files,
|
||||
languages: foundTorrent.languages
|
||||
};
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (animetorrrent.Categories.ANIME === category) {
|
||||
return 5;
|
||||
}
|
||||
if (animetorrrent.Categories.OVA === category) {
|
||||
return 3;
|
||||
}
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,163 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const moment = require("moment")
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require('magnet-uri');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { isPtDubbed, sanitizePtName, sanitizePtLanguages, sanitizePtOriginalName } = require('../scraperHelper')
|
||||
|
||||
const defaultTimeout = 30000;
|
||||
const maxSearchPage = 50
|
||||
|
||||
const baseUrl = 'https://comando.to';
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'filmes',
|
||||
TV: 'series',
|
||||
ANIME: 'animes',
|
||||
DOCUMENTARIES: 'documentario'
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} query`));
|
||||
}
|
||||
const slug = torrentId.split("/")[3];
|
||||
return singleRequest(`${baseUrl}/${slug}`, config)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
|
||||
.catch((err) => {
|
||||
console.warn(`Failed Comando ${slug} request: `, err);
|
||||
return torrent(torrentId, config, retries - 1)
|
||||
});
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
|
||||
|
||||
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
|
||||
.then(body => parseTableBody(body))
|
||||
.then(torrents => torrents.length === 40 && page < extendToPage
|
||||
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
|
||||
.then(nextTorrents => torrents.concat(nextTorrents))
|
||||
: torrents)
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return singleRequest(`${baseUrl}/category/${category}/page/${page}/`, config)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden')) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('article').each((i, element) => {
|
||||
const row = $(element);
|
||||
torrents.push({
|
||||
name: row.find("h2 > a").text(),
|
||||
torrentId: row.find("h2 > a").attr("href")
|
||||
});
|
||||
});
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const magnets = $('h2 > strong')
|
||||
.filter((i, elem) => isPtDubbed($(elem).text())).parent()
|
||||
.map((i, elem) => $(elem).nextUntil('h2, hr'))
|
||||
.map((i, elem) => $(elem).find('a[href^="magnet"]'))
|
||||
.map((i, section) => $(section).attr("href")).get();
|
||||
const details = $('strong, b').filter((i, elem) => $(elem).text().match(/Servidor|Orig(?:\.|inal)/)).parent();
|
||||
const imdbIdMatch = details.find('a[href*="imdb.com"]').attr('href')
|
||||
const torrents = magnets.map(magnetLink => {
|
||||
const decodedMagnet = decode(magnetLink);
|
||||
const originalNameElem = details.find('strong, b')
|
||||
.filter((i, elem) => $(elem).text().match(/Baixar|Orig(?:\.|inal)|^Título:/));
|
||||
const languagesElem = details.find('strong, b')
|
||||
.filter((i, elem) => $(elem).text().match(/^\s*([IÍ]dioma|[AÁ]udio)/));
|
||||
const originalName = parseOriginalName(originalNameElem);
|
||||
const title = decodedMagnet.name && escapeHTML(decodedMagnet.name.replace(/\+/g, ' '));
|
||||
return {
|
||||
title: title ? sanitizePtName(title) : originalName,
|
||||
originalName: sanitizePtOriginalName(originalName),
|
||||
year: details.find('a[href*="comando.to/category/"]').text(),
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
magnetLink: magnetLink,
|
||||
category: parseCategory($('div.entry-categories').html()),
|
||||
uploadDate: new Date(moment($('a.updated').text(), 'LL', 'pt-br').format()),
|
||||
imdbId: imdbIdMatch ? imdbIdMatch.split('/')[4] : null,
|
||||
languages: sanitizePtLanguages(languagesElem[0].nextSibling.nodeValue)
|
||||
}
|
||||
});
|
||||
resolve(torrents.filter((x) => x));
|
||||
});
|
||||
}
|
||||
|
||||
function parseOriginalName(originalNameElem) {
|
||||
if (!originalNameElem[0]) {
|
||||
return '';
|
||||
}
|
||||
const originalName = originalNameElem.next().text()
|
||||
|| originalNameElem[0].nextSibling.nodeValue
|
||||
|| originalNameElem.text();
|
||||
return originalName.replace(/[^:]*: ?/, '').trim();
|
||||
}
|
||||
|
||||
function parseCategory(categorys) {
|
||||
const $ = cheerio.load(categorys)
|
||||
if ($('a:contains(\'animes\')').text()) {
|
||||
return Categories.ANIME
|
||||
}
|
||||
if ($('a:contains(\'Filmes\')').text()) {
|
||||
return Categories.MOVIE
|
||||
}
|
||||
if ($('a:contains(\'Series\')').text()) {
|
||||
return Categories.TV
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,112 +0,0 @@
|
||||
const moment = require("moment");
|
||||
const Bottleneck = require("bottleneck");
|
||||
const comando = require("./comando_api");
|
||||
const { Type } = require("../../lib/types");
|
||||
const repository = require("../../lib/repository");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
|
||||
const { getImdbId } = require("../../lib/metadata");
|
||||
|
||||
const NAME = "Comando";
|
||||
const UNTIL_PAGE = 5;
|
||||
const TYPE_MAPPING = typeMapping();
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => comando.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
comando.Categories.MOVIE,
|
||||
comando.Categories.TV
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return comando.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processEntry(entry) {
|
||||
return comando.torrent(entry.torrentId)
|
||||
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
|
||||
.catch(() => undefined);
|
||||
}
|
||||
|
||||
async function processTorrentRecord(foundTorrent) {
|
||||
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
|
||||
return foundTorrent;
|
||||
}
|
||||
|
||||
if (!foundTorrent.size) {
|
||||
await updateTorrentSize(foundTorrent);
|
||||
}
|
||||
if (!Number.isInteger(foundTorrent.seeders)) {
|
||||
await updateCurrentSeeders(foundTorrent);
|
||||
}
|
||||
if (!foundTorrent.imdbId && foundTorrent.originalName) {
|
||||
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
|
||||
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: foundTorrent.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: foundTorrent.torrentId,
|
||||
title: foundTorrent.title,
|
||||
type: TYPE_MAPPING[foundTorrent.category],
|
||||
imdbId: foundTorrent.imdbId,
|
||||
uploadDate: foundTorrent.uploadDate,
|
||||
seeders: foundTorrent.seeders,
|
||||
size: foundTorrent.size,
|
||||
files: foundTorrent.files,
|
||||
languages: foundTorrent.languages
|
||||
};
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[comando.Categories.MOVIE] = Type.MOVIE;
|
||||
mapping[comando.Categories.DOCUMENTARIES] = Type.SERIES;
|
||||
mapping[comando.Categories.TV] = Type.SERIES;
|
||||
mapping[comando.Categories.ANIME] = Type.ANIME;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (comando.Categories.TV === category) {
|
||||
return 5;
|
||||
}
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,135 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require('magnet-uri');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { isPtDubbed, sanitizePtName, sanitizePtLanguages } = require('../scraperHelper')
|
||||
|
||||
const defaultTimeout = 30000;
|
||||
const maxSearchPage = 50
|
||||
|
||||
const baseUrl = 'https://comoeubaixo.com';
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'filmes',
|
||||
TV: 'series',
|
||||
ANIME: 'anime',
|
||||
DESENHOS: 'desenhos'
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} query`));
|
||||
}
|
||||
const slug = encodeURIComponent(torrentId.split("/")[3]);
|
||||
return singleRequest(`${baseUrl}/${slug}/`, config)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
|
||||
.catch((err) => {
|
||||
console.warn(`Failed ComoEuBaixo ${torrentId} request: `, err);
|
||||
return torrent(torrentId, config, retries - 1)
|
||||
});
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
|
||||
|
||||
return singleRequest(`${baseUrl}/${keyword}/${page}/`, config)
|
||||
.then(body => parseTableBody(body))
|
||||
.then(torrents => torrents.length === 40 && page < extendToPage
|
||||
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
|
||||
.then(nextTorrents => torrents.concat(nextTorrents))
|
||||
: torrents)
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
const requestUrl = category ? `${baseUrl}/${category}/${page}/` : `${baseUrl}/${page}/`;
|
||||
|
||||
return singleRequest(requestUrl, config)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.data;
|
||||
if (!body || (Buffer.isBuffer(body) && !body.size)) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden')) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('div.capa_larga.align-middle').each((i, element) => {
|
||||
const row = $(element);
|
||||
torrents.push({
|
||||
name: row.find("a").text(),
|
||||
torrentId: row.find("a").attr("href"),
|
||||
isTorrent: !!row.find("p:contains(\'Torrent\')")[0]
|
||||
});
|
||||
});
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const magnets = $(`a[href^="magnet"]`)
|
||||
.filter((i, elem) => isPtDubbed($(elem).attr('title')))
|
||||
.map((i, elem) => $(elem).attr("href")).get();
|
||||
const details = $('div#informacoes')
|
||||
const category = details.find('strong:contains(\'Gêneros: \')').next().attr('href').split('/')[0]
|
||||
const torrents = magnets.map(magnetLink => {
|
||||
const decodedMagnet = decode(magnetLink);
|
||||
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
|
||||
const originalTitle = details.find('strong:contains(\'Baixar\')')[0].nextSibling.nodeValue.split('-')[0];
|
||||
const year = details.find('strong:contains(\'Data de Lançamento: \')').next().text().trim();
|
||||
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
|
||||
return {
|
||||
title: name.length > 5 ? name : fallBackTitle,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
magnetLink: magnetLink,
|
||||
category: category,
|
||||
uploadDate: new Date($('time').attr('datetime')),
|
||||
imdbId: details.find('a[href*="imdb.com"]').attr('href').split('/')[4],
|
||||
languages: sanitizePtLanguages(details.find('strong:contains(\'Idioma\')')[0].nextSibling.nodeValue)
|
||||
};
|
||||
})
|
||||
resolve(torrents.filter((x) => x));
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,115 +0,0 @@
|
||||
const moment = require("moment");
|
||||
const Bottleneck = require("bottleneck");
|
||||
const comoeubaixo = require("./comoeubaixo_api");
|
||||
const { Type } = require("../../lib/types");
|
||||
const repository = require("../../lib/repository");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
|
||||
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
|
||||
|
||||
const NAME = "ComoEuBaixo";
|
||||
const UNTIL_PAGE = 5;
|
||||
const TYPE_MAPPING = typeMapping();
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => comoeubaixo.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
comoeubaixo.Categories.MOVIE,
|
||||
comoeubaixo.Categories.TV,
|
||||
comoeubaixo.Categories.DESENHOS
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return comoeubaixo
|
||||
.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processEntry(entry) {
|
||||
if (!entry.isTorrent) {
|
||||
return entry;
|
||||
}
|
||||
return comoeubaixo.torrent(entry.torrentId)
|
||||
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
|
||||
.catch(() => undefined);
|
||||
}
|
||||
|
||||
async function processTorrentRecord(foundTorrent) {
|
||||
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
|
||||
return foundTorrent;
|
||||
}
|
||||
|
||||
if (!foundTorrent.size) {
|
||||
await updateTorrentSize(foundTorrent);
|
||||
}
|
||||
if (!Number.isInteger(foundTorrent.seeders)) {
|
||||
await updateCurrentSeeders(foundTorrent);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: foundTorrent.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: foundTorrent.torrentId,
|
||||
title: foundTorrent.title,
|
||||
type: TYPE_MAPPING[foundTorrent.category],
|
||||
imdbId: foundTorrent.imdbId,
|
||||
uploadDate: foundTorrent.uploadDate,
|
||||
seeders: foundTorrent.seeders,
|
||||
size: foundTorrent.size,
|
||||
files: foundTorrent.files,
|
||||
languages: foundTorrent.languages
|
||||
};
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[comoeubaixo.Categories.MOVIE] = Type.MOVIE;
|
||||
mapping[comoeubaixo.Categories.TV] = Type.SERIES;
|
||||
mapping[comoeubaixo.Categories.ANIME] = Type.ANIME;
|
||||
mapping[comoeubaixo.Categories.DESENHOS] = Type.SERIES;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (comoeubaixo.Categories.DESENHOS === category) {
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
if (comoeubaixo.Categories.TV === category) {
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,127 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
const maxSearchPage = 50;
|
||||
|
||||
const baseUrl = 'https://darkmahou.com';
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'movie',
|
||||
ANIME: 'tv',
|
||||
OVA: 'ova'
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} query`));
|
||||
}
|
||||
const slug = torrentId.split("/")[3];
|
||||
return singleRequest(`${baseUrl}/${slug}`, config)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => torrent.map((el) => ({ torrentId: slug, ...el })))
|
||||
.catch((err) => torrent(slug, config, retries - 1));
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const extendToPage = Math.min(maxSearchPage, config.extendToPage || 1);
|
||||
|
||||
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
|
||||
.then((body) => parseTableBody(body))
|
||||
.then((torrents) =>
|
||||
torrents.length === 40 && page < extendToPage
|
||||
? search(keyword, { ...config, page: page + 1 })
|
||||
.catch(() => [])
|
||||
.then((nextTorrents) => torrents.concat(nextTorrents))
|
||||
: torrents
|
||||
)
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
const requestUrl = category ? `${baseUrl}/category/${category}/page/${page}/` : `${baseUrl}/page/${page}/`;
|
||||
|
||||
return singleRequest(requestUrl, config)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (
|
||||
body.includes("502: Bad gateway") ||
|
||||
body.includes("403 Forbidden")
|
||||
) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$("article.bs").each((i, element) => {
|
||||
const row = $(element);
|
||||
torrents.push({
|
||||
name: row.find("span.ntitle").text(),
|
||||
torrentId: row.find("div > a").attr("href"),
|
||||
});
|
||||
});
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
let magnets = [];
|
||||
$(`a[href^="magnet"]`).each((i, section) => {
|
||||
const magnet = $(section).attr("href");
|
||||
magnets.push(magnet);
|
||||
});
|
||||
const details = $('div.infox')
|
||||
const torrent = magnets.map((magnetLink) => {
|
||||
return {
|
||||
title: decode(magnetLink).name,
|
||||
originalName: details.find('h1.entry-title').text(),
|
||||
year: details.find('b:contains(\'Lançado:\')')[0].nextSibling.nodeValue || '',
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
category: details.find('b:contains(\'Tipo:\')').next().attr('href').split('/')[4],
|
||||
uploadDate: new Date($("time[itemprop=dateModified]").attr("datetime")),
|
||||
};
|
||||
})
|
||||
resolve(torrent.filter((x) => x));
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,108 +0,0 @@
|
||||
const moment = require("moment");
|
||||
const Bottleneck = require("bottleneck");
|
||||
const darkmahou = require("./darkmahou_api");
|
||||
const { Type } = require("../../lib/types");
|
||||
const repository = require("../../lib/repository");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
|
||||
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
|
||||
const { getKitsuId } = require("../../lib/metadata");
|
||||
|
||||
const NAME = "DarkMahou";
|
||||
const UNTIL_PAGE = 5;
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => darkmahou.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
darkmahou.Categories.MOVIE,
|
||||
darkmahou.Categories.ANIME,
|
||||
darkmahou.Categories.OVA
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return darkmahou
|
||||
.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processEntry(entry) {
|
||||
return darkmahou.torrent(entry.torrentId)
|
||||
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
|
||||
.catch(() => undefined);
|
||||
}
|
||||
|
||||
async function processTorrentRecord(foundTorrent) {
|
||||
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
|
||||
return foundTorrent;
|
||||
}
|
||||
|
||||
if (!foundTorrent.size) {
|
||||
await updateTorrentSize(foundTorrent);
|
||||
}
|
||||
if (!Number.isInteger(foundTorrent.seeders)) {
|
||||
await updateCurrentSeeders(foundTorrent);
|
||||
}
|
||||
if (!foundTorrent.imdbId && !foundTorrent.kitsuId) {
|
||||
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
|
||||
foundTorrent.kitsuId = await getKitsuId(info).catch(() => undefined);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: foundTorrent.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: foundTorrent.torrentId,
|
||||
title: foundTorrent.title,
|
||||
type: Type.ANIME,
|
||||
imdbId: foundTorrent.imdbId,
|
||||
kitsuId: foundTorrent.kitsuId,
|
||||
uploadDate: foundTorrent.uploadDate,
|
||||
seeders: foundTorrent.seeders,
|
||||
size: foundTorrent.size,
|
||||
files: foundTorrent.files,
|
||||
languages: foundTorrent.languages
|
||||
};
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (darkmahou.Categories.ANIME === category) {
|
||||
return 5;
|
||||
}
|
||||
if (darkmahou.Categories.OVA === category) {
|
||||
return 4;
|
||||
}
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,93 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require("magnet-uri");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const baseUrl = 'https://www.erai-raws.info';
|
||||
|
||||
const Categories = {
|
||||
ANIMES: 'anime',
|
||||
EPISODES: 'episodes'
|
||||
};
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return singleRequest(`${baseUrl}/${category}/page/${page}/`, config)
|
||||
.then((body) => parseTableBody(body)
|
||||
.then(animes => Promises.sequence(animes.map(anime => () => singleRequest(anime.animeLink))))
|
||||
.then(animeBodies => Promise.all(animeBodies.map(animeBody => parseTorrentPage(animeBody))))
|
||||
.then(animeInfos => animeInfos.reduce((a, b) => a.concat(b), [])))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout, };
|
||||
|
||||
return axios.get(requestUrl, options).then((response) => {
|
||||
const body = response.data;
|
||||
if (!body || (Buffer.isBuffer(body) && !body.size)) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (
|
||||
body.includes("502: Bad gateway") ||
|
||||
body.includes("403 Forbidden")
|
||||
) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
|
||||
const links = $('[itemprop=\'headline\'] a, .content-area a.aa_ss_ops_new')
|
||||
.map((i, element) => ({
|
||||
name: $(element).text(),
|
||||
animeLink: $(element).attr("href"),
|
||||
})).get();
|
||||
resolve(links);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error("Failed loading body"));
|
||||
}
|
||||
const entries = $('.tab-content table, .content-area table')
|
||||
.map((i, entry) => {
|
||||
const languages = $(entry).find('.tooltip3').map((_, l) => $(l).attr('data-title')).get().join('/');
|
||||
const magnets = $(entry).find('a[href^="magnet"]').map((_, m) => $(m).attr('href')).get();
|
||||
return { languages, magnets }
|
||||
}).get();
|
||||
const torrents = entries
|
||||
.map(entry => entry.magnets
|
||||
.map(magnet => decode(magnet))
|
||||
.map(decodedMagnet => ({
|
||||
title: decodedMagnet.name,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
trackers: decodedMagnet.tr,
|
||||
languages: entry.languages
|
||||
})))
|
||||
.reduce((a, b) => a.concat(b), []);
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { browse, Categories };
|
||||
@@ -1,50 +0,0 @@
|
||||
const Parser = require('rss-parser');
|
||||
const decode = require("magnet-uri");
|
||||
|
||||
const parser = new Parser({
|
||||
customFields: {
|
||||
item: [['erai:subtitles', 'subtitles']]
|
||||
}
|
||||
});
|
||||
const baseUrl = 'https://www.erai-raws.info';
|
||||
const rssKey = process.env.ERAI_RSS_KEY;
|
||||
|
||||
const Categories = {
|
||||
ANIMES: 'anime',
|
||||
EPISODES: 'episodes'
|
||||
};
|
||||
|
||||
function browse() {
|
||||
return parser.parseURL(`${baseUrl}/feed/?type=magnet&${rssKey}`)
|
||||
.then(result => result.items
|
||||
.map(item => {
|
||||
const decodedMagnet = decode(item.link);
|
||||
const languages = parseLanguages(item.subtitles);
|
||||
return {
|
||||
title: decodedMagnet.name,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
trackers: decodedMagnet.tr,
|
||||
languages: languages
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
const languageMapping = {
|
||||
'us': 'English',
|
||||
'br': 'Portuguese(Brazil)',
|
||||
'mx': 'Spanish(Latin_America)',
|
||||
'es': 'Spanish',
|
||||
'sa': 'Arabic',
|
||||
'fr': 'French',
|
||||
'de': 'German',
|
||||
'it': 'Italian',
|
||||
'ru': 'Russian'
|
||||
}
|
||||
function parseLanguages(languages) {
|
||||
return languages.split('][')
|
||||
.map(lang => lang.replace(/[\[\]]/g, ''))
|
||||
.map(lang => languageMapping[lang] || lang)
|
||||
.join('/');
|
||||
}
|
||||
|
||||
module.exports = { browse, Categories };
|
||||
@@ -1,47 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const erairaws = require('./erairaws_rss_api');
|
||||
const { checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'EraiRaws';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return scrapeLatestTorrentsForCategory(erairaws.Categories.EPISODES)
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return erairaws.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processRecord(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve([]));
|
||||
}
|
||||
|
||||
async function processRecord(foundTorrent) {
|
||||
return checkAndUpdateTorrent({ provider: NAME, ...foundTorrent }).then(() => foundTorrent);
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (category === erairaws.Categories.ANIMES) {
|
||||
return 45;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
@@ -1,117 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const { decode } = require("magnet-uri");
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomUserAgent } = require('./../../lib/requestHelper');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const defaultProxies = [
|
||||
'https://eztv.re'
|
||||
];
|
||||
const defaultTimeout = 120000;
|
||||
const minDelay = 3000;
|
||||
const jitterDelay = minDelay;
|
||||
const limit = 100;
|
||||
const maxPage = 5;
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 1) {
|
||||
if (!torrentId) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/ep/${torrentId}`, config)))
|
||||
.then(body => parseTorrentPage(body))
|
||||
.then(torrent => ({ torrentId, ...torrent }))
|
||||
.catch(error => retries ? jitter().then(() => torrent(torrentId, config, retries - 1)) : Promise.reject(error));
|
||||
}
|
||||
|
||||
function search(imdbId, config = {}, retries = 1) {
|
||||
if (!imdbId) {
|
||||
return Promise.reject(new Error(`Failed ${imdbId} search`));
|
||||
}
|
||||
const id = imdbId.replace('tt', '');
|
||||
const page = config.page || 1;
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}&imdb_id=${id}`, config)))
|
||||
.then(results => parseResults(results))
|
||||
.then(torrents => torrents.length === limit && page < maxPage
|
||||
? search(imdbId, { ...config, page: page + 1 }).catch(() => [])
|
||||
.then(nextTorrents => torrents.concat(nextTorrents))
|
||||
: torrents)
|
||||
.catch(error => retries ? jitter().then(() => search(imdbId, config, retries - 1)) : Promise.reject(error));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 1) {
|
||||
const page = config.page || 1;
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/get-torrents?limit=${limit}&page=${page}`, config)))
|
||||
.then(results => parseResults(results))
|
||||
.catch(error => retries ? jitter().then(() => browse(config, retries - 1)) : Promise.reject(error));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then(response => {
|
||||
if (!response.data) {
|
||||
return Promise.reject(`No body: ${requestUrl}`);
|
||||
}
|
||||
return Promise.resolve(response.data);
|
||||
});
|
||||
}
|
||||
|
||||
function parseResults(results) {
|
||||
if (!results || !Array.isArray(results.torrents)) {
|
||||
return Promise.reject(`Incorrect results ${results}`)
|
||||
}
|
||||
return results.torrents.map(torrent => parseTorrent(torrent));
|
||||
}
|
||||
|
||||
function parseTorrent(torrent) {
|
||||
return {
|
||||
name: torrent.title.replace(/EZTV$/, ''),
|
||||
torrentId: torrent.episode_url.replace(/.*\/ep\//, ''),
|
||||
infoHash: torrent.hash.trim().toLowerCase(),
|
||||
magnetLink: torrent.magnet_url,
|
||||
torrentLink: torrent.torrent_url,
|
||||
seeders: torrent.seeds,
|
||||
size: torrent.size_bytes,
|
||||
uploadDate: new Date(torrent.date_released_unix * 1000),
|
||||
imdbId: torrent.imdb_id !== '0' && 'tt' + torrent.imdb_id || undefined
|
||||
}
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const content = $('table[class="forum_header_border_normal"]');
|
||||
const magnetLink = content.find('a[title="Magnet Link"]').attr('href');
|
||||
const torrent = {
|
||||
name: content.find('h1 > span').text().replace(/EZTV$/, ''),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
torrentLink: content.find('a[title="Download Torrent"]').attr('href'),
|
||||
seeders: parseInt(content.find('span[class="stat_red"]').first().text(), 10) || 0,
|
||||
size: parseSize(content.find('b:contains(\'Filesize:\')')[0].nextSibling.data),
|
||||
uploadDate: moment(content.find('b:contains(\'Released:\')')[0].nextSibling.data, 'Do MMM YYYY').toDate(),
|
||||
showUrl: content.find('.episode_left_column a').attr('href')
|
||||
};
|
||||
resolve(torrent);
|
||||
});
|
||||
}
|
||||
|
||||
function jitter() {
|
||||
return Promises.delay(minDelay + Math.round(Math.random() * jitterDelay))
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse };
|
||||
@@ -1,85 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const eztv = require('./eztv_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
const { isEpisodeImdbId } = require('../../lib/metadata');
|
||||
|
||||
const NAME = 'EZTV';
|
||||
const UNTIL_PAGE = 10;
|
||||
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent, getImdbIdsMethod) {
|
||||
// return getImdbIdsMethod()
|
||||
// .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => eztv.search(imdbId)))))
|
||||
// .then(results => results.reduce((a, b) => a.concat(b), []))
|
||||
// .catch(() => limiter.schedule(() => eztv.torrent(torrent.torrentId)));
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return scrapeLatestTorrentsForCategory();
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(page = 1) {
|
||||
console.log(`Scrapping ${NAME} page ${page}`);
|
||||
return eztv.browse(({ page }))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] due: `, error);
|
||||
// return Promises.delay(30000).then(() => scrapeLatestTorrentsForCategory(page))
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
|
||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||
? scrapeLatestTorrentsForCategory(page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
|
||||
if (!record || !record.size) {
|
||||
return Promise.resolve('Invalid torrent record');
|
||||
}
|
||||
|
||||
// imdb id for talk shows is usually incorrect on eztv
|
||||
const parsedTitle = parse(record.name);
|
||||
const dateEpisode = !parsedTitle.season && parsedTitle.date;
|
||||
if (dateEpisode && await isEpisodeImdbId(record.imdbId)) {
|
||||
delete record.imdbId;
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: record.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: record.torrentId,
|
||||
title: record.name.replace(/\t|\s+/g, ' ').trim(),
|
||||
type: Type.SERIES,
|
||||
size: record.size,
|
||||
seeders: record.seeders,
|
||||
uploadDate: record.uploadDate,
|
||||
imdbId: record.imdbId,
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent).then(() => torrent);
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,137 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const Promises = require('../../lib/promises');
|
||||
|
||||
const defaultUrl = 'https://horriblesubs.info';
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
function allShows(config = {}) {
|
||||
return _getContent('/shows/', config)
|
||||
.then(($) => $('div[class="ind-show"]')
|
||||
.map((index, element) => $(element).children('a'))
|
||||
.map((index, element) => ({
|
||||
title: element.attr('title'),
|
||||
url: `${config.proxyUrl || defaultUrl}${element.attr('href')}`
|
||||
})).get());
|
||||
}
|
||||
|
||||
async function showData(showInfo, config = {}) {
|
||||
const showEndpoint = (showInfo.url || showInfo).match(/\/show.+/)[0];
|
||||
const title = showInfo.title;
|
||||
const showId = await _getShowId(showEndpoint);
|
||||
const packEntries = await _getShowEntries(showId, title, 'batch', config);
|
||||
const singleEntries = await _getShowEntries(showId, title, 'show', config);
|
||||
|
||||
return {
|
||||
title: title,
|
||||
url: showInfo.url || showInfo,
|
||||
showId: showId,
|
||||
singleEpisodes: singleEntries,
|
||||
packEpisodes: packEntries
|
||||
};
|
||||
}
|
||||
|
||||
async function getLatestEntries(config = {}) {
|
||||
return _getAllLatestEntries(config)
|
||||
.then((entries) => Promises.sequence(entries.map((entry) => () => _findLatestEntry(entry, config))))
|
||||
.then((entries) => entries.filter((entry) => entry))
|
||||
}
|
||||
|
||||
function _getContent(endpoint, config = {},) {
|
||||
const baseUrl = config.proxyUrl || defaultUrl;
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const url = endpoint.startsWith('http')
|
||||
? endpoint.replace(/https?:\/\/[^/]+/, baseUrl)
|
||||
: `${baseUrl}${endpoint}`;
|
||||
|
||||
return axios.get(url, { timeout: timeout })
|
||||
.then((response) => response.data)
|
||||
.then((body) => cheerio.load(body));
|
||||
}
|
||||
|
||||
function _getShowId(showEndpoint) {
|
||||
return _getContent(showEndpoint.replace(/(?:#\d+)?\/?$/, '/'))
|
||||
.then($ => $('div.entry-content').find('script').html().match(/var hs_showid = (\d+)/)[1]);
|
||||
}
|
||||
|
||||
function _getShowEntries(animeId, animeTitle, type, config) {
|
||||
return _getAllEntries(animeId, type, config)
|
||||
.then((entries) => entries.filter((entry) => entry.title === animeTitle));
|
||||
}
|
||||
|
||||
function _getAllEntries(animeId, type, config, page = 0, autoExtend = true) {
|
||||
const entriesEndpoint = `/api.php?method=getshows&type=${type}&showid=${animeId}&nextid=${page}`;
|
||||
return _getEntries(entriesEndpoint, config)
|
||||
.then((entries) => !autoExtend || !entries.length ? entries :
|
||||
_getAllEntries(animeId, type, config, page + 1)
|
||||
.then((nextEntries) => entries.concat(nextEntries)));
|
||||
}
|
||||
|
||||
function _getEntries(endpoint, config) {
|
||||
return _getContent(endpoint, config)
|
||||
.then(($) => $('div[class="rls-info-container"]')
|
||||
.map((index, element) => ({
|
||||
title: $(element).find('a[class="rls-label"]').contents()
|
||||
.filter((i, el) => el.nodeType === 3).first().text().trim(),
|
||||
episode: $(element).find('a[class="rls-label"]').find('strong').text(),
|
||||
uploadDate: _parseDate($(element).find('a[class="rls-label"]').find('span[class="rls-date"]').text()),
|
||||
mirrors: $(element).find('div[class="rls-links-container"]').children()
|
||||
.map((indexLink, elementLink) => ({
|
||||
resolution: $(elementLink).attr('id').match(/\d+p$/)[0],
|
||||
magnetLink: $(elementLink).find('a[title="Magnet Link"]').attr('href'),
|
||||
torrentLink: $(elementLink).find('a[title="Torrent Link"]').attr('href')
|
||||
})).get()
|
||||
})).get());
|
||||
}
|
||||
|
||||
function _getAllLatestEntries(config, page = 0) {
|
||||
const pageParam = page === 0 ? '' : `&nextid=${page}`;
|
||||
const entriesEndpoint = `/api.php?method=getlatest${pageParam}`;
|
||||
return _getContent(entriesEndpoint, config)
|
||||
.then(($) => $('li a')
|
||||
.map((index, element) => ({
|
||||
urlEndpoint: $(element).attr('href'),
|
||||
episode: $(element).find('strong').text()
|
||||
})).get())
|
||||
.then((entries) => entries.length < 12
|
||||
? entries
|
||||
: _getAllLatestEntries(config, page + 1)
|
||||
.then((nextEntries) => entries.concat(nextEntries)));
|
||||
}
|
||||
|
||||
async function _findLatestEntry(entry, config) {
|
||||
const showId = await _getShowId(entry.urlEndpoint);
|
||||
let foundEntry;
|
||||
let page = 0;
|
||||
let reachedEnd = false;
|
||||
|
||||
while (!foundEntry && !reachedEnd) {
|
||||
const allEntries = await _getAllEntries(showId, 'show', config, page, false);
|
||||
foundEntry = allEntries.filter((e) => e.episode === entry.episode)[0];
|
||||
page = page + 1;
|
||||
reachedEnd = allEntries.length === 0;
|
||||
}
|
||||
|
||||
if (!foundEntry) {
|
||||
return;
|
||||
}
|
||||
return {
|
||||
title: foundEntry.title,
|
||||
url: entry.urlEndpoint,
|
||||
showId: showId,
|
||||
singleEpisodes: [foundEntry]
|
||||
};
|
||||
}
|
||||
|
||||
function _parseDate(date) {
|
||||
if (date.match(/today/i)) {
|
||||
return moment().toDate();
|
||||
} else if (date.match(/yesterday/i)) {
|
||||
return moment().subtract(1, 'day').toDate();
|
||||
}
|
||||
return moment(date, 'MM/DD/YYYY').toDate();
|
||||
}
|
||||
|
||||
module.exports = { allShows, showData, getLatestEntries, _getShowId };
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,186 +0,0 @@
|
||||
const fs = require('fs');
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const decode = require('magnet-uri');
|
||||
const horriblesubs = require('./horriblesubs_api.js');
|
||||
const repository = require('../../lib/repository');
|
||||
const { Type } = require('../../lib/types');
|
||||
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
const { getMetadata, getKitsuId } = require('../../lib/metadata');
|
||||
const showMappings = require('./horriblesubs_mapping.json');
|
||||
|
||||
const NAME = 'HorribleSubs';
|
||||
const NEXT_FULL_SCRAPE_OFFSET = 5 * 24 * 60 * 60; // 5 days;
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
const lastScraped = lastScrape.lastScraped && moment(lastScrape.lastScraped);
|
||||
|
||||
if (!lastScraped || lastScraped.add(NEXT_FULL_SCRAPE_OFFSET, 'seconds') < scrapeStart) {
|
||||
console.log(`[${scrapeStart}] scrapping all ${NAME} shows...`);
|
||||
return _scrapeAllShows()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished scrapping all ${NAME} shows`));
|
||||
} else {
|
||||
console.log(`[${scrapeStart}] scrapping latest ${NAME} entries...`);
|
||||
return _scrapeLatestEntries()
|
||||
.then(() => console.log(`[${moment()}] finished scrapping latest ${NAME} entries`));
|
||||
}
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
async function _scrapeLatestEntries() {
|
||||
const latestEntries = await horriblesubs.getLatestEntries();
|
||||
|
||||
return Promise.all(latestEntries
|
||||
.map((entryData) => limiter.schedule(() => _parseShowData(entryData)
|
||||
.catch((err) => console.log(err)))));
|
||||
}
|
||||
|
||||
async function _scrapeAllShows() {
|
||||
const shows = await horriblesubs.allShows();
|
||||
|
||||
return Promise.all(shows
|
||||
.map((show) => limiter.schedule(() => horriblesubs.showData(show)
|
||||
.then((showData) => _parseShowData(showData, false))
|
||||
.catch((err) => console.log(err)))));
|
||||
}
|
||||
|
||||
async function compareSearchKitsuIds() {
|
||||
console.log(`${NAME}: initiating kitsu compare...`);
|
||||
const shows = await horriblesubs.allShows()
|
||||
.then((shows) => Promise.all(shows.slice(0, 1).map((show) => limiter.schedule(() => enrichShow(show)))));
|
||||
|
||||
const incorrect = shows.filter(
|
||||
(show) => showMappings[show.title] && showMappings[show.title].kitsu_id !== show.kitsu_id);
|
||||
const incorrectRatio = incorrect.length / shows.length;
|
||||
console.log(incorrect);
|
||||
console.log(`Ratio: ${incorrectRatio}`);
|
||||
}
|
||||
|
||||
async function initMapping() {
|
||||
console.log(`${NAME}: initiating kitsu mapping...`);
|
||||
const shows = await horriblesubs.allShows()
|
||||
.then((shows) => shows.filter((show) => !showMappings[show.title]))
|
||||
.then((shows) => Promise.all(shows.map((show) => limiter.schedule(() => enrichShow(show)))))
|
||||
.then((shows) => shows.reduce((map, show) => (map[show.title] = show, map), showMappings));
|
||||
|
||||
fs.writeFile(
|
||||
"./scraper/scrapers/horriblesubs/horriblesubs_mapping.json",
|
||||
JSON.stringify(shows), 'utf8',
|
||||
(err) => {
|
||||
if (err) {
|
||||
console.log("An error occurred while writing JSON Object to File.", err);
|
||||
} else {
|
||||
console.log(`${NAME}: finished kitsu mapping`);
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
async function enrichShow(show) {
|
||||
console.log(`${NAME}: getting show info for ${show.title}...`);
|
||||
const showId = await horriblesubs._getShowId(show.url)
|
||||
.catch(() => show.title);
|
||||
const metadata = await getKitsuId({ title: show.title })
|
||||
.then((kitsuId) => getMetadata(kitsuId))
|
||||
.catch((error) => {
|
||||
console.log(`Failed getting kitsu meta: ${error.message}`);
|
||||
return {};
|
||||
});
|
||||
|
||||
return {
|
||||
showId: showId,
|
||||
kitsu_id: metadata.kitsuId,
|
||||
...show,
|
||||
kitsuTitle: metadata.title,
|
||||
imdb_id: metadata.imdbId
|
||||
}
|
||||
}
|
||||
|
||||
async function _parseShowData(showData, updateSeeders = true) {
|
||||
console.log(`${NAME}: scrapping ${showData.title} data...`);
|
||||
const showMapping = showMappings[showData.title];
|
||||
const kitsuId = showMapping && showMapping.kitsu_id;
|
||||
if (!showMapping) {
|
||||
throw new Error(`No kitsu mapping found for ${showData.title}`);
|
||||
}
|
||||
if (!kitsuId) {
|
||||
throw new Error(`No kitsuId found for ${showData.title}`);
|
||||
}
|
||||
|
||||
// sometimes horriblesubs entry contains multiple season in it, so need to split it per kitsu season entry
|
||||
const kitsuIdsMapping = Array.isArray(kitsuId) && await Promise.all(kitsuId.map(kitsuId => getMetadata(kitsuId)))
|
||||
.then((metas) => metas.reduce((map, meta) => {
|
||||
const epOffset = Object.keys(map).length;
|
||||
[...Array(meta.totalCount || 1).keys()]
|
||||
.map(ep => ep + 1)
|
||||
.forEach(ep => map[ep + epOffset] = { kitsuId: meta.kitsuId, episode: ep, title: meta.title });
|
||||
return map;
|
||||
}, {})) || {};
|
||||
const formatTitle = (episodeInfo, mirror) => {
|
||||
const mapping = kitsuIdsMapping[episodeInfo.episode.replace(/^0+/, '')];
|
||||
if (mapping) {
|
||||
return `${mapping.title} - ${mapping.episode} [${mirror.resolution}]`;
|
||||
}
|
||||
return `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`;
|
||||
};
|
||||
const getKitsuId = inputEpisode => {
|
||||
const episodeString = inputEpisode.includes('-') && inputEpisode.split('-')[0] || inputEpisode;
|
||||
const episode = parseInt(episodeString, 10);
|
||||
if (kitsuIdsMapping[episode]) {
|
||||
return kitsuIdsMapping[episode].kitsuId;
|
||||
} else if (Array.isArray(kitsuId)) {
|
||||
console.warn(`Unmapped episode number for ${showData.title} - ${inputEpisode}`);
|
||||
return undefined;
|
||||
}
|
||||
return kitsuId;
|
||||
};
|
||||
|
||||
return Promise.all([].concat(showData.singleEpisodes || []).concat(showData.packEpisodes || [])
|
||||
.map(episodeInfo => episodeInfo.mirrors
|
||||
.filter(mirror => mirror.magnetLink && mirror.magnetLink.length)
|
||||
.map(mirror => ({
|
||||
provider: NAME,
|
||||
...mirror,
|
||||
infoHash: decode(mirror.magnetLink).infoHash,
|
||||
trackers: decode(mirror.magnetLink).tr.join(','),
|
||||
title: formatTitle(episodeInfo, mirror),
|
||||
type: Type.ANIME,
|
||||
kitsuId: getKitsuId(episodeInfo.episode),
|
||||
uploadDate: episodeInfo.uploadDate,
|
||||
})))
|
||||
.reduce((a, b) => a.concat(b), [])
|
||||
.filter(torrent => torrent.kitsuId)
|
||||
.map(torrent => entryLimiter.schedule(() => processTorrentRecord(torrent, updateSeeders))))
|
||||
.then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`));
|
||||
}
|
||||
|
||||
async function processTorrentRecord(torrent, updateSeeders = true) {
|
||||
const existingTorrent = await repository.getTorrent(torrent).catch(() => undefined);
|
||||
|
||||
if (existingTorrent && existingTorrent.provider === NAME) {
|
||||
if (updateSeeders) {
|
||||
return updateCurrentSeeders(torrent).then(updatedSeeders => checkAndUpdateTorrent(updatedSeeders))
|
||||
}
|
||||
return Promise.resolve(torrent)
|
||||
}
|
||||
|
||||
return updateTorrentSize(torrent)
|
||||
.then(updated => updateCurrentSeeders(updated))
|
||||
.then(updated => createTorrentEntry(updated, true))
|
||||
.catch(error => console.warn(`Failed creating entry for ${torrent.title}:`, error));
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,161 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const defaultProxies = [
|
||||
'https://katcr.co'
|
||||
];
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'movies',
|
||||
TV: 'tv',
|
||||
ANIME: 'anime',
|
||||
APPS: 'applications',
|
||||
GAMES: 'games',
|
||||
MUSIC: 'music',
|
||||
BOOKS: 'books',
|
||||
PORN: 'xxx',
|
||||
OTHER: 'other',
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`, config)))
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => ({ torrentId, ...torrent }))
|
||||
.catch((err) => torrent(torrentId, config, retries - 1));
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/category/${category}/page/${page}`, config)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { timeout: timeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('Access Denied')) {
|
||||
console.log(`Access Denied: ${requestUrl}`);
|
||||
throw new Error(`Access Denied: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden') ||
|
||||
body.includes('Origin DNS error') ||
|
||||
!body.includes('Kickass Torrents</title>')) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('.table > tbody > tr').each((i, element) => {
|
||||
const row = $(element);
|
||||
const magnetLink = row.find('a[title="Torrent magnet link"]').attr('href');
|
||||
torrents.push({
|
||||
name: row.find('a[class="torrents_table__torrent_title"]').first().children('b').text(),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
torrentId: row.find('a[class="torrents_table__torrent_title"]').first().attr('href').replace('/torrent/', ''),
|
||||
category: row.find('span[class="torrents_table__upload_info"]').first().children('a').first().attr('href')
|
||||
.match(/category\/([^\/]+)/)[1],
|
||||
seeders: parseInt(row.find('td[data-title="Seed"]').first().text()),
|
||||
leechers: parseInt(row.find('td[data-title="Leech"]').first().text()),
|
||||
size: parseSize(row.find('td[data-title="Size"]').first().text()),
|
||||
uploadDate: moment(row.find('td[data-title="Age"]').first().attr('title')).toDate()
|
||||
});
|
||||
});
|
||||
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const content = $('div[class="col"]').first();
|
||||
const info = content.find('div[class="torrent_stats"]').parent();
|
||||
const description = content.find('div[id="main"]');
|
||||
const magnetLink = info.find('a[title="Download verified Magnet"]').attr('href');
|
||||
const imdbIdMatch = description.html().match(/imdb\.com\/title\/(tt\d+)/i);
|
||||
|
||||
const torrent = {
|
||||
name: info.find('h1').first().text(),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
seeders: parseInt(info.find('span[class="torrent_stats__seed_count mr-2"]').first().text().match(/\d+/)[0], 10),
|
||||
leechers: parseInt(info.find('span[class="torrent_stats__leech_count mr-2"]').first().text().match(/\d+/)[0], 10),
|
||||
category: info.find('small').first().children('a').first().attr('href').match(/\/category\/([^\/]+)/)[1],
|
||||
languages: description.find('span:contains(\'Audio\')').next().children().eq(0).text(),
|
||||
size: parseSize(description.find('ul[class="file_list"]').first().find('li').first().contents().eq(2).text()
|
||||
.match(/\(Size: (.+)\)/)[1]),
|
||||
uploadDate: moment(info.find('time').first().text()).toDate(),
|
||||
imdbId: imdbIdMatch && imdbIdMatch[1],
|
||||
files: content.find('ul[class="file_list"]').first().find('li > ul > li[class="file_list__file"]')
|
||||
.map((i, elem) => $(elem))
|
||||
.map((i, ele) => ({
|
||||
fileIndex: i,
|
||||
name: ele.find('span > ul > li').contents().eq(1).text().trim().replace(/^.+\//g, ''),
|
||||
path: ele.find('span > ul > li').contents().eq(1).text().trim(),
|
||||
size: parseSize(ele.contents().eq(2).text())
|
||||
})).get()
|
||||
};
|
||||
if (torrent.files.length >= 50) {
|
||||
// a max of 50 files are displayed on the page
|
||||
delete torrent.files;
|
||||
}
|
||||
resolve(torrent);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,98 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const LineByLineReader = require('line-by-line');
|
||||
const fs = require('fs');
|
||||
const { Type } = require('../../lib/types');
|
||||
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'KickassTorrents';
|
||||
const CSV_FILE_PATH = '/tmp/kickass.csv';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 40 });
|
||||
|
||||
async function scrape() {
|
||||
console.log(`starting to scrape KAT dump: ${JSON.stringify(lastDump)}`);
|
||||
|
||||
let entriesProcessed = 0;
|
||||
const lr = new LineByLineReader(CSV_FILE_PATH);
|
||||
lr.on('line', (line) => {
|
||||
if (entriesProcessed % 1000 === 0) {
|
||||
console.log(`Processed ${entriesProcessed} entries`);
|
||||
}
|
||||
const row = line.match(/(?<=^|\|)(".*"|[^|]+)(?=\||$)/g);
|
||||
if (row.length !== 11) {
|
||||
console.log(`Invalid row: ${line}`);
|
||||
return;
|
||||
}
|
||||
const torrent = {
|
||||
infoHash: row[0].toLowerCase(),
|
||||
title: row[1]
|
||||
.replace(/^"|"$/g, '')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/&\w{2,6};/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim(),
|
||||
category: row[2],
|
||||
size: parseInt(row[5], 10),
|
||||
seeders: parseInt(row[8], 10),
|
||||
uploadDate: moment.unix(parseInt(row[10], 10)).toDate(),
|
||||
};
|
||||
|
||||
if (!limiter.empty()) {
|
||||
lr.pause()
|
||||
}
|
||||
|
||||
limiter.schedule(() => processTorrentRecord(torrent)
|
||||
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
|
||||
.then(() => limiter.empty())
|
||||
.then((empty) => empty && lr.resume())
|
||||
.then(() => entriesProcessed++);
|
||||
});
|
||||
lr.on('error', (err) => {
|
||||
console.log(err);
|
||||
});
|
||||
lr.on('end', () => {
|
||||
fs.unlink(CSV_FILE_PATH);
|
||||
console.log(`finished to scrape KAT dump: ${JSON.stringify(lastDump)}!`);
|
||||
});
|
||||
}
|
||||
|
||||
const categoryMapping = {
|
||||
"Movies": Type.MOVIE,
|
||||
"TV": Type.SERIES,
|
||||
"Anime": Type.ANIME
|
||||
};
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (!categoryMapping[record.category] || record.seeders === 0) {
|
||||
return createSkipTorrentEntry(record);
|
||||
}
|
||||
if (await getStoredTorrentEntry(record)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const torrentFound = await findTorrent(record).catch(() => undefined);
|
||||
|
||||
if (!torrentFound) {
|
||||
return createSkipTorrentEntry(record);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: record.infoHash,
|
||||
provider: NAME,
|
||||
title: torrentFound.name,
|
||||
size: record.size,
|
||||
type: categoryMapping[record.category],
|
||||
imdbId: torrentFound.imdbId,
|
||||
uploadDate: record.uploadDate,
|
||||
seeders: torrentFound.seeders,
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
async function findTorrent(record) {
|
||||
return Promise.reject("not found");
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
@@ -1,91 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const kickass = require('./kickass_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'KickassTorrents';
|
||||
const UNTIL_PAGE = 10;
|
||||
const TYPE_MAPPING = typeMapping();
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => kickass.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
kickass.Categories.MOVIE,
|
||||
kickass.Categories.TV,
|
||||
kickass.Categories.ANIME,
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return kickass.browse(({ category, page }))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
|
||||
const torrentFound = await kickass.torrent(record.torrentId).catch(() => undefined);
|
||||
|
||||
if (!torrentFound || !TYPE_MAPPING[torrentFound.category]) {
|
||||
return Promise.resolve('Invalid torrent record');
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: torrentFound.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: torrentFound.torrentId,
|
||||
title: torrentFound.name.replace(/\t|\s+/g, ' '),
|
||||
type: TYPE_MAPPING[torrentFound.category],
|
||||
size: torrentFound.size,
|
||||
seeders: torrentFound.seeders,
|
||||
uploadDate: torrentFound.uploadDate,
|
||||
imdbId: torrentFound.imdbId,
|
||||
languages: torrentFound.languages || undefined
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent).then(() => torrent);
|
||||
}
|
||||
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[kickass.Categories.MOVIE] = Type.MOVIE;
|
||||
mapping[kickass.Categories.TV] = Type.SERIES;
|
||||
mapping[kickass.Categories.ANIME] = Type.ANIME;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,145 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const moment = require("moment")
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require('magnet-uri');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper')
|
||||
|
||||
const defaultTimeout = 10000;
|
||||
const maxSearchPage = 50
|
||||
|
||||
const baseUrl = 'https://lapumia.org';
|
||||
|
||||
const Categories = {
|
||||
MOVIE: null,
|
||||
TV: 'series',
|
||||
ANIME: 'animes',
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} query`));
|
||||
}
|
||||
return singleRequest(`${baseUrl}/${torrentId}`, config)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => torrent.map(el => ({ torrentId, ...el })))
|
||||
.catch((err) => {
|
||||
console.warn(`Failed Lapumia ${torrentId} request: `, err);
|
||||
return torrent(torrentId, config, retries - 1)
|
||||
});
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
|
||||
|
||||
return singleRequest(`${baseUrl}/page/${page}/?s=${keyword}`, config)
|
||||
.then(body => parseTableBody(body))
|
||||
.then(torrents => torrents.length === 10 && page < extendToPage
|
||||
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
|
||||
.then(nextTorrents => torrents.concat(nextTorrents))
|
||||
: torrents)
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
const requestUrl = category ? `${baseUrl}/${category}/page/${page}/` : `${baseUrl}/page/${page}/`
|
||||
|
||||
return singleRequest(requestUrl, config)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), timeout: timeout, follow: 2 };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden')) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('div.post').each((i, element) => {
|
||||
const row = $(element);
|
||||
try {
|
||||
torrents.push({
|
||||
name: row.find("div > a").text(),
|
||||
torrentId: row.find("div > a").attr("href").split('/')[3]
|
||||
});
|
||||
} catch (e) {
|
||||
console.log("Failed parsing Lupumia table entry")
|
||||
}
|
||||
});
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const magnets = $('h2 > span')
|
||||
.filter((i, elem) => isPtDubbed($(elem).text())).parent()
|
||||
.map((i, elem) => $(elem).nextUntil('h2, hr'))
|
||||
.map((i, elem) => $(elem).find('a[href^="magnet"]'))
|
||||
.map((i, section) => $(section).attr("href")).get();
|
||||
const category = parseCategory($('div.category').html());
|
||||
const details = $('div.content')
|
||||
const torrents = magnets.filter(magnetLink => decode(magnetLink).name).map(magnetLink => ({
|
||||
title: sanitizePtName(escapeHTML(decode(magnetLink).name.replace(/\+/g, ' '))),
|
||||
originalName: sanitizePtOriginalName(details.find('b:contains(\'Titulo Original:\')')[0].nextSibling.nodeValue),
|
||||
year: details.find('b:contains(\'Ano de Lançamento:\')')[0].nextSibling.nodeValue.trim(),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
category: category,
|
||||
uploadDate: new Date(moment($('div.infos').text().split('•')[0].trim(), 'LL', 'pt-br').format()),
|
||||
imdbId: $('.imdbRatingPlugin').attr('data-title') || null,
|
||||
languages: sanitizePtLanguages(details.find('b:contains(\'Idioma\')')[0].nextSibling.nodeValue)
|
||||
}))
|
||||
resolve(torrents.filter((x) => x));
|
||||
});
|
||||
}
|
||||
|
||||
function parseCategory(categorys) {
|
||||
const $ = cheerio.load(categorys)
|
||||
if ($('a:contains(\'Animes\')').text()) {
|
||||
return Categories.ANIME
|
||||
}
|
||||
if ($('a:contains(\'Series\')').text()) {
|
||||
return Categories.TV
|
||||
}
|
||||
return Categories.MOVIE
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,112 +0,0 @@
|
||||
const moment = require("moment");
|
||||
const Bottleneck = require("bottleneck");
|
||||
const lapumia = require("./lapumia_api");
|
||||
const { Type } = require("../../lib/types");
|
||||
const repository = require("../../lib/repository");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
|
||||
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
|
||||
const { getImdbId } = require("../../lib/metadata");
|
||||
|
||||
const NAME = "Lapumia";
|
||||
const UNTIL_PAGE = 5;
|
||||
const TYPE_MAPPING = typeMapping();
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => lapumia.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
lapumia.Categories.MOVIE,
|
||||
lapumia.Categories.TV
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
.map((category) => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return lapumia
|
||||
.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processEntry(entry) {
|
||||
return lapumia.torrent(entry.torrentId)
|
||||
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
|
||||
.catch(() => undefined);
|
||||
}
|
||||
|
||||
async function processTorrentRecord(foundTorrent) {
|
||||
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
|
||||
return foundTorrent;
|
||||
}
|
||||
|
||||
if (!foundTorrent.size) {
|
||||
await updateTorrentSize(foundTorrent);
|
||||
}
|
||||
if (!Number.isInteger(foundTorrent.seeders)) {
|
||||
await updateCurrentSeeders(foundTorrent);
|
||||
}
|
||||
if (!foundTorrent.imdbId && TYPE_MAPPING[foundTorrent.category] !== Type.ANIME) {
|
||||
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
|
||||
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: foundTorrent.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: foundTorrent.torrentId,
|
||||
title: foundTorrent.title,
|
||||
type: TYPE_MAPPING[foundTorrent.category],
|
||||
imdbId: foundTorrent.imdbId,
|
||||
uploadDate: foundTorrent.uploadDate,
|
||||
seeders: foundTorrent.seeders,
|
||||
size: foundTorrent.size,
|
||||
files: foundTorrent.files,
|
||||
languages: foundTorrent.languages
|
||||
};
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[lapumia.Categories.MOVIE] = Type.MOVIE;
|
||||
mapping[lapumia.Categories.TV] = Type.SERIES;
|
||||
mapping[lapumia.Categories.ANIME] = Type.ANIME;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
if (lapumia.Categories.ANIME === category) {
|
||||
return 2;
|
||||
}
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,73 +0,0 @@
|
||||
const { pantsu } = require('nyaapi')
|
||||
|
||||
const Categories = {
|
||||
ANIME: {
|
||||
ALL: '3_',
|
||||
ENGLISH: '3_5',
|
||||
RAW: '3_6',
|
||||
MUSIC_VIDEO: '3_12',
|
||||
NON_ENGLISH: '3_13',
|
||||
},
|
||||
LIVE_ACTION: {
|
||||
ALL: '5_',
|
||||
ENGLISH: '5_9',
|
||||
RAW: '5_11',
|
||||
PROMOTIONAL_VIDEO: '5_10',
|
||||
NON_ENGLISH: '5_18',
|
||||
}
|
||||
}
|
||||
|
||||
function torrent(torrentId) {
|
||||
if (!torrentId) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
|
||||
return pantsu.infoRequest(torrentId)
|
||||
.then(result => parseTorrent(result))
|
||||
.catch(error => handleError(error, torrentId));
|
||||
}
|
||||
|
||||
function search(query) {
|
||||
return pantsu.search(query)
|
||||
.then(results => results.map(torrent => parseTorrent(torrent)))
|
||||
.catch(error => handleError(error, query));
|
||||
}
|
||||
|
||||
function browse(config = {}) {
|
||||
const page = config.page || 1;
|
||||
const category = config.category || Categories.ANIME.ENGLISH;
|
||||
|
||||
return pantsu.list(category, page)
|
||||
.then(results => results.map(torrent => parseTorrent(torrent)))
|
||||
.catch(error => handleError(error, category));
|
||||
}
|
||||
|
||||
function handleError(error, identifier) {
|
||||
if (error.statusCode && error.statusCode >= 400) {
|
||||
return Promise.reject(new Error(`${error.statusCode}: [${identifier}] failed retrieval on NyaaPantsu`));
|
||||
}
|
||||
return Promise.reject(error);
|
||||
}
|
||||
|
||||
function parseTorrent(torrent) {
|
||||
return {
|
||||
title: torrent.name.replace(/\t|\s+/g, ' ').trim(),
|
||||
torrentId: torrent.id,
|
||||
infoHash: torrent.hash.trim().toLowerCase(),
|
||||
magnetLink: torrent.magnet,
|
||||
torrentLink: torrent.torrent,
|
||||
seeders: torrent.seeders,
|
||||
size: torrent.filesize,
|
||||
uploadDate: new Date(torrent.date),
|
||||
category: `${torrent.category}_${torrent.sub_category}`,
|
||||
languages: torrent.languages ? torrent.languages.join(',') : undefined,
|
||||
files: torrent.file_list && torrent.file_list.length ? torrent.file_list.map((file, fileId) => ({
|
||||
fileIndex: fileId,
|
||||
name: file.path.replace(/([^\/]+$)/, '$1'),
|
||||
path: file.path,
|
||||
size: file.filesize
|
||||
})) : undefined
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,97 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const pantsu = require('./nyaa_pantsu_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const Promises = require('../../lib/promises');
|
||||
const repository = require('../../lib/repository');
|
||||
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'NyaaPantsu';
|
||||
const UNTIL_PAGE = 5
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
// const ids = ['1033095'];
|
||||
// return Promise.all(ids.map(id => limiter.schedule(() => pantsu.torrent(id)
|
||||
// .then(torrent => processTorrentRecord(torrent)))))
|
||||
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => pantsu.torrent(torrent.torrentId))
|
||||
.then(foundTorrent => {
|
||||
if (Number.isInteger(foundTorrent.seeders)) {
|
||||
return [foundTorrent];
|
||||
}
|
||||
return []
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
pantsu.Categories.ANIME.ENGLISH
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return pantsu.browse(({ page }))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)
|
||||
.catch(error => {
|
||||
console.warn(`Failed processing [${torrent.infoHash}] ${torrent.title} due: `, error);
|
||||
return Promise.resolve();
|
||||
})))))
|
||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (!record || await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
|
||||
if (!record.size) {
|
||||
await updateTorrentSize(record)
|
||||
}
|
||||
if (record.seeders === null || record.seeders === undefined) {
|
||||
await updateCurrentSeeders(record);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: record.infoHash,
|
||||
torrentLink: record.torrentLink,
|
||||
provider: NAME,
|
||||
torrentId: record.torrentId,
|
||||
title: record.title,
|
||||
type: Type.ANIME,
|
||||
size: record.size,
|
||||
seeders: record.seeders,
|
||||
uploadDate: record.uploadDate,
|
||||
languages: record.languages,
|
||||
files: record.files || undefined
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent).then(() => torrent);
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,66 +0,0 @@
|
||||
const { si } = require('nyaapi')
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const Categories = {
|
||||
ANIME: {
|
||||
ALL: '1_0',
|
||||
MUSIC_VIDEO: '1_1',
|
||||
ENGLISH: '1_2',
|
||||
NON_ENGLISH: '1_3',
|
||||
RAW: '1_4'
|
||||
},
|
||||
LIVE_ACTION: {
|
||||
ALL: '4_0',
|
||||
ENGLISH: '4_1',
|
||||
PROMOTIONAL_VIDEO: '4_2',
|
||||
NON_ENGLISH: '4_3',
|
||||
RAW: '4_4'
|
||||
}
|
||||
}
|
||||
|
||||
function torrent(torrentId) {
|
||||
if (!torrentId) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
|
||||
return si.infoRequest(torrentId)
|
||||
.then(result => parseTorrent(result))
|
||||
.then(result => ({ ...result, torrentId }))
|
||||
.catch(error => {
|
||||
if (error.statusCode && error.statusCode === 404) {
|
||||
return Promise.reject(new Error(`404: [${torrentId}] not found on NyaaSi`));
|
||||
}
|
||||
return Promise.reject(error);
|
||||
});
|
||||
}
|
||||
|
||||
function search(query) {
|
||||
return si.search(query, null, { category: Categories.ANIME.ENGLISH})
|
||||
.then(results => results.map(torrent => parseTorrent(torrent)));
|
||||
}
|
||||
|
||||
function browse(config = {}) {
|
||||
const page = config.page || 1;
|
||||
const category = config.category || Categories.ANIME.ENGLISH;
|
||||
const sort = config.sort || 'id'
|
||||
|
||||
return si.list(category, page, { sort })
|
||||
.then(response => response.results || [])
|
||||
.then(results => results.map(torrent => parseTorrent(torrent)));
|
||||
}
|
||||
|
||||
function parseTorrent(torrent) {
|
||||
return {
|
||||
title: torrent.name.replace(/\t|\s+/g, ' ').trim(),
|
||||
torrentId: torrent.id,
|
||||
infoHash: torrent.hash.trim().toLowerCase(),
|
||||
magnetLink: torrent.magnet,
|
||||
torrentLink: torrent.torrent,
|
||||
seeders: parseInt(torrent.seeders),
|
||||
size: parseSize(torrent.filesize),
|
||||
uploadDate: new Date(torrent.date),
|
||||
category: torrent.sub_category,
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,87 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const nyaasi = require('./nyaa_si_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const Promises = require('../../lib/promises');
|
||||
const repository = require('../../lib/repository');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'NyaaSi';
|
||||
const UNTIL_PAGE = 10
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
// const ids = ['1292786'];
|
||||
// return Promise.all(ids.map(id => limiter.schedule(() => nyaasi.torrent(id)
|
||||
// .then(torrent => processTorrentRecord(torrent)))))
|
||||
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
// const queries = ['Sagrada Reset', 'Sakurada Reset'];
|
||||
// return Promise.all(queries.map(query => limiter.schedule(() => nyaasi.search(query)
|
||||
// .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent))))))))
|
||||
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
// return limiter.schedule(() => nyaasi.torrent(torrent.torrentId))
|
||||
// .then(foundTorrent => Number.isInteger(foundTorrent.seeders) ? [foundTorrent] : []);
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
nyaasi.Categories.ANIME.ENGLISH
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return nyaasi.browse({ page, sort: 'id' })
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)
|
||||
.catch(error => {
|
||||
console.warn(`Failed processing [${torrent.infoHash}] ${torrent.title} due: `, error);
|
||||
return Promise.resolve();
|
||||
})))))
|
||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (!record || await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: record.infoHash,
|
||||
torrentLink: record.torrentLink,
|
||||
provider: NAME,
|
||||
torrentId: record.torrentId,
|
||||
title: record.title,
|
||||
type: Type.ANIME,
|
||||
size: record.size,
|
||||
seeders: record.seeders,
|
||||
uploadDate: record.uploadDate,
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent).then(() => torrent);
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,151 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require("cheerio");
|
||||
const decode = require('magnet-uri');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } = require('../scraperHelper')
|
||||
|
||||
const defaultTimeout = 30000;
|
||||
const maxSearchPage = 50
|
||||
|
||||
const baseUrl = 'https://ondebaixa.com';
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'filmes',
|
||||
TV: 'series',
|
||||
ANIME: 'anime',
|
||||
DESENHOS: 'desenhos'
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} query`));
|
||||
}
|
||||
const slug = encodeURIComponent(torrentId.split("/")[3]);
|
||||
return singleRequest(`${baseUrl}/${slug}/`, config)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => torrent.map(el => ({ torrentId: slug, ...el })))
|
||||
.catch((err) => {
|
||||
console.warn(`Failed OndeBaixo ${slug} request: `, err);
|
||||
return torrent(torrentId, config, retries - 1)
|
||||
});
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const extendToPage = Math.min(maxSearchPage, (config.extendToPage || 1))
|
||||
|
||||
return singleRequest(`${baseUrl}/${keyword}/${page}/`, config)
|
||||
.then(body => parseTableBody(body))
|
||||
.then(torrents => torrents.length === 40 && page < extendToPage
|
||||
? search(keyword, { ...config, page: page + 1 }).catch(() => [])
|
||||
.then(nextTorrents => torrents.concat(nextTorrents))
|
||||
: torrents)
|
||||
.catch((err) => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
const requestUrl = category ? `${baseUrl}/${category}/${page}/` : `${baseUrl}/${page}/`;
|
||||
|
||||
return singleRequest(requestUrl, config)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden')) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('div.capa_larga.align-middle').each((i, element) => {
|
||||
const row = $(element);
|
||||
torrents.push({
|
||||
name: row.find("a").text(),
|
||||
torrentId: row.find("a").attr("href")
|
||||
});
|
||||
});
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const magnets = $(`a[href^="magnet"]`)
|
||||
.filter((i, elem) => isPtDubbed($(elem).attr('title')))
|
||||
.map((i, elem) => $(elem).attr("href")).get();
|
||||
const details = $('div#informacoes')
|
||||
const category = details.find('span:contains(\'Gêneros: \')').next().html()
|
||||
const torrents = magnets.map(magnetLink => {
|
||||
const decodedMagnet = decode(magnetLink);
|
||||
const name = sanitizePtName(escapeHTML(decodedMagnet.name || '').replace(/\+/g, ' '));
|
||||
const originalTitle = details.find('span:contains(\'Título Original: \')').next().text().trim();
|
||||
const year = details.find('span:contains(\'Ano de Lançamento: \')').next().text().trim();
|
||||
const fallBackTitle = `${originalTitle.trim()} ${year.trim()} ${name.trim()}`;
|
||||
return {
|
||||
title: name.length > 5 ? name : fallBackTitle,
|
||||
originalName: sanitizePtOriginalName(originalTitle),
|
||||
year: year,
|
||||
infoHash: decodedMagnet.infoHash,
|
||||
magnetLink: magnetLink,
|
||||
category: parseCategory(category),
|
||||
uploadDate: new Date($('time').attr('datetime')),
|
||||
languages: sanitizePtLanguages(details.find('span:contains(\'Idioma\')').next().text())
|
||||
}
|
||||
});
|
||||
resolve(torrents.filter((x) => x));
|
||||
});
|
||||
}
|
||||
|
||||
function parseCategory(body) {
|
||||
const $ = cheerio.load(body)
|
||||
if ($("a[href*='anime']").text()) {
|
||||
return Categories.ANIME
|
||||
}
|
||||
if ($("a[href*='series']").text()) {
|
||||
return Categories.TV
|
||||
}
|
||||
if ($("a[href*='filmes']").text()) {
|
||||
return Categories.MOVIE
|
||||
}
|
||||
if ($("a[href*='desenhos']").text()) {
|
||||
return Categories.TV
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,113 +0,0 @@
|
||||
const moment = require("moment");
|
||||
const Bottleneck = require("bottleneck");
|
||||
const ondebaixa = require("./ondebaixa_api");
|
||||
const { Type } = require("../../lib/types");
|
||||
const repository = require("../../lib/repository");
|
||||
const Promises = require("../../lib/promises");
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require("../../lib/torrentEntries");
|
||||
const { updateCurrentSeeders, updateTorrentSize } = require("../../lib/torrent");
|
||||
const { getImdbId } = require("../../lib/metadata");
|
||||
|
||||
const NAME = "OndeBaixa";
|
||||
const UNTIL_PAGE = 5;
|
||||
const TYPE_MAPPING = typeMapping();
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => ondebaixa.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
ondebaixa.Categories.MOVIE,
|
||||
ondebaixa.Categories.TV,
|
||||
ondebaixa.Categories.DESENHOS
|
||||
];
|
||||
|
||||
return Promises.sequence(
|
||||
allowedCategories.map(
|
||||
(category) => () => scrapeLatestTorrentsForCategory(category)
|
||||
)
|
||||
).then((entries) => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return ondebaixa
|
||||
.browse({ category, page })
|
||||
.catch((error) => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then((torrents) => Promise.all(torrents.map((torrent) => limiter.schedule(() => processEntry(torrent)))))
|
||||
.then((resolved) => resolved.length > 0 && page < untilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processEntry(entry) {
|
||||
return ondebaixa.torrent(entry.torrentId)
|
||||
.then(records => Promises.sequence(records.map(record => () => processTorrentRecord(record))))
|
||||
.catch(() => undefined);
|
||||
}
|
||||
|
||||
async function processTorrentRecord(foundTorrent) {
|
||||
if (await checkAndUpdateTorrent({ provider: NAME, ...foundTorrent })) {
|
||||
return foundTorrent;
|
||||
}
|
||||
|
||||
if (!foundTorrent.size) {
|
||||
await updateTorrentSize(foundTorrent);
|
||||
}
|
||||
if (!Number.isInteger(foundTorrent.seeders)) {
|
||||
await updateCurrentSeeders(foundTorrent);
|
||||
}
|
||||
if (!foundTorrent.imdbId && TYPE_MAPPING[foundTorrent.category] !== Type.ANIME) {
|
||||
const info = { title: foundTorrent.originalName, year: foundTorrent.year };
|
||||
foundTorrent.imdbId = await getImdbId(info, TYPE_MAPPING[foundTorrent.category]).catch(() => undefined);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: foundTorrent.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: foundTorrent.torrentId,
|
||||
title: foundTorrent.title,
|
||||
type: TYPE_MAPPING[foundTorrent.category],
|
||||
imdbId: foundTorrent.imdbId,
|
||||
uploadDate: foundTorrent.uploadDate,
|
||||
seeders: foundTorrent.seeders,
|
||||
size: foundTorrent.size,
|
||||
files: foundTorrent.files,
|
||||
languages: foundTorrent.languages
|
||||
};
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[ondebaixa.Categories.MOVIE] = Type.MOVIE;
|
||||
mapping[ondebaixa.Categories.TV] = Type.SERIES;
|
||||
mapping[ondebaixa.Categories.DESENHOS] = Type.SERIES;
|
||||
mapping[ondebaixa.Categories.ANIME] = Type.ANIME;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function untilPage(category) {
|
||||
return UNTIL_PAGE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,161 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomUserAgent } = require("../../lib/requestHelper");
|
||||
|
||||
const baseUrl = 'https://torrentapi.org/pubapi_v2.php';
|
||||
const appId = 'torrentio-addon';
|
||||
const defaultTimeout = 30000;
|
||||
const retryDelay = 3000;
|
||||
|
||||
let token;
|
||||
|
||||
const Options = {
|
||||
category: {
|
||||
MOVIES_XVID: [14],
|
||||
MOVIES_XVID_720P: [48],
|
||||
MOVIES_X264: [17],
|
||||
MOVIES_X264_1080P: [44],
|
||||
MOVIES_X264_720P: [45],
|
||||
MOVIES_X264_3D: [47],
|
||||
MOVIES_X264_4K: [50],
|
||||
MOVIES_X265_1080P: [54],
|
||||
MOVIES_X265_4K: [51],
|
||||
MOVIES_X265_4K_HDR: [52],
|
||||
MOVIES_FULL_BD: [42],
|
||||
MOVIES_BD_REMUX: [46],
|
||||
MOVIES_HIGH_RES: [47, 50, 51, 52, 46],
|
||||
TV_EPISODES: [18],
|
||||
TV_UHD_EPISODES: [49],
|
||||
TV_HD_EPISODES: [41],
|
||||
MUSIC_MP3: [23],
|
||||
MUSIC_FLAC: [25],
|
||||
GAMES_PC_ISO: [27],
|
||||
GAMES_PC_RIP: [28],
|
||||
GAMES_PS3: [40],
|
||||
GAMES_XBOX_360: [32],
|
||||
SOFTWARE_PC_ISO: [33],
|
||||
EBOOKS: [35],
|
||||
XXX: [4],
|
||||
},
|
||||
sort: {
|
||||
LAST: 'last',
|
||||
SEEDERS: 'seeders',
|
||||
LEECHERS: 'leechers'
|
||||
},
|
||||
format: {
|
||||
JSON: 'json',
|
||||
JSON_EXTENDED: 'json_extended'
|
||||
},
|
||||
ranked: {
|
||||
TRUE: 1,
|
||||
FALSE: 0
|
||||
}
|
||||
}
|
||||
|
||||
function search(imdbId, params = {}) {
|
||||
if (!imdbId) {
|
||||
return Promise.reject(new Error(`Must define imdbId`));
|
||||
}
|
||||
const parameters = {
|
||||
mode: 'search',
|
||||
search_imdb: imdbId,
|
||||
category: params.category && params.category.join(';') || null,
|
||||
limit: params.limit || 100,
|
||||
sort: params.sort || Options.sort.SEEDERS,
|
||||
min_seeders: params.min_seeders || undefined,
|
||||
min_leechers: params.min_leechers || undefined,
|
||||
format: params.format || Options.format.JSON_EXTENDED,
|
||||
ranked: params.ranked || Options.ranked.FALSE
|
||||
}
|
||||
|
||||
return singleRequest(parameters).then(results => parseResults(results));
|
||||
}
|
||||
|
||||
function browse(params = {}) {
|
||||
const parameters = {
|
||||
mode: 'list',
|
||||
category: params.category && params.category.join(';') || null,
|
||||
limit: params.limit || 100,
|
||||
sort: params.sort || Options.sort.LAST,
|
||||
min_seeders: params.min_seeders || undefined,
|
||||
min_leechers: params.min_leechers || undefined,
|
||||
format: params.format || Options.format.JSON_EXTENDED,
|
||||
ranked: params.ranked || Options.ranked.FALSE
|
||||
}
|
||||
|
||||
return singleRequest(parameters).then(results => parseResults(results));
|
||||
}
|
||||
|
||||
async function singleRequest(params = {}, config = {}, retries = 15) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const headers = {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'accept-encoding': 'gzip, deflate',
|
||||
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
|
||||
};
|
||||
params.token = await getToken();
|
||||
params.app_id = appId;
|
||||
|
||||
Object.keys(params)
|
||||
.filter(key => params[key] === undefined || params[key] === null)
|
||||
.forEach(key => delete params[key]);
|
||||
const options = { headers, timeout, params };
|
||||
return axios.get(baseUrl, options)
|
||||
.then(response => {
|
||||
if (response.data && response.data.error_code === 4) {
|
||||
// token expired
|
||||
token = undefined;
|
||||
return singleRequest(params, config);
|
||||
}
|
||||
if ((!response.data || !response.data.length || [5, 20].includes(response.data.error_code)) && retries > 0) {
|
||||
// too many requests
|
||||
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
|
||||
}
|
||||
if (response.status !== 200 || (response.data && response.data.error)) {
|
||||
// something went wrong
|
||||
return Promise.reject(response.data || `Failed RARGB request with status=${response.status}`);
|
||||
}
|
||||
|
||||
return response.data;
|
||||
})
|
||||
.catch(error => {
|
||||
if (error.response && [429].includes(error.response.status) && retries > 0) {
|
||||
return Promises.delay(retryDelay).then(() => singleRequest(params, config, retries - 1));
|
||||
}
|
||||
return Promise.reject(error.message || error);
|
||||
});
|
||||
}
|
||||
|
||||
function parseResults(results) {
|
||||
if (!results || !Array.isArray(results.torrent_results)) {
|
||||
return Promise.reject(`Incorrect results ${JSON.stringify(results)}`)
|
||||
}
|
||||
return results.torrent_results.map(result => parseResult(result));
|
||||
}
|
||||
|
||||
function parseResult(result) {
|
||||
return {
|
||||
title: result.title,
|
||||
infoHash: decode(result.download).infoHash,
|
||||
magnetLink: result.download,
|
||||
seeders: result.seeders,
|
||||
leechers: result.leechers,
|
||||
category: result.category,
|
||||
size: result.size,
|
||||
uploadDate: new Date(result.pubdate),
|
||||
imdbId: result.episode_info && result.episode_info.imdb
|
||||
}
|
||||
}
|
||||
|
||||
async function getToken() {
|
||||
if (!token) {
|
||||
const params = { get_token: 'get_token', app_id: appId };
|
||||
const options = { timeout: defaultTimeout, params };
|
||||
token = await axios.get(baseUrl, options)
|
||||
.then(response => response.data.token);
|
||||
}
|
||||
return token;
|
||||
}
|
||||
|
||||
module.exports = { search, browse, Options };
|
||||
@@ -1,80 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const rarbg = require('./rarbg_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'RARBG';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 3000 });
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 20 });
|
||||
const allowedCategories = [
|
||||
rarbg.Options.category.MOVIES_XVID,
|
||||
rarbg.Options.category.MOVIES_XVID_720P,
|
||||
rarbg.Options.category.MOVIES_X265_1080P,
|
||||
rarbg.Options.category.MOVIES_X264,
|
||||
rarbg.Options.category.MOVIES_X264_720P,
|
||||
rarbg.Options.category.MOVIES_X264_1080P,
|
||||
rarbg.Options.category.MOVIES_HIGH_RES,
|
||||
rarbg.Options.category.TV_EPISODES,
|
||||
rarbg.Options.category.TV_UHD_EPISODES,
|
||||
rarbg.Options.category.TV_HD_EPISODES
|
||||
].reduce((a, b) => a.concat(b), [])
|
||||
|
||||
async function scrape() {
|
||||
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
|
||||
// const movieImdbIds = require('./rargb_movie_imdb_ids_2021-02-27.json');
|
||||
const seriesImdbIds = require('./rargb_series_imdb_ids_2021-02-27.json');
|
||||
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
||||
|
||||
return Promise.all(
|
||||
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
||||
}
|
||||
|
||||
async function getTorrentsForImdbId(imdbId) {
|
||||
return rarbg.search(imdbId, { category: allowedCategories })
|
||||
.then(torrents => {
|
||||
console.log(`Completed ${imdbId} request`);
|
||||
return torrents;
|
||||
})
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} request for ${imdbId}: `, error);
|
||||
return [];
|
||||
});
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
provider: NAME,
|
||||
infoHash: record.infoHash,
|
||||
title: record.title,
|
||||
type: getType(record.category),
|
||||
seeders: record.seeders,
|
||||
size: record.size,
|
||||
uploadDate: record.uploadDate,
|
||||
imdbId: record.imdbId
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
const seriesCategories = [
|
||||
'TV Episodes',
|
||||
'Movies/TV-UHD-episodes',
|
||||
'TV HD Episodes',
|
||||
];
|
||||
|
||||
function getType(category) {
|
||||
if (seriesCategories.includes(category)) {
|
||||
return Type.SERIES;
|
||||
}
|
||||
return Type.MOVIE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
@@ -1,95 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const rarbg = require('./rarbg_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'RARBG';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent, getImdbIdsMethod) {
|
||||
// return getImdbIdsMethod()
|
||||
// .then(imdbIds => Promise.all(imdbIds.map(imdbId => limiter.schedule(() => search(imdbId)))))
|
||||
// .then(results => results.reduce((a, b) => a.concat(b), []));
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
rarbg.Options.category.MOVIES_XVID,
|
||||
rarbg.Options.category.MOVIES_XVID_720P,
|
||||
rarbg.Options.category.MOVIES_X265_1080P,
|
||||
rarbg.Options.category.MOVIES_X264,
|
||||
rarbg.Options.category.MOVIES_X264_720P,
|
||||
rarbg.Options.category.MOVIES_X264_1080P,
|
||||
rarbg.Options.category.MOVIES_HIGH_RES,
|
||||
rarbg.Options.category.TV_EPISODES,
|
||||
rarbg.Options.category.TV_UHD_EPISODES,
|
||||
rarbg.Options.category.TV_HD_EPISODES
|
||||
];
|
||||
|
||||
return Promises.sequence(allowedCategories
|
||||
.map(category => () => limiter.schedule(() => scrapeLatestTorrentsForCategory(category))))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category) {
|
||||
console.log(`Scrapping ${NAME} ${category} category`);
|
||||
return rarbg.browse({ category: category })
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
});
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
provider: NAME,
|
||||
infoHash: record.infoHash,
|
||||
title: record.title,
|
||||
type: getType(record.category),
|
||||
seeders: record.seeders,
|
||||
size: record.size,
|
||||
uploadDate: record.uploadDate,
|
||||
imdbId: record.imdbId
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
const seriesCategories = [
|
||||
'TV Episodes',
|
||||
'Movies/TV-UHD-episodes',
|
||||
'TV HD Episodes',
|
||||
];
|
||||
|
||||
function getType(category) {
|
||||
if (seriesCategories.includes(category)) {
|
||||
return Type.SERIES;
|
||||
}
|
||||
return Type.MOVIE;
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,207 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const { defaultOptionsWithProxy } = require('../../lib/requestHelper');
|
||||
|
||||
const baseUrl = 'http://www.rutor.info';
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const Categories = {
|
||||
ALL: '0',
|
||||
FOREIGN_FILMS: '1',
|
||||
RUSSIAN_FILMS: '5',
|
||||
SCIENCE_FILMS: '12',
|
||||
FOREIGN_SERIES: '4',
|
||||
RUSSIAN_SERIES: '16',
|
||||
RUSSIAN_TV: '6',
|
||||
RUSSIAN_ANIMATION: '7',
|
||||
ANIME: '10',
|
||||
FOREIGN_RELEASES: '17'
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2, error = null) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(error || new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
|
||||
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
|
||||
.then((body) => parseTorrentPage(body, torrentId))
|
||||
.catch((err) => torrent(torrentId, config, retries - 1, err));
|
||||
}
|
||||
|
||||
function search(query, retries = 2, error = null) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(error || new Error(`Failed browse request`));
|
||||
}
|
||||
|
||||
return singleRequest(`${baseUrl}/search/0/0/0/0/${encodeURIComponent(query)}`)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => search(query, retries - 1, err));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2, error = null) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(error || new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return singleRequest(`${baseUrl}/browse/${page - 1}/${category}/0/0`)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1, err));
|
||||
}
|
||||
|
||||
function files(torrentId) {
|
||||
return singleRequest(`${baseUrl}/descriptions/${torrentId}.files`)
|
||||
.then((body) => parseFiles(body));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl) {
|
||||
const options = { ...defaultOptionsWithProxy(), timeout: defaultTimeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
|
||||
} else if (body.includes('Access Denied')) {
|
||||
console.log(`Access Denied: ${requestUrl}`);
|
||||
throw new Error(`Access Denied: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden') ||
|
||||
body.includes('Origin DNS error')) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = $('#index').find('tr:not(.backgr)').map((i, elem) => {
|
||||
const row = $(elem).find('td');
|
||||
const links = $(row[1]).find('a');
|
||||
const peers = $(row[row.length - 1]);
|
||||
const magnetLink = $(links[1]).attr('href');
|
||||
|
||||
return {
|
||||
title: $(links[2]).text(),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
torrentLink: $(links[0]).attr('href'),
|
||||
torrentId: $(links[2]).attr('href').match(/torrent\/(\d+)/)[1],
|
||||
seeders: parseInt(peers.find('.green').text()),
|
||||
leechers: parseInt(peers.find('.red').text()),
|
||||
uploadDate: parseRussianDate($(row[0]).text()),
|
||||
size: $(row[row.length - 2]).html().replace(' ', ' '),
|
||||
}
|
||||
}).get();
|
||||
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body, torrentId) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const rows = $('#details > tr')
|
||||
const details = $(rows[0]).find('td:nth-of-type(2)');
|
||||
const magnetLink = $('#download a:nth-of-type(1)').attr('href');
|
||||
const imdbIdMatch = details.html().match(/imdb\.com\/title\/(tt\d+)/i);
|
||||
|
||||
const parsedTorrent = {
|
||||
title: $('#all h1').first().text(),
|
||||
torrentId: torrentId,
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
trackers: Array.from(new Set(decode(magnetLink).tr)).join(','),
|
||||
magnetLink: magnetLink,
|
||||
torrentLink: $('#download a:nth-of-type(2)').attr('href'),
|
||||
seeders: parseInt($(rows[rows.length - 8]).find('td:nth-of-type(2)').first().text(), 10),
|
||||
category: $('tr:contains(\'Категория\') a').first().attr('href').match(/\/([\w-]+)$/)[1],
|
||||
languages: parseLanguages(details.text()),
|
||||
size: parseSize($(rows[rows.length - 4]).find('td:nth-of-type(2)').text()),
|
||||
uploadDate: parseDate($(rows[rows.length - 5]).find('td:nth-of-type(2)').first().text()),
|
||||
imdbId: imdbIdMatch && imdbIdMatch[1]
|
||||
};
|
||||
resolve(parsedTorrent);
|
||||
});
|
||||
}
|
||||
|
||||
function parseFiles(body) {
|
||||
if (!body) {
|
||||
throw new Error("No files in the body");
|
||||
}
|
||||
return body.split('\n')
|
||||
.map((item) => item.match(/<td>([^<]+)<\/td>/g).slice(1))
|
||||
.map((item, index) => ({
|
||||
fileIndex: index,
|
||||
name: item[0].replace(/^.+\//g, ''),
|
||||
path: item[0].replace(/^.+\//, ''),
|
||||
size: parseSize(item[1])
|
||||
}));
|
||||
}
|
||||
|
||||
function parseDate(dateString) {
|
||||
const preparedDate = dateString.replace(/\s\(.*\)/, '')
|
||||
return moment(preparedDate, 'DD-MM-YYYY HH:mm:ss').toDate();
|
||||
}
|
||||
|
||||
const russianMonths = {
|
||||
'Янв': 'Jan',
|
||||
'Фев': 'Feb',
|
||||
'Мар': 'Mar',
|
||||
'Апр': 'Apr',
|
||||
'Май': 'May',
|
||||
'Июн': 'Jun',
|
||||
'Июл': 'Jul',
|
||||
'Авг': 'Aug',
|
||||
'Сен': 'Sep',
|
||||
'Окт': 'Oct',
|
||||
'Ноя': 'Nov',
|
||||
'Дек': 'Dec'
|
||||
};
|
||||
|
||||
function parseRussianDate(dateString) {
|
||||
const rusMonth = Object.keys(russianMonths).find(month => dateString.includes(month));
|
||||
const preparedDate = dateString.trim().replace(rusMonth, russianMonths[rusMonth]).replace(/\u00a0/g, ' ');
|
||||
return moment(preparedDate, 'DD MMM YY').toDate();
|
||||
}
|
||||
|
||||
function parseSize(sizeString) {
|
||||
return parseInt(sizeString.match(/\((\d+) Bytes\)/)[1], 10);
|
||||
}
|
||||
|
||||
const languageMatchers = {
|
||||
'russian': /(?:Язык|Звук|Аудио|audio|language).*(russian|\brus?\b|[Рр]усский)/i,
|
||||
'english': /(?:Язык|Звук|Аудио|audio|language).*(english|\beng?\b|[Аа]нглийский)/i,
|
||||
'ukrainian': /(?:Язык|Звук|Аудио|audio|language).*(ukrainian|\bukr\b|украинский)/i,
|
||||
'french': /(?:Язык|Звук|Аудио|audio|language).*(french|\bfr\b|французский)/i,
|
||||
'spanish': /(?:Язык|Звук|Аудио|audio|language).*(spanish|\bspa\b|испанский)/i,
|
||||
'italian': /(?:Язык|Звук|Аудио|audio|language).*(italian|\bita\b|итальянский)/i,
|
||||
'german': /(?:Язык|Звук|Аудио|audio|language).*(german|\bger\b|Немецкий)/i,
|
||||
'korean': /(?:Язык|Звук|Аудио|audio|language).*(korean|Корейский)/i,
|
||||
'arabic': /(?:Язык|Звук|Аудио|audio|language).*(arabic|Арабский)/i,
|
||||
'portuguese': /(?:Язык|Звук|Аудио|audio|language).*(portuguese|Португальский)/i,
|
||||
'japanese': /(?:Язык|Звук|Аудио|audio|language).*(japanese|\bjap\b|\bjp\b|[Яя]понский)/i,
|
||||
}
|
||||
|
||||
function parseLanguages(details) {
|
||||
const subsInfoMatch = details.match(/\r?\n(Text|Текст)(?:\s?#?\d{1,2})?\r?\n/i);
|
||||
const detailsPart = subsInfoMatch ? details.substring(0, subsInfoMatch.index) : details;
|
||||
const matchedLanguages = Object.keys(languageMatchers).filter(lang => languageMatchers[lang].test(detailsPart));
|
||||
const languages = Array.from(new Set(['russian'].concat(matchedLanguages)));
|
||||
return languages.length > 4 ? 'multi-audio' : languages.join(',');
|
||||
}
|
||||
|
||||
module.exports = { torrent, browse, search, Categories };
|
||||
@@ -1,133 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const rutor = require('./rutor_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'Rutor';
|
||||
const TYPE_MAPPING = {
|
||||
'kino': Type.MOVIE,
|
||||
'nashe_kino': Type.MOVIE,
|
||||
'nauchno_popularnoe': Type.MOVIE,
|
||||
'inostrannoe': Type.MOVIE,
|
||||
'seriali': Type.SERIES,
|
||||
'nashi_seriali': Type.SERIES,
|
||||
'tv': Type.SERIES,
|
||||
'multiki': Type.MOVIE,
|
||||
'anime': Type.ANIME
|
||||
};
|
||||
|
||||
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
|
||||
const api_entry_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
const allowedCategories = [
|
||||
rutor.Categories.FOREIGN_FILMS,
|
||||
rutor.Categories.FOREIGN_RELEASES,
|
||||
rutor.Categories.RUSSIAN_FILMS,
|
||||
rutor.Categories.FOREIGN_SERIES,
|
||||
rutor.Categories.RUSSIAN_SERIES,
|
||||
rutor.Categories.SCIENCE_FILMS,
|
||||
rutor.Categories.RUSSIAN_ANIMATION,
|
||||
rutor.Categories.ANIME
|
||||
];
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
// const ids = [
|
||||
// '637799'
|
||||
// ];
|
||||
// return Promise.all(ids.map(id => api_entry_limiter.schedule(() => rutor.torrent(id))
|
||||
// .then(torrent => processTorrentRecord(torrent))))
|
||||
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return api_limiter.schedule(() => rutor.browse({ category, page }))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < getMaxPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
const isOld = moment(record.uploadDate).isBefore(moment().subtract(18, 'month'));
|
||||
if (record.seeders === 0 && isOld) {
|
||||
console.log(`Skipping old unseeded torrent [${record.infoHash}] ${record.title}`)
|
||||
return record;
|
||||
}
|
||||
|
||||
const foundTorrent = await api_entry_limiter.schedule(() => rutor.torrent(record.torrentId).catch(() => undefined));
|
||||
|
||||
if (!foundTorrent || !TYPE_MAPPING[foundTorrent.category]) {
|
||||
return Promise.resolve(`${NAME}: Invalid torrent record: ${record.torrentId}`);
|
||||
}
|
||||
if (!foundTorrent.imdbId && disallowWithoutImdbId(foundTorrent)) {
|
||||
return Promise.resolve(`${NAME}: No imdbId defined: ${record.torrentId}`);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
provider: NAME,
|
||||
infoHash: foundTorrent.infoHash,
|
||||
torrentId: foundTorrent.torrentId,
|
||||
torrentLink: foundTorrent.torrentLink,
|
||||
trackers: foundTorrent.trackers,
|
||||
title: foundTorrent.title,
|
||||
type: TYPE_MAPPING[foundTorrent.category],
|
||||
size: foundTorrent.size,
|
||||
seeders: foundTorrent.seeders,
|
||||
uploadDate: foundTorrent.uploadDate,
|
||||
imdbId: foundTorrent.imdbId,
|
||||
languages: foundTorrent.languages || undefined,
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent).then(() => torrent);
|
||||
}
|
||||
|
||||
function getMaxPage(category) {
|
||||
switch (category) {
|
||||
case rutor.Categories.FOREIGN_FILMS:
|
||||
case rutor.Categories.FOREIGN_SERIES:
|
||||
return 2;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
function disallowWithoutImdbId(torrent) {
|
||||
if (['kino', 'anime'].includes(torrent.category)) {
|
||||
return false; // allow to search foreign movie and anime ids via search
|
||||
}
|
||||
// allow to search id for non russian series titles via search
|
||||
return !(torrent.category === 'seriali' && !parse(torrent.title).title.match(/[\u0400-\u04ff]/i));
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,41 +0,0 @@
|
||||
function isPtDubbed(name) {
|
||||
return name.toLowerCase().match(/dublado|dual|nacional|multi/);
|
||||
}
|
||||
|
||||
function sanitizePtName(name) {
|
||||
return name
|
||||
.replace(/(.*)\b(\d{3,4}P)\b(?!.*\d{3,4}[Pp])(.*)/, '$1$3 $2') // add resolution to the end if missing
|
||||
.replace(/^[\[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks
|
||||
.replace(/^(\d*(?:\.\d{1,2})?(?:[4A-Z-]{3,}|P)[-.]+)+/, '') // replace metadata prefixes
|
||||
.replace(/^[\[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks2
|
||||
.replace(/^(COM|NET|ORG|TO|TV|ME)\b\s*-+[\s.]*/, '') // replace dangling site endings
|
||||
.trim();
|
||||
}
|
||||
|
||||
function sanitizePtOriginalName(name) {
|
||||
return name.trim().replace(/S\d+$|\d.\s?[Tt]emporada/, '');
|
||||
}
|
||||
|
||||
function sanitizePtLanguages(languages) {
|
||||
return languages
|
||||
.replace(/<2F><>/g, 'ê')
|
||||
.replace(/ /g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return undefined;
|
||||
}
|
||||
let scale = 1;
|
||||
if (/Gi?B|Go/.test(sizeText)) {
|
||||
scale = 1024 * 1024 * 1024
|
||||
} else if (/Mi?B|Mo/.test(sizeText)) {
|
||||
scale = 1024 * 1024;
|
||||
} else if (/[Kk]i?B|Ko/.test(sizeText)) {
|
||||
scale = 1024;
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale);
|
||||
}
|
||||
|
||||
module.exports = { parseSize, isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages }
|
||||
@@ -1,129 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
|
||||
const baseUrl = 'https://apibay.org';
|
||||
const timeout = 5000;
|
||||
|
||||
const Categories = {
|
||||
AUDIO: {
|
||||
ALL: 100,
|
||||
MUSIC: 101,
|
||||
AUDIO_BOOKS: 102,
|
||||
SOUND_CLIPS: 103,
|
||||
FLAC: 104,
|
||||
OTHER: 199
|
||||
},
|
||||
VIDEO: {
|
||||
ALL: 200,
|
||||
MOVIES: 201,
|
||||
MOVIES_DVDR: 202,
|
||||
MUSIC_VIDEOS: 203,
|
||||
MOVIE_CLIPS: 204,
|
||||
TV_SHOWS: 205,
|
||||
HANDHELD: 206,
|
||||
MOVIES_HD: 207,
|
||||
TV_SHOWS_HD: 208,
|
||||
MOVIES_3D: 209,
|
||||
OTHER: 299
|
||||
},
|
||||
APPS: {
|
||||
ALL: 300,
|
||||
WINDOWS: 301,
|
||||
MAC: 302,
|
||||
UNIX: 303,
|
||||
HANDHELD: 304,
|
||||
IOS: 305,
|
||||
ANDROID: 306,
|
||||
OTHER_OS: 399
|
||||
},
|
||||
GAMES: {
|
||||
ALL: 400,
|
||||
PC: 401,
|
||||
MAC: 402,
|
||||
PSx: 403,
|
||||
XBOX360: 404,
|
||||
Wii: 405,
|
||||
HANDHELD: 406,
|
||||
IOS: 407,
|
||||
ANDROID: 408,
|
||||
OTHER: 499
|
||||
},
|
||||
PORN: {
|
||||
ALL: 500,
|
||||
MOVIES: 501,
|
||||
MOVIES_DVDR: 502,
|
||||
PICTURES: 503,
|
||||
GAMES: 504,
|
||||
MOVIES_HD: 505,
|
||||
MOVIE_CLIPS: 506,
|
||||
OTHER: 599
|
||||
},
|
||||
OTHER: {
|
||||
ALL: 600,
|
||||
E_BOOKS: 601,
|
||||
COMICS: 602,
|
||||
PICTURES: 603,
|
||||
COVERS: 604,
|
||||
PHYSIBLES: 605,
|
||||
OTHER: 699
|
||||
}
|
||||
};
|
||||
|
||||
function torrent(torrentId, retries = 2) {
|
||||
if (!torrentId) {
|
||||
return Promise.reject(new Error('No valid torrentId provided'));
|
||||
}
|
||||
|
||||
return _request(`t.php?id=${torrentId}`)
|
||||
.then(result => toTorrent(result))
|
||||
.catch(error => retries ? torrent(torrentId, retries - 1) : Promise.reject(error));
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword) {
|
||||
return Promise.reject(new Error('No valid keyword provided'));
|
||||
}
|
||||
const q = keyword;
|
||||
const cat = config.category || Categories.VIDEO.ALL;
|
||||
|
||||
return _request(`q.php?q=${q}&cat=${cat}`)
|
||||
.then(results => results.map((result) => toTorrent(result)))
|
||||
.catch(error => retries ? search(keyword, config, retries - 1) : Promise.reject(error));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
const category = config.category || 0;
|
||||
const page = config.page - 1 || 0;
|
||||
|
||||
return _request(`q.php?q=category:${category}:${page}`)
|
||||
.then(results => results.map((result) => toTorrent(result)))
|
||||
.catch(error => retries ? browse(config, retries - 1) : Promise.reject(error));
|
||||
}
|
||||
|
||||
async function _request(endpoint) {
|
||||
const url = `${baseUrl}/${endpoint}`;
|
||||
return axios.get(url, { timeout: timeout })
|
||||
.then(response => {
|
||||
if (typeof response.data === 'object') {
|
||||
return response.data;
|
||||
}
|
||||
return Promise.reject(`Unexpected response body`);
|
||||
});
|
||||
}
|
||||
|
||||
function toTorrent(result) {
|
||||
return {
|
||||
torrentId: result.id,
|
||||
name: escapeHTML(result.name),
|
||||
infoHash: result.info_hash.toLowerCase(),
|
||||
size: parseInt(result.size),
|
||||
seeders: parseInt(result.seeders),
|
||||
leechers: parseInt(result.leechers),
|
||||
subcategory: parseInt(result.category),
|
||||
uploadDate: new Date(result.added * 1000),
|
||||
imdbId: result.imdb || undefined,
|
||||
filesCount: result.num_files && parseInt(result.num_files) || undefined
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,175 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { ungzip } = require('node-gzip');
|
||||
const LineByLineReader = require('line-by-line');
|
||||
const fs = require('fs');
|
||||
const thepiratebay = require('./thepiratebay_api.js');
|
||||
const bing = require('nodejs-bing');
|
||||
const { Type } = require('../../lib/types');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'ThePirateBay';
|
||||
const CSV_FILE_PATH = '/tmp/tpb_dump.csv';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 40 });
|
||||
|
||||
async function scrape() {
|
||||
const lastDump = { updatedAt: 2147000000 };
|
||||
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
|
||||
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
|
||||
const checkPoint = 0;
|
||||
|
||||
if (lastDump) {
|
||||
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
|
||||
await downloadDump(lastDump);
|
||||
|
||||
let entriesProcessed = 0;
|
||||
const lr = new LineByLineReader(CSV_FILE_PATH);
|
||||
lr.on('line', (line) => {
|
||||
if (line.includes("#ADDED")) {
|
||||
return;
|
||||
}
|
||||
if (entriesProcessed % 1000 === 0) {
|
||||
console.log(`Processed ${entriesProcessed} entries`);
|
||||
}
|
||||
if (entriesProcessed <= checkPoint) {
|
||||
entriesProcessed++;
|
||||
return;
|
||||
}
|
||||
|
||||
const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g);
|
||||
if (row.length !== 4) {
|
||||
console.log(`Invalid row: ${line}`);
|
||||
return;
|
||||
}
|
||||
const torrent = {
|
||||
uploadDate: moment(row[0], 'YYYY-MMM-DD HH:mm:ss').toDate(),
|
||||
infoHash: Buffer.from(row[1], 'base64').toString('hex'),
|
||||
title: escapeHTML(row[2])
|
||||
.replace(/^"|"$/g, '')
|
||||
.replace(/&#?\w{2,6};/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim(),
|
||||
size: parseInt(row[3], 10)
|
||||
};
|
||||
|
||||
if (!limiter.empty()) {
|
||||
lr.pause()
|
||||
}
|
||||
|
||||
limiter.schedule(() => processTorrentRecord(torrent)
|
||||
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
|
||||
.then(() => limiter.empty())
|
||||
.then((empty) => empty && lr.resume())
|
||||
.then(() => entriesProcessed++);
|
||||
});
|
||||
lr.on('error', (err) => {
|
||||
console.log(err);
|
||||
});
|
||||
lr.on('end', () => {
|
||||
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const allowedCategories = [
|
||||
thepiratebay.Categories.VIDEO.MOVIES,
|
||||
thepiratebay.Categories.VIDEO.MOVIES_HD,
|
||||
thepiratebay.Categories.VIDEO.MOVIES_DVDR,
|
||||
thepiratebay.Categories.VIDEO.MOVIES_3D,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
|
||||
];
|
||||
const seriesCategories = [
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
|
||||
];
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await getStoredTorrentEntry(record)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const torrentFound = await findTorrent(record);
|
||||
|
||||
if (!torrentFound || !allowedCategories.includes(torrentFound.subcategory)) {
|
||||
return createSkipTorrentEntry(record);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: torrentFound.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: torrentFound.torrentId,
|
||||
title: torrentFound.name,
|
||||
size: torrentFound.size,
|
||||
type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE,
|
||||
imdbId: seriesCategories.includes(torrentFound.subcategory) && torrentFound.imdbId || undefined,
|
||||
uploadDate: torrentFound.uploadDate || record.uploadDate,
|
||||
seeders: torrentFound.seeders,
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
async function findTorrent(record) {
|
||||
return findTorrentInSource(record)
|
||||
.catch(() => findTorrentViaBing(record));
|
||||
}
|
||||
|
||||
async function findTorrentInSource(record) {
|
||||
let page = 0;
|
||||
let torrentFound;
|
||||
while (!torrentFound && page < 5) {
|
||||
const torrents = await thepiratebay.search(record.title.replace(/[\W\s]+/, ' '), { page: page });
|
||||
torrentFound = torrents.filter(torrent => torrent.magnetLink.toLowerCase().includes(record.infoHash))[0];
|
||||
page = torrents.length === 0 ? 1000 : page + 1;
|
||||
}
|
||||
if (!torrentFound) {
|
||||
return Promise.reject(new Error(`Failed to find torrent ${record.title}`));
|
||||
}
|
||||
return Promise.resolve(torrentFound)
|
||||
.then((torrent) => thepiratebay.torrent(torrent.torrentId)
|
||||
.catch(() => thepiratebay.torrent(torrent.torrentId)));
|
||||
}
|
||||
|
||||
async function findTorrentViaBing(record) {
|
||||
return bing.web(`${record.infoHash}`)
|
||||
.then((results) => results
|
||||
.find(result => result.description.includes('Direct download via magnet link') ||
|
||||
result.description.includes('Get this torrent')))
|
||||
.then((result) => {
|
||||
if (!result) {
|
||||
console.warn(`Failed to find torrent ${record.title}`);
|
||||
return Promise.resolve(undefined);
|
||||
}
|
||||
return result.link.match(/torrent\/(\w+)\//)[1];
|
||||
})
|
||||
.then((torrentId) => torrentId && thepiratebay.torrent(torrentId))
|
||||
}
|
||||
|
||||
function downloadDump(dump) {
|
||||
try {
|
||||
if (fs.existsSync(CSV_FILE_PATH)) {
|
||||
console.log('dump file already exist...');
|
||||
return;
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(err)
|
||||
}
|
||||
|
||||
console.log('downloading dump file...');
|
||||
return axios.get(dump.url, { timeout: 2000, responseType: 'stream' })
|
||||
.then((response) => response.data)
|
||||
.then((body) => {
|
||||
console.log('unzipping dump file...');
|
||||
return ungzip(body);
|
||||
})
|
||||
.then((unzipped) => {
|
||||
console.log('writing dump file...');
|
||||
return fs.promises.writeFile(CSV_FILE_PATH, unzipped);
|
||||
})
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
@@ -1,43 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const { Sequelize } = require('sequelize');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const thepiratebay = require('./thepiratebay_api.js');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
|
||||
const NAME = 'ThePirateBay';
|
||||
const EMPTY_HASH = '0000000000000000000000000000000000000000';
|
||||
|
||||
const Op = Sequelize.Op;
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape() {
|
||||
console.log(`Starting ${NAME} fake removal...`);
|
||||
const startCreatedAt = moment().subtract(14, 'day');
|
||||
const endCreatedAt = moment();
|
||||
const whereQuery = {
|
||||
provider: NAME,
|
||||
type: Type.MOVIE,
|
||||
createdAt: { [Op.between]: [startCreatedAt, endCreatedAt] }
|
||||
};
|
||||
return repository.getTorrentsBasedOnQuery(whereQuery)
|
||||
.then(torrents => {
|
||||
console.log(`Checking for ${NAME} fake entries in ${torrents.length} torrents`);
|
||||
return Promise.all(torrents.map(torrent => limiter.schedule(() => removeIfFake(torrent))))
|
||||
})
|
||||
.then(results => {
|
||||
const removed = results.filter(result => result);
|
||||
console.log(`Finished ${NAME} fake removal with ${removed.length} removals in ${results.length} torrents`);
|
||||
});
|
||||
}
|
||||
|
||||
async function removeIfFake(torrent) {
|
||||
const tpbTorrentInfo = await thepiratebay.torrent(torrent.torrentId).catch(() => null);
|
||||
if (tpbTorrentInfo && tpbTorrentInfo.infoHash === EMPTY_HASH) {
|
||||
console.log(`Removing ${NAME} fake torrent [${torrent.torrentId}][${torrent.infoHash}] ${torrent.title}`);
|
||||
return repository.deleteTorrent(torrent).catch(() => null);
|
||||
}
|
||||
return Promise.resolve(null);
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
@@ -1,98 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const thepiratebay = require('./thepiratebay_api.js');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'ThePirateBay';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
const allowedCategories = [
|
||||
thepiratebay.Categories.VIDEO.MOVIES,
|
||||
thepiratebay.Categories.VIDEO.MOVIES_HD,
|
||||
thepiratebay.Categories.VIDEO.MOVIES_3D,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
|
||||
];
|
||||
const seriesCategories = [
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
|
||||
];
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
// return limiter.schedule(() => thepiratebay.torrent(torrent.torrentId));
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return thepiratebay.browse({ category, page })
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
|
||||
if (!record || !allowedCategories.includes(record.subcategory)) {
|
||||
return Promise.resolve('Invalid torrent record');
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: record.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: record.torrentId,
|
||||
title: record.name.replace(/\t|\s+/g, ' '),
|
||||
type: seriesCategories.includes(record.subcategory) ? Type.SERIES : Type.MOVIE,
|
||||
size: record.size,
|
||||
seeders: record.seeders,
|
||||
uploadDate: record.uploadDate,
|
||||
imdbId: seriesCategories.includes(record.subcategory) && record.imdbId || undefined,
|
||||
languages: record.languages && record.languages.trim() || undefined
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
function getUntilPage(category) {
|
||||
switch (category) {
|
||||
case thepiratebay.Categories.VIDEO.MOVIES_3D:
|
||||
return 1;
|
||||
case thepiratebay.Categories.VIDEO.TV_SHOWS:
|
||||
case thepiratebay.Categories.VIDEO.TV_SHOWS_HD:
|
||||
return 10;
|
||||
default:
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,112 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const LineByLineReader = require('line-by-line');
|
||||
const decode = require('magnet-uri');
|
||||
const thepiratebay = require('./thepiratebay_api.js');
|
||||
const { Type } = require('../../lib/types');
|
||||
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'ThePirateBay';
|
||||
const CSV_FILE_PATH = '/tmp/tpb.csv';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 40 });
|
||||
|
||||
async function scrape() {
|
||||
// await processTorrentRecord({ torrentId: 26877339, category: 'Video' });
|
||||
console.log(`starting to scrape tpb dump...`);
|
||||
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
|
||||
const checkPoint = 4115000;
|
||||
|
||||
let entriesProcessed = 0;
|
||||
const lr = new LineByLineReader(CSV_FILE_PATH);
|
||||
lr.on('line', (line) => {
|
||||
if (entriesProcessed % 1000 === 0) {
|
||||
console.log(`Processed ${entriesProcessed} entries`);
|
||||
}
|
||||
if (entriesProcessed <= checkPoint) {
|
||||
entriesProcessed++;
|
||||
return;
|
||||
}
|
||||
|
||||
const row = line.match(/(?<=^|,)(".*"|[^,]*)(?=,|$)/g);
|
||||
if (row.length !== 10) {
|
||||
console.log(`Invalid row: ${line}`);
|
||||
return;
|
||||
}
|
||||
const torrent = {
|
||||
torrentId: row[0],
|
||||
title: row[1]
|
||||
.replace(/^"|"$/g, '')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/&\w{2,6};/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim(),
|
||||
size: parseInt(row[2], 10),
|
||||
category: row[4],
|
||||
subcategory: row[5],
|
||||
infoHash: row[7].toLowerCase() || decode(row[9]).infoHash,
|
||||
magnetLink: row[9],
|
||||
uploadDate: moment(row[8]).toDate(),
|
||||
};
|
||||
|
||||
if (!limiter.empty()) {
|
||||
lr.pause()
|
||||
}
|
||||
|
||||
limiter.schedule(() => processTorrentRecord(torrent)
|
||||
.catch((error) => console.log(`failed ${torrent.title} due: ${error}`)))
|
||||
.then(() => limiter.empty())
|
||||
.then((empty) => empty && lr.resume())
|
||||
.then(() => entriesProcessed++);
|
||||
});
|
||||
lr.on('error', (err) => {
|
||||
console.log(err);
|
||||
});
|
||||
lr.on('end', () => {
|
||||
console.log(`finished to scrape tpb dump!`);
|
||||
});
|
||||
}
|
||||
|
||||
const allowedCategories = [
|
||||
thepiratebay.Categories.VIDEO.MOVIES,
|
||||
thepiratebay.Categories.VIDEO.MOVIES_HD,
|
||||
thepiratebay.Categories.VIDEO.MOVIES_DVDR,
|
||||
thepiratebay.Categories.VIDEO.MOVIES_3D,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
|
||||
];
|
||||
const seriesCategories = [
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS,
|
||||
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
|
||||
];
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (record.category !== 'Video') {
|
||||
return createSkipTorrentEntry(record);
|
||||
}
|
||||
if (await getStoredTorrentEntry(record)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const torrentFound = await thepiratebay.torrent(record.torrentId);
|
||||
|
||||
if (!torrentFound || !allowedCategories.includes(torrentFound.subcategory)) {
|
||||
return createSkipTorrentEntry(record);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: torrentFound.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: torrentFound.torrentId,
|
||||
title: torrentFound.name,
|
||||
size: torrentFound.size,
|
||||
type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE,
|
||||
imdbId: torrentFound.imdbId,
|
||||
uploadDate: torrentFound.uploadDate,
|
||||
seeders: torrentFound.seeders,
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
@@ -1,119 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const baseUrl = 'https://www.torrent9.st'
|
||||
const defaultTimeout = 10000;
|
||||
const pageSize = 50;
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'films',
|
||||
TV: 'series',
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
|
||||
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => ({ torrentId, ...torrent }))
|
||||
.catch((err) => {
|
||||
console.warn(`Failed Torrent9 ${torrentId} request: `, err);
|
||||
return torrent(torrentId, config, retries - 1)
|
||||
});
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2, error = null) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(error || new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
const offset = (page - 1) * pageSize + 1;
|
||||
|
||||
return singleRequest(`${baseUrl}/torrents/${category}/${offset}`)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1, err));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl) {
|
||||
const headers = {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'accept-encoding': 'gzip, deflate',
|
||||
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
|
||||
};
|
||||
const options = { headers, timeout: defaultTimeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then(response => {
|
||||
const body = response.data;
|
||||
if (!body || !body.length) {
|
||||
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
|
||||
}
|
||||
return body;
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('tbody tr').each((i, element) => {
|
||||
const row = $(element);
|
||||
const titleElement = row.find('td a');
|
||||
try {
|
||||
torrents.push({
|
||||
name: titleElement.text().trim(),
|
||||
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
|
||||
seeders: parseInt(row.find('span.seed_ok').first().text()),
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('Failed parsing TorrentGalaxy row: ', e);
|
||||
}
|
||||
});
|
||||
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const details = $('.movie-detail');
|
||||
const magnetLink = details.find('a[href^="magnet"]').first().attr('href');
|
||||
const torrentLink = details.find('div.download-btn:nth-of-type(1) a').first().attr('href');
|
||||
const name = details.find('p strong').contents().filter((_, e) => e.type === 'text').text() || $('h5, h1').text();
|
||||
const languages = parse(name).languages;
|
||||
const torrent = {
|
||||
title: name.trim(),
|
||||
infoHash: magnetLink ? decode(magnetLink).infoHash : undefined,
|
||||
magnetLink: magnetLink,
|
||||
torrentLink: torrentLink ? `${baseUrl}${torrentLink}` : undefined,
|
||||
seeders: parseInt(details.find('.movie-information ul:nth-of-type(1) li:nth-of-type(3)').text(), 10),
|
||||
category: details.find('ul:nth-of-type(4) a').attr('href').match(/\/(\w+)$/)[1],
|
||||
size: parseSize(details.find('ul:nth-of-type(2) li:nth-of-type(3)').text()),
|
||||
uploadDate: moment(details.find('ul:nth-of-type(3) li:nth-of-type(3)').text(), 'DD/MM/YYYY').toDate(),
|
||||
languages: languages && languages.includes('french') ? undefined : 'french',
|
||||
};
|
||||
resolve(torrent);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { torrent, browse, Categories };
|
||||
@@ -1,104 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const torrent9 = require('./torrent9_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'Torrent9';
|
||||
const TYPE_MAPPING = typeMapping();
|
||||
|
||||
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
const allowedCategories = [
|
||||
torrent9.Categories.MOVIE,
|
||||
torrent9.Categories.TV,
|
||||
];
|
||||
const clients = [
|
||||
torrent9
|
||||
];
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
const scrapeFunctions = allowedCategories
|
||||
.map(category => clients.map(client => () => scrapeLatestTorrentsForCategory(client, category)))
|
||||
.reduce((a, b) => a.concat(b), []);
|
||||
return Promises.sequence(scrapeFunctions)
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(client, category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return api_limiter.schedule(() => client.browse({ category, page }))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(results => Promise.all(results.map(r => limiter.schedule(() => processTorrentRecord(client, r)))))
|
||||
.then(resolved => resolved.length > 0 && page < getUntilPage(category)
|
||||
? scrapeLatestTorrentsForCategory(client, category, page + 1)
|
||||
: Promise.resolve([]));
|
||||
}
|
||||
|
||||
async function processTorrentRecord(client, record) {
|
||||
if (await checkAndUpdateTorrent({ provider: NAME, torrentId: record.torrentId })) {
|
||||
return record;
|
||||
}
|
||||
|
||||
const foundTorrent = await api_limiter.schedule(() => client.torrent(record.torrentId)).catch(() => undefined);
|
||||
if (!foundTorrent || !foundTorrent.infoHash) {
|
||||
console.warn(`Failed retrieving torrent ${record.torrentId}`);
|
||||
return record;
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
provider: NAME,
|
||||
infoHash: foundTorrent.infoHash,
|
||||
magnetLink: foundTorrent.magnetLink,
|
||||
torrentLink: foundTorrent.torrentLink,
|
||||
torrentId: foundTorrent.torrentId,
|
||||
title: foundTorrent.title,
|
||||
type: TYPE_MAPPING[foundTorrent.category],
|
||||
size: foundTorrent.size,
|
||||
seeders: foundTorrent.seeders,
|
||||
uploadDate: foundTorrent.uploadDate,
|
||||
imdbId: foundTorrent.imdbId,
|
||||
languages: foundTorrent.languages
|
||||
};
|
||||
|
||||
if (await checkAndUpdateTorrent(torrent)) {
|
||||
console.info(`Skipping torrent ${torrent.torrentId} - [${torrent.infoHash}] ${torrent.title}`);
|
||||
return torrent;
|
||||
}
|
||||
|
||||
return createTorrentEntry(torrent).then(() => torrent);
|
||||
}
|
||||
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[torrent9.Categories.MOVIE] = Type.MOVIE;
|
||||
mapping[torrent9.Categories.TV] = Type.SERIES;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function getUntilPage(category) {
|
||||
if (category === torrent9.Categories.TV) {
|
||||
return 2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME };
|
||||
@@ -1,124 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const { parse } = require('parse-torrent-title');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const baseUrl = 'https://www.torrent9.st'
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'films',
|
||||
TV: 'series',
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
|
||||
return singleRequest(`${baseUrl}/torrent/${torrentId}`)
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => ({ torrentId, ...torrent }))
|
||||
.catch((err) => {
|
||||
console.warn(`Failed Torrent9 ${torrentId} request: `, err);
|
||||
return torrent(torrentId, config, retries - 1)
|
||||
});
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2, error = null) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(error || new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return singleRequest(`${baseUrl}/torrents_${category}.html,page-${page}`)
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1, err));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl) {
|
||||
const headers = {
|
||||
'user-agent': getRandomUserAgent(),
|
||||
'accept-encoding': 'gzip, deflate',
|
||||
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,lt;q=0.7,ar;q=0.6,fr;q=0.5,de;q=0.4'
|
||||
};
|
||||
const options = { headers, timeout: defaultTimeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then(response => {
|
||||
const body = response.data;
|
||||
if (!body || !body.length) {
|
||||
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
|
||||
}
|
||||
return body;
|
||||
})
|
||||
.catch(error => Promise.reject(error.message || error));
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('tr').each((i, element) => {
|
||||
const row = $(element);
|
||||
const titleElement = row.find('td a');
|
||||
if (titleElement.length) {
|
||||
torrents.push({
|
||||
title: titleElement.attr('title').trim(),
|
||||
torrentId: titleElement.attr('href').match(/torrent\/(.*)/)[1],
|
||||
seeders: parseInt(row.find('span.seed_ok').first().text()),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const details = $('.movie-detail');
|
||||
const magnetLink = details.find('a[href^="magnet"]').first().attr('href');
|
||||
const name = getName(details) || $('h1').text();
|
||||
const languages = parse(name).languages;
|
||||
const torrent = {
|
||||
title: name.trim(),
|
||||
infoHash: magnetLink ? decode(magnetLink).infoHash : undefined,
|
||||
magnetLink: magnetLink,
|
||||
seeders: parseInt(details.find('.movie-information ul:nth-of-type(1) li:nth-of-type(3)').text(), 10),
|
||||
category: details.find('ul:nth-of-type(4) a').attr('href').match(/_(\w+)\.html$/)[1],
|
||||
size: parseSize(details.find('ul:nth-of-type(2) li:nth-of-type(3)').text()),
|
||||
uploadDate: moment(details.find('ul:nth-of-type(3) li:nth-of-type(3)').text(), 'DD/MM/YYYY').toDate(),
|
||||
languages: languages && languages.includes('french') ? undefined : 'french',
|
||||
};
|
||||
resolve(torrent);
|
||||
});
|
||||
}
|
||||
|
||||
function getName(details) {
|
||||
const nameElement = details.find('p strong');
|
||||
if (nameElement.length === 1) {
|
||||
return nameElement.contents().filter((_, elem) => elem.type === 'text').text()
|
||||
}
|
||||
const description = nameElement.parent().text();
|
||||
const nameMatch = description.match(
|
||||
/(?:[A-Z]+[^A-Z0-9]*|[A-Z0-9-]+(?:[a-z]+\d+)?)\.([\w-]+\.){3,}\w+(?:-\w+)?(?=[A-Z])/);
|
||||
return nameMatch && nameMatch[0];
|
||||
}
|
||||
|
||||
module.exports = { torrent, browse, Categories };
|
||||
@@ -1,171 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomUserAgent } = require('../../lib/requestHelper');
|
||||
const { parseSize } = require("../scraperHelper");
|
||||
|
||||
const defaultProxies = [
|
||||
// 'https://torrentgalaxy.to',
|
||||
// 'https://torrentgalaxy.mx',
|
||||
'https://torrentgalaxy.su'
|
||||
];
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
const Categories = {
|
||||
ANIME: '28',
|
||||
MOVIE_4K: '3',
|
||||
MOVIE_PACKS: '4',
|
||||
MOVIE_SD: '1',
|
||||
MOVIE_HD: '42',
|
||||
MOVIE_CAM: '45',
|
||||
MOVIE_BOLLYWOOD: '46',
|
||||
TV_SD: '5',
|
||||
TV_HD: '41',
|
||||
TV_PACKS: '6',
|
||||
TV_SPORT: '7',
|
||||
DOCUMENTARIES: '9'
|
||||
};
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${torrentId}`)))
|
||||
.then((body) => parseTorrentPage(body))
|
||||
.then((torrent) => ({ torrentId, ...torrent }))
|
||||
.catch((err) => torrent(torrentId, config, retries - 1));
|
||||
}
|
||||
|
||||
function search(keyword, config = {}, retries = 2) {
|
||||
if (!keyword || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${keyword} search`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}&search=${keyword}`)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch(() => search(keyword, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2, error = null) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(error || new Error(`Failed browse request`));
|
||||
}
|
||||
const proxyList = config.proxyList || defaultProxies;
|
||||
const page = config.page || 1;
|
||||
const category = config.category;
|
||||
|
||||
return Promises.first(proxyList
|
||||
.map((proxyUrl) => singleRequest(`${proxyUrl}/torrents.php?cat=${category}&page=${page - 1}`)))
|
||||
.then((body) => parseTableBody(body))
|
||||
.catch((err) => browse(config, retries - 1, err));
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl) {
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: defaultTimeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.data;
|
||||
if (!body) {
|
||||
throw new Error(`No body: ${requestUrl} with status ${response.status}`);
|
||||
} else if (body.includes('Access Denied')) {
|
||||
console.log(`Access Denied: ${requestUrl}`);
|
||||
throw new Error(`Access Denied: ${requestUrl}`);
|
||||
} else if (body.includes('502: Bad gateway') ||
|
||||
body.includes('403 Forbidden') ||
|
||||
body.includes('Origin DNS error')) {
|
||||
throw new Error(`Invalid body contents: ${requestUrl}`);
|
||||
}
|
||||
return body;
|
||||
});
|
||||
}
|
||||
|
||||
function parseTableBody(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
|
||||
const torrents = [];
|
||||
|
||||
$('.tgxtable > div').each((i, element) => {
|
||||
if (i === 0) return;
|
||||
const row = $(element);
|
||||
const magnetLink = row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(2)').attr('href');
|
||||
const imdbIdMatch = row.html().match(/search=(tt\d+)/i);
|
||||
try {
|
||||
torrents.push({
|
||||
name: row.find('.tgxtablecell div a[title]').first().text(),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
torrentLink: row.find('div:nth-of-type(n+2) .collapsehide > a:nth-of-type(1)').first().attr('href'),
|
||||
torrentId: row.find('.tgxtablecell div a[title]').first().attr('href').match(/torrent\/(\d+)/)[1],
|
||||
verified: !!row.find('i.fa-check').length,
|
||||
category: row.find('div:nth-of-type(n+2) .shrink a').first().attr('href').match(/cat=(\d+)$/)[1],
|
||||
seeders: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'green\'] b').first().text()),
|
||||
leechers: parseInt(row.find('div:nth-of-type(n+2) .collapsehide [color=\'#ff0000\'] b').first().text()),
|
||||
languages: row.find('.tgxtablecell img[title]').first().attr('title'),
|
||||
size: parseSize(row.find('.collapsehide span.badge-secondary').first().text()),
|
||||
uploadDate: parseDate(row.find('div.collapsehide:nth-of-type(12)').first().text()),
|
||||
imdbId: imdbIdMatch && imdbIdMatch[1],
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('Failed parsing TorrentGalaxy row: ', e);
|
||||
}
|
||||
});
|
||||
|
||||
resolve(torrents);
|
||||
});
|
||||
}
|
||||
|
||||
function parseTorrentPage(body) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
if (!$) {
|
||||
reject(new Error('Failed loading body'));
|
||||
}
|
||||
const content = $('div[class="torrentpagetable limitwidth"]').first();
|
||||
const magnetLink = $('a[class="btn btn-danger"]').attr('href');
|
||||
const imdbIdContent = $('a[title="IMDB link"]').attr('href');
|
||||
const imdbIdMatch = imdbIdContent && imdbIdContent.match(/imdb\.com\/title\/(tt\d+)/i);
|
||||
|
||||
const torrent = {
|
||||
name: content.find('.linebreakup a').first().text(),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
verified: !content.find('i.fa-exclamation-triangle').length,
|
||||
torrentLink: $('a[class="btn btn-success"]').attr('href'),
|
||||
seeders: parseInt(content.find('font[color=\'green\']').first().text(), 10),
|
||||
category: content.find('div:nth-of-type(4) a:nth-of-type(2)').first().attr('href').match(/cat=(\d+)$/)[1],
|
||||
languages: content.find('div:nth-of-type(5) div:nth-of-type(2)').first().text().trim(),
|
||||
size: parseSize(content.find('div:nth-of-type(6) div:nth-of-type(2)').first().text()),
|
||||
uploadDate: parseDate(content.find('div:nth-of-type(9) div:nth-of-type(2)').first().text()),
|
||||
imdbId: imdbIdMatch && imdbIdMatch[1],
|
||||
};
|
||||
resolve(torrent);
|
||||
});
|
||||
}
|
||||
|
||||
function parseDate(dateString) {
|
||||
if (dateString.includes('ago')) {
|
||||
const amount = parseInt(dateString, 10);
|
||||
const unit = dateString.includes('Min') ? 'minutes' : 'hours';
|
||||
return moment().subtract(amount, unit).toDate();
|
||||
}
|
||||
const preparedDate = dateString.replace(/\//g, '-').replace(/-(\d{2})\s/, '-20$1 ')
|
||||
return moment(preparedDate, 'DD-MM-YYYY HH:mm').toDate();
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, Categories };
|
||||
@@ -1,132 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const torrentGalaxy = require('./torrentgalaxy_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'TorrentGalaxy';
|
||||
const TYPE_MAPPING = typeMapping();
|
||||
|
||||
const api_limiter = new Bottleneck({ maxConcurrent: 1, minTime: 5000 });
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
const allowedCategories = [
|
||||
torrentGalaxy.Categories.ANIME,
|
||||
torrentGalaxy.Categories.MOVIE_4K,
|
||||
torrentGalaxy.Categories.MOVIE_PACKS,
|
||||
torrentGalaxy.Categories.MOVIE_SD,
|
||||
torrentGalaxy.Categories.MOVIE_HD,
|
||||
torrentGalaxy.Categories.MOVIE_CAM,
|
||||
torrentGalaxy.Categories.MOVIE_BOLLYWOOD,
|
||||
torrentGalaxy.Categories.TV_SD,
|
||||
torrentGalaxy.Categories.TV_HD,
|
||||
torrentGalaxy.Categories.TV_PACKS,
|
||||
torrentGalaxy.Categories.DOCUMENTARIES,
|
||||
];
|
||||
const packCategories = [
|
||||
torrentGalaxy.Categories.MOVIE_PACKS,
|
||||
torrentGalaxy.Categories.TV_PACKS
|
||||
];
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
// const ids = ['14212584'];
|
||||
// return Promise.all(ids.map(id => limiter.schedule(() => torrentGalaxy.torrent(id)
|
||||
// .then(torrent => processTorrentRecord(torrent)))))
|
||||
// .then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
return scrapeLatestTorrents()
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => torrentGalaxy.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrents() {
|
||||
return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category)))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||
return api_limiter.schedule(() => torrentGalaxy.browse({ category, page }))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < getMaxPage(category)
|
||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (!record || !TYPE_MAPPING[record.category] || !record.verified) {
|
||||
return Promise.resolve('Invalid torrent record');
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
provider: NAME,
|
||||
infoHash: record.infoHash,
|
||||
torrentId: record.torrentId,
|
||||
torrentLink: record.torrentLink,
|
||||
title: record.name.replace(/\t|\s+/g, ' '),
|
||||
type: TYPE_MAPPING[record.category],
|
||||
size: record.size,
|
||||
seeders: record.seeders,
|
||||
uploadDate: record.uploadDate,
|
||||
imdbId: record.imdbId,
|
||||
pack: packCategories.includes(record.category),
|
||||
languages: !(record.languages || '').includes('Other') ? record.languages : undefined
|
||||
};
|
||||
|
||||
if (await checkAndUpdateTorrent(torrent)) {
|
||||
return torrent;
|
||||
}
|
||||
const isOld = moment(torrent.uploadDate).isBefore(moment().subtract(18, 'month'));
|
||||
if (torrent.seeders === 0 && isOld && !torrent.pack) {
|
||||
console.log(`Skipping old unseeded torrent [${torrent.infoHash}] ${torrent.title}`)
|
||||
return torrent;
|
||||
}
|
||||
|
||||
return createTorrentEntry(torrent).then(() => torrent);
|
||||
}
|
||||
|
||||
function typeMapping() {
|
||||
const mapping = {};
|
||||
mapping[torrentGalaxy.Categories.MOVIE_SD] = Type.MOVIE;
|
||||
mapping[torrentGalaxy.Categories.MOVIE_HD] = Type.MOVIE;
|
||||
mapping[torrentGalaxy.Categories.MOVIE_4K] = Type.MOVIE;
|
||||
mapping[torrentGalaxy.Categories.MOVIE_CAM] = Type.MOVIE;
|
||||
mapping[torrentGalaxy.Categories.MOVIE_PACKS] = Type.MOVIE;
|
||||
mapping[torrentGalaxy.Categories.MOVIE_BOLLYWOOD] = Type.MOVIE;
|
||||
mapping[torrentGalaxy.Categories.DOCUMENTARIES] = Type.MOVIE;
|
||||
mapping[torrentGalaxy.Categories.TV_SD] = Type.SERIES;
|
||||
mapping[torrentGalaxy.Categories.TV_HD] = Type.SERIES;
|
||||
mapping[torrentGalaxy.Categories.TV_PACKS] = Type.SERIES;
|
||||
mapping[torrentGalaxy.Categories.TV_SPORT] = Type.SERIES;
|
||||
mapping[torrentGalaxy.Categories.ANIME] = Type.ANIME;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function getMaxPage(category) {
|
||||
switch (category) {
|
||||
case torrentGalaxy.Categories.TV_SD:
|
||||
case torrentGalaxy.Categories.TV_HD:
|
||||
case torrentGalaxy.Categories.MOVIE_SD:
|
||||
case torrentGalaxy.Categories.MOVIE_HD:
|
||||
return 5;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
@@ -1,98 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomUserAgent } = require('./../../lib/requestHelper');
|
||||
|
||||
const defaultProxies = [
|
||||
'https://yts.mx'
|
||||
];
|
||||
const defaultTimeout = 30000;
|
||||
const limit = 50;
|
||||
|
||||
function torrent(torrentId, config = {}, retries = 2) {
|
||||
if (!torrentId || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${torrentId} search`));
|
||||
}
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/movie_details.json?movie_id=${torrentId}`, config)))
|
||||
.then(body => parseResults(body))
|
||||
.catch(error => torrent(torrentId, config, retries - 1));
|
||||
}
|
||||
|
||||
function search(query, config = {}, retries = 2) {
|
||||
if (!query || retries === 0) {
|
||||
return Promise.reject(new Error(`Failed ${query} search`));
|
||||
}
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&query_term=${query}`, config)))
|
||||
.then(results => parseResults(results))
|
||||
.catch(error => search(query, config, retries - 1));
|
||||
}
|
||||
|
||||
function browse(config = {}, retries = 2) {
|
||||
if (retries === 0) {
|
||||
return Promise.reject(new Error(`Failed browse request`));
|
||||
}
|
||||
const page = config.page || 1;
|
||||
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}&page=${page}`, config)))
|
||||
.then(results => parseResults(results))
|
||||
.catch(error => browse(config, retries - 1));
|
||||
}
|
||||
|
||||
function maxPage() {
|
||||
return Promises.first(defaultProxies
|
||||
.map(proxyUrl => singleRequest(`${proxyUrl}/api/v2/list_movies.json?limit=${limit}`)))
|
||||
.then(results => Math.round((results?.data?.movie_count || 0) / limit))
|
||||
}
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { headers: { 'User-Agent': getRandomUserAgent() }, timeout: timeout };
|
||||
|
||||
return axios.get(requestUrl, options)
|
||||
.then(response => {
|
||||
if (!response.data) {
|
||||
return Promise.reject(`No body: ${requestUrl}`);
|
||||
}
|
||||
return Promise.resolve(response.data);
|
||||
});
|
||||
}
|
||||
|
||||
function parseResults(results) {
|
||||
if (!results || !results.data || (!results.data.movie && !Array.isArray(results.data.movies))) {
|
||||
console.log('Incorrect results: ', results);
|
||||
return Promise.reject('Incorrect results')
|
||||
}
|
||||
return (results.data.movies || [results.data.movie])
|
||||
.filter(movie => Array.isArray(movie.torrents))
|
||||
.map(movie => parseMovie(movie))
|
||||
.reduce((a, b) => a.concat(b), []);
|
||||
}
|
||||
|
||||
function parseMovie(movie) {
|
||||
return movie.torrents.map(torrent => ({
|
||||
name: `${movie.title} ${movie.year} ${torrent.quality} ${formatType(torrent.type)} `,
|
||||
torrentId: `${movie.id}-${torrent.hash.trim().toLowerCase()}`,
|
||||
infoHash: torrent.hash.trim().toLowerCase(),
|
||||
torrentLink: torrent.url,
|
||||
seeders: torrent.seeds,
|
||||
size: torrent.size_bytes,
|
||||
uploadDate: new Date(torrent.date_uploaded_unix * 1000),
|
||||
imdbId: movie.imdb_code
|
||||
}));
|
||||
}
|
||||
|
||||
function formatType(type) {
|
||||
if (type === 'web') {
|
||||
return 'WEBRip';
|
||||
}
|
||||
if (type === 'bluray') {
|
||||
return 'BluRay';
|
||||
}
|
||||
return type.toUpperCase();
|
||||
}
|
||||
|
||||
module.exports = { torrent, search, browse, maxPage };
|
||||
@@ -1,15 +0,0 @@
|
||||
const moment = require("moment");
|
||||
const yts = require('./yts_api');
|
||||
const scraper = require('./yts_scraper')
|
||||
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
console.log(`[${scrapeStart}] starting ${scraper.NAME} full scrape...`);
|
||||
|
||||
return yts.maxPage()
|
||||
.then(maxPage => scraper.scrape(maxPage))
|
||||
.then(() => console.log(`[${moment()}] finished ${scraper.NAME} full scrape`));
|
||||
}
|
||||
|
||||
module.exports = { scrape, NAME: scraper.NAME };
|
||||
@@ -1,67 +0,0 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const yts = require('./yts_api');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'YTS';
|
||||
const UNTIL_PAGE = 10;
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||
|
||||
async function scrape(maxPage) {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
return scrapeLatestTorrentsForCategory(maxPage)
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
return lastScrape.save();
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function updateSeeders(torrent) {
|
||||
return limiter.schedule(() => yts.torrent(torrent.torrentId));
|
||||
}
|
||||
|
||||
async function scrapeLatestTorrentsForCategory(maxPage = UNTIL_PAGE, page = 1) {
|
||||
console.log(`Scrapping ${NAME} page ${page}`);
|
||||
return yts.browse(({ page }))
|
||||
.catch(error => {
|
||||
console.warn(`Failed ${NAME} scrapping for [${page}] due: `, error);
|
||||
return Promise.resolve([]);
|
||||
})
|
||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||
.then(resolved => resolved.length > 0 && page < maxPage
|
||||
? scrapeLatestTorrentsForCategory(maxPage, page + 1)
|
||||
: Promise.resolve());
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await checkAndUpdateTorrent(record)) {
|
||||
return record;
|
||||
}
|
||||
|
||||
if (!record || !record.size) {
|
||||
return Promise.resolve('Invalid torrent record');
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
infoHash: record.infoHash,
|
||||
provider: NAME,
|
||||
torrentId: record.torrentId,
|
||||
title: record.name.replace(/\t|\s+/g, ' ').trim(),
|
||||
type: Type.MOVIE,
|
||||
size: record.size,
|
||||
seeders: record.seeders,
|
||||
uploadDate: record.uploadDate,
|
||||
imdbId: record.imdbId,
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent).then(() => torrent);
|
||||
}
|
||||
|
||||
module.exports = { scrape, updateSeeders, NAME };
|
||||
1
src/node/addon-jackett/.eslintignore
Normal file
1
src/node/addon-jackett/.eslintignore
Normal file
@@ -0,0 +1 @@
|
||||
*.ts
|
||||
39
src/node/addon-jackett/.eslintrc.cjs
Normal file
39
src/node/addon-jackett/.eslintrc.cjs
Normal file
@@ -0,0 +1,39 @@
|
||||
/** @type {import("eslint").ESLint.Options} */
|
||||
module.exports = {
|
||||
env: {
|
||||
es2024: true,
|
||||
node: true,
|
||||
},
|
||||
globals: {
|
||||
Atomics: "readonly",
|
||||
SharedArrayBuffer: "readonly",
|
||||
},
|
||||
parserOptions: {
|
||||
sourceType: "module",
|
||||
},
|
||||
plugins: ["import", "import-helpers"],
|
||||
rules: {
|
||||
"default-case": "off",
|
||||
"import/no-duplicates": "off",
|
||||
"import/no-extraneous-dependencies": ["off", { devDependencies: ["backend", "frontend", "mobile"] }],
|
||||
"import/order": "off",
|
||||
"import-helpers/order-imports": [
|
||||
"warn",
|
||||
{
|
||||
alphabetize: {
|
||||
order: "asc",
|
||||
},
|
||||
},
|
||||
],
|
||||
"lines-between-class-members": ["error", "always", { exceptAfterSingleLine: true }],
|
||||
"no-continue": "off",
|
||||
"no-param-reassign": "off",
|
||||
"no-plusplus": ["error", { allowForLoopAfterthoughts: true }],
|
||||
"no-restricted-syntax": "off",
|
||||
"no-unused-expressions": ["off", { allowShortCircuit: true }],
|
||||
"no-unused-vars": "off",
|
||||
"no-use-before-define": "off",
|
||||
"one-var": ["error", { uninitialized: "consecutive" }],
|
||||
"prefer-destructuring": "warn",
|
||||
},
|
||||
};
|
||||
29
src/node/addon-jackett/Dockerfile
Normal file
29
src/node/addon-jackett/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
# --- Build Stage ---
|
||||
FROM node:lts-alpine AS builder
|
||||
|
||||
RUN apk update && apk upgrade && \
|
||||
apk add --no-cache git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
RUN npm install
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# --- Runtime Stage ---
|
||||
FROM node:lts-alpine
|
||||
|
||||
# Install pm2
|
||||
RUN npm install pm2 -g
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV NODE_ENV production
|
||||
|
||||
COPY --from=builder /app ./
|
||||
RUN npm prune --omit=dev
|
||||
|
||||
EXPOSE 7001
|
||||
|
||||
ENTRYPOINT [ "pm2-runtime", "start", "ecosystem.config.cjs"]
|
||||
1
src/node/addon-jackett/README.md
Normal file
1
src/node/addon-jackett/README.md
Normal file
@@ -0,0 +1 @@
|
||||
# addon-jackett
|
||||
4
src/node/addon-jackett/build.sh
Normal file
4
src/node/addon-jackett/build.sh
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
docker build -t ippexdeploymentscr.azurecr.io/dave/stremio-addon-jackett:latest . --platform linux/amd64
|
||||
docker push ippexdeploymentscr.azurecr.io/dave/stremio-addon-jackett:latest
|
||||
14
src/node/addon-jackett/ecosystem.config.cjs
Normal file
14
src/node/addon-jackett/ecosystem.config.cjs
Normal file
@@ -0,0 +1,14 @@
|
||||
module.exports = {
|
||||
apps: [
|
||||
{
|
||||
name: 'stremio-jackett',
|
||||
script: 'npm start',
|
||||
cwd: '/app',
|
||||
watch: ['./dist/index.js'],
|
||||
autorestart: true,
|
||||
env: {
|
||||
...process.env,
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
68
src/node/addon-jackett/esbuild.js
Normal file
68
src/node/addon-jackett/esbuild.js
Normal file
@@ -0,0 +1,68 @@
|
||||
import { build } from 'esbuild';
|
||||
import { copy } from 'esbuild-plugin-copy';
|
||||
import { readFileSync, rmSync } from 'fs';
|
||||
|
||||
const { devDependencies } = JSON.parse(readFileSync('./package.json', 'utf8'));
|
||||
|
||||
const start = Date.now();
|
||||
|
||||
try {
|
||||
const outdir = 'dist';
|
||||
|
||||
rmSync(outdir, { recursive: true, force: true });
|
||||
|
||||
build({
|
||||
bundle: true,
|
||||
entryPoints: [
|
||||
'./src/index.js',
|
||||
// "./src/**/*.css",
|
||||
// "./src/**/*.hbs",
|
||||
// "./src/**/*.html"
|
||||
],
|
||||
external: [...(devDependencies && Object.keys(devDependencies))],
|
||||
keepNames: true,
|
||||
loader: {
|
||||
'.css': 'copy',
|
||||
'.hbs': 'copy',
|
||||
'.html': 'copy',
|
||||
},
|
||||
minify: true,
|
||||
outbase: './src',
|
||||
outdir,
|
||||
outExtension: {
|
||||
'.js': '.cjs',
|
||||
},
|
||||
platform: 'node',
|
||||
plugins: [
|
||||
{
|
||||
name: 'populate-import-meta',
|
||||
setup: ({ onLoad }) => {
|
||||
onLoad({ filter: new RegExp(`${import.meta.dirname}/src/.*\.(js|ts)$`) }, args => {
|
||||
const contents = readFileSync(args.path, 'utf8');
|
||||
|
||||
const transformedContents = contents
|
||||
.replace(/import\.meta/g, `{dirname:__dirname,filename:__filename}`)
|
||||
.replace(/import\.meta\.filename/g, '__filename')
|
||||
.replace(/import\.meta\.dirname/g, '__dirname');
|
||||
|
||||
return { contents: transformedContents, loader: 'default' };
|
||||
});
|
||||
},
|
||||
},
|
||||
copy({
|
||||
assets: [
|
||||
{
|
||||
from: ['./static/**'],
|
||||
to: ['./static'],
|
||||
},
|
||||
],
|
||||
}),
|
||||
],
|
||||
}).then(() => {
|
||||
// biome-ignore lint/style/useTemplate: <explanation>
|
||||
console.log('⚡ ' + '\x1b[32m' + `Done in ${Date.now() - start}ms`);
|
||||
});
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
process.exit(1);
|
||||
}
|
||||
21
src/node/addon-jackett/jsconfig.json
Normal file
21
src/node/addon-jackett/jsconfig.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"baseUrl": "./src",
|
||||
"checkJs": true,
|
||||
"isolatedModules": true,
|
||||
"lib": ["es6"],
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "node",
|
||||
"outDir": "./dist",
|
||||
"pretty": true,
|
||||
"removeComments": true,
|
||||
"resolveJsonModule": true,
|
||||
"rootDir": "./src",
|
||||
"skipLibCheck": true,
|
||||
"sourceMap": true,
|
||||
"target": "ES6",
|
||||
"types": ["node"],
|
||||
"typeRoots": ["node_modules/@types", "src/@types"]
|
||||
},
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
6024
src/node/addon-jackett/package-lock.json
generated
Normal file
6024
src/node/addon-jackett/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,13 +1,13 @@
|
||||
{
|
||||
"name": "stremio-torrentio",
|
||||
"version": "1.0.14",
|
||||
"exports": "./index.js",
|
||||
"type": "commonjs",
|
||||
"name": "jackettio-addon",
|
||||
"version": "0.0.1",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "node index.js"
|
||||
"build": "node esbuild.js",
|
||||
"dev": "tsx watch --ignore node_modules src/index.js",
|
||||
"start": "node dist/index.cjs",
|
||||
"lint": "eslint . --ext .ts,.js"
|
||||
},
|
||||
"author": "TheBeastLT <pauliox@beyond.lt>",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@putdotio/api-client": "^8.42.0",
|
||||
"all-debrid-api": "^1.1.0",
|
||||
@@ -17,34 +17,32 @@
|
||||
"cache-manager-mongodb": "^0.3.0",
|
||||
"cors": "^2.8.5",
|
||||
"debrid-link-api": "^1.0.1",
|
||||
"express": "^4.18.2",
|
||||
"express-rate-limit": "^6.7.0",
|
||||
"google-it": "^1.6.4",
|
||||
"google-sr": "^3.2.1",
|
||||
"he": "^1.2.0",
|
||||
"jaro-winkler": "^0.2.8",
|
||||
"magnet-uri": "^6.2.0",
|
||||
"name-to-imdb": "^3.0.4",
|
||||
"named-queue": "^2.2.1",
|
||||
"needle": "^3.3.1",
|
||||
"node-schedule": "^2.1.1",
|
||||
"nodejs-bing": "^0.1.0",
|
||||
"nyaapi": "^2.4.4",
|
||||
"offcloud-api": "^1.0.2",
|
||||
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#022408972c2a040f846331a912a6a8487746a654",
|
||||
"pg": "^8.11.3",
|
||||
"pg-hstore": "^2.3.4",
|
||||
"premiumize-api": "^1.0.3",
|
||||
"prom-client": "^12.0.0",
|
||||
"real-debrid-api": "git://github.com/TheBeastLT/node-real-debrid.git#d1f7eaa8593b947edbfbc8a92a176448b48ef445",
|
||||
"request-ip": "^3.3.0",
|
||||
"router": "^1.3.8",
|
||||
"rss-parser": "^3.13.0",
|
||||
"sequelize": "^6.31.1",
|
||||
"stremio-addon-sdk": "^1.6.10",
|
||||
"sugar-date": "^2.0.6",
|
||||
"swagger-stats": "^0.99.7",
|
||||
"torrent-stream": "^1.2.1",
|
||||
"ua-parser-js": "^1.0.36",
|
||||
"user-agents": "^1.0.1444"
|
||||
"url-join": "^5.0.0",
|
||||
"user-agents": "^1.0.1444",
|
||||
"video-name-parser": "^1.4.6",
|
||||
"xml-js": "^1.6.11",
|
||||
"xml2js": "^0.6.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.11.6",
|
||||
"@types/stremio-addon-sdk": "^1.6.10",
|
||||
"esbuild": "^0.19.12",
|
||||
"esbuild-plugin-copy": "^2.1.1",
|
||||
"eslint": "^8.56.0",
|
||||
"eslint-plugin-import": "^2.29.1",
|
||||
"eslint-plugin-import-helpers": "^1.3.1",
|
||||
"tsx": "^4.7.0"
|
||||
}
|
||||
}
|
||||
126
src/node/addon-jackett/src/addon.js
Normal file
126
src/node/addon-jackett/src/addon.js
Normal file
@@ -0,0 +1,126 @@
|
||||
import Bottleneck from 'bottleneck';
|
||||
import {addonBuilder} from 'stremio-addon-sdk';
|
||||
import {searchJackett} from "./jackett/jackett.js";
|
||||
import {cacheWrapStream} from './lib/cache.js';
|
||||
import {getMetaData} from "./lib/cinemetaProvider.js";
|
||||
import {dummyManifest} from './lib/manifest.js';
|
||||
import {cacheConfig, processConfig} from "./lib/settings.js";
|
||||
import applySorting from './lib/sort.js';
|
||||
import {toStreamInfo} from './lib/streamInfo.js';
|
||||
import {Type} from './lib/types.js';
|
||||
import {applyMochs, getMochCatalog, getMochItemMeta} from './moch/moch.js';
|
||||
import StaticLinks from './moch/static.js';
|
||||
|
||||
const builder = new addonBuilder(dummyManifest());
|
||||
const limiter = new Bottleneck({
|
||||
maxConcurrent: 200,
|
||||
highWater: 220,
|
||||
strategy: Bottleneck.strategy.OVERFLOW
|
||||
});
|
||||
|
||||
builder.defineStreamHandler((args) => {
|
||||
if (!args.id.match(/tt\d+/i) && !args.id.match(/kitsu:\d+/i)) {
|
||||
return Promise.resolve({ streams: [] });
|
||||
}
|
||||
|
||||
if (processConfig.DEBUG) {
|
||||
console.log(`Incoming stream ${args.id} request`)
|
||||
console.log('args', args);
|
||||
}
|
||||
|
||||
return cacheWrapStream(args.id, () => limiter.schedule(() =>
|
||||
streamHandler(args)
|
||||
.then(records => records.map(record => toStreamInfo(record, args.type))))
|
||||
.then(streams => applySorting(streams, args.extra))
|
||||
.then(streams => applyMochs(streams, args.extra))
|
||||
.then(streams => enrichCacheParams(streams))
|
||||
.catch(error => {
|
||||
console.log(`Failed request ${args.id}: ${error}`);
|
||||
return Promise.reject(`Failed request ${args.id}: ${error}`);
|
||||
}));
|
||||
});
|
||||
|
||||
|
||||
builder.defineCatalogHandler((args) => {
|
||||
const mochKey = args.id.replace("jackettio-", '');
|
||||
console.log(`Incoming catalog ${args.id} request with skip=${args.extra.skip || 0}`)
|
||||
return getMochCatalog(mochKey, args.extra)
|
||||
.then(metas => ({
|
||||
metas: metas,
|
||||
cacheMaxAge: cacheConfig.CATALOG_CACHE_MAX_AGE
|
||||
}))
|
||||
.catch(error => {
|
||||
return Promise.reject(`Failed retrieving catalog ${args.id}: ${JSON.stringify(error)}`);
|
||||
});
|
||||
})
|
||||
|
||||
builder.defineMetaHandler((args) => {
|
||||
const [mochKey, metaId] = args.id.split(':');
|
||||
console.log(`Incoming debrid meta ${args.id} request`)
|
||||
return getMochItemMeta(mochKey, metaId, args.extra)
|
||||
.then(meta => ({
|
||||
meta: meta,
|
||||
cacheMaxAge: metaId === 'Downloads' ? 0 : cacheConfig.CACHE_MAX_AGE
|
||||
}))
|
||||
.catch(error => {
|
||||
return Promise.reject(`Failed retrieving catalog meta ${args.id}: ${JSON.stringify(error)}`);
|
||||
});
|
||||
})
|
||||
|
||||
async function streamHandler(args) {
|
||||
if (args.type === Type.MOVIE) {
|
||||
return movieRecordsHandler(args);
|
||||
} else if (args.type === Type.SERIES) {
|
||||
return seriesRecordsHandler(args);
|
||||
}
|
||||
return Promise.reject('not supported type');
|
||||
}
|
||||
|
||||
async function seriesRecordsHandler(args) {
|
||||
if (args.id.match(/^tt\d+:\d+:\d+$/)) {
|
||||
const parts = args.id.split(':');
|
||||
const season = parts[1] !== undefined ? parseInt(parts[1], 10) : 1;
|
||||
const episode = parts[2] !== undefined ? parseInt(parts[2], 10) : 1;
|
||||
|
||||
const metaData = await getMetaData(args);
|
||||
return await searchJackett({
|
||||
type: Type.SERIES,
|
||||
season: season,
|
||||
episode: episode,
|
||||
name: metaData.name,
|
||||
});
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function movieRecordsHandler(args) {
|
||||
if (args.id.match(/^tt\d+$/)) {
|
||||
|
||||
const metaData = await getMetaData(args);
|
||||
return await searchJackett({
|
||||
type: Type.MOVIE,
|
||||
name: metaData.name,
|
||||
year: metaData.year,
|
||||
});
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function enrichCacheParams(streams) {
|
||||
let cacheAge = cacheConfig.CACHE_MAX_AGE;
|
||||
if (!streams.length) {
|
||||
cacheAge = cacheConfig.CACHE_MAX_AGE_EMPTY;
|
||||
} else if (streams.every(stream => stream?.url?.endsWith(StaticLinks.FAILED_ACCESS))) {
|
||||
cacheAge = 0;
|
||||
}
|
||||
return {
|
||||
streams: streams,
|
||||
cacheMaxAge: cacheAge,
|
||||
staleRevalidate: cacheConfig.STALE_REVALIDATE_AGE,
|
||||
staleError: cacheConfig.STALE_ERROR_AGE
|
||||
}
|
||||
}
|
||||
|
||||
export default builder.getInterface();
|
||||
13
src/node/addon-jackett/src/index.js
Normal file
13
src/node/addon-jackett/src/index.js
Normal file
@@ -0,0 +1,13 @@
|
||||
import express from 'express';
|
||||
import { initBestTrackers } from './lib/magnetHelper.js';
|
||||
import {processConfig} from "./lib/settings.js";
|
||||
import serverless from './serverless.js';
|
||||
|
||||
const app = express();
|
||||
app.enable('trust proxy');
|
||||
app.use(express.static('static', { maxAge: '1y' }));
|
||||
app.use((req, res) => serverless(req, res));
|
||||
app.listen(processConfig.PORT, () => {
|
||||
initBestTrackers()
|
||||
.then(() => console.log(`Started addon at: http://localhost:${processConfig.PORT}`));
|
||||
});
|
||||
151
src/node/addon-jackett/src/jackett/jacketParser.js
Normal file
151
src/node/addon-jackett/src/jackett/jacketParser.js
Normal file
@@ -0,0 +1,151 @@
|
||||
import videoNameParser from "video-name-parser";
|
||||
import {parseStringPromise as parseString} from "xml2js";
|
||||
import {processConfig, jackettConfig} from "../lib/settings.js";
|
||||
|
||||
export function extractSize(title) {
|
||||
const seedersMatch = title.match(/💾 ([\d.]+ \w+)/);
|
||||
return seedersMatch && parseSize(seedersMatch[1]) || 0;
|
||||
}
|
||||
|
||||
export function parseSize(sizeText) {
|
||||
if (!sizeText) {
|
||||
return 0;
|
||||
}
|
||||
let scale = 1;
|
||||
if (sizeText.includes('TB')) {
|
||||
scale = 1024 * 1024 * 1024 * 1024
|
||||
} else if (sizeText.includes('GB')) {
|
||||
scale = 1024 * 1024 * 1024
|
||||
} else if (sizeText.includes('MB')) {
|
||||
scale = 1024 * 1024;
|
||||
} else if (sizeText.includes('kB')) {
|
||||
scale = 1024;
|
||||
}
|
||||
return Math.floor(parseFloat(sizeText.replace(/,/g, '')) * scale);
|
||||
}
|
||||
|
||||
export const parseVideo = (name) => {
|
||||
return videoNameParser(name + '.mp4');
|
||||
};
|
||||
|
||||
export const episodeTag = (season, episode) => {
|
||||
const paddedSeason = season < 10 ? `0${season}` : season;
|
||||
const paddedEpisode = episode < 10 ? `0${episode}` : episode;
|
||||
return `S${paddedSeason}E${paddedEpisode}`;
|
||||
};
|
||||
|
||||
export const cleanName = (name) => {
|
||||
name = name.replace(/[._\-–()\[\]:,]/g, ' ');
|
||||
name = name.replace(/\s+/g, ' ');
|
||||
name = name.replace(/'/g, '');
|
||||
name = name.replace(/\\\\/g, '\\').replace(/\\\\'|\\'|\\\\"|\\"/g, '');
|
||||
return name;
|
||||
};
|
||||
|
||||
export const insertIntoSortedArray = (sortedArray, newObject, sortingProperty, maxSize) => {
|
||||
const indexToInsert = sortedArray.findIndex(item => item[sortingProperty] < newObject[sortingProperty]);
|
||||
|
||||
if (indexToInsert === -1) {
|
||||
if (sortedArray.length < maxSize) {
|
||||
sortedArray.push(newObject);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
// Insert the new object at the correct position to maintain the sorted order (descending)
|
||||
sortedArray.splice(indexToInsert, 0, newObject);
|
||||
// Trim the array if it exceeds maxSize
|
||||
if (sortedArray.length > maxSize) {
|
||||
sortedArray.pop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
export const extraTag = (name, searchQuery) => {
|
||||
const parsedName = parseVideo(name + '.mp4');
|
||||
let extraTag = cleanName(name);
|
||||
searchQuery = cleanName(searchQuery);
|
||||
|
||||
extraTag = extraTag.replace(new RegExp(searchQuery, 'gi'), '');
|
||||
extraTag = extraTag.replace(new RegExp(parsedName.name, 'gi'), '');
|
||||
|
||||
if (parsedName.year) {
|
||||
extraTag = extraTag.replace(parsedName.year.toString(), '');
|
||||
}
|
||||
|
||||
if (parsedName.season && parsedName.episode && parsedName.episode.length) {
|
||||
extraTag = extraTag.replace(new RegExp(episodeTag(parsedName.season, parsedName.episode[0]), 'gi'), '');
|
||||
}
|
||||
|
||||
extraTag = extraTag.trim();
|
||||
|
||||
let extraParts = extraTag.split(' ');
|
||||
|
||||
if (parsedName.season && parsedName.episode && parsedName.episode.length) {
|
||||
if (extraParts[0] && extraParts[0].length === 2 && !isNaN(extraParts[0])) {
|
||||
const possibleEpTag = `${episodeTag(parsedName.season, parsedName.episode[0])}-${extraParts[0]}`;
|
||||
if (name.toLowerCase().includes(possibleEpTag.toLowerCase())) {
|
||||
extraParts[0] = possibleEpTag;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const foundPart = name.toLowerCase().indexOf(extraParts[0].toLowerCase());
|
||||
|
||||
if (foundPart > -1) {
|
||||
extraTag = name.substring(foundPart).replace(/[_()\[\],]/g, ' ');
|
||||
|
||||
if ((extraTag.match(/\./g) || []).length > 1) {
|
||||
extraTag = extraTag.replace(/\./g, ' ');
|
||||
}
|
||||
|
||||
extraTag = extraTag.replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
return extraTag;
|
||||
};
|
||||
|
||||
|
||||
export const transformData = async (data, query) => {
|
||||
console.log("Transforming data for query " + data);
|
||||
|
||||
let results = [];
|
||||
|
||||
const parsedData = await parseString(data);
|
||||
|
||||
if (!parsedData.rss.channel[0]?.item) {
|
||||
return [];
|
||||
}
|
||||
|
||||
for (const rssItem of parsedData.rss.channel[0].item) {
|
||||
let torznabData = {};
|
||||
|
||||
rssItem["torznab:attr"].forEach((torznabDataItem) =>
|
||||
Object.assign(torznabData, {
|
||||
[torznabDataItem.$.name]: torznabDataItem.$.value,
|
||||
})
|
||||
);
|
||||
|
||||
if (torznabData.infohash) {
|
||||
|
||||
const [title, pubDate, category, size] = [rssItem.title[0], rssItem.pubDate[0], rssItem.category[0], rssItem.size[0]];
|
||||
|
||||
torznabData = {
|
||||
...torznabData,
|
||||
title,
|
||||
pubDate,
|
||||
category,
|
||||
size,
|
||||
extraTag: extraTag(title, query.name)
|
||||
};
|
||||
|
||||
if (insertIntoSortedArray(results, torznabData, 'size', jackettConfig.MAXIMUM_RESULTS)) {
|
||||
processConfig.DEBUG && console.log(torznabData);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return results;
|
||||
};
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user