36 Commits

Author SHA1 Message Date
iPromKnight
9c6c1ac249 Update compose versions 1.0.1, ready for tag push (#163) 2024-03-25 20:38:11 +00:00
iPromKnight
0ddfac57f7 Build on Tag Pushes. (#162)
* enable tag and pr builds

* Build on Tag Pushes
2024-03-25 20:27:37 +00:00
iPromKnight
9fbd750cd2 enable tag and pr builds (#161) 2024-03-25 20:24:14 +00:00
Knight Crawler
5fc2027cfa Option to manually trigger each workflow (#159)
Co-authored-by: purple_emily <purple_emily@protonmail.com>
2024-03-20 20:26:32 +00:00
purple_emily
2d39476c65 Push dev builds & ready to tag semver (#153) 2024-03-14 14:27:19 +00:00
iPromKnight
e7f987a0d7 Merge pull request #151 from Gabisonfire/feature/tissue-corn-sanitizer
Improve producer matching - Add tissue service
2024-03-12 10:31:18 +00:00
iPromKnight
79a6aa3cb0 Improve producer matching - Add tissue service
Tissue service will sanitize the existign database of ingested torrents by matching existing titles with new banned word lists. Now with added kleenex
2024-03-12 10:29:13 +00:00
iPromKnight
e24d81dd96 Merge pull request #149 from Gabisonfire/improve-consumer
Simplification of parsing in consumer
2024-03-11 11:00:23 +00:00
iPromKnight
aeb83c19f8 Simplification of parsing in consumer
should speed up massively especially if imdbIds are found from mongodb
2024-03-11 10:56:04 +00:00
iPromKnight
e23ee974e2 Merge pull request #148 from Gabisonfire/hotfix/nyaa
Fix nyaa category
2024-03-11 09:03:22 +00:00
iPromKnight
5c310427b4 Fix nyaa category 2024-03-11 08:59:55 +00:00
iPromKnight
b3d9be0b7a Merge pull request #147 from Gabisonfire/force_build
accidentally skipped build on last pr
2024-03-10 23:37:28 +00:00
iPromKnight
dda81ec5bf accidentally skipped build on last pr
tired..
2024-03-10 23:37:16 +00:00
iPromKnight
8eae288f10 Merge pull request #146 from Gabisonfire/hotfix/default_season_1
[skip ci] Final hotfix
2024-03-10 23:34:41 +00:00
iPromKnight
75ac89489e [skip ci] Final hotfix 2024-03-10 23:34:35 +00:00
iPromKnight
fa27b0cda9 Merge pull request #145 from Gabisonfire/hotfix/series_consumer
Fix series parsing
2024-03-10 22:28:00 +00:00
iPromKnight
500dd0d725 patch type 2024-03-10 22:28:06 +00:00
iPromKnight
6f4bc10f5a Fix series parsing 2024-03-10 21:38:55 +00:00
iPromKnight
1b3c190ed1 Merge pull request #144 from Gabisonfire/reduce_cpu_cycles
reduce cpu cycles in parsing in producer
2024-03-10 15:14:37 +00:00
iPromKnight
02150482df reduce cpu cycles in parsing in producer 2024-03-10 15:14:17 +00:00
iPromKnight
f18cd5b1ac Merge pull request #143 from Gabisonfire/extra_terms
Few extra terms getting through
2024-03-10 14:54:58 +00:00
iPromKnight
2e774058ff Few extra terms getting through 2024-03-10 14:54:25 +00:00
iPromKnight
4e84d7c9c3 Merge pull request #142 from Gabisonfire/feature/dmm-improvements
remove log line of adult content
2024-03-10 13:55:20 +00:00
iPromKnight
ad04d323b4 remove log line of adult content 2024-03-10 13:54:35 +00:00
iPromKnight
7d0b779bc8 Merge pull request #129 from Gabisonfire/feature/dmm-improvements
Improvements for DMM
2024-03-10 13:52:53 +00:00
iPromKnight
e2b45e799d [skip ci] Remove Debug logged adult terms found 2024-03-10 13:49:51 +00:00
iPromKnight
6c03f79933 Complete 2024-03-10 13:48:27 +00:00
iPromKnight
c8a1ebd8ae Bump large file to 2500kb because of Jav list.
Doesn't make sense to enable lfs for this file.
2024-03-10 13:48:14 +00:00
iPromKnight
320fccc8e8 [skip ci] More work on parsing - seasons to fix still and use banned words 2024-03-10 12:48:19 +00:00
iPromKnight
51246ed352 Ignore producer data dir from codespell hook 2024-03-10 12:48:19 +00:00
iPromKnight
8d82a17876 re-disable services other than dmm while developing
re-enable

disable again - will squash dont worry

enable again

disable again
2024-03-10 12:48:19 +00:00
iPromKnight
f719520b3b [skip ci] Ignore all run profiles to prevent pat leaking
reenable these, testing only producer should build
2024-03-10 12:48:19 +00:00
iPromKnight
bacb50e060 [skip ci] remove extra package no longer in use 2024-03-10 12:48:19 +00:00
iPromKnight
6600fceb1a Wip Blacklisting dmm porn
Create adult text classifier ML Model

wip - starting to write PTN in c#

More work on season, show and movie parsing

Remove ML project
2024-03-10 12:48:16 +00:00
purple_emily
5aba05f2b4 Merge pull request #141 from Gabisonfire/generic-fixes
Typo at the end of the staging environment
2024-03-10 12:45:58 +00:00
purple_emily
601dbdf64f Typo at the end of the staging environment 2024-03-10 12:27:21 +00:00
107 changed files with 690853 additions and 1241 deletions

View File

@@ -70,8 +70,10 @@ jobs:
flavor: |
latest=auto
tags: |
type=edge,branch=master,commit=${{ github.sha }}
type=ref,event=tag
type=ref,event=pr
type=sha,commit=${{ github.sha }}
type=semver,pattern={{version}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Build image for scanning
@@ -130,7 +132,7 @@ jobs:
sarif_file: 'trivy-results-os.sarif'
- name: Push Service Image to repo
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
# if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
uses: docker/build-push-action@v5
with:
context: ${{ needs.set-vars.outputs.CONTEXT }}

View File

@@ -4,6 +4,9 @@ on:
push:
paths:
- 'src/node/addon/**'
tags:
- 'v*.*.*'
workflow_dispatch:
jobs:
process:

View File

@@ -4,6 +4,9 @@ on:
push:
paths:
- 'src/node/consumer/**'
tags:
- 'v*.*.*'
workflow_dispatch:
jobs:
process:

View File

@@ -1,86 +0,0 @@
name: Build documentation
# TODO: Only run on ./docs folder change
on:
push:
branches: ["master"]
paths:
- 'docs/**'
# Specify to run a workflow manually from the Actions tab on GitHub
workflow_dispatch:
permissions:
id-token: write
pages: write
env:
INSTANCE: Writerside/kc
ARTIFACT: webHelpKC2-all.zip
DOCS_FOLDER: ./docs
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Build Writerside docs using Docker
uses: JetBrains/writerside-github-action@v4
with:
instance: ${{ env.INSTANCE }}
artifact: ${{ env.ARTIFACT }}
location: ${{ env.DOCS_FOLDER }}
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: docs
path: |
artifacts/${{ env.ARTIFACT }}
artifacts/report.json
retention-days: 7
test:
needs: build
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v3
with:
name: docs
path: artifacts
- name: Test documentation
uses: JetBrains/writerside-checker-action@v1
with:
instance: ${{ env.INSTANCE }}
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
needs: [build, test]
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v3
with:
name: docs
- name: Unzip artifact
run: unzip -O UTF-8 -qq '${{ env.ARTIFACT }}' -d dir
- name: Setup Pages
uses: actions/configure-pages@v4
- name: Package and upload Pages artifact
uses: actions/upload-pages-artifact@v3
with:
path: dir
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

View File

@@ -4,6 +4,9 @@ on:
push:
paths:
- 'src/node/addon-jackett/**'
tags:
- 'v*.*.*'
workflow_dispatch:
jobs:
process:

View File

@@ -4,6 +4,9 @@ on:
push:
paths:
- 'src/metadata/**'
tags:
- 'v*.*.*'
workflow_dispatch:
jobs:
process:

View File

@@ -4,6 +4,9 @@ on:
push:
paths:
- 'src/migrator/**'
tags:
- 'v*.*.*'
workflow_dispatch:
jobs:
process:

View File

@@ -4,6 +4,9 @@ on:
push:
paths:
- 'src/producer/**'
tags:
- 'v*.*.*'
workflow_dispatch:
jobs:
process:

17
.github/workflows/build_tissue.yaml vendored Normal file
View File

@@ -0,0 +1,17 @@
name: Build and Push Tissue Service
on:
push:
paths:
- 'src/tissue/**'
tags:
- 'v*.*.*'
workflow_dispatch:
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/tissue/
IMAGE_NAME: knightcrawler-tissue

3
.gitignore vendored
View File

@@ -355,6 +355,9 @@ MigrationBackup/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# Jetbrains ide's run profiles (Could contain sensative information)
**/.run/
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json

View File

@@ -3,6 +3,7 @@ repos:
rev: v4.5.0
hooks:
- id: check-added-large-files
args: ['--maxkb=2500']
- id: check-json
- id: check-toml
- id: check-xml
@@ -15,5 +16,6 @@ repos:
rev: v2.2.6
hooks:
- id: codespell
exclude: ^src/node/consumer/test/
exclude: |
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*|src/tissue/Data/.*)$
args: ["-L", "strem,chage"]

View File

@@ -7,9 +7,6 @@
## Contents
> [!CAUTION]
> Until we reach `v1.0.0`, please consider releases as alpha.
> [!IMPORTANT]
> The latest change renames the project and requires a [small migration](#selfhostio-to-knightcrawler-migration).
- [Contents](#contents)

View File

@@ -82,7 +82,7 @@ services:
- knightcrawler-network
producer:
image: gabisonfire/knightcrawler-producer:latest
image: gabisonfire/knightcrawler-producer:1.0.1
labels:
logging: "promtail"
env_file: .env
@@ -91,7 +91,7 @@ services:
- knightcrawler-network
consumer:
image: gabisonfire/knightcrawler-consumer:latest
image: gabisonfire/knightcrawler-consumer:1.0.1
env_file: .env
labels:
logging: "promtail"
@@ -102,7 +102,7 @@ services:
- knightcrawler-network
metadata:
image: gabisonfire/knightcrawler-metadata:latest
image: gabisonfire/knightcrawler-metadata:1.0.1
env_file: .env
labels:
logging: "promtail"
@@ -114,7 +114,7 @@ services:
<<: *knightcrawler-app
env_file: .env
hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:latest
image: gabisonfire/knightcrawler-addon:1.0.1
labels:
logging: "promtail"
networks:

View File

@@ -4,7 +4,7 @@
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Once you have confirmed Caddy works you should comment out
## the below line:
acme_ca https://acme-staging-v02.api.letsencrypt.org/director
acme_ca https://acme-staging-v02.api.letsencrypt.org/directory
}
(security-headers) {

View File

@@ -1,14 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<buildprofiles xsi:noNamespaceSchemaLocation="https://resources.jetbrains.com/writerside/1.0/build-profiles.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<variables>
<header-logo>knight-crawler-logo.png</header-logo>
</variables>
<build-profile instance="kc">
<variables>
<noindex-content>true</noindex-content>
</variables>
</build-profile>
</buildprofiles>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 568 KiB

View File

@@ -1,13 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE instance-profile
SYSTEM "https://resources.jetbrains.com/writerside/1.0/product-profile.dtd">
<instance-profile id="kc" name="Knight Crawler"
start-page="Overview.md">
<toc-element topic="Overview.md"/>
<toc-element topic="Getting-started.md">
</toc-element>
<toc-element topic="External-access.md"/>
<toc-element topic="Supported-Debrid-services.md"/>
</instance-profile>

View File

@@ -1,57 +0,0 @@
# External access
This guide outlines how to use Knight Crawler on devices like your TV. While it's currently limited to the device of
installation, we can change that. With some extra effort, we'll show you how to make it accessible on other devices.
This limitation is set by Stremio, as [explained here](https://github.com/Stremio/stremio-features/issues/687#issuecomment-1890546094).
## What to keep in mind
Before we make Knight Crawler available outside your home network, we've got to talk about safety. No software is
perfect, including ours. Knight Crawler is built on lots of different parts, some made by other people. So, if we keep
it just for your home network, it's a bit safer. But if you want to use it over the internet, just know that keeping
your devices secure is up to you. We won't be responsible for any problems or lost data if you use Knight Crawler that way.
## Initial setup
To enable external access for Knight Crawler, whether it's within your home network or over the internet, you'll
need to follow these initial setup steps:
- Set up Caddy, a powerful and easy-to-use web server.
- Disable the open port in the Knight Crawler <path>docker-compose.yaml</path> file.
### Caddy
A basic Caddy configuration is included with Knight Crawler in the deployment directory.
<path>deployment/docker/optional-services/caddy</path>
```Generic
deployment/
└── docker/
└── optional-services/
└── caddy/
├── config/
│ ├── snippets/
│ │ └── cloudflare-replace-X-Forwarded-For
│ └── Caddyfile
├── logs/
└── docker-compose.yaml
```
ports:
- "8080:8080"
By disabling the default port, Knight Crawler will only be accessible internally within your network, ensuring added security.
## Home network access
## Internet access
### Through a VPN
### On the public web
## Troubleshooting?
## Additional Resources?

View File

@@ -1,192 +0,0 @@
# Getting started
Knight Crawler is provided as an all-in-one solution. This means we include all the necessary software you need to get started
out of the box.
## Before you start
Make sure that you have:
- A place to host Knight Crawler
- [Docker](https://docs.docker.com/get-docker/) and [Compose](https://docs.docker.com/compose/install/) installed
- A [GitHub](https://github.com/) account _(optional)_
## Download the files
Installing Knight Crawler is as simple as downloading a copy of the [deployment directory](https://github.com/Gabisonfire/knightcrawler/tree/master/deployment/docker).
A basic installation requires only two files:
- <path>deployment/docker/.env.example</path>
- <path>deployment/docker/docker-compose.yaml</path>.
For this guide I will be placing them in a directory on my home drive <path>~/knightcrawler</path>.
Rename the <path>.env.example</path> file to be <path>.env</path>
```
~/
└── knightcrawler/
├── .env
└── docker-compose.yaml
```
## Initial configuration
Below are a few recommended configuration changes.
Open the <path>.env</path> file in your favourite editor.
> If you are using an external database, configure it in the <path>.env</path> file. Don't forget to disable the ones
> included in the <path>docker-compose.yaml</path>.
### Database credentials
It is strongly recommended that you change the credentials for the databases included with Knight Crawler. This is best done
before running Knight Crawler for the first time. It is much harder to change the passwords once the services have been started
for the first time.
```Bash
POSTGRES_PASSWORD=postgres
...
MONGODB_PASSWORD=mongo
...
RABBITMQ_PASSWORD=guest
```
Here's a few options on generating a secure password:
```Bash
# Linux
tr -cd '[:alnum:]' < /dev/urandom | fold -w 64 | head -n 1
# Or you could use openssl
openssl rand -hex 32
```
```Python
# Python
import secrets
print(secrets.token_hex(32))
```
### Your time zone
```Bash
TZ=London/Europe
```
A list of time zones can be found on [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
### Consumers
```Bash
JOB_CONCURRENCY=5
...
MAX_CONNECTIONS_PER_TORRENT=10
...
CONSUMER_REPLICAS=3
```
These are totally subjective to your machine and network capacity. The above default is pretty minimal and will work on
most machines.
`JOB_CONCURRENCY` is how many films and tv shows the consumers should process at once. As this affects every consumer
this will likely cause exponential
strain on your system. It's probably best to leave this at 5, but you can try experimenting with it if you wish.
`MAX_CONNECTIONS_PER_TORRENT` is how many peers the consumer will attempt to connect to when it is trying to collect
metadata.
Increasing this value can speed up processing, but you will eventually reach a point where more connections are being
made than
your router can handle. This will then cause a cascading fail where your internet stops working. If you are going to
increase this value
then try increasing it by 10 at a time.
> Increasing this value increases the max connections for every parallel job, for every consumer. For example
> with the default values above this means that Knight Crawler will be on average making `(5 x 3) x 10 = 150`
> connections at any one time.
>
{style="warning"}
`CONSUMER_REPLICAS` is how many consumers should be initially started. You can increase or decrease the number of consumers whilst the
service is running by running the command `docker compose up -d --scale consumer=<number>`.
### GitHub personal access token
This step is optional but strongly recommended. [Debrid Media Manager](https://debridmediamanager.com/start) is a media library manager
for Debrid services. When a user of this service chooses to export/share their library publicly it is saved to a public GitHub repository.
This is, essentially, a repository containing a vast amount of ready to go films and tv shows. Knight Crawler comes with the ability to
read these exported lists, but it requires a GitHub account to make it work.
Knight Crawler needs a personal access token with read-only access to public repositories. This means we can not access any private
repositories you have.
1. Navigate to GitHub settings ([GitHub token settings](https://github.com/settings/tokens?type=beta)):
- Navigate to `GitHub settings`.
- Click on `Developer Settings`.
- Select `Personal access tokens`.
- Choose `Fine-grained tokens`.
2. Press `Generate new token`.
3. Fill out the form with the following information:
```Generic
Token name:
KnightCrawler
Expiration:
90 days
Description:
<blank>
Repository access:
(checked) Public Repositories (read-only)
```
4. Click `Generate token`.
5. Take the new token and add it to the bottom of the <path>.env</path> file:
```Bash
# Producer
GITHUB_PAT=<YOUR TOKEN HERE>
```
## Start Knight Crawler
To start Knight Crawler use the following command:
```Bash
docker compose up -d
```
Then we can follow the logs to watch it start:
```Bash
docker compose logs -f --since 1m
```
> Knight Crawler will only be accessible on the machine you run it on, to make it accessible from other machines navigate to [External access](External-access.md).
>
{style="note"}
To stop following the logs press <shortcut>Ctrl+C</shortcut> at any time.
The Knight Crawler configuration page should now be accessible in your web browser at [http://localhost:7000](http://localhost:7000)
## Start more consumers
If you wish to speed up the processing of the films and tv shows that Knight Crawler finds, then you'll likely want to
increase the number of consumers.
The below command can be used to both increase or decrease the number of running consumers. Gradually increase the number
until you encounter any issues and then decrease until stable.
```Bash
docker compose up -d --scale consumer=<number>
```
## Stop Knight Crawler
Knight Crawler can be stopped with the following command:
```Bash
docker compose down
```

View File

@@ -1,30 +0,0 @@
# Overview
<img alt="The image shows a Knight in silvery armour looking forwards." src="knight-crawler-logo.png" title="Knight Crawler logo" width="100"/>
Knight Crawler is a self-hosted [Stremio](https://www.stremio.com/) addon for streaming torrents via
a [Debrid](Supported-Debrid-services.md "Click for a list of Debrid services we support") service.
We are active on [Discord](https://discord.gg/8fQdxay9z2) for both support and casual conversation.
> Knight Crawler is currently alpha software.
>
> Users are responsible for ensuring their data is backed up regularly.
>
> Please read the changelogs before updating to the latest version.
>
{style="warning"}
## What does Knight Crawler do?
Knight Crawler is an addon for [Stremio](https://www.stremio.com/). It began as a fork of the very popular
[Torrentio](https://github.com/TheBeastLT/torrentio-scraper) addon. Knight crawler essentially does the following:
1. It searches the internet for available films and tv shows.
2. It collects as much information as it can about each film and tv show it finds.
3. It then stores this information to a database for easy access.
When you choose on a film or tv show to watch on Stremio, a request will be sent to your installation of Knight Crawler.
Knight Crawler will query the database and return a list of all the copies it has stored in the database as Debrid
links.
This enables playback to begin immediately for your chosen media.

View File

@@ -1,3 +0,0 @@
# Supported Debrid services
Start typing here...

View File

@@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ihp SYSTEM "https://resources.jetbrains.com/writerside/1.0/ihp.dtd">
<ihp version="2.0">
<topics dir="topics" web-path="topics"/>
<images dir="images" web-path="knightcrawler"/>
<instance src="kc.tree"/>
</ihp>

View File

@@ -9,13 +9,13 @@
"version": "0.0.1",
"license": "MIT",
"dependencies": {
"@ctrl/video-filename-parser": "^5.2.0",
"@tirke/node-cache-manager-mongodb": "^1.6.0",
"amqplib": "^0.10.3",
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^5.4.0",
"fuse.js": "^7.0.0",
"google-sr": "^3.2.1",
"inversify": "^6.0.2",
"magnet-uri": "^6.2.0",
"moment": "^2.30.1",
@@ -793,6 +793,17 @@
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"node_modules/@ctrl/video-filename-parser": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/@ctrl/video-filename-parser/-/video-filename-parser-5.2.0.tgz",
"integrity": "sha512-6F9inbv+wXc82tG4jGcZX9j0YgpHyyF9dP+6M0GbWbQwLLwpgVcKs70DvmK7unE8cb6FFGlD/n1QDKk86mEJ7A==",
"workspaces": [
"demo"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.20.0",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.20.0.tgz",
@@ -2923,11 +2934,6 @@
"resolved": "https://registry.npmjs.org/bncode/-/bncode-0.5.3.tgz",
"integrity": "sha512-0P5VuWobU5Gwbeio8n9Jsdv0tE1IikrV9n4f7RsnXHNtxmdd/oeIO6QyoSEUAEyo5P6i3XMfBppi82WqNsT4JA=="
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
},
"node_modules/bottleneck": {
"version": "2.19.5",
"resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz",
@@ -3189,42 +3195,6 @@
"integrity": "sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==",
"dev": true
},
"node_modules/cheerio": {
"version": "1.0.0-rc.12",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz",
"integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==",
"dependencies": {
"cheerio-select": "^2.1.0",
"dom-serializer": "^2.0.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"htmlparser2": "^8.0.1",
"parse5": "^7.0.0",
"parse5-htmlparser2-tree-adapter": "^7.0.0"
},
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
}
},
"node_modules/cheerio-select": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
"dependencies": {
"boolbase": "^1.0.0",
"css-select": "^5.1.0",
"css-what": "^6.1.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/chokidar": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
@@ -3533,32 +3503,6 @@
"node": ">= 8"
}
},
"node_modules/css-select": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/cyclist": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/cyclist/-/cyclist-0.1.1.tgz",
@@ -3629,14 +3573,6 @@
"node": ">=0.10.0"
}
},
"node_modules/deepmerge-ts": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/deepmerge-ts/-/deepmerge-ts-5.1.0.tgz",
"integrity": "sha512-eS8dRJOckyo9maw9Tu5O5RUi/4inFLrnoLkBe3cPfDMx3WZioXtmOew4TXQaxq7Rhl4xjDtR7c6x8nNTxOvbFw==",
"engines": {
"node": ">=16.0.0"
}
},
"node_modules/defaults": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/defaults/-/defaults-1.0.4.tgz",
@@ -3745,57 +3681,6 @@
"node": ">=6.0.0"
}
},
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"entities": "^4.2.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
]
},
"node_modules/domhandler": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
"dependencies": {
"domelementtype": "^2.3.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
"integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
"dependencies": {
"dom-serializer": "^2.0.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/dottie": {
"version": "2.0.6",
"resolved": "https://registry.npmjs.org/dottie/-/dottie-2.0.6.tgz",
@@ -3857,17 +3742,6 @@
"once": "^1.4.0"
}
},
"node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/error-ex": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz",
@@ -4984,23 +4858,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/google-sr": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/google-sr/-/google-sr-3.2.1.tgz",
"integrity": "sha512-1WGy6mxMTKo+jbIrmq1mwm+2Egvmx9ttsXzCiR0Y2LMcpeG4shqc8C4g12msi4arRn9qEwG1qrFQ1W9jo3dDzw==",
"dependencies": {
"axios": "^1.4.0",
"cheerio": "1.0.0-rc.12",
"deepmerge-ts": "^5.1.0",
"google-sr-selectors": "^0.0.2",
"tslib": "^2.6.1"
}
},
"node_modules/google-sr-selectors": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/google-sr-selectors/-/google-sr-selectors-0.0.2.tgz",
"integrity": "sha512-7h+vo7NSDf+pZB/InDon4mwhXeTvy/9yvAChGnjppcdHgTwlUWDpYPWGUn781J3PrjBj6rZAginsSTGqG5uUZw=="
},
"node_modules/gopd": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
@@ -5141,24 +4998,6 @@
"integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==",
"dev": true
},
"node_modules/htmlparser2": {
"version": "8.0.2",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
"integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"entities": "^4.4.0"
}
},
"node_modules/human-signals": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz",
@@ -6994,17 +6833,6 @@
"node": ">=8"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/object-inspect": {
"version": "1.13.1",
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",
@@ -7317,29 +7145,6 @@
"node": ">=0.2.6"
}
},
"node_modules/parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dependencies": {
"entities": "^4.4.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-htmlparser2-tree-adapter": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz",
"integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==",
"dependencies": {
"domhandler": "^5.0.2",
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -9407,7 +9212,8 @@
"node_modules/tslib": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==",
"dev": true
},
"node_modules/tsx": {
"version": "4.7.0",

View File

@@ -14,13 +14,13 @@
},
"license": "MIT",
"dependencies": {
"@ctrl/video-filename-parser": "^5.2.0",
"@tirke/node-cache-manager-mongodb": "^1.6.0",
"amqplib": "^0.10.3",
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^5.4.0",
"fuse.js": "^7.0.0",
"google-sr": "^3.2.1",
"inversify": "^6.0.2",
"magnet-uri": "^6.2.0",
"moment": "^2.30.1",

View File

@@ -10,7 +10,6 @@ import {IMetadataService} from "@interfaces/metadata_service";
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
import {IocTypes} from "@setup/ioc_types";
import axios from 'axios';
import {ResultTypes, search} from 'google-sr';
import {inject, injectable} from "inversify";
import nameToImdb from 'name-to-imdb';
@@ -46,10 +45,7 @@ export class MetadataService implements IMetadataService {
const name = this.escapeTitle(info.title!);
const year = info.year || (info.date && info.date.slice(0, 4));
const key = `${name}_${year || 'NA'}_${info.type}`;
const query = `${name} ${year || ''} ${info.type} imdb`;
const fallbackQuery = `${name} ${info.type} imdb`;
const googleQuery = year ? query : fallbackQuery;
const imdbInMongo = await this.mongoRepository.getImdbId(name, info.type, year);
if (imdbInMongo) {
@@ -62,8 +58,7 @@ export class MetadataService implements IMetadataService {
);
return imdbId && 'tt' + imdbId.replace(/tt0*([1-9][0-9]*)$/, '$1').padStart(7, '0');
} catch (error) {
const imdbIdFallback = await this.getIMDbIdFromGoogle(googleQuery);
return imdbIdFallback && 'tt' + imdbIdFallback.toString().replace(/tt0*([1-9][0-9]*)$/, '$1').padStart(7, '0');
return undefined;
}
}
@@ -146,9 +141,9 @@ export class MetadataService implements IMetadataService {
? body.meta.videos.map(video => ({
name: video.name,
season: video.season,
episode: video.episode,
episode: video.number,
imdbSeason: video.season,
imdbEpisode: video.episode,
imdbEpisode: video.number,
}))
: [],
episodeCount: body.meta?.videos
@@ -209,34 +204,15 @@ export class MetadataService implements IMetadataService {
private getIMDbIdFromNameToImdb = (name: string, info: IMetaDataQuery): Promise<string | Error> => {
const {year} = info;
const {type} = info;
const type = info.type === TorrentType.Movie ? 'movie' : 'series';
return new Promise((resolve, reject) => {
nameToImdb({name, year, type}, function (err: Error, res: string) {
nameToImdb({name, year, type}, function (err: Error | null, res: string) {
if (res) {
resolve(res);
} else {
reject(err || new Error('Failed IMDbId search'));
reject(err || new Error('Failed to get IMDb id from name-to-imdb'));
}
});
});
};
private getIMDbIdFromGoogle = async (query: string): Promise<string | undefined> => {
try {
const searchResults = await search({query: query});
for (const result of searchResults) {
if (result.type === ResultTypes.SearchResult) {
if (result.link.includes('imdb.com/title/')) {
const match = result.link.match(/imdb\.com\/title\/(tt\d+)/);
if (match) {
return match[1];
}
}
}
}
return undefined;
} catch (error) {
throw new Error('Failed to find IMDb ID from Google search');
}
};
}

View File

@@ -81,17 +81,27 @@ export class TorrentEntriesService implements ITorrentEntriesService {
return;
}
if (fileCollection.videos.some(video => parse(torrent.title).season !== undefined && video.imdbEpisode === undefined && video.imdbSeason === undefined && video.kitsuEpisode === undefined)) {
this.logger.warn(`Unsatisfied episode and season found for ${torrent.provider} [${torrent.infoHash}] ${torrent.title} - skipping torrent`);
return;
}
const newTorrent: ITorrentCreationAttributes = ({
...torrent,
contents: fileCollection.contents,
subtitles: fileCollection.subtitles
});
newTorrent.type = newTorrent.type.toLowerCase();
return this.repository.createTorrent(newTorrent)
.then(() => PromiseHelpers.sequence(fileCollection.videos!.map(video => () => {
const newVideo: IFileCreationAttributes = {...video, infoHash: video.infoHash, title: video.title};
if (!newVideo.kitsuId) {
newVideo.kitsuId = 0;
newVideo.kitsuEpisode = 0;
}
if (newVideo.imdbEpisode && !newVideo.imdbSeason) {
newVideo.imdbSeason = 1;
}
return this.repository.createFile(newVideo)
})))

View File

@@ -1,10 +1,6 @@
import {TorrentType} from '@enums/torrent_types';
import {ExtensionHelpers} from '@helpers/extension_helpers';
import {PromiseHelpers} from '@helpers/promises_helpers';
import {ICommonVideoMetadata} from "@interfaces/common_video_metadata";
import {ILoggingService} from "@interfaces/logging_service";
import {IMetaDataQuery} from "@interfaces/metadata_query";
import {IMetadataResponse} from "@interfaces/metadata_response";
import {IMetadataService} from "@interfaces/metadata_service";
import {IParsedTorrent} from "@interfaces/parsed_torrent";
import {ITorrentDownloadService} from "@interfaces/torrent_download_service";
@@ -14,27 +10,20 @@ import {IContentAttributes} from "@repository/interfaces/content_attributes";
import {IFileAttributes} from "@repository/interfaces/file_attributes";
import {configurationService} from '@services/configuration_service';
import {IocTypes} from "@setup/ioc_types";
import Bottleneck from 'bottleneck';
import {inject, injectable} from "inversify";
import moment from 'moment';
import {parse} from 'parse-torrent-title';
import { filenameParse } from '@ctrl/video-filename-parser';
import {ParsedShow} from "@ctrl/video-filename-parser/dist/src/filenameParse";
const MIN_SIZE: number = 5 * 1024 * 1024; // 5 MB
const MULTIPLE_FILES_SIZE = 4 * 1024 * 1024 * 1024; // 4 GB
type SeasonEpisodeMap = Record<number, Record<number, ICommonVideoMetadata>>;
@injectable()
export class TorrentFileService implements ITorrentFileService {
@inject(IocTypes.IMetadataService) metadataService: IMetadataService;
@inject(IocTypes.ITorrentDownloadService) torrentDownloadService: ITorrentDownloadService;
@inject(IocTypes.ILoggingService) logger: ILoggingService;
private readonly imdb_limiter: Bottleneck = new Bottleneck({
maxConcurrent: configurationService.metadataConfig.IMDB_CONCURRENT,
minTime: configurationService.metadataConfig.IMDB_INTERVAL_MS
});
async parseTorrentFiles(torrent: IParsedTorrent): Promise<ITorrentFileCollection> {
if (!torrent.title) {
return Promise.reject(new Error('Torrent title is missing'));
@@ -44,30 +33,17 @@ export class TorrentFileService implements ITorrentFileService {
return Promise.reject(new Error('Torrent infoHash is missing'));
}
const parsedTorrentName = parse(torrent.title);
const query: IMetaDataQuery = {
id: torrent.kitsuId || torrent.imdbId,
type: torrent.type || TorrentType.Movie,
};
const metadata = await this.metadataService.getMetadata(query)
.then(meta => Object.assign({}, meta))
.catch(() => undefined);
let fileCollection: ITorrentFileCollection;
if (metadata === undefined || metadata instanceof Error) {
return Promise.reject(new Error('Failed to retrieve metadata'));
const isSeries = parse(torrent.title).seasons || this.isSeries(torrent.title);
if (!isSeries){
fileCollection = await this.parseMovieFiles(torrent);
} else {
fileCollection = await this.parseSeriesFiles(torrent);
}
if (torrent.type !== TorrentType.Anime && metadata && metadata.type && metadata.type !== torrent.type) {
// it's actually a movie/series
torrent.type = metadata.type;
}
if (torrent.type === TorrentType.Movie && (!parsedTorrentName.seasons ||
parsedTorrentName.season === 5 && [1, 5].includes(parsedTorrentName.episode || 0))) {
return this.parseMovieFiles(torrent, metadata);
}
return this.parseSeriesFiles(torrent, metadata)
return fileCollection;
}
isPackTorrent(torrent: IParsedTorrent): boolean {
@@ -100,7 +76,7 @@ export class TorrentFileService implements ITorrentFileService {
return parsedVideos.map(video => ({...video, isMovie: this.isMovieVideo(torrent, video, parsedVideos, hasMovies)}));
};
private parseMovieFiles = async (torrent: IParsedTorrent, metadata: IMetadataResponse): Promise<ITorrentFileCollection> => {
private parseMovieFiles = async (torrent: IParsedTorrent): Promise<ITorrentFileCollection> => {
const fileCollection: ITorrentFileCollection = await this.getMoviesTorrentContent(torrent);
if (fileCollection.videos === undefined || fileCollection.videos.length === 0) {
return {...fileCollection, videos: this.getDefaultFileEntries(torrent)};
@@ -115,26 +91,24 @@ export class TorrentFileService implements ITorrentFileService {
fileIndex: video.fileIndex,
title: video.title || video.path || video.fileName || '',
size: video.size || torrent.size,
imdbId: torrent.imdbId?.toString() || metadata && metadata.imdbId?.toString(),
kitsuId: parseInt(torrent.kitsuId?.toString() || metadata && metadata.kitsuId?.toString() || '0')
imdbId: torrent.imdbId?.toString(),
kitsuId: parseInt(torrent.kitsuId?.toString() || '0')
}));
return {...fileCollection, videos: parsedVideos};
}
const parsedVideos = await PromiseHelpers.sequence(filteredVideos.map(video => () => this.isFeaturette(video)
? Promise.resolve(video)
: this.findMovieImdbId(video.title).then(imdbId => ({...video, imdbId: imdbId?.toString() || ''}))))
.then(videos => videos.map((video: IFileAttributes) => ({
infoHash: torrent.infoHash,
fileIndex: video.fileIndex,
title: video.title || video.path,
size: video.size,
imdbId: video.imdbId,
})));
const parsedVideos = filteredVideos.map(video => ({
infoHash: torrent.infoHash,
fileIndex: video.fileIndex,
title: video.title || video.path,
size: video.size,
imdbId: torrent.imdbId.toString() || ''
}));
return {...fileCollection, videos: parsedVideos};
};
private parseSeriesFiles = async (torrent: IParsedTorrent, metadata: IMetadataResponse): Promise<ITorrentFileCollection> => {
private parseSeriesFiles = async (torrent: IParsedTorrent): Promise<ITorrentFileCollection> => {
const fileCollection: ITorrentFileCollection = await this.getSeriesTorrentContent(torrent);
if (fileCollection.videos === undefined || fileCollection.videos.length === 0) {
return {...fileCollection, videos: this.getDefaultFileEntries(torrent)};
@@ -143,14 +117,13 @@ export class TorrentFileService implements ITorrentFileService {
const parsedVideos: IFileAttributes[] = await Promise.resolve(fileCollection.videos)
.then(videos => videos.filter(video => videos?.length === 1 || video.size! > MIN_SIZE))
.then(videos => this.parseSeriesVideos(torrent, videos))
.then(videos => this.decomposeEpisodes(torrent, videos, metadata))
.then(videos => this.assignKitsuOrImdbEpisodes(torrent, videos, metadata))
.then(videos => Promise.all(videos.map(video => video.isMovie
? this.mapSeriesMovie(torrent, video)
: this.mapSeriesEpisode(torrent, video, videos))))
.then(videos => videos
.reduce((a, b) => a.concat(b), [])
.map(video => this.isFeaturette(video) ? this.clearInfoFields(video) : video));
.map(video => this.isFeaturette(video) ? this.clearInfoFields(video) : video))
.then(videos => Promise.all(videos.flatMap(video => this.mapSeriesEpisode(torrent, video, videos))))
.then(videos => videos.flat());
return {...torrent.fileCollection, videos: parsedVideos};
};
@@ -195,6 +168,10 @@ export class TorrentFileService implements ITorrentFileService {
title: file.path || file.title,
size: file.size,
imdbId: torrent?.imdbId?.toString() || file?.imdbId?.toString() || '',
kitsuId: torrent?.kitsuId || file.kitsuId || 0,
imdbSeason: file.imdbSeason,
imdbEpisode: file.imdbEpisode,
kitsuEpisode: file.kitsuEpisode,
}]);
}
return Promise.resolve([]);
@@ -206,439 +183,16 @@ export class TorrentFileService implements ITorrentFileService {
title: file.path || file.title,
size: file.size,
imdbId: file?.imdbId?.toString() || torrent?.imdbId?.toString() || '',
imdbSeason: file.season,
imdbSeason: file.imdbSeason,
season: file.season,
imdbEpisode: file.episodes && file.episodes[index],
episode: file.episodes && file.episodes[index],
kitsuEpisode: file.episodes && file.episodes[index],
kitsuEpisode: file.kitsuId && file.kitsuId !== 0 && file.episodes && file.episodes[index],
episodes: file.episodes,
kitsuId: parseInt(file.kitsuId?.toString() || torrent.kitsuId?.toString() || '0') || 0,
})))
};
private mapSeriesMovie = async (torrent: IParsedTorrent, file: IFileAttributes): Promise<IFileAttributes[]> => {
const kitsuId = torrent.type === TorrentType.Anime ? await this.findMovieKitsuId(file)
.then(result => {
if (result instanceof Error) {
this.logger.warn(`Failed to retrieve kitsuId due to error: ${result.message}`);
return undefined;
}
return result;
}) : undefined;
const imdbId = !kitsuId ? await this.findMovieImdbId(file) : undefined;
const query: IMetaDataQuery = {
id: kitsuId || imdbId,
type: TorrentType.Movie
};
const metadataOrError = await this.metadataService.getMetadata(query);
if (metadataOrError instanceof Error) {
this.logger.warn(`Failed to retrieve metadata due to error: ${metadataOrError.message}`);
// return default result or throw error, depending on your use case
return [{
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.title,
size: file.size,
imdbId: imdbId,
kitsuId: parseInt(kitsuId?.toString() || '0') || 0,
episodes: undefined,
imdbSeason: undefined,
imdbEpisode: undefined,
kitsuEpisode: undefined
}];
}
// at this point, TypeScript infers that metadataOrError is actually MetadataResponse
const metadata = metadataOrError;
const hasEpisode = metadata.videos && metadata.videos.length && (file.episode || metadata.videos.length === 1);
const episodeVideo = hasEpisode && metadata.videos && metadata.videos[(file.episode || 1) - 1];
return [{
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.title,
size: file.size,
imdbId: metadata.imdbId?.toString() || imdbId || '',
kitsuId: parseInt(metadata.kitsuId?.toString() || kitsuId?.toString() || '0') || 0,
imdbSeason: episodeVideo && metadata.imdbId ? episodeVideo.season : undefined,
imdbEpisode: episodeVideo && metadata.imdbId || episodeVideo && metadata.kitsuId ? episodeVideo.episode || episodeVideo.episode : undefined,
kitsuEpisode: episodeVideo && metadata.imdbId || episodeVideo && metadata.kitsuId ? episodeVideo.episode || episodeVideo.episode : undefined,
}];
};
private decomposeEpisodes = async (torrent: IParsedTorrent, files: IFileAttributes[], metadata: IMetadataResponse = {episodeCount: []}): Promise<IFileAttributes[]> => {
if (files.every(file => !file.episodes && !file.date)) {
return files;
}
this.preprocessEpisodes(files);
if (torrent.type === TorrentType.Anime && torrent.kitsuId) {
if (this.needsCinemetaMetadataForAnime(files, metadata)) {
// In some cases anime could be resolved to wrong kitsuId
// because of imdb season naming/absolute per series naming/multiple seasons
// So in these cases we need to fetch cinemeta based metadata and decompose episodes using that
await this.updateToCinemetaMetadata(metadata);
if (files.some(file => Number.isInteger(file.season))) {
// sometimes multi season anime torrents don't include season 1 naming
files
.filter(file => !Number.isInteger(file.season) && file.episodes)
.forEach(file => file.season = 1);
}
} else {
// otherwise for anime type episodes are always absolute and for a single season
files
.filter(file => file.episodes && file.season !== 0)
.forEach(file => file.season = 1);
return files;
}
}
const sortedEpisodes = files
.map(file => !file.isMovie && file.episodes || [])
.reduce((a, b) => a.concat(b), [])
.sort((a, b) => a - b);
if (this.isConcatSeasonAndEpisodeFiles(files, sortedEpisodes, metadata)) {
this.decomposeConcatSeasonAndEpisodeFiles(files, metadata);
} else if (this.isDateEpisodeFiles(files, metadata)) {
this.decomposeDateEpisodeFiles(files, metadata);
} else if (this.isAbsoluteEpisodeFiles(torrent, files, metadata)) {
this.decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
}
// decomposeEpisodeTitleFiles(torrent, files, metadata);
return files;
};
private preprocessEpisodes = (files: IFileAttributes[]): void => {
// reverse special episode naming when they named with 0 episode, ie. S02E00
files
.filter(file => Number.isInteger(file.season) && file.episode === 0)
.forEach(file => {
file.episode = file.season
file.episodes = [file.season || 0];
file.season = 0;
})
};
private isConcatSeasonAndEpisodeFiles = (files: IFileAttributes[], sortedEpisodes: number[], metadata: IMetadataResponse): boolean => {
if (metadata.kitsuId !== undefined) {
// anime does not use this naming scheme in 99% of cases;
return false;
}
// decompose concat season and episode files (ex. 101=S01E01) in case:
// 1. file has a season, but individual files are concatenated with that season (ex. path Season 5/511 - Prize
// Fighters.avi)
// 2. file does not have a season and the episode does not go out of range for the concat season
// episode count
const thresholdAbove = Math.max(Math.ceil(files.length * 0.05), 5);
const thresholdSorted = Math.max(Math.ceil(files.length * 0.8), 8);
const threshold = Math.max(Math.ceil(files.length * 0.8), 5);
const sortedConcatEpisodes = sortedEpisodes
.filter(ep => ep > 100)
.filter(ep => metadata.episodeCount && metadata.episodeCount[this.div100(ep) - 1] < ep)
.filter(ep => metadata.episodeCount && metadata.episodeCount[this.div100(ep) - 1] >= this.mod100(ep));
const concatFileEpisodes = files
.filter(file => !file.isMovie && file.episodes)
.filter(file => !file.season || file.episodes?.every(ep => this.div100(ep) === file.season));
const concatAboveTotalEpisodeCount = files
.filter(file => !file.isMovie && file.episodes && file.episodes.every(ep => ep > 100))
.filter(file => file.episodes?.every(ep => ep > metadata.totalCount!));
return sortedConcatEpisodes.length >= thresholdSorted && concatFileEpisodes.length >= threshold
|| concatAboveTotalEpisodeCount.length >= thresholdAbove;
};
private isDateEpisodeFiles = (files: IFileAttributes[], metadata: IMetadataResponse): boolean => files.every(file => (!file.season || metadata.episodeCount && !metadata.episodeCount[file.season - 1]) && file.date);
private isAbsoluteEpisodeFiles = (torrent: IParsedTorrent, files: IFileAttributes[], metadata: IMetadataResponse): boolean => {
const threshold = Math.ceil(files.length / 5);
const isAnime = torrent.type === TorrentType.Anime && torrent.kitsuId;
const nonMovieEpisodes = files.filter(file => !file.isMovie && file.episodes);
const absoluteEpisodes = files
.filter(file => file.season && file.episodes)
.filter(file => file.episodes?.every(ep =>
metadata.episodeCount && file.season && metadata.episodeCount[file.season - 1] < ep));
return nonMovieEpisodes.every(file => !file.season)
|| (isAnime && nonMovieEpisodes.every(file =>
metadata.episodeCount && file.season && file.season > metadata.episodeCount.length))
|| absoluteEpisodes.length >= threshold;
};
private isNewEpisodeNotInMetadata = (torrent: IParsedTorrent, video: IFileAttributes, metadata: IMetadataResponse): boolean => {
const isAnime = torrent.type === TorrentType.Anime && torrent.kitsuId;
return !!(!isAnime && !video.isMovie && video.episodes && video.season !== 1
&& metadata.status && /continuing|current/i.test(metadata.status)
&& metadata.episodeCount && video.season && video.season >= metadata.episodeCount.length
&& video.episodes.every(ep => metadata.episodeCount && video.season && ep > (metadata.episodeCount[video.season - 1] || 0)));
};
private decomposeConcatSeasonAndEpisodeFiles = (files: IFileAttributes[], metadata: IMetadataResponse): void => {
files
.filter(file => file.episodes && file.season !== 0 && file.episodes.every(ep => ep > 100))
.filter(file => file.episodes && metadata?.episodeCount &&
((file.season || this.div100(file.episodes[0])) - 1) >= 0 &&
metadata.episodeCount[(file.season || this.div100(file.episodes[0])) - 1] < 100)
.filter(file => (file.season && file.episodes && file.episodes.every(ep => this.div100(ep) === file.season)) || !file.season)
.forEach(file => {
if (file.episodes) {
file.season = this.div100(file.episodes[0]);
file.episodes = file.episodes.map(ep => this.mod100(ep));
}
});
};
private decomposeAbsoluteEpisodeFiles = (torrent: IParsedTorrent, videos: IFileAttributes[], metadata: IMetadataResponse): void => {
if (metadata.episodeCount?.length === 0) {
videos
.filter(file => !Number.isInteger(file.season) && file.episodes && !file.isMovie)
.forEach(file => {
file.season = 1;
});
return;
}
if (!metadata.episodeCount) return;
videos
.filter(file => file.episodes && !file.isMovie && file.season !== 0)
.filter(file => !this.isNewEpisodeNotInMetadata(torrent, file, metadata))
.filter(file => {
if (!file.episodes || !metadata.episodeCount) return false;
return !file.season || (metadata.episodeCount[file.season - 1] || 0) < file.episodes[0];
})
.forEach(file => {
if (!file.episodes || !metadata.episodeCount) return;
let seasonIdx = metadata.episodeCount
.map((_, i) => i)
.find(i => metadata.episodeCount && file.episodes && metadata.episodeCount.slice(0, i + 1).reduce((a, b) => a + b) >= file.episodes[0]);
seasonIdx = (seasonIdx || 1 || metadata.episodeCount.length) - 1;
file.season = seasonIdx + 1;
file.episodes = file.episodes
.map(ep => ep - (metadata.episodeCount?.slice(0, seasonIdx).reduce((a, b) => a + b, 0) || 0));
});
};
private decomposeDateEpisodeFiles = (files: IFileAttributes[], metadata: IMetadataResponse): void => {
if (!metadata || !metadata.videos || !metadata.videos.length) {
return;
}
const timeZoneOffset = this.getTimeZoneOffset(metadata.country);
const offsetVideos: { [key: string]: ICommonVideoMetadata } = metadata.videos
.reduce((map: { [key: string]: ICommonVideoMetadata }, video: ICommonVideoMetadata) => {
const releaseDate = moment(video.released).utcOffset(timeZoneOffset).format('YYYY-MM-DD');
map[releaseDate] = video;
return map;
}, {});
files
.filter(file => file.date)
.forEach(file => {
const video = offsetVideos[file.date!];
if (video) {
file.season = video.season;
file.episodes = [video.episode || 0];
}
});
};
private getTimeZoneOffset = (country: string | undefined): string => {
switch (country) {
case 'United States':
case 'USA':
return '-08:00';
default:
return '00:00';
}
};
private assignKitsuOrImdbEpisodes = (torrent: IParsedTorrent, files: IFileAttributes[], metadata: IMetadataResponse): IFileAttributes[] => {
if (!metadata || !metadata.videos || !metadata.videos.length) {
if (torrent.type === TorrentType.Anime) {
// assign episodes as kitsu episodes for anime when no metadata available for imdb mapping
files
.filter(file => file.season && file.episodes)
.forEach(file => {
file.season = undefined;
file.episodes = undefined;
})
if (metadata.type === TorrentType.Movie && files.every(file => !file.imdbId)) {
// sometimes a movie has episode naming, thus not recognized as a movie and imdbId not assigned
files.forEach(file => file.imdbId = metadata.imdbId?.toString());
}
}
return files;
}
const seriesMapping = metadata.videos
.filter(video => video.season !== undefined && Number.isInteger(video.season) && video.episode !== undefined && Number.isInteger(video.episode))
.reduce<SeasonEpisodeMap>((map, video) => {
if (video.season !== undefined && video.episode !== undefined) {
const episodeMap = map[video.season] || {};
episodeMap[video.episode] = video;
map[video.season] = episodeMap;
}
return map;
}, {});
if (metadata.videos.some(video => Number.isInteger(video.season)) || !metadata.imdbId) {
files.filter(file => file && Number.isInteger(file.season) && file.episodes)
.map(file => {
const seasonMapping = file && file.season && seriesMapping[file.season] || null;
const episodeMapping = seasonMapping && file && file.episodes && file.episodes[0] && seasonMapping[file.episodes[0]] || null;
if (episodeMapping && Number.isInteger(episodeMapping.season)) {
file.imdbId = metadata.imdbId?.toString();
file.season = episodeMapping.season;
file.episodes = file.episodes && file.episodes.map(ep => (seasonMapping && seasonMapping[ep]) ? Number(seasonMapping[ep].episode) : 0);
} else {
file.season = undefined;
file.episodes = undefined;
}
});
} else if (metadata.videos.some(video => video.episode)) {
// imdb episode info is base
files
.filter(file => Number.isInteger(file.season) && file.episodes)
.forEach(file => {
if (!file.season || !file.episodes) {
return;
}
if (seriesMapping[file.season]) {
const seasonMapping = seriesMapping[file.season];
file.imdbId = metadata.imdbId?.toString();
file.kitsuId = seasonMapping[file.episodes[0]] && parseInt(seasonMapping[file.episodes[0]].id || '0') || 0;
file.episodes = file.episodes.map(ep => seasonMapping[ep]?.episode)
.filter((ep): ep is number => ep !== undefined);
} else if (seriesMapping[file.season - 1]) {
// sometimes a second season might be a continuation of the previous season
const seasonMapping = seriesMapping[file.season - 1] as ICommonVideoMetadata;
const episodes = Object.values(seasonMapping);
const firstKitsuId = episodes.length && episodes[0];
const differentTitlesCount = new Set(episodes.map(ep => ep.id)).size
const skippedCount = episodes.filter(ep => ep.id === firstKitsuId).length;
const emptyArray: number[] = [];
const seasonEpisodes = files
.filter((otherFile: IFileAttributes) => otherFile.season === file.season && otherFile.episodes)
.reduce((a, b) => a.concat(b.episodes || []), emptyArray);
const isAbsoluteOrder = seasonEpisodes.every(ep => ep > skippedCount && ep <= episodes.length)
const isNormalOrder = seasonEpisodes.every(ep => ep + skippedCount <= episodes.length)
if (differentTitlesCount >= 1 && (isAbsoluteOrder || isNormalOrder)) {
const {season} = file;
const [episode] = file.episodes;
file.imdbId = metadata.imdbId?.toString();
file.season = file.season - 1;
file.episodes = file.episodes.map(ep => isAbsoluteOrder ? ep : ep + skippedCount);
const currentEpisode = seriesMapping[season][episode];
file.kitsuId = currentEpisode ? parseInt(currentEpisode.id || '0') : 0;
if (typeof season === 'number' && Array.isArray(file.episodes)) {
file.episodes = file.episodes.map(ep =>
seriesMapping[season]
&& seriesMapping[season][ep]
&& seriesMapping[season][ep].episode
|| ep);
}
}
} else if (Object.values(seriesMapping).length === 1 && seriesMapping[1]) {
// sometimes series might be named with sequel season but it's not a season on imdb and a new title
// eslint-disable-next-line prefer-destructuring
const seasonMapping = seriesMapping[1];
file.imdbId = metadata.imdbId?.toString();
file.season = 1;
file.kitsuId = parseInt(seasonMapping[file.episodes[0]].id || '0') || 0;
file.episodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].episode)
.filter((ep): ep is number => ep !== undefined);
}
});
}
return files;
};
private needsCinemetaMetadataForAnime = (files: IFileAttributes[], metadata: IMetadataResponse): boolean => {
if (!metadata || !metadata.imdbId || !metadata.videos || !metadata.videos.length) {
return false;
}
const seasons = metadata.videos
.map(video => video.season)
.filter((season): season is number => season !== null && season !== undefined);
// Using || 0 instead of || Number.MAX_VALUE to match previous logic
const minSeason = Math.min(...seasons) || 0;
const maxSeason = Math.max(...seasons) || 0;
const differentSeasons = new Set(seasons.filter(season => Number.isInteger(season))).size;
const total = metadata.totalCount || Number.MAX_VALUE;
return differentSeasons > 1 || files
.filter(file => !file.isMovie && file.episodes)
.some(file => file.season || 0 < minSeason || file.season || 0 > maxSeason || file.episodes?.every(ep => ep > total));
};
private updateToCinemetaMetadata = async (metadata: IMetadataResponse): Promise<IMetadataResponse> => {
const query: IMetaDataQuery = {
id: metadata.imdbId,
type: metadata.type
};
return await this.metadataService.getMetadata(query)
.then((newMetadataOrError) => {
if (newMetadataOrError instanceof Error) {
// handle error
this.logger.warn(`Failed ${metadata.imdbId} metadata cinemeta update due: ${newMetadataOrError.message}`);
return metadata; // or throw newMetadataOrError to propagate error up the call stack
}
// At this point TypeScript infers newMetadataOrError to be of type MetadataResponse
const newMetadata = newMetadataOrError;
if (!newMetadata.videos || !newMetadata.videos.length) {
return metadata;
} else {
metadata.videos = newMetadata.videos;
metadata.episodeCount = newMetadata.episodeCount;
metadata.totalCount = newMetadata.totalCount;
return metadata;
}
})
};
private findMovieImdbId = (title: IFileAttributes | string): Promise<string | undefined> => {
const parsedTitle = typeof title === 'string' ? parse(title) : title;
this.logger.debug(`Finding movie imdbId for ${title}`);
return this.imdb_limiter.schedule(async () => {
const imdbQuery = {
title: parsedTitle.title,
year: parsedTitle.year,
type: TorrentType.Movie
};
try {
return await this.metadataService.getImdbId(imdbQuery);
} catch (e) {
return undefined;
}
});
};
private findMovieKitsuId = async (title: IFileAttributes | string): Promise<number | Error | undefined> => {
const parsedTitle = typeof title === 'string' ? parse(title) : title;
const kitsuQuery = {
title: parsedTitle.title,
year: parsedTitle.year,
season: parsedTitle.season,
type: TorrentType.Movie
};
try {
return await this.metadataService.getKitsuId(kitsuQuery);
} catch (e) {
return undefined;
}
};
private isDiskTorrent = (contents: IContentAttributes[]): boolean => contents.some(content => ExtensionHelpers.isDisk(content.path));
private isSingleMovie = (videos: IFileAttributes[]): boolean => videos.length === 1 ||
@@ -649,51 +203,80 @@ export class TorrentFileService implements ITorrentFileService {
private isFeaturette = (video: IFileAttributes): boolean => /featurettes?\/|extras-grym/i.test(video.path!);
private parseSeriesVideo = (video: IFileAttributes): IFileAttributes => {
const videoInfo = parse(video.title);
// the episode may be in a folder containing season number
if (!Number.isInteger(videoInfo.season) && video.path?.includes('/')) {
const folders = video.path?.split('/');
const pathInfo = parse(folders[folders.length - 2]);
videoInfo.season = pathInfo.season;
}
if (!Number.isInteger(videoInfo.season) && video.season) {
videoInfo.season = video.season;
}
if (!Number.isInteger(videoInfo.season) && videoInfo.seasons && videoInfo.seasons.length > 1) {
// in case single file was interpreted as having multiple seasons
[videoInfo.season] = videoInfo.seasons;
}
if (!Number.isInteger(videoInfo.season) && video.path?.includes('/') && video.seasons
&& video.seasons.length > 1) {
// russian season are usually named with 'series name-2` i.e. Улицы разбитых фонарей-6/22. Одиночный выстрел.mkv
const folderPathSeasonMatch = video.path?.match(/[\u0400-\u04ff]-(\d{1,2})(?=.*\/)/);
videoInfo.season = folderPathSeasonMatch && parseInt(folderPathSeasonMatch[1], 10) || undefined;
}
// sometimes video file does not have correct date format as in torrent title
if (!videoInfo.episodes && !videoInfo.date && video.date) {
videoInfo.date = video.date;
}
// limit number of episodes in case of incorrect parsing
if (videoInfo.episodes && videoInfo.episodes.length > 20) {
videoInfo.episodes = [videoInfo.episodes[0]];
[videoInfo.episode] = videoInfo.episodes;
}
// force episode to any found number if it was not parsed
if (!videoInfo.episodes && !videoInfo.date) {
const epMatcher = videoInfo.title.match(
/(?<!season\W*|disk\W*|movie\W*|film\W*)(?:^|\W|_)(\d{1,4})(?:a|b|c|v\d)?(?:_|\W|$)(?!disk|movie|film)/i);
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)] || undefined;
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
}
if (!videoInfo.episodes && !videoInfo.date) {
const epMatcher = video.title.match(new RegExp(`(?:\\(${videoInfo.year}\\)|part)[._ ]?(\\d{1,3})(?:\\b|_)`, "i"));
videoInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)] || undefined;
videoInfo.episode = videoInfo.episodes && videoInfo.episodes[0];
let pathParts = video.path?.split('/');
let fileName = pathParts[pathParts.length - 1];
let regexList = [
{ regex: /(saison|season|se|s)\s?(\d{2})/gi, format: (match: RegExpMatchArray) => `S${match[2]}` },
{ regex: /(episode|ep|e)\s?(\d{2})/gi, format: (match: RegExpMatchArray) => `E${match[2]}` },
];
let formattedValues: string[] = [];
for (let i = 0; i < regexList.length; i++) {
let regexEntry = regexList[i];
let match = regexEntry.regex.exec(fileName);
if (match) {
let formattedValue = regexEntry.format(match);
fileName = fileName.replace(match[0], '');
formattedValues.push(formattedValue);
}
}
return {...video, ...videoInfo};
fileName = fileName.trim();
let splitFilename = fileName.split(/\.(?=[^.]*$)/);
fileName = `${splitFilename[0]} ${formattedValues.join('')}.${splitFilename[1]}`;
const parsedInfo = filenameParse(fileName, true);
if ('isTv' in parsedInfo) {
const parsedShow = parsedInfo as ParsedShow;
return this.mapParsedShowToVideo(video, parsedShow);
} else {
return {
title: video.title,
path: video.path,
size: video.size,
fileIndex: video.fileIndex,
imdbId: video.imdbId ? video.imdbId : undefined,
kitsuId: video.kitsuId && video.kitsuId !== 0 ? video.kitsuId : 0,
};
}
};
private isSeries = (title: string): boolean => {
const regexList = [
/(saison|season|se|s)\s?(\d{2})/gi,
/(episode|ep|e)\s?(\d{2})/gi,
];
return regexList.some(regex => regex.test(title));
};
private mapParsedShowToVideo(video: IFileAttributes, parsedShow: ParsedShow & { isTv: true }) : IFileAttributes {
let response : IFileAttributes = {
title: video.title,
season: parsedShow.seasons[0],
episode: parsedShow.episodeNumbers[0],
path: video.path,
size: video.size,
fileIndex: video.fileIndex,
imdbId: video.imdbId ? video.imdbId : undefined,
kitsuId: video.kitsuId && video.kitsuId !== 0 ? video.kitsuId : 0,
imdbSeason: video.season,
imdbEpisode: video.episode,
kitsuEpisode: video.episode,
};
if (!response.imdbSeason && response.imdbEpisode) {
response.imdbSeason = 0;
}
return response;
}
private isMovieVideo = (torrent: IParsedTorrent, video: IFileAttributes, otherVideos: IFileAttributes[], hasMovies: boolean): boolean => {
if (Number.isInteger(torrent.season) && Array.isArray(torrent.episodes)) {
// not movie if video has season
@@ -726,8 +309,4 @@ export class TorrentFileService implements ITorrentFileService {
video.kitsuEpisode = undefined;
return video;
};
private div100 = (episode: number): number => (episode / 100 >> 0);
private mod100 = (episode: number): number => episode % 100;
}

View File

@@ -41,7 +41,7 @@ export class TorrentSubtitleService implements ITorrentSubtitleService {
return Object.assign(video, {
fileName: fileName,
folderName: folderName,
...this.parseFilename(video.title.toString() || '')
...this.parseFilename(video.path || '')
});
}

View File

@@ -103,10 +103,12 @@ describe('Configuration Tests', () => {
it('should populate metadataConfig correctly', async() => {
process.env.IMDB_CONCURRENT = '1';
process.env.IMDB_INTERVAL_MS = '1000';
process.env.TITLE_MATCH_THRESHOLD = '0.1';
const {configurationService} = await import("@services/configuration_service");
const {metadataConfig} = configurationService;
expect(metadataConfig.IMDB_CONCURRENT).toBe(1);
expect(metadataConfig.IMDB_INTERVAL_MS).toBe(1000);
expect(metadataConfig.TITLE_MATCH_THRESHOLD).toBe(0.1);
});
it('should populate rabbitConfig correctly', async () => {

View File

@@ -5,20 +5,28 @@ import {MongoRepository} from "@mongo/mongo_repository";
import {IocTypes} from "@setup/ioc_types";
import {Container, inject} from "inversify";
jest.mock('@services/configuration_service', () => {
const metadataConfig = {
TITLE_MATCH_THRESHOLD: 0.25,
}
const cacheConfig = {
MONGODB_HOST: 'localhost',
MONGODB_PORT: '27017',
MONGODB_DB: 'knightcrawler',
MONGODB_USER: 'mongo',
MONGODB_PASSWORD: 'mongo',
get MONGO_URI(): string {
return `mongodb://${this.MONGODB_USER}:${this.MONGODB_PASSWORD}@${this.MONGODB_HOST}:${this.MONGODB_PORT}/${this.MONGODB_DB}?authSource=admin`;
},
};
jest.doMock('@services/configuration_service', () => {
return {
configurationService: {
cacheConfig: {
MONGODB_HOST: 'localhost',
MONGODB_PORT: '27017',
MONGODB_DB: 'knightcrawler',
MONGODB_USER: 'mongo',
MONGODB_PASSWORD: 'mongo',
get MONGO_URI(): string {
return `mongodb://${this.MONGODB_USER}:${this.MONGODB_PASSWORD}@${this.MONGODB_HOST}:${this.MONGODB_PORT}/${this.MONGODB_DB}?authSource=admin`;
}
},
}
cacheConfig: cacheConfig,
metadataConfig: metadataConfig,
},
}
});

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

234602
src/producer/Data/jav.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
namespace Producer.Extensions;
public static partial class StringExtensions
{
[GeneratedRegex("[^a-zA-Z0-9 ]")]
private static partial Regex NotAlphaNumeric();
private static readonly char[] separator = [' '];
public static bool IsNullOrEmpty(this string? value) =>
string.IsNullOrEmpty(value);
public static string NormalizeTitle(this string title)
{
var alphanumericTitle = NotAlphaNumeric().Replace(title, " ");
var words = alphanumericTitle.Split(separator, StringSplitOptions.RemoveEmptyEntries)
.Select(word => word.ToLower());
var normalizedTitle = string.Join(" ", words);
return normalizedTitle;
}
}

View File

@@ -4,14 +4,12 @@ public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory,
ILogger<DebridMediaManagerCrawler> logger,
IDataStorage storage,
GithubConfiguration githubConfiguration) : BaseCrawler(logger, storage)
GithubConfiguration githubConfiguration,
IParsingService parsingService) : BaseCrawler(logger, storage)
{
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher();
[GeneratedRegex(@"[sS]([0-9]{1,2})|seasons?[\s-]?([0-9]{1,2})", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonMatcher();
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
@@ -95,30 +93,83 @@ public partial class DebridMediaManagerCrawler(
private Torrent? ParseTorrent(JsonElement item)
{
var torrent = new Torrent
{
Source = Source,
Name = item.GetProperty("filename").GetString(),
Size = item.GetProperty("bytes").GetInt64().ToString(),
InfoHash = item.GetProperty("hash").ToString(),
Seeders = 0,
Leechers = 0,
};
if (string.IsNullOrEmpty(torrent.Name))
if (!item.TryGetProperty("filename", out var filenameElement) ||
!item.TryGetProperty("bytes", out var bytesElement) ||
!item.TryGetProperty("hash", out var hashElement))
{
return null;
}
torrent.Category = SeasonMatcher().IsMatch(torrent.Name) ? "tv" : "movies";
var torrentTitle = filenameElement.GetString();
if (torrentTitle.IsNullOrEmpty())
{
return null;
}
var torrentType = parsingService.GetTypeByName(torrentTitle);
var torrent = new Torrent
{
Source = Source,
Size = bytesElement.GetInt64().ToString(),
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
};
return torrentType switch
{
TorrentType.Movie => HandleMovieType(torrent, torrentTitle),
TorrentType.Tv => HandleTvType(torrent, torrentTitle),
_ => null,
};
}
private Torrent HandleMovieType(Torrent torrent, string title)
{
if (title.IsNullOrEmpty())
{
return null;
}
if (!parsingService.HasNoBannedTerms(title))
{
LogBannedTermsFound(torrent);
return null;
}
torrent.Category = "movies";
torrent.Name = title;
return torrent;
}
private Torrent HandleTvType(Torrent torrent, string title)
{
if (title.IsNullOrEmpty())
{
return null;
}
if (!parsingService.HasNoBannedTerms(title))
{
LogBannedTermsFound(torrent);
return null;
}
torrent.Category = "tv";
torrent.Name = title;
return torrent;
}
private void LogBannedTermsFound(Torrent torrent) => logger.LogWarning("Banned terms found in torrent title for ingested infoHash: {InfoHash}. Skipping", torrent.InfoHash);
private async Task InsertTorrentsForPage(JsonDocument json)
{
var torrents = json.RootElement.EnumerateArray()
.Select(ParseTorrent)
.Where(t => t is not null)
.ToList();
if (torrents.Count == 0)

View File

@@ -15,7 +15,6 @@ public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawl
[nameof(Torrent.Seeders)] = "seeders",
[nameof(Torrent.Leechers)] = "leechers",
[nameof(Torrent.InfoHash)] = "infoHash",
[nameof(Torrent.Category)] = "category",
};
protected override Torrent ParseTorrent(XElement itemNode) =>
@@ -27,6 +26,6 @@ public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawl
Seeders = int.Parse(itemNode.Element(XmlNamespace + Mappings[nameof(Torrent.Seeders)])?.Value ?? "0"),
Leechers = int.Parse(itemNode.Element(XmlNamespace + Mappings[nameof(Torrent.Leechers)])?.Value ?? "0"),
InfoHash = itemNode.Element(XmlNamespace + Mappings[nameof(Torrent.InfoHash)])?.Value,
Category = itemNode.Element(Mappings[nameof(Torrent.Category)])?.Value.ToLowerInvariant(),
Category = "anime",
};
}

View File

@@ -190,7 +190,6 @@ public partial class TorrentioCrawler(
Source = $"{Source}_{instance.Name}",
InfoHash = infoHash,
Category = "movies", // we only handle movies for now...
Imdb = imdbId,
};
var span = title.AsSpan();

View File

@@ -5,38 +5,43 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json";
protected override string Source => "TPB";
// ReSharper disable once UnusedMember.Local
private readonly Dictionary<string, Dictionary<string, int>> TpbCategories = new()
{
{"VIDEO", new() {
{"ALL", 200},
{"MOVIES", 201},
{"MOVIES_DVDR", 202},
{"MUSIC_VIDEOS", 203},
{"MOVIE_CLIPS", 204},
{"TV_SHOWS", 205},
{"HANDHELD", 206},
{"MOVIES_HD", 207},
{"TV_SHOWS_HD", 208},
{"MOVIES_3D", 209},
{"OTHER", 299},
}},
{"PORN", new() {
{"ALL", 500},
{"MOVIES", 501},
{"MOVIES_DVDR", 502},
{"PICTURES", 503},
{"GAMES", 504},
{"MOVIES_HD", 505},
{"MOVIE_CLIPS", 506},
{"OTHER", 599},
}},
};
// // ReSharper disable once UnusedMember.Local
// private readonly Dictionary<string, Dictionary<string, int>> TpbCategories = new()
// {
// {
// "VIDEO", new()
// {
// {"ALL", 200},
// {"MOVIES", 201},
// {"MOVIES_DVDR", 202},
// {"MUSIC_VIDEOS", 203},
// {"MOVIE_CLIPS", 204},
// {"TV_SHOWS", 205},
// {"HANDHELD", 206},
// {"MOVIES_HD", 207},
// {"TV_SHOWS_HD", 208},
// {"MOVIES_3D", 209},
// {"OTHER", 299},
// }
// },
// {
// "PORN", new()
// {
// {"ALL", 500},
// {"MOVIES", 501},
// {"MOVIES_DVDR", 502},
// {"PICTURES", 503},
// {"GAMES", 504},
// {"MOVIES_HD", 505},
// {"MOVIE_CLIPS", 506},
// {"OTHER", 599},
// }
// },
// };
private static readonly HashSet<int> TvSeriesCategories = [ 205, 208 ];
private static readonly HashSet<int> MovieCategories = [ 201, 202, 207, 209 ];
private static readonly HashSet<int> PornCategories = [ 500, 501, 502, 505, 506 ];
private static readonly HashSet<int> AllowedCategories = [ ..MovieCategories, ..TvSeriesCategories ];
protected override IReadOnlyDictionary<string, string> Mappings
@@ -47,7 +52,6 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
[nameof(Torrent.Seeders)] = "seeders",
[nameof(Torrent.Leechers)] = "leechers",
[nameof(Torrent.InfoHash)] = "info_hash",
[nameof(Torrent.Imdb)] = "imdb",
[nameof(Torrent.Category)] = "category",
};
@@ -67,7 +71,6 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
Size = item.GetProperty(Mappings["Size"]).GetInt64().ToString(),
Seeders = item.GetProperty(Mappings["Seeders"]).GetInt32(),
Leechers = item.GetProperty(Mappings["Leechers"]).GetInt32(),
Imdb = item.GetProperty(Mappings["Imdb"]).GetString(),
};
HandleInfoHash(item, torrent, "InfoHash");

View File

@@ -0,0 +1,13 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class AudioChannels : SmartEnum<AudioChannels, string>
{
public static readonly AudioChannels SEVEN = new("SEVEN", "7.1");
public static readonly AudioChannels SIX = new("SIX", "5.1");
public static readonly AudioChannels STEREO = new("STEREO", "stereo");
public static readonly AudioChannels MONO = new ("MONO", "mono");
private AudioChannels(string name, string value) : base(name, value)
{
}
}

View File

@@ -0,0 +1,50 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class AudioChannelsParser
{
[GeneratedRegex(@"\b(?<eight>7.?[01])\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex EightChannelExp();
[GeneratedRegex(@"\b(?<six>(6[\W]0(?:ch)?)(?=[^\d]|$)|(5[\W][01](?:ch)?)(?=[^\d]|$)|5ch|6ch)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SixChannelExp();
[GeneratedRegex(@"(?<stereo>((2[\W]0(?:ch)?)(?=[^\d]|$))|(stereo))", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex StereoChannelExp();
[GeneratedRegex(@"(?<mono>(1[\W]0(?:ch)?)(?=[^\d]|$)|(mono)|(1ch))", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MonoChannelExp();
private static readonly Regex ChannelExp = new(string.Join("|", EightChannelExp(), SixChannelExp(), StereoChannelExp(), MonoChannelExp()), RegexOptions.IgnoreCase);
public static void Parse(string title, out AudioChannels? channels, out string? source)
{
channels = null;
source = null;
var channelResult = ChannelExp.Match(title);
if (!channelResult.Success)
{
return;
}
var groups = channelResult.Groups;
if (groups["eight"].Success)
{
channels = AudioChannels.SEVEN;
source = groups["eight"].Value;
}
else if (groups["six"].Success)
{
channels = AudioChannels.SIX;
source = groups["six"].Value;
}
else if (groups["stereo"].Success)
{
channels = AudioChannels.STEREO;
source = groups["stereo"].Value;
}
else if (groups["mono"].Success)
{
channels = AudioChannels.MONO;
source = groups["mono"].Value;
}
}
}

View File

@@ -0,0 +1,22 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class AudioCodec : SmartEnum<AudioCodec, string>
{
public static readonly AudioCodec MP3 = new("MP3", "MP3");
public static readonly AudioCodec MP2 = new("MP2", "MP2");
public static readonly AudioCodec DOLBY = new("DOLBY", "Dolby Digital");
public static readonly AudioCodec EAC3 = new("EAC3", "Dolby Digital Plus");
public static readonly AudioCodec AAC = new("AAC", "AAC");
public static readonly AudioCodec FLAC = new("FLAC", "FLAC");
public static readonly AudioCodec DTS = new("DTS", "DTS");
public static readonly AudioCodec DTSHD = new("DTSHD", "DTS-HD");
public static readonly AudioCodec TRUEHD = new("TRUEHD", "Dolby TrueHD");
public static readonly AudioCodec OPUS = new("OPUS", "Opus");
public static readonly AudioCodec VORBIS = new("VORBIS", "Vorbis");
public static readonly AudioCodec PCM = new("PCM", "PCM");
public static readonly AudioCodec LPCM = new("LPCM", "LPCM");
private AudioCodec(string name, string value) : base(name, value)
{
}
}

View File

@@ -0,0 +1,138 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class AudioCodecsParser
{
[GeneratedRegex(@"\b(?<mp3>(LAME(?:\d)+-?(?:\d)+)|(mp3))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex Mp3CodecExp();
[GeneratedRegex(@"\b(?<mp2>(mp2))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex Mp2CodecExp();
[GeneratedRegex(@"\b(?<dolby>(Dolby)|(Dolby-?Digital)|(DD)|(AC3D?))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DolbyCodecExp();
[GeneratedRegex(@"\b(?<dolbyatmos>(Dolby-?Atmos))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DolbyAtmosCodecExp();
[GeneratedRegex(@"\b(?<aac>(AAC))(\d?.?\d?)(ch)?\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex AacAtmosCodecExp();
[GeneratedRegex(@"\b(?<eac3>(EAC3|DDP|DD\+))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex Eac3CodecExp();
[GeneratedRegex(@"\b(?<flac>(FLAC))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex FlacCodecExp();
[GeneratedRegex(@"\b(?<dts>(DTS))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DtsCodecExp();
[GeneratedRegex(@"\b(?<dtshd>(DTS-?HD)|(DTS(?=-?MA)|(DTS-X)))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DtsHdCodecExp();
[GeneratedRegex(@"\b(?<truehd>(True-?HD))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TrueHdCodecExp();
[GeneratedRegex(@"\b(?<opus>(Opus))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex OpusCodecExp();
[GeneratedRegex(@"\b(?<vorbis>(Vorbis))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex VorbisCodecExp();
[GeneratedRegex(@"\b(?<pcm>(PCM))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PcmCodecExp();
[GeneratedRegex(@"\b(?<lpcm>(LPCM))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex LpcmCodecExp();
private static readonly Regex AudioCodecExp = new(
string.Join(
"|", Mp3CodecExp(), Mp2CodecExp(), DolbyCodecExp(), DolbyAtmosCodecExp(), AacAtmosCodecExp(), Eac3CodecExp(), FlacCodecExp(),
DtsHdCodecExp(),
DtsCodecExp(), TrueHdCodecExp(), OpusCodecExp(), VorbisCodecExp(), PcmCodecExp(), LpcmCodecExp()), RegexOptions.IgnoreCase);
public static void Parse(string title, out AudioCodec? codec, out string? source)
{
codec = null;
source = null;
var audioResult = AudioCodecExp.Match(title);
if (!audioResult.Success)
{
return;
}
var groups = audioResult.Groups;
if (groups["aac"].Success)
{
codec = AudioCodec.AAC;
source = groups["aac"].Value;
}
else if (groups["dolbyatmos"].Success)
{
codec = AudioCodec.EAC3;
source = groups["dolbyatmos"].Value;
}
else if (groups["dolby"].Success)
{
codec = AudioCodec.DOLBY;
source = groups["dolby"].Value;
}
else if (groups["dtshd"].Success)
{
codec = AudioCodec.DTSHD;
source = groups["dtshd"].Value;
}
else if (groups["dts"].Success)
{
codec = AudioCodec.DTS;
source = groups["dts"].Value;
}
else if (groups["flac"].Success)
{
codec = AudioCodec.FLAC;
source = groups["flac"].Value;
}
else if (groups["truehd"].Success)
{
codec = AudioCodec.TRUEHD;
source = groups["truehd"].Value;
}
else if (groups["mp3"].Success)
{
codec = AudioCodec.MP3;
source = groups["mp3"].Value;
}
else if (groups["mp2"].Success)
{
codec = AudioCodec.MP2;
source = groups["mp2"].Value;
}
else if (groups["pcm"].Success)
{
codec = AudioCodec.PCM;
source = groups["pcm"].Value;
}
else if (groups["lpcm"].Success)
{
codec = AudioCodec.LPCM;
source = groups["lpcm"].Value;
}
else if (groups["opus"].Success)
{
codec = AudioCodec.OPUS;
source = groups["opus"].Value;
}
else if (groups["vorbis"].Success)
{
codec = AudioCodec.VORBIS;
source = groups["vorbis"].Value;
}
else if (groups["eac3"].Success)
{
codec = AudioCodec.EAC3;
source = groups["eac3"].Value;
}
}
}

View File

@@ -0,0 +1,19 @@
namespace Producer.Features.ParseTorrentTitle;
public class BaseParsed
{
public string? ReleaseTitle { get; set; }
public string? Title { get; set; }
public string? Year { get; set; }
public Edition? Edition { get; set; }
public Resolution? Resolution { get; set; }
public VideoCodec? VideoCodec { get; set; }
public AudioCodec? AudioCodec { get; set; }
public AudioChannels? AudioChannels { get; set; }
public Revision? Revision { get; set; }
public string? Group { get; set; }
public List<Language> Languages { get; set; } = [];
public List<Source> Sources { get; set; } = [];
public bool? Multi { get; set; }
public bool? Complete { get; set; }
}

View File

@@ -0,0 +1,14 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class Complete
{
[GeneratedRegex(@"\b(NTSC|PAL)?.DVDR\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CompleteDvdExp();
[GeneratedRegex(@"\b(COMPLETE)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CompleteExp();
public static bool? IsCompleteDvd(string title) => CompleteDvdExp().IsMatch(title) ? true : null;
public static bool IsComplete(string title) => CompleteExp().IsMatch(title) || IsCompleteDvd(title) == true;
}

View File

@@ -0,0 +1,26 @@
namespace Producer.Features.ParseTorrentTitle;
public class Edition
{
public bool? Internal { get; set; }
public bool? Limited { get; set; }
public bool? Remastered { get; set; }
public bool? Extended { get; set; }
public bool? Theatrical { get; set; }
public bool? Directors { get; set; }
public bool? Unrated { get; set; }
public bool? Imax { get; set; }
public bool? FanEdit { get; set; }
public bool? Hdr { get; set; }
public bool? Bw { get; set; }
public bool? ThreeD { get; set; }
public bool? Hsbs { get; set; }
public bool? Sbs { get; set; }
public bool? Hou { get; set; }
public bool? Uhd { get; set; }
public bool? Oar { get; set; }
public bool? DolbyVision { get; set; }
public bool? HardcodedSubs { get; set; }
public bool? DeletedScenes { get; set; }
public bool? BonusContent { get; set; }
}

View File

@@ -0,0 +1,101 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class EditionParser
{
[GeneratedRegex(@"\b(INTERNAL)\b", RegexOptions.IgnoreCase)]
private static partial Regex InternalExp();
[GeneratedRegex(@"\b(Remastered|Anniversary|Restored)\b", RegexOptions.IgnoreCase)]
private static partial Regex RemasteredExp();
[GeneratedRegex(@"\b(IMAX)\b", RegexOptions.IgnoreCase)]
private static partial Regex ImaxExp();
[GeneratedRegex(@"\b(Uncensored|Unrated)\b", RegexOptions.IgnoreCase)]
private static partial Regex UnratedExp();
[GeneratedRegex(@"\b(Extended|Uncut|Ultimate|Rogue|Collector)\b", RegexOptions.IgnoreCase)]
private static partial Regex ExtendedExp();
[GeneratedRegex(@"\b(Theatrical)\b", RegexOptions.IgnoreCase)]
private static partial Regex TheatricalExp();
[GeneratedRegex(@"\b(Directors?)\b", RegexOptions.IgnoreCase)]
private static partial Regex DirectorsExp();
[GeneratedRegex(@"\b(Despecialized|Fan.?Edit)\b", RegexOptions.IgnoreCase)]
private static partial Regex FanExp();
[GeneratedRegex(@"\b(LIMITED)\b", RegexOptions.IgnoreCase)]
private static partial Regex LimitedExp();
[GeneratedRegex(@"\b(HDR)\b", RegexOptions.IgnoreCase)]
private static partial Regex HdrExp();
[GeneratedRegex(@"\b(3D)\b", RegexOptions.IgnoreCase)]
private static partial Regex ThreeD();
[GeneratedRegex(@"\b(Half-?SBS|HSBS)\b", RegexOptions.IgnoreCase)]
private static partial Regex Hsbs();
[GeneratedRegex(@"\b((?<!H|HALF-)SBS)\b", RegexOptions.IgnoreCase)]
private static partial Regex Sbs();
[GeneratedRegex(@"\b(HOU)\b", RegexOptions.IgnoreCase)]
private static partial Regex Hou();
[GeneratedRegex(@"\b(UHD)\b", RegexOptions.IgnoreCase)]
private static partial Regex Uhd();
[GeneratedRegex(@"\b(OAR)\b", RegexOptions.IgnoreCase)]
private static partial Regex Oar();
[GeneratedRegex(@"\b(DV(\b(HDR10|HLG|SDR))?)\b", RegexOptions.IgnoreCase)]
private static partial Regex DolbyVision();
[GeneratedRegex(@"\b((?<hcsub>(\w+(?<!SOFT|HORRIBLE)SUBS?))|(?<hc>(HC|SUBBED)))\b", RegexOptions.IgnoreCase)]
private static partial Regex HardcodedSubsExp();
[GeneratedRegex(@"\b((Bonus.)?Deleted.Scenes)\b", RegexOptions.IgnoreCase)]
private static partial Regex DeletedScenes();
[GeneratedRegex(@"\b((Bonus|Extras|Behind.the.Scenes|Making.of|Interviews|Featurettes|Outtakes|Bloopers|Gag.Reel).(?!(Deleted.Scenes)))\b", RegexOptions.IgnoreCase)]
private static partial Regex BonusContent();
[GeneratedRegex(@"\b(BW)\b", RegexOptions.IgnoreCase)]
private static partial Regex Bw();
public static Edition Parse(string title)
{
TitleParser.Parse(title, out var parsedTitle, out _);
var withoutTitle = title.Replace(".", " ").Replace(parsedTitle, "").ToLower();
var result = new Edition
{
Internal = InternalExp().IsMatch(withoutTitle),
Limited = LimitedExp().IsMatch(withoutTitle),
Remastered = RemasteredExp().IsMatch(withoutTitle),
Extended = ExtendedExp().IsMatch(withoutTitle),
Theatrical = TheatricalExp().IsMatch(withoutTitle),
Directors = DirectorsExp().IsMatch(withoutTitle),
Unrated = UnratedExp().IsMatch(withoutTitle),
Imax = ImaxExp().IsMatch(withoutTitle),
FanEdit = FanExp().IsMatch(withoutTitle),
Hdr = HdrExp().IsMatch(withoutTitle),
ThreeD = ThreeD().IsMatch(withoutTitle),
Hsbs = Hsbs().IsMatch(withoutTitle),
Sbs = Sbs().IsMatch(withoutTitle),
Hou = Hou().IsMatch(withoutTitle),
Uhd = Uhd().IsMatch(withoutTitle),
Oar = Oar().IsMatch(withoutTitle),
DolbyVision = DolbyVision().IsMatch(withoutTitle),
HardcodedSubs = HardcodedSubsExp().IsMatch(withoutTitle),
DeletedScenes = DeletedScenes().IsMatch(withoutTitle),
BonusContent = BonusContent().IsMatch(withoutTitle),
Bw = Bw().IsMatch(withoutTitle),
};
return result;
}
}

View File

@@ -0,0 +1,78 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class FileExtensionParser
{
[GeneratedRegex(@"\.[a-z0-9]{2,4}$", RegexOptions.IgnoreCase)]
private static partial Regex FileExtensionExp();
private static readonly List<string> _fileExtensions = new()
{
// Unknown
".webm",
// SDTV
".m4v",
".3gp",
".nsv",
".ty",
".strm",
".rm",
".rmvb",
".m3u",
".ifo",
".mov",
".qt",
".divx",
".xvid",
".bivx",
".nrg",
".pva",
".wmv",
".asf",
".asx",
".ogm",
".ogv",
".m2v",
".avi",
".bin",
".dat",
".dvr-ms",
".mpg",
".mpeg",
".mp4",
".avc",
".vp3",
".svq3",
".nuv",
".viv",
".dv",
".fli",
".flv",
".wpl",
// DVD
".img",
".iso",
".vob",
// HD
".mkv",
".mk3d",
".ts",
".wtv",
// Bluray
".m2ts",
};
public static string RemoveFileExtension(string title) =>
FileExtensionExp().Replace(
title, match =>
{
if (_fileExtensions.Any(ext => ext.Equals(match.Value, StringComparison.OrdinalIgnoreCase)))
{
return string.Empty;
}
return match.Value;
});
}

View File

@@ -0,0 +1,71 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class GroupParser
{
[GeneratedRegex(@"^\[\s*[a-z]+(\.[a-z]+)+\s*\][- ]*|^www\.[a-z]+\.(?:com|net)[ -]*", RegexOptions.IgnoreCase)]
private static partial Regex WebsitePrefixExp();
[GeneratedRegex(@"(-(RP|1|NZBGeek|Obfuscated|Obfuscation|Scrambled|sample|Pre|postbot|xpost|Rakuv[a-z0-9]*|WhiteRev|BUYMORE|AsRequested|AlternativeToRequested|GEROV|Z0iDS3N|Chamele0n|4P|4Planet|AlteZachen|RePACKPOST))+$", RegexOptions.IgnoreCase)]
private static partial Regex CleanReleaseGroupExp();
[GeneratedRegex(@"-(?<releasegroup>[a-z0-9]+)(?<!WEB-DL|WEB-RIP|480p|720p|1080p|2160p|DTS-(HD|X|MA|ES)|([a-zA-Z]{3}-ENG))(?:\b|[-._ ])", RegexOptions.IgnoreCase)]
private static partial Regex ReleaseGroupRegexExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeReleaseGroupExp();
[GeneratedRegex(@"(\[)?(?<releasegroup>(Joy|YIFY|YTS.(MX|LT|AG)|FreetheFish|VH-PROD|FTW-HS|DX-TV|Blu-bits|afm72|Anna|Bandi|Ghost|Kappa|MONOLITH|Qman|RZeroX|SAMPA|Silence|theincognito|D-Z0N3|t3nzin|Vyndros|HDO|DusIctv|DHD|SEV|CtrlHD|-ZR-|ADC|XZVN|RH|Kametsu|r00t|HONE))(\])?$", RegexOptions.IgnoreCase)]
private static partial Regex ExceptionReleaseGroupRegex();
public static string? Parse(string title)
{
var nowebsiteTitle = WebsitePrefixExp().Replace(title, "");
TitleParser.Parse(nowebsiteTitle, out var releaseTitle, out _);
releaseTitle = releaseTitle.Replace(" ", ".");
var trimmed = nowebsiteTitle.Replace(" ", ".");
if (releaseTitle != nowebsiteTitle)
{
trimmed = trimmed.Replace(releaseTitle, "");
}
trimmed = trimmed.Replace(".-.", ".");
trimmed = TitleParser.SimplifyTitle(FileExtensionParser.RemoveFileExtension(trimmed.Trim()));
if (trimmed.Length == 0)
{
return null;
}
var exceptionResult = ExceptionReleaseGroupRegex().Match(trimmed);
if (exceptionResult.Groups["releasegroup"].Success)
{
return exceptionResult.Groups["releasegroup"].Value;
}
var animeResult = AnimeReleaseGroupExp().Match(trimmed);
if (animeResult.Success)
{
return animeResult.Groups["subgroup"].Value;
}
trimmed = CleanReleaseGroupExp().Replace(trimmed, "");
var globalReleaseGroupExp = new Regex(ReleaseGroupRegexExp().ToString(), RegexOptions.IgnoreCase);
var result = globalReleaseGroupExp.Match(trimmed);
while (result.Success)
{
if (result.Groups["releasegroup"].Success)
{
return result.Groups["releasegroup"].Value;
}
result = result.NextMatch();
}
return null;
}
}

View File

@@ -0,0 +1,24 @@
namespace Producer.Features.ParseTorrentTitle;
public interface IParsingService
{
TorrentType GetTypeByName(string name);
ParsedFilename Parse(string name);
string Naked(string title);
List<string> GrabYears(string str);
List<int> GrabPossibleSeasonNums(string str);
bool HasYear(string test, List<string> years, bool strictCheck = false);
string RemoveDiacritics(string str);
string RemoveRepeats(string str);
int RomanToDecimal(string roman);
string ReplaceRomanWithDecimal(string input);
bool StrictEqual(string title1, string title2);
int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false);
bool FlexEq(string test, string target, List<string> years);
bool MatchesTitle(string target, List<string> years, string test);
bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle);
bool HasNoBannedTerms(string targetTitle, string testTitle);
bool HasNoBannedTerms(string targetTitle);
bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle);
int CountUncommonWords(string title);
}

View File

@@ -0,0 +1,7 @@
namespace Producer.Features.ParseTorrentTitle;
public interface ITorrentTitleParser
{
ParsedFilename Parse(string name);
TorrentType GetTypeByName(string name);
}

View File

@@ -0,0 +1,50 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class Language : SmartEnum<Language, string>
{
public static readonly Language English = new("English", "English");
public static readonly Language French = new("French", "French");
public static readonly Language Spanish = new("Spanish", "Spanish");
public static readonly Language German = new("German", "German");
public static readonly Language Italian = new("Italian", "Italian");
public static readonly Language Danish = new("Danish", "Danish");
public static readonly Language Dutch = new("Dutch", "Dutch");
public static readonly Language Japanese = new("Japanese", "Japanese");
public static readonly Language Cantonese = new("Cantonese", "Cantonese");
public static readonly Language Mandarin = new("Mandarin", "Mandarin");
public static readonly Language Russian = new("Russian", "Russian");
public static readonly Language Polish = new("Polish", "Polish");
public static readonly Language Vietnamese = new("Vietnamese", "Vietnamese");
public static readonly Language Nordic = new("Nordic", "Nordic");
public static readonly Language Swedish = new("Swedish", "Swedish");
public static readonly Language Norwegian = new("Norwegian", "Norwegian");
public static readonly Language Finnish = new("Finnish", "Finnish");
public static readonly Language Turkish = new("Turkish", "Turkish");
public static readonly Language Portuguese = new("Portuguese", "Portuguese");
public static readonly Language Flemish = new("Flemish", "Flemish");
public static readonly Language Greek = new("Greek", "Greek");
public static readonly Language Korean = new("Korean", "Korean");
public static readonly Language Hungarian = new("Hungarian", "Hungarian");
public static readonly Language Persian = new("Persian", "Persian");
public static readonly Language Bengali = new("Bengali", "Bengali");
public static readonly Language Bulgarian = new("Bulgarian", "Bulgarian");
public static readonly Language Brazilian = new("Brazilian", "Brazilian");
public static readonly Language Hebrew = new("Hebrew", "Hebrew");
public static readonly Language Czech = new("Czech", "Czech");
public static readonly Language Ukrainian = new("Ukrainian", "Ukrainian");
public static readonly Language Catalan = new("Catalan", "Catalan");
public static readonly Language Chinese = new("Chinese", "Chinese");
public static readonly Language Thai = new("Thai", "Thai");
public static readonly Language Hindi = new("Hindi", "Hindi");
public static readonly Language Tamil = new("Tamil", "Tamil");
public static readonly Language Arabic = new("Arabic", "Arabic");
public static readonly Language Estonian = new("Estonian", "Estonian");
public static readonly Language Icelandic = new("Icelandic", "Icelandic");
public static readonly Language Latvian = new("Latvian", "Latvian");
public static readonly Language Lithuanian = new("Lithuanian", "Lithuanian");
public static readonly Language Romanian = new("Romanian", "Romanian");
public static readonly Language Slovak = new("Slovak", "Slovak");
public static readonly Language Serbian = new("Serbian", "Serbian");
private Language(string name, string value) : base(name, value) { }
}

View File

@@ -0,0 +1,340 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class LanguageParser
{
[GeneratedRegex(@"\bWEB-?DL\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebDL();
[GeneratedRegex(@"(?<!(WEB-))\b(MULTi|DUAL|DL)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MultiExp();
[GeneratedRegex(@"\b(english|eng|EN|FI)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex EnglishRegex();
[GeneratedRegex(@"\b(DK|DAN|danish)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DanishRegex();
[GeneratedRegex(@"\b(SE|SWE|swedish)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SwedishRegex();
[GeneratedRegex(@"\b(ice|Icelandic)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex IcelandicRegex();
[GeneratedRegex(@"\b(chi|chinese)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ChineseRegex();
[GeneratedRegex(@"\b(ita|italian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ItalianRegex();
[GeneratedRegex(@"\b(german|videomann)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex GermanRegex();
[GeneratedRegex(@"\b(flemish)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex FlemishRegex();
[GeneratedRegex(@"\b(greek)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex GreekRegex();
[GeneratedRegex(@"\b(FR|FRENCH|VOSTFR|VO|VFF|VFQ|VF2|TRUEFRENCH|SUBFRENCH)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex FrenchRegex();
[GeneratedRegex(@"\b(russian|rus)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RussianRegex();
[GeneratedRegex(@"\b(norwegian|NO)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex NorwegianRegex();
[GeneratedRegex(@"\b(HUNDUB|HUN|hungarian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HungarianRegex();
[GeneratedRegex(@"\b(HebDub)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HebrewRegex();
[GeneratedRegex(@"\b(CZ|SK)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CzechRegex();
[GeneratedRegex(@"(?<ukrainian>\bukr\b)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex UkrainianRegex();
[GeneratedRegex(@"\b(PL|PLDUB|POLISH)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PolishRegex();
[GeneratedRegex(@"\b(nl|dutch)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DutchRegex();
[GeneratedRegex(@"\b(HIN|Hindi)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HindiRegex();
[GeneratedRegex(@"\b(TAM|Tamil)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TamilRegex();
[GeneratedRegex(@"\b(Arabic)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ArabicRegex();
[GeneratedRegex(@"\b(Latvian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex LatvianRegex();
[GeneratedRegex(@"\b(Lithuanian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex LithuanianRegex();
[GeneratedRegex(@"\b(RO|Romanian|rodubbed)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RomanianRegex();
[GeneratedRegex(@"\b(SK|Slovak)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SlovakRegex();
[GeneratedRegex(@"\b(Brazilian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BrazilianRegex();
[GeneratedRegex(@"\b(Persian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PersianRegex();
[GeneratedRegex(@"\b(Bengali)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BengaliRegex();
[GeneratedRegex(@"\b(Bulgarian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BulgarianRegex();
[GeneratedRegex(@"\b(Serbian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SerbianRegex();
public static void Parse(string title, out List<Language> languages)
{
TitleParser.Parse(title, out var parsedTitle, out _);
var languageTitle = title.Replace(".", " ").Replace(parsedTitle, "").ToLower();
languages = new();
if (languageTitle.Contains("spanish"))
{
languages.Add(Language.Spanish);
}
if (languageTitle.Contains("japanese"))
{
languages.Add(Language.Japanese);
}
if (languageTitle.Contains("cantonese"))
{
languages.Add(Language.Cantonese);
}
if (languageTitle.Contains("mandarin"))
{
languages.Add(Language.Mandarin);
}
if (languageTitle.Contains("korean"))
{
languages.Add(Language.Korean);
}
if (languageTitle.Contains("vietnamese"))
{
languages.Add(Language.Vietnamese);
}
if (languageTitle.Contains("finnish"))
{
languages.Add(Language.Finnish);
}
if (languageTitle.Contains("turkish"))
{
languages.Add(Language.Turkish);
}
if (languageTitle.Contains("portuguese"))
{
languages.Add(Language.Portuguese);
}
if (languageTitle.Contains("hebrew"))
{
languages.Add(Language.Hebrew);
}
if (languageTitle.Contains("czech"))
{
languages.Add(Language.Czech);
}
if (languageTitle.Contains("ukrainian"))
{
languages.Add(Language.Ukrainian);
}
if (languageTitle.Contains("catalan"))
{
languages.Add(Language.Catalan);
}
if (languageTitle.Contains("estonian"))
{
languages.Add(Language.Estonian);
}
if (languageTitle.Contains("thai"))
{
languages.Add(Language.Thai);
}
if (EnglishRegex().IsMatch(languageTitle))
{
languages.Add(Language.English);
}
if (DanishRegex().IsMatch(languageTitle))
{
languages.Add(Language.Danish);
}
if (SwedishRegex().IsMatch(languageTitle))
{
languages.Add(Language.Swedish);
}
if (IcelandicRegex().IsMatch(languageTitle))
{
languages.Add(Language.Icelandic);
}
if (ChineseRegex().IsMatch(languageTitle))
{
languages.Add(Language.Chinese);
}
if (ItalianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Italian);
}
if (GermanRegex().IsMatch(languageTitle))
{
languages.Add(Language.German);
}
if (FlemishRegex().IsMatch(languageTitle))
{
languages.Add(Language.Flemish);
}
if (GreekRegex().IsMatch(languageTitle))
{
languages.Add(Language.Greek);
}
if (FrenchRegex().IsMatch(languageTitle))
{
languages.Add(Language.French);
}
if (RussianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Russian);
}
if (NorwegianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Norwegian);
}
if (HungarianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Hungarian);
}
if (HebrewRegex().IsMatch(languageTitle))
{
languages.Add(Language.Hebrew);
}
if (CzechRegex().IsMatch(languageTitle))
{
languages.Add(Language.Czech);
}
if (UkrainianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Ukrainian);
}
if (PolishRegex().IsMatch(languageTitle))
{
languages.Add(Language.Polish);
}
if (DutchRegex().IsMatch(languageTitle))
{
languages.Add(Language.Dutch);
}
if (HindiRegex().IsMatch(languageTitle))
{
languages.Add(Language.Hindi);
}
if (TamilRegex().IsMatch(languageTitle))
{
languages.Add(Language.Tamil);
}
if (ArabicRegex().IsMatch(languageTitle))
{
languages.Add(Language.Arabic);
}
if (LatvianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Latvian);
}
if (LithuanianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Lithuanian);
}
if (RomanianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Romanian);
}
if (SlovakRegex().IsMatch(languageTitle))
{
languages.Add(Language.Slovak);
}
if (BrazilianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Brazilian);
}
if (PersianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Persian);
}
if (BengaliRegex().IsMatch(languageTitle))
{
languages.Add(Language.Bengali);
}
if (BulgarianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Bulgarian);
}
if (SerbianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Serbian);
}
}
public static bool? IsMulti(string title)
{
var noWebTitle = WebDL().Replace(title, "");
return MultiExp().IsMatch(noWebTitle) ? true : null;
}
}

View File

@@ -0,0 +1,10 @@
namespace Producer.Features.ParseTorrentTitle;
public class ParsedFilename
{
public ParsedMovie? Movie { get; set; }
public ParsedTv? Show { get; set; }
public TorrentType? Type { get; set; }
public bool IsInvalid => Movie is null && Show is null;
}

View File

@@ -0,0 +1,5 @@
namespace Producer.Features.ParseTorrentTitle;
public class ParsedMovie : BaseParsed
{
}

View File

@@ -0,0 +1,15 @@
namespace Producer.Features.ParseTorrentTitle;
public class ParsedTv : BaseParsed
{
public string? SeriesTitle { get; set; }
public List<int> Seasons { get; set; } = [];
public List<int> EpisodeNumbers { get; set; } = [];
public DateTime? AirDate { get; set; }
public bool FullSeason { get; set; }
public bool IsPartialSeason { get; set; }
public bool IsMultiSeason { get; set; }
public bool IsSeasonExtra { get; set; }
public bool IsSpecial { get; set; }
public int SeasonPart { get; set; }
}

View File

@@ -0,0 +1,29 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService
{
[GeneratedRegex("[^a-z0-9]")]
private static partial Regex NakedMatcher();
[GeneratedRegex(@"\d{4}")]
private static partial Regex GrabYearsMatcher();
[GeneratedRegex(@"\d+")]
private static partial Regex GrabPossibleSeasonNumsMatcher();
[GeneratedRegex(@"(.)\1+")]
private static partial Regex RemoveRepeatsMatcher();
[GeneratedRegex(@"m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})")]
private static partial Regex ReplaceRomanWithDecimalMatcher();
[GeneratedRegex(@"\s+")]
private static partial Regex WhitespaceMatcher();
[GeneratedRegex(@"\W+")]
private static partial Regex WordMatcher();
[GeneratedRegex(@"'s|\s&\s|\W")]
private static partial Regex WordProcessingMatcher();
}

View File

@@ -0,0 +1,353 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService : IParsingService
{
private readonly IWordCollections _wordCollections;
private readonly ITorrentTitleParser _torrentTitleParser;
private static readonly char[] WhitespaceSeparator = [' '];
private HashSet<string> _compoundWords = [];
public ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser)
{
_wordCollections = wordCollections;
_torrentTitleParser = torrentTitleParser;
_compoundWords.UnionWith(_wordCollections.AdultCompoundPhrases);
_compoundWords.UnionWith(_wordCollections.Jav);
_compoundWords.UnionWith(_wordCollections.AdultStars);
}
public string Naked(string title) =>
NakedMatcher().Replace(title.ToLower(), "");
public List<string> GrabYears(string str)
{
var matches = GrabYearsMatcher().Matches(str);
return matches
.Select(m => m.Value)
.Where(n => int.Parse(n) > 1900 && int.Parse(n) <= DateTime.Now.Year)
.ToList();
}
public List<int> GrabPossibleSeasonNums(string str)
{
var matches = GrabPossibleSeasonNumsMatcher().Matches(str);
return matches
.Select(m => int.Parse(m.Value))
.Where(n => n is > 0 and <= 500)
.ToList();
}
public bool HasYear(string test, List<string> years, bool strictCheck = false) =>
strictCheck
? years.Any(test.Contains)
: years.Any(year =>
{
var intYear = int.Parse(year);
return test.Contains(year) ||
test.Contains($"{intYear + 1}") ||
test.Contains($"{intYear - 1}");
});
public string RemoveDiacritics(string str)
{
var normalizedString = str.Normalize(NormalizationForm.FormD);
var stringBuilder = new StringBuilder();
foreach (var c in normalizedString)
{
var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
if (unicodeCategory != UnicodeCategory.NonSpacingMark)
{
stringBuilder.Append(c);
}
}
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
}
public string RemoveRepeats(string str) => RemoveRepeatsMatcher().Replace(str, "$1");
public int RomanToDecimal(string roman)
{
var romanNumerals = new Dictionary<char, int>
{
{'I', 1},
{'V', 5},
{'X', 10},
{'L', 50},
{'C', 100},
{'D', 500},
{'M', 1000}
};
var total = 0;
var prevValue = 0;
for (var i = roman.Length - 1; i >= 0; i--)
{
var currentValue = romanNumerals[roman[i].ToString().ToUpper()[0]];
total = currentValue < prevValue ? total - currentValue : total + currentValue;
prevValue = currentValue;
}
return total;
}
public string ReplaceRomanWithDecimal(string input) => ReplaceRomanWithDecimalMatcher().Replace(input, match => RomanToDecimal(match.Value).ToString());
public bool StrictEqual(string title1, string title2)
{
title1 = WhitespaceMatcher().Replace(title1, "");
title2 = WhitespaceMatcher().Replace(title2, "");
return (title1.Length > 0 && title1 == title2) ||
(Naked(title1).Length > 0 && Naked(title1) == Naked(title2)) ||
(RemoveRepeats(title1).Length > 0 && RemoveRepeats(title1) == RemoveRepeats(title2)) ||
(RemoveDiacritics(title1).Length > 0 && RemoveDiacritics(title1) == RemoveDiacritics(title2));
}
public int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false)
{
var replaceCount = 0;
var prevReplaceCount = 0;
var prevOffset = 0;
var prevLength = 0;
const int wordTolerance = 5;
var wordsInTitle = WordMatcher().Split(target).Where(e => !string.IsNullOrEmpty(e)).ToList();
const int magicLength = 3;
var testStr = test;
var inSequenceTerms = 1;
var longestSequence = 0;
MatchEvaluator replacer = match =>
{
if (shouldBeInSequence && prevLength > 0 && match.Index >= wordTolerance)
{
if (inSequenceTerms > longestSequence)
{
longestSequence = inSequenceTerms;
}
inSequenceTerms = 0;
}
prevOffset = match.Index;
prevLength = match.Length;
replaceCount++;
inSequenceTerms++;
return match.Value;
};
Action<string, bool, bool> wrapReplace = (newTerm, first, last) =>
{
var prefix = first ? @"\b" : "";
var suffix = last ? @"\b" : "";
testStr = Regex.Replace(testStr[(prevOffset + prevLength)..], $"{prefix}{newTerm}{suffix}", replacer);
};
var actual = wordsInTitle.Where((term, idx) =>
{
var first = idx == 0;
var last = idx == wordsInTitle.Count - 1;
testStr = testStr[(prevOffset + prevLength)..];
wrapReplace(term, first, last);
if (replaceCount > prevReplaceCount)
{
prevReplaceCount = replaceCount;
return true;
}
if (RemoveDiacritics(term).Length >= magicLength)
{
wrapReplace(RemoveDiacritics(term), first, last);
if (replaceCount > prevReplaceCount)
{
prevReplaceCount = replaceCount;
return true;
}
}
if (RemoveRepeats(term).Length >= magicLength)
{
wrapReplace(RemoveRepeats(term), first, last);
if (replaceCount > prevReplaceCount)
{
prevReplaceCount = replaceCount;
return true;
}
}
if (Naked(term).Length >= magicLength)
{
wrapReplace(Naked(term), first, last);
if (replaceCount > prevReplaceCount)
{
prevReplaceCount = replaceCount;
return true;
}
}
if (ReplaceRomanWithDecimal(term) == term)
{
return false;
}
wrapReplace(ReplaceRomanWithDecimal(term), first, last);
if (replaceCount <= prevReplaceCount)
{
return false;
}
prevReplaceCount = replaceCount;
return true;
}).ToList();
if (shouldBeInSequence)
{
return inSequenceTerms > longestSequence ? inSequenceTerms : longestSequence;
}
return actual.Count;
}
public bool FlexEq(string test, string target, List<string> years)
{
var movieTitle = _torrentTitleParser.Parse(test).Movie.Title.ToLower();
var tvTitle = _torrentTitleParser.Parse(test).Show.Title.ToLower();
var target2 = WhitespaceMatcher().Replace(target, "");
var test2 = WhitespaceMatcher().Replace(test, "");
var magicLength = HasYear(test, years) ? 3 : 5;
if (Naked(target2).Length >= magicLength && test2.Contains(Naked(target2)))
{
return true;
}
if (RemoveRepeats(target2).Length >= magicLength && test2.Contains(RemoveRepeats(target2)))
{
return true;
}
if (RemoveDiacritics(target2).Length >= magicLength && test2.Contains(RemoveDiacritics(target2)))
{
return true;
}
if (target2.Length >= Math.Ceiling(magicLength * 1.5) && test2.Contains(target2))
{
return true;
}
return StrictEqual(target, movieTitle) || StrictEqual(target, tvTitle);
}
public bool MatchesTitle(string target, List<string> years, string test)
{
target = target.ToLower();
test = test.ToLower();
var splits = WordMatcher().Split(target).Where(e => !string.IsNullOrEmpty(e)).ToList();
var containsYear = HasYear(test, years);
if (FlexEq(test, target, years))
{
var sequenceCheck = CountTestTermsInTarget(test, string.Join(' ', splits), true);
return containsYear || sequenceCheck >= 0;
}
var totalTerms = splits.Count;
if (totalTerms == 0 || (totalTerms <= 2 && !containsYear))
{
return false;
}
var keyTerms = splits.Where(s => (s.Length > 1 && !_wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList();
keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2));
var keySet = new HashSet<string>(keyTerms);
var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList();
var hasYearScore = totalTerms * 1.5;
var totalScore = keyTerms.Count * 2 + commonTerms.Count + hasYearScore;
if (keyTerms.Count == 0 && totalTerms <= 2 && !containsYear)
{
return false;
}
var foundKeyTerms = CountTestTermsInTarget(test, string.Join(' ', keyTerms));
var foundCommonTerms = CountTestTermsInTarget(test, string.Join(' ', commonTerms));
var score = foundKeyTerms * 2 + foundCommonTerms + (containsYear ? hasYearScore : 0);
return Math.Floor(score / 0.85) >= totalScore;
}
public bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle) =>
mustHaveTerms.All(term =>
{
var newTitle = testTitle.Replace(term, "");
if (newTitle != testTitle)
{
testTitle = newTitle;
return true;
}
newTitle = testTitle.Replace(RemoveDiacritics(term), "");
if (newTitle != testTitle)
{
testTitle = newTitle;
return true;
}
newTitle = testTitle.Replace(RemoveRepeats(term), "");
if (newTitle != testTitle)
{
testTitle = newTitle;
return true;
}
return false;
});
public bool HasNoBannedTerms(string targetTitle, string testTitle)
{
var normalisedTitle = targetTitle.NormalizeTitle();
var normalisedWords = normalisedTitle.Split(' ');
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => !targetTitle.Contains(word) && _wordCollections.AdultWords.Contains(word));
var hasCompounds = _compoundWords.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
return !hasBannedWords &&
!hasCompounds;
}
public bool HasNoBannedTerms(string targetTitle)
{
var normalisedTitle = targetTitle.NormalizeTitle();
var normalisedWords = normalisedTitle.Split(' ');
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => normalisedWords.Contains(word, StringComparer.OrdinalIgnoreCase) && _wordCollections.AdultWords.Contains(word));
var hasCompounds = _compoundWords.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
return !hasBannedWords &&
!hasCompounds;
}
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
public int CountUncommonWords(string title)
{
var processedTitle = WhitespaceMatcher().Split(title)
.Select(word => WordProcessingMatcher().Replace(word.ToLower(), ""))
.Where(word => word.Length > 3)
.ToList();
return processedTitle.Count(word => !_wordCollections.CommonWords.Contains(word));
}
public ParsedFilename Parse(string name) => _torrentTitleParser.Parse(name);
public TorrentType GetTypeByName(string name) => _torrentTitleParser.GetTypeByName(name);
}

View File

@@ -0,0 +1,9 @@
namespace Producer.Features.ParseTorrentTitle;
public class QualityModel
{
public List<Source> Sources { get; set; } = [];
public QualityModifier? Modifier { get; set; }
public Resolution? Resolution { get; set; }
public Revision Revision { get; set; } = new();
}

View File

@@ -0,0 +1,10 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class QualityModifier : SmartEnum<QualityModifier, string>
{
public static readonly QualityModifier REMUX = new("REMUX", "REMUX");
public static readonly QualityModifier BRDISK = new("BRDISK", "BRDISK");
public static readonly QualityModifier RAWHD = new("RAWHD", "RAWHD");
private QualityModifier(string name, string value) : base(name, value) { }
}

View File

@@ -0,0 +1,230 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class QualityParser
{
[GeneratedRegex(@"\b(?<proper>proper|repack|rerip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ProperRegex();
[GeneratedRegex(@"\b(?<real>REAL)\b", RegexOptions.None, "en-GB")]
private static partial Regex RealRegex();
[GeneratedRegex(@"(?<version>v\d\b|\[v\d\])", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex VersionExp();
[GeneratedRegex(@"\b(?<remux>(BD|UHD)?Remux)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RemuxExp();
[GeneratedRegex(@"\b(COMPLETE|ISO|BDISO|BDMux|BD25|BD50|BR.?DISK)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BdiskExp();
[GeneratedRegex(@"\b(?<rawhd>RawHD|1080i[-_. ]HDTV|Raw[-_. ]HD|MPEG[-_. ]?2)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RawHdExp();
[GeneratedRegex(@"hr[-_. ]ws", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HighDefPdtvRegex();
public static void Parse(string title, out QualityModel result)
{
var normalizedTitle = title.Trim().Replace("_", " ").Replace("[", " ").Replace("]", " ").Trim().ToLower();
ParseQualityModifyers(title, out var revision);
ResolutionParser.Parse(normalizedTitle, out var resolution, out _);
SourceParser.ParseSourceGroups(normalizedTitle, out var sourceGroups);
SourceParser.Parse(normalizedTitle, out var source);
VideoCodecsParser.Parse(normalizedTitle, out var codec, out _);
result = new()
{
Sources = source,
Resolution = resolution,
Revision = revision,
Modifier = null,
};
if (BdiskExp().IsMatch(normalizedTitle) && sourceGroups["bluray"])
{
result.Modifier = QualityModifier.BRDISK;
result.Sources = [Source.BLURAY];
}
if (RemuxExp().IsMatch(normalizedTitle) && !sourceGroups["webdl"] && !sourceGroups["hdtv"])
{
result.Modifier = QualityModifier.REMUX;
result.Sources = [Source.BLURAY];
}
if (RawHdExp().IsMatch(normalizedTitle) && result.Modifier != QualityModifier.BRDISK && result.Modifier != QualityModifier.REMUX)
{
result.Modifier = QualityModifier.RAWHD;
result.Sources = [Source.TV];
}
if (sourceGroups["bluray"])
{
result.Sources = [Source.BLURAY];
if (codec == VideoCodec.XVID)
{
result.Resolution = Resolution.R480P;
result.Sources = [Source.DVD];
}
if (resolution == null)
{
// assume bluray is at least 720p
result.Resolution = Resolution.R720P;
}
if (resolution == null && result.Modifier == QualityModifier.BRDISK)
{
result.Resolution = Resolution.R1080P;
}
if (resolution == null && result.Modifier == QualityModifier.REMUX)
{
result.Resolution = Resolution.R2160P;
}
return;
}
if (sourceGroups["webdl"] || sourceGroups["webrip"])
{
result.Sources = source;
if (resolution == null)
{
result.Resolution = Resolution.R480P;
}
if (resolution == null)
{
result.Resolution = Resolution.R480P;
}
if (resolution == null && title.Contains("[WEBDL]"))
{
result.Resolution = Resolution.R720P;
}
return;
}
if (sourceGroups["hdtv"])
{
result.Sources = [Source.TV];
if (resolution == null)
{
result.Resolution = Resolution.R480P;
}
if (resolution == null && title.Contains("[HDTV]"))
{
result.Resolution = Resolution.R720P;
}
return;
}
if (sourceGroups["pdtv"] || sourceGroups["sdtv"] || sourceGroups["dsr"] || sourceGroups["tvrip"])
{
result.Sources = [Source.TV];
if (HighDefPdtvRegex().IsMatch(normalizedTitle))
{
result.Resolution = Resolution.R720P;
return;
}
result.Resolution = Resolution.R480P;
return;
}
if (sourceGroups["bdrip"] || sourceGroups["brrip"])
{
if (codec == VideoCodec.XVID)
{
result.Resolution = Resolution.R480P;
result.Sources = [Source.DVD];
return;
}
if (resolution == null)
{
// bdrips are at least 480p
result.Resolution = Resolution.R480P;
}
result.Sources = [Source.BLURAY];
return;
}
if (sourceGroups["workprint"])
{
result.Sources = [Source.WORKPRINT];
return;
}
if (sourceGroups["cam"])
{
result.Sources = [Source.CAM];
return;
}
if (sourceGroups["ts"])
{
result.Sources = [Source.TELESYNC];
return;
}
if (sourceGroups["tc"])
{
result.Sources = [Source.TELECINE];
return;
}
if (result.Modifier == null && (resolution == Resolution.R2160P || resolution == Resolution.R1080P || resolution == Resolution.R720P))
{
result.Sources = [Source.WEBDL];
}
}
private static void ParseQualityModifyers(string title, out Revision revision)
{
var normalizedTitle = title.Trim().Replace("_", " ").Trim().ToLower();
revision = new()
{
Version = 1,
Real = 0,
};
if (ProperRegex().IsMatch(normalizedTitle))
{
revision.Version = 2;
}
var versionResult = VersionExp().Match(normalizedTitle);
if (versionResult.Success)
{
// get numbers from version regex
var digits = Regex.Match(versionResult.Groups["version"].Value, @"\d");
if (digits.Success)
{
var value = int.Parse(digits.Value);
revision.Version = value;
}
}
var realCount = 0;
var realGlobalExp = new Regex(RealRegex().ToString(), RegexOptions.None);
// use non normalized title to prevent insensitive REAL matching
while (realGlobalExp.IsMatch(title))
{
realCount += 1;
}
revision.Real = realCount;
}
}

View File

@@ -0,0 +1,13 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class Resolution : SmartEnum<Resolution, string>
{
public static readonly Resolution R2160P = new("R2160P", "2160P");
public static readonly Resolution R1080P = new("R1080P", "1080P");
public static readonly Resolution R720P = new("R720P", "720P");
public static readonly Resolution R576P = new("R576P", "576P");
public static readonly Resolution R540P = new("R540P", "540P");
public static readonly Resolution R480P = new("R480P", "480P");
private Resolution(string name, string value) : base(name, value) { }
}

View File

@@ -0,0 +1,55 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class ResolutionParser
{
[GeneratedRegex(@"(?<R2160P>2160p|4k[-_. ](?:UHD|HEVC|BD)|(?:UHD|HEVC|BD)[-_. ]4k|\b(4k)\b|COMPLETE.UHD|UHD.COMPLETE)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R2160pExp();
[GeneratedRegex(@"(?<R1080P>1080(i|p)|1920x1080)(10bit)?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R1080pExp();
[GeneratedRegex(@"(?<R720P>720(i|p)|1280x720|960p)(10bit)?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R720pExp();
[GeneratedRegex(@"(?<R576P>576(i|p))", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R576pExp();
[GeneratedRegex(@"(?<R540P>540(i|p))", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R540pExp();
[GeneratedRegex(@"(?<R480P>480(i|p)|640x480|848x480)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R480Exp();
private static readonly Regex ResolutionExp = new(string.Join("|", R2160pExp(), R1080pExp(), R720pExp(), R576pExp(), R540pExp(), R480Exp()), RegexOptions.IgnoreCase);
public static void Parse(string title, out Resolution? resolution, out string? source)
{
resolution = null;
source = null;
var result = ResolutionExp.Match(title);
if (result.Success)
{
foreach (var resolutionEnum in Resolution.List)
{
if (!result.Groups[resolutionEnum.Name].Success)
{
continue;
}
resolution = resolutionEnum;
source = result.Groups[resolutionEnum.Name].Value;
return;
}
}
// Fallback to guessing from some sources
// Make safe assumptions like dvdrip is probably 480p
SourceParser.Parse(title, out var sourceList);
if (sourceList.Contains(Source.DVD))
{
resolution = Resolution.R480P;
}
}
}

View File

@@ -0,0 +1,7 @@
namespace Producer.Features.ParseTorrentTitle;
public class Revision
{
public int Version { get; set; }
public int Real { get; set; }
}

View File

@@ -0,0 +1,16 @@
namespace Producer.Features.ParseTorrentTitle;
public class Season
{
public string? ReleaseTitle { get; set; }
public string? SeriesTitle { get; set; }
public List<int> Seasons { get; set; } = [];
public List<int> EpisodeNumbers { get; set; } = [];
public DateTime? AirDate { get; set; }
public bool FullSeason { get; set; }
public bool IsPartialSeason { get; set; }
public bool IsMultiSeason { get; set; }
public bool IsSeasonExtra { get; set; }
public bool IsSpecial { get; set; }
public int SeasonPart { get; set; }
}

View File

@@ -0,0 +1,44 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class SeasonParser
{
[GeneratedRegex(@"^[0-9a-zA-Z]{32}", RegexOptions.IgnoreCase)]
private static partial Regex GenericMatchForMd5AndMixedCaseHashesExp();
[GeneratedRegex(@"^[a-z0-9]{24}$", RegexOptions.IgnoreCase)]
private static partial Regex GenericMatchForShorterLowerCaseHashesExp();
[GeneratedRegex(@"^[A-Z]{11}\d{3}$", RegexOptions.IgnoreCase)]
private static partial Regex FormatSeenOnSomeNZBGeekReleasesExp();
[GeneratedRegex(@"^[a-z]{12}\d{3}$", RegexOptions.IgnoreCase)]
private static partial Regex FormatSeenOnSomeNZBGeekReleasesExp2();
[GeneratedRegex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.IgnoreCase)]
private static partial Regex BackupFilenameExp();
[GeneratedRegex(@"^123$", RegexOptions.IgnoreCase)]
private static partial Regex StartedAppearingDecember2014Exp();
[GeneratedRegex(@"^abc$", RegexOptions.IgnoreCase)]
private static partial Regex StartedAppearingJanuary2015Exp();
[GeneratedRegex(@"^b00bs$", RegexOptions.IgnoreCase)]
private static partial Regex StartedAppearingJanuary2015Exp2();
[GeneratedRegex(@"^\d{6}_\d{2}$", RegexOptions.IgnoreCase)]
private static partial Regex StartedAppearingAugust2018Exp();
private static List<Func<Regex>> _rejectedRegex =
[
GenericMatchForMd5AndMixedCaseHashesExp,
GenericMatchForShorterLowerCaseHashesExp,
FormatSeenOnSomeNZBGeekReleasesExp,
FormatSeenOnSomeNZBGeekReleasesExp2,
BackupFilenameExp,
StartedAppearingDecember2014Exp,
StartedAppearingJanuary2015Exp,
StartedAppearingJanuary2015Exp2,
StartedAppearingAugust2018Exp
];
}

View File

@@ -0,0 +1,248 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class SeasonParser
{
[GeneratedRegex(@"^(?<airyear>19[6-9]\d|20\d\d)(?<sep>[-_]?)(?<airmonth>0\d|1[0-2])\k<sep>(?<airday>[0-2]\d|3[01])(?!\d)", RegexOptions.IgnoreCase)]
private static partial Regex DailyEpisodesWithoutTitleExp();
[GeneratedRegex(@"^(?:\W*S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", RegexOptions.IgnoreCase)]
private static partial Regex MultiPartEpisodesWithoutTitleExp();
[GeneratedRegex(@"^(?<title>.+?)[-_. ]S(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:[E-_. ]?[ex]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+(?:[-_. ]?[ex]?(?<episode1>(?<!\d+)\d{1,2}(?!\d+)))+", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeWithSingleEpisodeNumbersExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode1>\d{2,3}(?!\d+)))+).+?(?:\[.+?\])(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeWithTitleAndTrailingInfoInSlashesExp();
[GeneratedRegex(@"(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[-_]|[ex]){1,2}(?<episode>\d{2,3}(?!\d+))){2,})", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithoutTitleMultiExp();
[GeneratedRegex(@"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[-_ ]?[ex])(?<episode>\d{2,3}(?!\d+))))", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithoutTitleSingleExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>.+?)[-_. ](?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleEpisodeAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+(?<absoluteepisode>\d{2,3}(\.\d{1,2})?))+(?:_|-|\s|\.)+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+).*?(?<hash>[([]\w{8}[)\]])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleAbsoluteEpisodeNumberSeasonEpisodeExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:(?:_|-|\s|\.)+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+.*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleSeasonEpisodeAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:\s|\.).*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleSeasonEpisodeExp();
[GeneratedRegex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>[^-]+?\d+?)[-_. ]+(?:[-_. ]?(?<absoluteepisode>\d{3}(\.\d{1,2})?(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleWithTrailingNumberAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)(?:[. ]-[. ](?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+|[-])))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)[-_. ]+\(?(?:[-_. ]?#?(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+)))+\)?(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleAbsoluteEpisodeNumberSpecialExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]|[-_. ]e){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeRepeatedExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:[ex]|\W[ex]){1,2}(?<episode>(?!265|264)\d{2,3}(?!\d+|(?:[ex]|\W[ex]|_|-){1,2})))", RegexOptions.IgnoreCase)]
private static partial Regex SingleEpisodesWithTitleExp();
[GeneratedRegex(@"^(?<title>.+?)(?:[-_\W](?<![()[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+)))).+?(?:[-_. ]?(?<absoluteepisode>(?<!\d+)\d{3}(\.\d{1,2})?(?!\d+)))+.+?\[(?<subgroup>.+?)\](?:$|\.mkv)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleSeasonEpisodeNumberAbsoluteEpisodeNumberSubGroupExp();
[GeneratedRegex(@"^(?<title>.+?)[-_. ]Episode(?:[-_. ]+(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:.+?)\[(?<subgroup>.+?)\].*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleEpisodeAbsoluteEpisodeNumberSubGroupHashExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{3}(\.\d{1,2})(?!\d+)))+(?:.+?)\[(?<subgroup>.+?)\].*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleAbsoluteEpisodeNumberSubGroupHashExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?[-_. ]+.*?(?<hash>\[\w{8}\])(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleAbsoluteEpisodeNumberHashExp();
[GeneratedRegex(@"^(?<title>.+?)?\W*(?<airdate>\d{4}\W+[0-1][0-9]\W+[0-3][0-9])(?!\W+[0-3][0-9])[-_. ](?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))/i", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithAirdateAndSeasonEpisodeNumberCaptureSeasonEpisodeOnlyExp();
[GeneratedRegex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(?!\W+[0-3][0-9]).+?(?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))/i", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithAirdateAndSeasonEpisodeNumberExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:e|\We|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:-|e|\We|_){1,2}(?<episode1>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithTitleSingleEpisodesMultiEpisodeExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S(?<season>(?<!\d+)(?:\d{4})(?!\d+))(?:e|\We|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:-|e|\We|_){1,2}(?<episode1>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithTitle4DigitSeasonNumberSingleEpisodesMultiEpisodeExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+(?<season>(?<!\d+)(?:\d{4})(?!\d+))(?:x|\Wx){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:-|x|\Wx|_){1,2}(?<episode1>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithTitle4DigitSeasonNumberSingleEpisodesMultiEpisodeExp2();
[GeneratedRegex(@"^(?<title>.+?)[-_. ]+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))\W?-\W?S?(?<season1>(?<!\d+)(?:\d{1,2})(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex MultiSeasonPackExp();
[GeneratedRegex(@"^(?<title>.+?)(?:\W+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<seasonpart>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase)]
private static partial Regex PartialSeasonPackExp();
[GeneratedRegex(@"^(?<title>.+?\d{4})(?:\W+(?:(?:Part\W?|e)(?<episode>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesWithYearInTitleExp();
[GeneratedRegex(@"^(?<title>.+?)(?:[-._ ][e])(?<episode>\d{2,3}(?!\d+))(?:(?:-?[e])(?<episode1>\d{2,3}(?!\d+)))+", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesMultiEpisodesExp();
[GeneratedRegex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesEpisodesExp();
[GeneratedRegex(@"^(?<title>.+?)(?:\W+(?:Part[-._ ](?<episode>One|Two|Three|Four|Five|Six|Seven|Eight|Nine)(>[-._ ])))", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesEpisodesExp2();
[GeneratedRegex(@"^(?<title>.+?)(?:\W+(?:(?<episode>(?<!\d+)\d{1,2}(?!\d+))of\d+)+)", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesEpisodesExp3();
[GeneratedRegex(@"(?:.*(?:""|^))(?<title>.*?)(?:[-_\W](?<![()[]))+(?:\W?Season\W?)(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)+(?:Episode\W)(?:[-_. ]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+", RegexOptions.IgnoreCase)]
private static partial Regex SupportsSeason01Episode03Exp();
[GeneratedRegex(@"(?:.*(?:^))(?<title>.*?)[-._ ]+\[S(?<season>(?<!\d+)\d{2}(?!\d+))(?:[E-]{1,2}(?<episode>(?<!\d+)\d{2}(?!\d+)))+\]", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeWithEpisodesInSquareBracketsExp();
[GeneratedRegex(@"(?:.*(?:^))(?<title>.*?)S(?<season>(?<!\d+)\d{2}(?!\d+))(?:E(?<episode>(?<!\d+)\d{2}(?!\d+)))+", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeReleaseWithNoSpaceBetweenSeriesTitleAndSeasonExp();
[GeneratedRegex(@"(?:.*(?:""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)?Ep?[ ._]?(?<episode>(?<!\d+)\d{1,2}(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex SingleEpisodeSeasonOrEpisodeExp();
[GeneratedRegex(@"(?:.*(?:""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{3}(?!\d+))(?:\W|_)?E(?<episode>(?<!\d+)\d{1,2}(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex ThreeDigitSeasonExp();
[GeneratedRegex(@"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))", RegexOptions.IgnoreCase)]
private static partial Regex FiveDigitEpisodeNumberWithTitleExp();
[GeneratedRegex(@"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:[-_. ]{1,3}ep){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))", RegexOptions.IgnoreCase)]
private static partial Regex FiveDigitMultiEpisodeWithTitleExp();
[GeneratedRegex(@"^(?<title>.+?)(?:_|-|\s|\.)+S(?<season>\d{2}(?!\d+))(\W-\W)E(?<episode>(?<!\d+)\d{2}(?!\d+))(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex SeparatedSeasonAndEpisodeNumbersExp();
[GeneratedRegex(@"^(?<title>.+?S\d{1,2})[-_. ]{3,}(?:EP)?(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleWithSeasonNumberAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)[-_. ]+?(?:Episode[-_. ]+?)(?<absoluteepisode>\d{1}(\.\d{1,2})?(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex AnimeFrenchTitlesWithSingleEpisodeNumbersExp();
[GeneratedRegex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex SeasonOnlyReleasesExp();
[GeneratedRegex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{4}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex FourDigitSeasonOnlyReleasesExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+\[S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+|i|p)))+\])\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithTitleAndSeasonEpisodeInSquareBracketsExp();
[GeneratedRegex(@"^(?<title>.+?)?(?:(?:[_.](?<![()[!]))+(?<season>(?<!\d+)[1-9])(?<episode>[1-9][0-9]|[0][1-9])(?![a-z]|\d+))+(?:[_.]|$)", RegexOptions.IgnoreCase)]
private static partial Regex Supports103_113NamingExp();
[GeneratedRegex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex FourDigitEpisodeNumberEpisodesWithoutTitleSingleAndMultiExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex FourDigitEpisodeNumberEpisodesWithTitleSingleAndMultiExp();
[GeneratedRegex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})[-_. ]+(?<airmonth>[0-1][0-9])[-_. ]+(?<airday>[0-3][0-9])(?![-_. ]+[0-3][0-9])", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithAirdateExp();
[GeneratedRegex(@"^(?<title>.+?)?\W*(?<airmonth>[0-1][0-9])[-_. ]+(?<airday>[0-3][0-9])[-_. ]+(?<airyear>\d{4})(?!\d+)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithAirdateExp2();
[GeneratedRegex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()[!]))*(?<season>(?<!\d+|\(|\[|e|x)\d{2})(?<episode>(?<!e|x)\d{2}(?!p|i|\d+|\)|\]|\W\d+|\W(?:e|ep|x)\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex Supports1103_1113NamingExp();
[GeneratedRegex(@"^(?<title>.*?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]){1,2}(?<episode>\d{1}))+)+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithSingleDigitEpisodeNumberExp();
[GeneratedRegex(@"^(?:Season(?:_|-|\s|\.)(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:_|-|\s|\.)(?<episode>(?<!\d+)\d{1,2})", RegexOptions.IgnoreCase)]
private static partial Regex ITunesSeason1_05TitleQualityExp();
[GeneratedRegex(@"^(?:(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:-(?<episode>\d{2,3}(?!\d+))))", RegexOptions.IgnoreCase)]
private static partial Regex ITunes1_05TitleQualityExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:_|\s|\.)+(?:e|ep)(?<absoluteepisode>\d{2,3}(\.\d{1,2})?)-(?<absoluteepisode1>(?<!\d+)\d{1,2}(\.\d{1,2})?(?!\d+|-)).*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeRange_TitleAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:_|-|\s|\.)+(?:e|ep)(?<absoluteepisode>\d{2,4}(\.\d{1,2})?))+.*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex Anime_TitleAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?<title>.+?)[-_. ](?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex Anime_TitleEpisodeAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)[_. ]+(?<absoluteepisode>(?<!\d+)\d{1,2}(\.\d{1,2})?(?!\d+))-(?<absoluteepisode1>(?<!\d+)\d{1,2}(\.\d{1,2})?(?!\d+|-))(?:_|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex AnimeRange_TitleAbsoluteEpisodeNumberExp2();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex Anime_TitleAbsoluteEpisodeNumberExp2();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex Anime_TitleAbsoluteEpisodeNumberExp3();
[GeneratedRegex(@"^(?<title>.+?)[-_. ](?<season>[0]?\d?)(?:(?<episode>\d{2}){2}(?!\d+))[-_. ]", RegexOptions.IgnoreCase)]
private static partial Regex ExtantTerribleMultiEpisodeNamingExp();
private static List<Func<Regex>> _validRegexes =
[
DailyEpisodesWithoutTitleExp,
MultiPartEpisodesWithoutTitleExp,
MultiEpisodeWithSingleEpisodeNumbersExp,
MultiEpisodeWithTitleAndTrailingInfoInSlashesExp,
EpisodesWithoutTitleMultiExp,
EpisodesWithoutTitleSingleExp,
AnimeSubGroupTitleEpisodeAbsoluteEpisodeNumberExp,
AnimeSubGroupTitleAbsoluteEpisodeNumberSeasonEpisodeExp,
AnimeSubGroupTitleSeasonEpisodeAbsoluteEpisodeNumberExp,
AnimeSubGroupTitleSeasonEpisodeExp,
AnimeSubGroupTitleWithTrailingNumberAbsoluteEpisodeNumberExp,
AnimeSubGroupTitleAbsoluteEpisodeNumberExp,
AnimeSubGroupTitleAbsoluteEpisodeNumberSpecialExp,
MultiEpisodeRepeatedExp,
SingleEpisodesWithTitleExp,
AnimeTitleSeasonEpisodeNumberAbsoluteEpisodeNumberSubGroupExp,
AnimeTitleEpisodeAbsoluteEpisodeNumberSubGroupHashExp,
AnimeTitleAbsoluteEpisodeNumberSubGroupHashExp,
AnimeTitleAbsoluteEpisodeNumberHashExp,
EpisodesWithAirdateAndSeasonEpisodeNumberCaptureSeasonEpisodeOnlyExp,
EpisodesWithAirdateAndSeasonEpisodeNumberExp,
EpisodesWithTitleSingleEpisodesMultiEpisodeExp,
EpisodesWithTitle4DigitSeasonNumberSingleEpisodesMultiEpisodeExp,
EpisodesWithTitle4DigitSeasonNumberSingleEpisodesMultiEpisodeExp2,
MultiSeasonPackExp,
PartialSeasonPackExp,
MiniSeriesWithYearInTitleExp,
MiniSeriesMultiEpisodesExp,
MiniSeriesEpisodesExp,
MiniSeriesEpisodesExp2,
MiniSeriesEpisodesExp3,
SupportsSeason01Episode03Exp,
MultiEpisodeWithEpisodesInSquareBracketsExp,
MultiEpisodeReleaseWithNoSpaceBetweenSeriesTitleAndSeasonExp,
SingleEpisodeSeasonOrEpisodeExp,
ThreeDigitSeasonExp,
FiveDigitEpisodeNumberWithTitleExp,
SeparatedSeasonAndEpisodeNumbersExp,
AnimeTitleWithSeasonNumberAbsoluteEpisodeNumberExp,
AnimeFrenchTitlesWithSingleEpisodeNumbersExp,
SeasonOnlyReleasesExp,
FourDigitSeasonOnlyReleasesExp,
EpisodesWithTitleAndSeasonEpisodeInSquareBracketsExp,
Supports103_113NamingExp,
FourDigitEpisodeNumberEpisodesWithoutTitleSingleAndMultiExp,
FourDigitEpisodeNumberEpisodesWithTitleSingleAndMultiExp,
EpisodesWithAirdateExp,
EpisodesWithAirdateExp2,
Supports1103_1113NamingExp,
EpisodesWithSingleDigitEpisodeNumberExp,
ITunesSeason1_05TitleQualityExp,
ITunes1_05TitleQualityExp,
AnimeRange_TitleAbsoluteEpisodeNumberExp,
Anime_TitleAbsoluteEpisodeNumberExp,
Anime_TitleEpisodeAbsoluteEpisodeNumberExp,
AnimeRange_TitleAbsoluteEpisodeNumberExp2,
Anime_TitleAbsoluteEpisodeNumberExp2,
Anime_TitleAbsoluteEpisodeNumberExp3,
ExtantTerribleMultiEpisodeNamingExp,
];
}

View File

@@ -0,0 +1,307 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class SeasonParser
{
[GeneratedRegex(@"^(?:\[.+?\])+", RegexOptions.None)]
private static partial Regex RequestInfoExp();
[GeneratedRegex(@"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])", RegexOptions.IgnoreCase)]
private static partial Regex SixDigitAirDateMatchExp();
public static Season? Parse(string title)
{
if (!PreValidation(title))
{
return null;
}
var simpleTitle = TitleParser.SimplifyTitle(title);
// parse daily episodes with mmddyy eg `At.Midnight.140722.720p.HDTV.x264-YesTV`
var sixDigitAirDateMatch = SixDigitAirDateMatchExp().Match(title);
if (sixDigitAirDateMatch.Groups.Count > 0)
{
var airYear = sixDigitAirDateMatch.Groups["airyear"]?.Value ?? "";
var airMonth = sixDigitAirDateMatch.Groups["airmonth"]?.Value ?? "";
var airDay = sixDigitAirDateMatch.Groups["airday"]?.Value ?? "";
if (airMonth != "00" || airDay != "00")
{
var fixedDate = $"20{airYear}.{airMonth}.{airDay}";
if (!string.IsNullOrEmpty(sixDigitAirDateMatch.Groups["airdate"].Value))
{
simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate);
}
}
}
foreach (var exp in _validRegexes)
{
var match = exp().Match(simpleTitle);
if (match.Groups.Count <= 0 || !match.Success)
{
continue;
}
var result = ParseMatchCollection(match, simpleTitle);
if (result.FullSeason && result.ReleaseTokens != null && result.ReleaseTokens.Contains("Special", StringComparison.OrdinalIgnoreCase))
{
result.FullSeason = false;
result.IsSpecial = true;
}
return new()
{
ReleaseTitle = title,
SeriesTitle = result.SeriesName,
Seasons = result.SeasonNumbers ?? [],
EpisodeNumbers = result.EpisodeNumbers ?? [],
AirDate = result.AirDate,
FullSeason = result.FullSeason,
IsPartialSeason = result.IsPartialSeason ?? false,
IsMultiSeason = result.IsMultiSeason ?? false,
IsSeasonExtra = result.IsSeasonExtra ?? false,
IsSpecial = result.IsSpecial ?? false,
SeasonPart = result.SeasonPart ?? 0,
};
}
return null;
}
private static ParsedMatch ParseMatchCollection(Match match, string simpleTitle)
{
var groups = match.Groups;
if (groups.Count == 0)
{
throw new("No match");
}
var seriesName = groups["title"].Value
.Replace(".", " ")
.Replace("_", " ")
.Replace(RequestInfoExp().ToString(), "")
.Trim();
var result = new ParsedMatch
{
SeriesName = seriesName,
};
var lastSeasonEpisodeStringIndex = IndexOfEnd(simpleTitle, groups["title"].Value);
if (int.TryParse(groups["airyear"].Value, out var airYear) && airYear >= 1900)
{
var seasons = new List<string> {groups["season"]?.Value, groups["season1"]?.Value}
.Where(x => !string.IsNullOrEmpty(x))
.Select(
x =>
{
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, x ?? ""),
lastSeasonEpisodeStringIndex
);
return int.Parse(x);
})
.ToList();
if (seasons.Count > 1)
{
seasons = CompleteRange(seasons);
}
result.SeasonNumbers = seasons;
if (seasons.Count > 1)
{
result.IsMultiSeason = true;
}
var episodeCaptures = new List<string> {groups["episode"]?.Value, groups["episode1"]?.Value}
.Where(x => !string.IsNullOrEmpty(x))
.ToList();
var absoluteEpisodeCaptures = new List<string> {groups["absoluteepisode"]?.Value, groups["absoluteepisode1"]?.Value}
.Where(x => !string.IsNullOrEmpty(x))
.ToList();
// handle 0 episode possibly indicating a full season release
if (episodeCaptures.Any())
{
var first = int.Parse(episodeCaptures[0]);
var last = int.Parse(episodeCaptures[^1]);
if (first > last)
{
return null;
}
var count = last - first + 1;
result.EpisodeNumbers = Enumerable.Range(first, count).ToList();
}
if (absoluteEpisodeCaptures.Any())
{
var first = double.Parse(absoluteEpisodeCaptures[0]);
var last = double.Parse(absoluteEpisodeCaptures[^1]);
if (first % 1 != 0 || last % 1 != 0)
{
if (absoluteEpisodeCaptures.Count != 1)
{
return null;
}
// specialAbsoluteEpisodeNumbers in radarr
result.EpisodeNumbers = new()
{(int) first};
result.IsSpecial = true;
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, absoluteEpisodeCaptures[0] ?? ""),
lastSeasonEpisodeStringIndex
);
}
else
{
var count = (int) (last - first + 1);
// AbsoluteEpisodeNumbers in radarr
result.EpisodeNumbers = Enumerable.Range((int) first, count).ToList();
if (groups["special"]?.Value != null)
{
result.IsSpecial = true;
}
}
}
if (!episodeCaptures.Any() && !absoluteEpisodeCaptures.Any())
{
// Check to see if this is an "Extras" or "SUBPACK" release, if it is, set
// IsSeasonExtra so they can be filtered out
if (groups["extras"]?.Value != null)
{
result.IsSeasonExtra = true;
}
// Partial season packs will have a seasonpart group so they can be differentiated
// from a full season/single episode release
var seasonPart = groups["seasonpart"]?.Value;
if (seasonPart != null)
{
result.SeasonPart = int.Parse(seasonPart);
result.IsPartialSeason = true;
}
else
{
result.FullSeason = true;
}
}
if (absoluteEpisodeCaptures.Any() && result.EpisodeNumbers == null)
{
result.SeasonNumbers = new()
{0};
}
}
else
{
if (int.TryParse(groups["airmonth"]?.Value, out var airMonth) && int.TryParse(groups["airday"]?.Value, out var airDay))
{
// Swap day and month if month is bigger than 12 (scene fail)
if (airMonth > 12)
{
(airDay, airMonth) = (airMonth, airDay);
}
var airDate = new DateTime(airYear, airMonth, airDay);
// dates in the future is most likely parser error
if (airDate > DateTime.Now)
{
throw new("Parsed date is in the future");
}
if (airDate < new DateTime(1970, 1, 1))
{
throw new("Parsed date error");
}
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, groups["airyear"]?.Value ?? ""),
lastSeasonEpisodeStringIndex
);
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, groups["airmonth"]?.Value ?? ""),
lastSeasonEpisodeStringIndex
);
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, groups["airday"]?.Value ?? ""),
lastSeasonEpisodeStringIndex
);
result.AirDate = airDate;
}
}
if (lastSeasonEpisodeStringIndex == simpleTitle.Length || lastSeasonEpisodeStringIndex == -1)
{
result.ReleaseTokens = simpleTitle;
}
else
{
result.ReleaseTokens = simpleTitle.Substring(lastSeasonEpisodeStringIndex);
}
result.SeriesTitle = seriesName;
// TODO: seriesTitleInfo
return result;
}
private static bool PreValidation(string title) =>
_rejectedRegex.Select(exp => exp().Match(title)).All(match => !match.Success);
private static List<int> CompleteRange(List<int> arr)
{
var uniqArr = arr.Distinct().ToList();
var first = uniqArr[0];
var last = uniqArr[^1];
if (first > last)
{
return arr;
}
var count = last - first + 1;
return Enumerable.Range(first, count).ToList();
}
private static int IndexOfEnd(string str1, string str2)
{
var io = str1.IndexOf(str2, StringComparison.Ordinal);
return io == -1 ? -1 : io + str2.Length;
}
private record ParsedMatch
{
public string? SeriesName { get; set; }
public string? SeriesTitle { get; set; }
public List<int>? SeasonNumbers { get; set; }
public bool? IsMultiSeason { get; set; }
public List<int>? EpisodeNumbers { get; set; }
public bool? IsSpecial { get; set; }
public bool? IsSeasonExtra { get; set; }
public int? SeasonPart { get; set; }
public bool? IsPartialSeason { get; set; }
public bool FullSeason { get; set; }
public DateTime? AirDate { get; set; }
public string? ReleaseTokens { get; set; }
}
}

View File

@@ -0,0 +1,12 @@
namespace Producer.Features.ParseTorrentTitle;
public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterParseTorrentTitle(this IServiceCollection services)
{
services.AddSingleton<IParsingService, ParsingService>();
services.AddSingleton<ITorrentTitleParser, TorrentTitleParser>();
return services;
}
}

View File

@@ -0,0 +1,20 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class Source : SmartEnum<Source, string>
{
public static readonly Source BLURAY = new("BLURAY", "BLURAY");
public static readonly Source WEBDL = new("WEBDL", "WEBDL");
public static readonly Source WEBRIP = new("WEBRIP", "WEBRIP");
public static readonly Source DVD = new("DVD", "DVD");
public static readonly Source CAM = new("CAM", "CAM");
public static readonly Source SCREENER = new("SCREENER", "SCREENER");
public static readonly Source PPV = new("PPV", "PPV");
public static readonly Source TELESYNC = new("TELESYNC", "TELESYNC");
public static readonly Source TELECINE = new("TELECINE", "TELECINE");
public static readonly Source WORKPRINT = new("WORKPRINT", "WORKPRINT");
public static readonly Source TV = new("TV", "TV");
private Source(string name, string value) : base(name, value)
{
}
}

View File

@@ -0,0 +1,151 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class SourceParser
{
[GeneratedRegex(@"\b(?<bluray>M?Blu-?Ray|HDDVD|BD|UHDBD|BDISO|BDMux|BD25|BD50|BR.?DISK|Bluray(1080|720)p?|BD(1080|720)p?)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BlurayExp();
[GeneratedRegex(@"\b(?<webdl>WEB[-_. ]DL|HDRIP|WEBDL|WEB-DLMux|NF|APTV|NETFLIX|NetflixU?HD|DSNY|DSNP|HMAX|AMZN|AmazonHD|iTunesHD|MaxdomeHD|WebHD|WEB$|[. ]WEB[. ](?:[xh]26[45]|DD5[. ]1)|\d+0p[. ]WEB[. ]|\b\s\/\sWEB\s\/\s\b|AMZN[. ]WEB[. ])\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebdlExp();
[GeneratedRegex(@"\b(?<webrip>WebRip|Web-Rip|WEBCap|WEBMux)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebripExp();
[GeneratedRegex(@"\b(?<hdtv>HDTV)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HdtvExp();
[GeneratedRegex(@"\b(?<bdrip>BDRip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BdripExp();
[GeneratedRegex(@"\b(?<brrip>BRRip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BrripExp();
[GeneratedRegex(@"\b(?<scr>SCR|SCREENER|DVDSCR|(DVD|WEB).?SCREENER)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ScrExp();
[GeneratedRegex(@"\b(?<dvdr>DVD-R|DVDR)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DvdrExp();
[GeneratedRegex(@"\b(?<dvd>DVD9?|DVDRip|NTSC|PAL|xvidvd|DvDivX)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DvdExp();
[GeneratedRegex(@"\b(?<dsr>WS[-_. ]DSR|DSR)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DsrExp();
[GeneratedRegex(@"\b(?<regional>R[0-9]{1}|REGIONAL)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RegionalExp();
[GeneratedRegex(@"\b(?<ppv>PPV)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PpvExp();
[GeneratedRegex(@"\b(?<ts>TS|TELESYNC|HD-TS|HDTS|PDVD|TSRip|HDTSRip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TsExp();
[GeneratedRegex(@"\b(?<tc>TC|TELECINE|HD-TC|HDTC)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TcExp();
[GeneratedRegex(@"\b(?<cam>CAMRIP|CAM|HDCAM|HD-CAM)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CamExp();
[GeneratedRegex(@"\b(?<workprint>WORKPRINT|WP)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WorkprintExp();
[GeneratedRegex(@"\b(?<pdtv>PDTV)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PdtvExp();
[GeneratedRegex(@"\b(?<sdtv>SDTV)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SdtvExp();
[GeneratedRegex(@"\b(?<tvrip>TVRip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TvripExp();
public static void Parse(string title, out List<Source> result)
{
ParseSourceGroups(title, out var groups);
result = [];
if (groups["bluray"] || groups["bdrip"] || groups["brrip"])
{
result.Add(Source.BLURAY);
}
if (groups["webrip"])
{
result.Add(Source.WEBRIP);
}
if (!groups["webrip"] && groups["webdl"])
{
result.Add(Source.WEBDL);
}
if (groups["dvdr"] || (groups["dvd"] && !groups["scr"]))
{
result.Add(Source.DVD);
}
if (groups["ppv"])
{
result.Add(Source.PPV);
}
if (groups["workprint"])
{
result.Add(Source.WORKPRINT);
}
if (groups["pdtv"] || groups["sdtv"] || groups["dsr"] || groups["tvrip"] || groups["hdtv"])
{
result.Add(Source.TV);
}
if (groups["cam"])
{
result.Add(Source.CAM);
}
if (groups["ts"])
{
result.Add(Source.TELESYNC);
}
if (groups["tc"])
{
result.Add(Source.TELECINE);
}
if (groups["scr"])
{
result.Add(Source.SCREENER);
}
}
public static void ParseSourceGroups(string title, out Dictionary<string, bool> groups)
{
var normalizedName = title.Replace("_", " ").Replace("[", " ").Replace("]", " ").Trim();
groups = new()
{
{"bluray", BlurayExp().IsMatch(normalizedName)},
{"webdl", WebdlExp().IsMatch(normalizedName)},
{"webrip", WebripExp().IsMatch(normalizedName)},
{"hdtv", HdtvExp().IsMatch(normalizedName)},
{"bdrip", BdripExp().IsMatch(normalizedName)},
{"brrip", BrripExp().IsMatch(normalizedName)},
{"scr", ScrExp().IsMatch(normalizedName)},
{"dvdr", DvdrExp().IsMatch(normalizedName)},
{"dvd", DvdExp().IsMatch(normalizedName)},
{"dsr", DsrExp().IsMatch(normalizedName)},
{"regional", RegionalExp().IsMatch(normalizedName)},
{"ppv", PpvExp().IsMatch(normalizedName)},
{"ts", TsExp().IsMatch(normalizedName)},
{"tc", TcExp().IsMatch(normalizedName)},
{"cam", CamExp().IsMatch(normalizedName)},
{"workprint", WorkprintExp().IsMatch(normalizedName)},
{"pdtv", PdtvExp().IsMatch(normalizedName)},
{"sdtv", SdtvExp().IsMatch(normalizedName)},
{"tvrip", TvripExp().IsMatch(normalizedName)},
};
}
}

View File

@@ -0,0 +1,197 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class TitleParser
{
[GeneratedRegex(@"^(?<title>(?![([]).+?)?(?:(?:[-_\W](?<![)[!]))*\(?\b(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Anniversary|The.Uncut|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\b\)?.{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex1();
[GeneratedRegex(@"^(?<title>(?![([]).+?)?(?:(?:[-_\W](?<![)[!]))*\((?<year>(1(8|9)|20)\d{2}(?!p|i|(1(8|9)|20)\d{2}|\]|\W(1(8|9)|20)\d{2})))+", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex2();
[GeneratedRegex(@"^(?<title>(?![([]).+?)?(?:(?:[-_\W](?<![)[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|(1(8|9)|20)\d{2}|\]|\W(1(8|9)|20)\d{2})))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex3();
[GeneratedRegex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()[!]))*(?<year>(\[\w *\])))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex4();
[GeneratedRegex(@"^(?<title>(?![([]).+?)?(?:(?:[-_\W](?<![)!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex5();
[GeneratedRegex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![)[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex6();
[GeneratedRegex(@"\s*(?:480[ip]|576[ip]|720[ip]|1080[ip]|2160[ip]|HVEC|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*:|]|848x480|1280x720|1920x1080)((8|10)b(it))?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SimpleTitleRegex();
[GeneratedRegex(@"^\[\s*[a-z]+(\.[a-z]+)+\s*\][- ]*|^www\.[a-z]+\.(?:com|net)[ -]*", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebsitePrefixRegex();
[GeneratedRegex(@"^\[(?:REQ)\]", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CleanTorrentPrefixRegex();
[GeneratedRegex(@"\[(?:ettv|rartv|rarbg|cttv)\]$", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CleanTorrentSuffixRegex();
[GeneratedRegex(@"\b(Bluray|(dvdr?|BD)rip|HDTV|HDRip|TS|R5|CAM|SCR|(WEB|DVD)?.?SCREENER|DiVX|xvid|web-?dl)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CommonSourcesRegex();
[GeneratedRegex(@"\b(?<webdl>WEB[-_. ]DL|HDRIP|WEBDL|WEB-DLMux|NF|APTV|NETFLIX|NetflixU?HD|DSNY|DSNP|HMAX|AMZN|AmazonHD|iTunesHD|MaxdomeHD|WebHD|WEB$|[. ]WEB[. ](?:[xh]26[45]|DD5[. ]1)|\d+0p[. ]WEB[. ]|\b\s\/\sWEB\s\/\s\b|AMZN[. ]WEB[. ])\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebdlExp();
[GeneratedRegex(@"\[.+?\]", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RequestInfoRegex();
[GeneratedRegex(
@"\b((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Anniversary|The.Uncut|DC|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Special|Despecialized|unrated|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1)))){1,3}",
RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex EditionExp();
[GeneratedRegex(@"\b(TRUE.?FRENCH|videomann|SUBFRENCH|PLDUB|MULTI)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex LanguageExp();
[GeneratedRegex(@"\b(PROPER|REAL|READ.NFO)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SceneGarbageExp();
[GeneratedRegex(@"-([a-z0-9]+)$", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex GrouplessTitleRegex();
public static void Parse(string title, out string parsedTitle, out string? year)
{
var simpleTitle = SimplifyTitle(title);
// Removing the group from the end could be trouble if a title is "title-year"
var grouplessTitle = simpleTitle.Replace(GrouplessTitleRegex().ToString(), "");
var movieTitleYearRegex = new List<Regex>
{
MovieTitleYearRegex1(), MovieTitleYearRegex2(), MovieTitleYearRegex3(), MovieTitleYearRegex4(), MovieTitleYearRegex5(),
MovieTitleYearRegex6()
};
foreach (var exp in movieTitleYearRegex)
{
var match = exp.Match(grouplessTitle);
if (match.Success)
{
parsedTitle = ReleaseTitleCleaner(match.Groups["title"].Value);
year = match.Groups["year"].Value;
return;
}
}
// year not found, attack using codec or resolution
// attempt to parse using the first found artifact like codec
ResolutionParser.Parse(title, out var resolution, out _);
VideoCodecsParser.Parse(title, out var videoCodec, out _);
AudioChannelsParser.Parse(title, out var channels, out _);
AudioCodecsParser.Parse(title, out var audioCodec, out _);
var resolutionPosition = title.IndexOf(resolution?.Value ?? string.Empty, StringComparison.Ordinal);
var videoCodecPosition = title.IndexOf(videoCodec?.Value ?? string.Empty, StringComparison.Ordinal);
var channelsPosition = title.IndexOf(channels?.Value ?? string.Empty, StringComparison.Ordinal);
var audioCodecPosition = title.IndexOf(audioCodec?.Value ?? string.Empty, StringComparison.Ordinal);
var positions = new List<int> {resolutionPosition, audioCodecPosition, channelsPosition, videoCodecPosition}.Where(x => x > 0).ToList();
if (positions.Count != 0)
{
var firstPosition = positions.Min();
parsedTitle = ReleaseTitleCleaner(title[..firstPosition]);
year = null;
return;
}
parsedTitle = title.Trim();
year = null;
}
public static string SimplifyTitle(string title)
{
var simpleTitle = title.Replace(SimpleTitleRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(WebsitePrefixRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(CleanTorrentPrefixRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(CleanTorrentSuffixRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(CommonSourcesRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(WebdlExp().ToString(), "");
// allow filtering of up to two codecs.
// maybe parseVideoCodec should be an array
VideoCodecsParser.Parse(simpleTitle, out _, out var source1);
if (!string.IsNullOrEmpty(source1))
{
simpleTitle = simpleTitle.Replace(source1, "");
}
VideoCodecsParser.Parse(simpleTitle, out _, out var source2);
if (!string.IsNullOrEmpty(source2))
{
simpleTitle = simpleTitle.Replace(source2, "");
}
return simpleTitle.Trim();
}
public static string ReleaseTitleCleaner(string title)
{
if (string.IsNullOrEmpty(title) || title.Length == 0 || title == "(")
{
return null;
}
var trimmedTitle = title.Replace("_", " ");
trimmedTitle = trimmedTitle.Replace(RequestInfoRegex().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(CommonSourcesRegex().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(WebdlExp().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(EditionExp().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(LanguageExp().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(SceneGarbageExp().ToString(), "").Trim();
trimmedTitle = Language.List.Aggregate(trimmedTitle, (current, lang) => current.Replace($@"\b{lang.Value.ToUpper()}", "").Trim());
// Look for gap formed by removing items
trimmedTitle = trimmedTitle.Split(" ")[0];
trimmedTitle = trimmedTitle.Split("..")[0];
var parts = trimmedTitle.Split('.');
var result = "";
var n = 0;
var previousAcronym = false;
var nextPart = "";
foreach (var part in parts)
{
if (parts.Length >= n + 2)
{
nextPart = parts[n + 1];
}
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out _))
{
result += part + ".";
previousAcronym = true;
}
else if (part.ToLower() == "a" && (previousAcronym || nextPart.Length == 1))
{
result += part + ".";
previousAcronym = true;
}
else
{
if (previousAcronym)
{
result += " ";
previousAcronym = false;
}
result += part + " ";
}
n++;
}
return result.Trim();
}
}

View File

@@ -0,0 +1,142 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class TorrentTitleParser : ITorrentTitleParser
{
[GeneratedRegex(@"(season|episode)s?.?\d?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonEpisode();
[GeneratedRegex(@"[se]\d\d", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonShort();
[GeneratedRegex(@"\b(tv|complete)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TvOrComplete();
[GeneratedRegex(@"\b(saison|stage).?\d", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonStage();
[GeneratedRegex(@"[a-z]\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex Season();
[GeneratedRegex(@"\d{2,4}\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonTwo();
public ParsedFilename Parse(string name)
{
VideoCodecsParser.Parse(name, out var videoCodec, out _);
AudioCodecsParser.Parse(name, out var audioCodec, out _);
AudioChannelsParser.Parse(name, out var audioChannels, out _);
LanguageParser.Parse(name, out var languages);
QualityParser.Parse(name, out var quality);
var group = GroupParser.Parse(name);
var edition = EditionParser.Parse(name);
var multi = LanguageParser.IsMulti(name);
var complete = Complete.IsComplete(name);
var baseParsed = new BaseParsed
{
Resolution = quality.Resolution,
Sources = quality.Sources,
VideoCodec = videoCodec,
AudioCodec = audioCodec,
AudioChannels = audioChannels,
Revision = quality.Revision,
Group = group,
Edition = edition,
Languages = languages,
Multi = multi,
Complete = complete,
};
var isTv = GetTypeByName(name) == TorrentType.Tv;
return !isTv ? ParseMovie(name, baseParsed) : ParseSeason(name, baseParsed);
}
private static ParsedFilename ParseSeason(string name, BaseParsed baseParsed)
{
var season = SeasonParser.Parse(name);
if (season == null)
{
return new();
}
return new()
{
Show = new()
{
EpisodeNumbers = season.EpisodeNumbers,
FullSeason = season.FullSeason,
IsPartialSeason = season.IsPartialSeason,
IsSpecial = season.IsSpecial,
SeasonPart = season.SeasonPart,
IsSeasonExtra = season.IsSeasonExtra,
SeriesTitle = season.SeriesTitle,
IsMultiSeason = season.IsMultiSeason,
AirDate = season.AirDate,
Seasons = season.Seasons,
ReleaseTitle = season.ReleaseTitle,
Edition = baseParsed.Edition,
Resolution = baseParsed.Resolution,
Sources = baseParsed.Sources,
VideoCodec = baseParsed.VideoCodec,
Complete = baseParsed.Complete,
AudioCodec = baseParsed.AudioCodec,
Languages = baseParsed.Languages,
AudioChannels = baseParsed.AudioChannels,
Group = baseParsed.Group,
Multi = baseParsed.Multi,
Revision = baseParsed.Revision,
},
Type = TorrentType.Tv,
};
}
private static ParsedFilename ParseMovie(string name, BaseParsed baseParsed)
{
TitleParser.Parse(name, out var title, out var year);
baseParsed.Title = title;
baseParsed.Year = year;
return new()
{
Movie = new()
{
ReleaseTitle = name,
Title = baseParsed.Title,
Year = baseParsed.Year,
Edition = baseParsed.Edition,
Resolution = baseParsed.Resolution,
Sources = baseParsed.Sources,
VideoCodec = baseParsed.VideoCodec,
Complete = baseParsed.Complete,
AudioCodec = baseParsed.AudioCodec,
Languages = baseParsed.Languages,
AudioChannels = baseParsed.AudioChannels,
Group = baseParsed.Group,
Multi = baseParsed.Multi,
Revision = baseParsed.Revision,
},
Type = TorrentType.Movie,
};
}
public TorrentType GetTypeByName(string name)
{
var tvRegexes = new[]
{
SeasonEpisode,
SeasonShort,
TvOrComplete,
SeasonStage,
Season,
SeasonTwo
};
foreach (var regex in tvRegexes)
{
if (regex().IsMatch(name))
{
return TorrentType.Tv;
}
}
return TorrentType.Movie;
}
}

View File

@@ -0,0 +1,7 @@
namespace Producer.Features.ParseTorrentTitle;
public enum TorrentType
{
Movie,
Tv,
}

View File

@@ -0,0 +1,16 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class VideoCodec : SmartEnum<VideoCodec, string>
{
public static readonly VideoCodec X265 = new("X265", "x265");
public static readonly VideoCodec X264 = new("X264", "x264");
public static readonly VideoCodec H264 = new("H264", "h264");
public static readonly VideoCodec H265 = new("H265", "h265");
public static readonly VideoCodec WMV = new("WMV", "WMV");
public static readonly VideoCodec XVID = new("XVID", "xvid");
public static readonly VideoCodec DVDR = new("DVDR", "dvdr");
private VideoCodec(string name, string value) : base(name, value)
{
}
}

View File

@@ -0,0 +1,89 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class VideoCodecsParser
{
[GeneratedRegex(@"(?<x265>x265)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex X265Exp();
[GeneratedRegex(@"(?<h265>h265)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex H265Exp();
[GeneratedRegex(@"(?<x264>x264)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex X264Exp();
[GeneratedRegex(@"(?<h264>h264)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex H264Exp();
[GeneratedRegex(@"(?<wmv>WMV)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WMVExp();
[GeneratedRegex(@"(?<xvidhd>XvidHD)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex XvidhdExp();
[GeneratedRegex(@"(?<xvid>X-?vid)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex XvidExp();
[GeneratedRegex(@"(?<divx>divx)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DivxExp();
[GeneratedRegex(@"(?<hevc>HEVC)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HevcExp();
[GeneratedRegex(@"(?<dvdr>DVDR)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DvdrExp();
private static readonly Regex CodecExp = new(
string.Join(
"|", X265Exp(), H265Exp(), X264Exp(), H264Exp(), WMVExp(), XvidhdExp(), XvidExp(), DivxExp(), HevcExp(), DvdrExp()), RegexOptions.IgnoreCase);
public static void Parse(string title, out VideoCodec? codec, out string? source)
{
codec = null;
source = null;
var result = CodecExp.Match(title);
if (!result.Success)
{
return;
}
var groups = result.Groups;
if (groups["h264"].Success)
{
codec = VideoCodec.H264;
source = groups["h264"].Value;
}
else if (groups["h265"].Success)
{
codec = VideoCodec.H265;
source = groups["h265"].Value;
}
else if (groups["x265"].Success || groups["hevc"].Success)
{
codec = VideoCodec.X265;
source = groups["x265"].Success ? groups["x265"].Value : groups["hevc"].Value;
}
else if (groups["x264"].Success)
{
codec = VideoCodec.X264;
source = groups["x264"].Value;
}
else if (groups["xvidhd"].Success || groups["xvid"].Success || groups["divx"].Success)
{
codec = VideoCodec.XVID;
source = groups["xvidhd"].Success ? groups["xvidhd"].Value : (groups["xvid"].Success ? groups["xvid"].Value : groups["divx"].Value);
}
else if (groups["wmv"].Success)
{
codec = VideoCodec.WMV;
source = groups["wmv"].Value;
}
else if (groups["dvdr"].Success)
{
codec = VideoCodec.DVDR;
source = groups["dvdr"].Value;
}
}
}

View File

@@ -0,0 +1,16 @@
namespace Producer.Features.Wordlists;
public interface IWordCollections
{
HashSet<string> AdultWords { get; }
HashSet<string> AdultCompoundPhrases { get; }
HashSet<string> CommonWords { get; }
HashSet<string> Jav { get; }
HashSet<string> AdultStars { get; }
Task LoadAsync();
}

View File

@@ -0,0 +1,21 @@
namespace Producer.Features.Wordlists;
public class PopulationService(IWordCollections wordCollections, ILogger<PopulationService> logger) : IHostedService
{
public async Task StartAsync(CancellationToken cancellationToken)
{
logger.LogInformation("Loading word collections...");
await wordCollections.LoadAsync();
logger.LogInformation("Common Words Count: {Count}", wordCollections.CommonWords.Count);
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
logger.LogInformation("Jav Count: {Count}", wordCollections.Jav.Count);
logger.LogInformation("Adult Stars Count: {Count}", wordCollections.AdultStars.Count);
logger.LogInformation("Word collections loaded.");
}
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}

View File

@@ -0,0 +1,12 @@
namespace Producer.Features.Wordlists;
public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterWordCollections(this IServiceCollection services)
{
services.AddSingleton<IWordCollections, WordCollections>();
services.AddHostedService<PopulationService>();
return services;
}
}

View File

@@ -0,0 +1,65 @@
namespace Producer.Features.Wordlists;
public class WordCollections : IWordCollections
{
private const string AdultWordsFile = "adult-words.txt";
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
private const string AdultStarsFile = "adult-stars.txt";
private const string JavFile = "jav.txt";
private const string CommonWordsFile = "common-words.txt";
public HashSet<string> AdultWords { get; private set; } = [];
public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
public HashSet<string> AdultStars { get; private set; } = [];
public HashSet<string> Jav { get; private set; } = [];
public HashSet<string> CommonWords { get; private set; } = [];
public async Task LoadAsync()
{
var loaderTasks = new List<Task>
{
LoadAdultWords(),
LoadAdultCompounds(),
LoadCommonWords(),
LoadJav(),
LoadAdultStars(),
};
await Task.WhenAll(loaderTasks);
}
private async Task LoadCommonWords()
{
var commonWords = await File.ReadAllLinesAsync(GetPath(CommonWordsFile));
CommonWords = [..commonWords];
}
private async Task LoadAdultCompounds()
{
var adultCompoundWords = await File.ReadAllLinesAsync(GetPath(AdultCompoundPhrasesFile));
AdultCompoundPhrases = [..adultCompoundWords];
}
private async Task LoadAdultWords()
{
var adultWords = await File.ReadAllLinesAsync(GetPath(AdultWordsFile));
AdultWords = [..adultWords];
}
private async Task LoadJav()
{
var jav = await File.ReadAllLinesAsync(GetPath(JavFile));
Jav = [..jav];
}
private async Task LoadAdultStars()
{
var adultStars = await File.ReadAllLinesAsync(GetPath(AdultStarsFile));
AdultStars = [..adultStars];
}
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
}

View File

@@ -1,12 +1,17 @@
// Global using directives
global using System.Globalization;
global using System.Reflection;
global using System.Text;
global using System.Text.Json;
global using System.Text.RegularExpressions;
global using System.Threading.Channels;
global using System.Xml.Linq;
global using Ardalis.SmartEnum;
global using Dapper;
global using FuzzySharp;
global using FuzzySharp.Extractor;
global using FuzzySharp.PreProcess;
global using LZStringCSharp;
global using MassTransit;
global using Microsoft.AspNetCore.Builder;
@@ -25,4 +30,6 @@ global using Producer.Features.Crawlers.Torrentio;
global using Producer.Features.CrawlerSupport;
global using Producer.Features.DataProcessing;
global using Producer.Features.JobSupport;
global using Producer.Features.ParseTorrentTitle;
global using Producer.Features.Wordlists;
global using Serilog;

View File

@@ -9,7 +9,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Ardalis.SmartEnum" Version="8.0.0" />
<PackageReference Include="Dapper" Version="2.1.28" />
<PackageReference Include="FuzzySharp" Version="2.0.2" />
<PackageReference Include="LZStringCSharp" Version="1.4.0" />
<PackageReference Include="MassTransit" Version="8.1.3" />
<PackageReference Include="MassTransit.RabbitMQ" Version="8.1.3" />
@@ -32,4 +34,11 @@
</None>
</ItemGroup>
<ItemGroup>
<Content Remove="Data\**" />
<None Include="Data\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@@ -10,6 +10,8 @@ builder.Services
.RegisterMassTransit()
.AddDataStorage()
.AddCrawlers()
.RegisterWordCollections()
.RegisterParseTorrentTitle()
.AddQuartz(builder.Configuration);
var host = builder.Build();

View File

@@ -0,0 +1,31 @@
{
"Serilog": {
"Using": [ "Serilog.Sinks.Console" ],
"MinimumLevel": {
"Default": "Information",
"Override": {
"Microsoft": "Warning",
"System": "Warning",
"System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning",
"System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning",
"Quartz.Impl.StdSchedulerFactory": "Warning",
"Quartz.Core.QuartzScheduler": "Warning",
"Quartz.Simpl.RAMJobStore": "Warning",
"Quartz.Core.JobRunShell": "Warning",
"Quartz.Core.SchedulerSignalerImpl": "Warning"
}
},
"WriteTo": [
{
"Name": "Console",
"Args": {
"outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}"
}
}
],
"Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ],
"Properties": {
"Application": "Producer"
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

234602
src/tissue/Data/jav.txt Normal file

File diff suppressed because it is too large Load Diff

17
src/tissue/Dockerfile Normal file
View File

@@ -0,0 +1,17 @@
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build
ARG TARGETARCH
WORKDIR /App
COPY . ./
RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
WORKDIR /App
COPY --from=build /App/out .
RUN addgroup -S tissue && adduser -S -G tissue tissue
USER tissue
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1
ENTRYPOINT ["dotnet", "Tissue.dll"]

View File

@@ -0,0 +1,44 @@
namespace Tissue.Extensions;
public static class ConfigurationExtensions
{
private const string ConfigurationFolder = "Configuration";
private const string LoggingConfig = "logging.json";
public static IConfigurationBuilder AddScrapeConfiguration(this IConfigurationBuilder configuration)
{
configuration.SetBasePath(Path.Combine(AppContext.BaseDirectory, ConfigurationFolder));
configuration.AddJsonFile(LoggingConfig, false, true);
configuration.AddEnvironmentVariables();
configuration.AddUserSecrets<Program>();
return configuration;
}
public static TConfiguration LoadConfigurationFromConfig<TConfiguration>(this IServiceCollection services, IConfiguration configuration, string sectionName)
where TConfiguration : class
{
var instance = configuration.GetSection(sectionName).Get<TConfiguration>();
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
services.TryAddSingleton(instance);
return instance;
}
public static TConfiguration LoadConfigurationFromEnv<TConfiguration>(this IServiceCollection services)
where TConfiguration : class
{
var instance = Activator.CreateInstance<TConfiguration>();
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
services.TryAddSingleton(instance);
return instance;
}
}

View File

@@ -0,0 +1,68 @@
namespace Tissue.Extensions;
public static class EnvironmentExtensions
{
public static bool GetEnvironmentVariableAsBool(this string prefix, string varName, bool fallback = false)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return str.Trim().ToLower() switch
{
"true" => true,
"yes" => true,
"1" => true,
_ => false,
};
}
public static int GetEnvironmentVariableAsInt(this string prefix, string varName, int fallback = 0)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return int.TryParse(str, out var result) ? result : fallback;
}
public static string GetRequiredEnvironmentVariableAsString(this string prefix, string varName)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
throw new InvalidOperationException($"Environment variable {fullVarName} is not set");
}
return str;
}
public static string GetOptionalEnvironmentVariableAsString(this string prefix, string varName, string? fallback = null)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return str;
}
private static string GetFullVariableName(string prefix, string varName) => $"{prefix}_{varName}";
}

View File

@@ -0,0 +1,28 @@
namespace Tissue.Extensions;
public static class ServiceCollectionExtensions
{
public static IServiceCollection AddSerilogLogging(this IServiceCollection services, IConfiguration configuration)
{
services.AddLogging(
loggingBuilder =>
{
loggingBuilder.ClearProviders();
var logger = new LoggerConfiguration()
.ReadFrom.Configuration(configuration)
.CreateLogger();
loggingBuilder.AddSerilog(logger);
});
return services;
}
public static IServiceCollection AddKleenexService(this IServiceCollection services)
{
services.AddHostedService<KleenexService>();
return services;
}
}

View File

@@ -0,0 +1,24 @@
namespace Tissue.Extensions;
public static partial class StringExtensions
{
[GeneratedRegex("[^a-zA-Z0-9 ]")]
private static partial Regex NotAlphaNumeric();
public static bool IsNullOrEmpty(this string? value) =>
string.IsNullOrEmpty(value);
private static readonly char[] separator = [' '];
public static string NormalizeTitle(this string title)
{
var alphanumericTitle = NotAlphaNumeric().Replace(title, " ");
var words = alphanumericTitle.Split(separator, StringSplitOptions.RemoveEmptyEntries)
.Select(word => word.ToLower());
var normalizedTitle = string.Join(" ", words);
return normalizedTitle;
}
}

View File

@@ -0,0 +1,44 @@
namespace Tissue.Features.DataProcessing;
public class DapperDataStorage(PostgresConfiguration configuration, ILogger<DapperDataStorage> logger) : IDataStorage
{
public async Task<IReadOnlyCollection<Torrent>?> GetAllTorrents(CancellationToken cancellationToken = default)
{
const string GetAllTorrentsSql = "SELECT * FROM torrents";
try
{
await using var connection = await CreateAndOpenConnection(cancellationToken);
var torrents = await connection.QueryAsync<Torrent>(GetAllTorrentsSql);
return torrents.ToList();
}
catch (Exception e)
{
logger.LogError(e, "Error while torrents from database");
return new List<Torrent>();
}
}
public async Task DeleteTorrentsByInfoHashes(IReadOnlyCollection<string> infoHashes, CancellationToken cancellationToken = default)
{
const string deleteTorrentsSql = "DELETE FROM torrents WHERE \"infoHash\" = ANY(@infoHashes)";
try
{
await using var connection = await CreateAndOpenConnection(cancellationToken);
await connection.ExecuteAsync(deleteTorrentsSql, new { infoHashes });
}
catch (Exception e)
{
logger.LogError(e, "Error while deleting torrents from database");
}
}
private async Task<NpgsqlConnection> CreateAndOpenConnection(CancellationToken cancellationToken = default)
{
var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
return connection;
}
}

View File

@@ -0,0 +1,7 @@
namespace Tissue.Features.DataProcessing;
public interface IDataStorage
{
Task<IReadOnlyCollection<Torrent>?> GetAllTorrents(CancellationToken cancellationToken = default);
Task DeleteTorrentsByInfoHashes(IReadOnlyCollection<string> infoHashes, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,19 @@
namespace Tissue.Features.DataProcessing;
public class PostgresConfiguration
{
private const string Prefix = "POSTGRES";
private const string HostVariable = "HOST";
private const string UsernameVariable = "USER";
private const string PasswordVariable = "PASSWORD";
private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT";
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
}

View File

@@ -0,0 +1,12 @@
namespace Tissue.Features.DataProcessing;
internal static class ServiceCollectionExtensions
{
internal static IServiceCollection AddDataStorage(this IServiceCollection services)
{
services.LoadConfigurationFromEnv<PostgresConfiguration>();
services.AddTransient<IDataStorage, DapperDataStorage>();
return services;
}
}

View File

@@ -0,0 +1,20 @@
namespace Tissue.Features.DataProcessing;
public class Torrent
{
public string? InfoHash { get; set; }
public string? Provider { get; set; }
public string? TorrentId { get; set; }
public string? Title { get; set; }
public long? Size { get; set; }
public string? Type { get; set; }
public DateTime UploadDate { get; set; }
public short? Seeders { get; set; }
public string? Trackers { get; set; }
public string? Languages { get; set; }
public string? Resolution { get; set; }
public bool Reviewed { get; set; }
public bool Opened { get; set; }
public DateTime CreatedAt { get; set; }
public DateTime UpdatedAt { get; set; }
}

View File

@@ -0,0 +1,93 @@
namespace Tissue.Features.Kleenex;
public class KleenexService(IDataStorage dataStorage, ILogger<KleenexService> logger, IWordCollections wordCollections) : IHostedService
{
private HashSet<string> _combinedCompounds = [];
public async Task StartAsync(CancellationToken cancellationToken)
{
try
{
logger.LogInformation("Kleenex service started");
logger.LogInformation("Get ready to pop all that corn...");
var torrents = await LoadTorrents(cancellationToken);
_combinedCompounds.UnionWith(wordCollections.AdultCompoundPhrases);
_combinedCompounds.UnionWith(wordCollections.Jav);
_combinedCompounds.UnionWith(wordCollections.AdultStars);
var infoHashesToDelete = GetInfoHashesToDelete(torrents);
await dataStorage.DeleteTorrentsByInfoHashes(infoHashesToDelete!, cancellationToken);
logger.LogInformation("Deleted {TorrentCount} torrents", infoHashesToDelete.Count);
logger.LogInformation("Kleenex service completed successfully");
Environment.Exit(0);
}
catch (Exception e)
{
logger.LogError(e, "Error while processing torrents");
Environment.Exit(1);
}
}
public Task StopAsync(CancellationToken cancellationToken)
{
logger.LogInformation("Service Shutdown");
return Task.CompletedTask;
}
private List<string?> GetInfoHashesToDelete(IReadOnlyCollection<Torrent> torrents)
{
var torrentsToDelete = torrents.Where(torrent => HasBannedTerms(torrent.Title)).ToList();
var infoHashesToDelete = torrentsToDelete.Select(torrent => torrent.InfoHash).ToList();
if (infoHashesToDelete.Count == 0)
{
logger.LogInformation("No torrents to delete");
Environment.Exit(0);
}
return infoHashesToDelete;
}
private async Task<IReadOnlyCollection<Torrent>> LoadTorrents(CancellationToken cancellationToken)
{
var torrents = await dataStorage.GetAllTorrents(cancellationToken);
if (torrents is null || torrents.Count == 0)
{
logger.LogInformation("No torrents found");
Environment.Exit(0);
}
logger.LogInformation("Found {TorrentCount} torrents", torrents.Count);
return torrents;
}
private bool HasBannedTerms(string targetTitle)
{
var normalisedTitle = targetTitle.NormalizeTitle();
var normalisedWords = normalisedTitle.Split(' ');
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => normalisedWords.Contains(word, StringComparer.OrdinalIgnoreCase) && wordCollections.AdultWords.Contains(word));
var hasCompounds = _combinedCompounds.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
var isClean = !hasBannedWords &&
!hasCompounds;
if (isClean)
{
logger.LogInformation("No banned terms found in torrent title: {Title}", targetTitle);
return false;
}
logger.LogWarning("Banned terms found in torrent title: {Title}", targetTitle);
return true;
}
}

Some files were not shown because too many files have changed in this diff Show More