52 Commits
v2.0.0 ... main

Author SHA1 Message Date
Giorgi
594320ed63 Fix DMM parser and a couple of other minor issues. (#226) 2024-11-19 19:00:06 +00:00
Gabisonfire
a7d5944d25 Adds missing body parameter (#232) 2024-11-15 08:35:46 -05:00
Gabisonfire
c053a5f8da Automate pull request for changelog (#231) 2024-11-15 08:34:09 -05:00
Gabisonfire
5611d3776f [skip ci] Update changelog (#230)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-11-15 08:27:29 -05:00
Gabisonfire
833ac11a96 Update git_cliff.yml (#229) 2024-11-15 08:26:33 -05:00
Gabisonfire
16d8707c48 Commit git cliff to the proper branch (#228) 2024-11-15 08:16:24 -05:00
Gabisonfire
6dfbaa4739 Fix git cliff branch protection issues (#227) 2024-11-15 08:11:50 -05:00
FunkeCoder23
03b5617312 Update release workflow (#219)
* add bare-bones contributing guide

* remove GITLAB_PAT token instructions as unneeded

* add git cliff workflow and config

add git cliff information to contributing.md

squash

---------

Co-authored-by: funkecoder23 <funkecoder@DESKTOP-AMVOBPG>
2024-11-15 07:39:15 -05:00
David Young
19cb42af77 Update all-debrid-api to v1.2.0 for new IP requirement (#212)
Signed-off-by: David Young <davidy@funkypenguin.co.nz>
2024-11-15 07:29:53 -05:00
FunkeCoder23
9344531b34 Add longer timeout for DMM (#218) 2024-07-01 11:40:09 +01:00
FunkeCoder23
723aa6b6a0 update eztv url (#220)
Co-authored-by: funkecoder23 <funkecoder@DESKTOP-AMVOBPG>
2024-05-18 21:39:14 -04:00
Davide Marcoli
e17b476801 Re-add filter step in the processing of the streams (#215) 2024-05-15 12:00:02 +01:00
David Young
2a414d8bc0 Always return title and filename (#210)
Signed-off-by: David Young <davidy@funkypenguin.co.nz>
2024-05-15 11:55:42 +01:00
iPromKnight
9b5f454e6e Python version bump in alpine (#209) 2024-04-22 12:49:59 +01:00
iPromKnight
ad9549c695 Version bump for release (#208) 2024-04-22 12:46:02 +01:00
David Young
1e85cb00ff INNER JOIN when selecting files and torrents to avoid null results (#207)
* INNER JOIN when selecting files and torrents to avoid null results

Signed-off-by: David Young <davidy@funkypenguin.co.nz>

* Extend fix to all torrent types

Signed-off-by: David Young <davidy@funkypenguin.co.nz>

---------

Signed-off-by: David Young <davidy@funkypenguin.co.nz>
2024-04-22 12:43:57 +01:00
iPromKnight
da640a4071 Fix namespaces on extracted scraper info (#204)
* Fix namespaces on extracted scrapers

* version bump
2024-04-11 18:56:29 +01:00
iPromKnight
e6a63fd72e Allow configuration of producer urls (#203)
* Allow configuration of urls in scrapers by mounting the scrapers.json file over the one in the container

* version bump
2024-04-11 18:23:42 +01:00
iPromKnight
02101ac50a Allow qbit concurrency to be configurable (#200) 2024-04-11 18:02:29 +01:00
iPromKnight
3c8ffd5082 Fix Duplicates (#199)
* Fix Duplicates

* Version
2024-04-02 20:31:22 +01:00
iPromKnight
79e0a0f102 DMM Offline (#198)
* Process DMM all locally

single call to github to download the repo archive.
remove need for PAT
update RTN to 0.2.13
change to batch_parse for title parsing from RTN

* introduce concurrent dictionary, and parallelism
2024-04-02 17:01:22 +01:00
purple_emily
6181207513 Fix incorrect file index stored (#197)
* Fix incorrect file index stored

* Update `rank-torrent-name` to latest version

* Knight Crawler version update
2024-04-01 23:08:32 +01:00
iPromKnight
684dbba2f0 RTN-025 and title category parsing (#195)
* update rtn to 025

* Implement movie / show type parsing

* switch to RTN in collectors

* ensure env for pythonnet is loaded, and that requirements copy for qbit

* version bump
2024-03-31 22:01:09 +01:00
iPromKnight
c75ecd2707 add qbit housekeeping service to remove stale torrents (#193)
* Add housekeeping service to clean stale torrents

* version bump
2024-03-30 11:52:23 +00:00
iPromKnight
c493ef3376 Hotfix category, and roll back RTN to 0.1.8 (#192)
* Hotfix categories

Also roll back RTN to 0.1.8 as regression introduced in 0.2

* bump version
2024-03-30 04:47:36 +00:00
iPromKnight
655a39e35c patch the query with execute (#191) 2024-03-30 01:54:06 +00:00
iPromKnight
cfeee62f6b patch ratio (#190)
* add configurable threshold, default 0.95

* version bump
2024-03-30 01:43:21 +00:00
iPromKnight
c6d4c06d70 hotfix categories from imdb result instead (#189)
* category mapping from imdb

* version bump
2024-03-30 01:26:02 +00:00
iPromKnight
08639a3254 Patch isMovie (#188)
* fix is movie

* version bump
2024-03-30 00:28:35 +00:00
iPromKnight
d430850749 Patch message contract names (#187)
* ensure unique message contract names per collector type

* version bump
2024-03-30 00:09:13 +00:00
iPromKnight
82c0ea459b change qbittorrent settings (#186) 2024-03-29 23:35:27 +00:00
iPromKnight
1e83b4c5d8 Patch the addon (#185) 2024-03-29 19:08:17 +00:00
iPromKnight
66609c2a46 trigram performance increased and housekeeping (#184)
* add new indexes, and change year column to int

* Change gist to gin, and change year to int

* Producer changes for new gin query

* Fully map the rtn response using json dump from Pydantic

Also updates Rtn to 0.1.9

* Add housekeeping script to reconcile imdb ids.

* Join Torrent onto the ingested torrent table

Ensure that a torrent can always find the details of where it came from, and how it was parsed.

* Version bump for release

* missing quote on table name
2024-03-29 19:01:48 +00:00
iPromKnight
2d78dc2735 version bump for release (#183) 2024-03-28 23:37:35 +00:00
iPromKnight
527d6cdf15 Upgrade RTN to 0.1.8, replace rabbitmq with drop in replacement lavinmq - better performance, lower resource usage. (#182) 2024-03-28 23:35:41 +00:00
iPromKnight
bb260d78d6 Address Issues in build (#180)
- CIS-DI-0001
- CIS-DI-0006
- CIS-DI-0008
- DKL-LI-0003
2024-03-28 10:47:13 +00:00
iPromKnight
baec0450bf Hotfix ingestor github flow, and move to top level src folder (foldedr per service) (#179) 2024-03-28 10:20:26 +00:00
iPromKnight
4308a0ee71 [wip] bridge python and c# and bring in rank torrent name (#177)
* [wip] bridge python and c# and bring in rank torrent name

* Container restores package now

Includes two dev scripts to install the python packages locally for debugging purposes.

* Introduce slightly turned title matching scoring, by making it length aware

this should help with sequels such as Terminator 2, vs Terminator etc

* Version bump

Also fixes postgres healthcheck so that it utilises the user from the stack.env file
2024-03-28 10:13:50 +00:00
RohirrimRider
cc15a69517 fix torrent_ingestor ci (#178) 2024-03-27 21:38:59 -05:00
RohirrimRider
a6d3a4a066 init ingest torrents from annatar (#157)
* init ingest torrents from annatar

* works

* mv annatar to src/

* done

* add ci and readme

---------

Co-authored-by: Brett <eruiluvatar@pnbx.xyz>
2024-03-27 21:35:03 -05:00
iPromKnight
9430704205 rename commited .env file to stack.env (#176) 2024-03-27 12:57:14 +00:00
iPromKnight
6cc857bdc3 rename .env to stack.env (#175) 2024-03-27 12:37:11 +00:00
iPromKnight
cc2adbfca5 Recreate single docker-compose file (#174)
Clean it up - also comment all services
2024-03-27 12:21:40 +00:00
iPromKnight
9f928f9b66 Allow trackers url to be configurable + version bump (#173)
this allows people to use only the udp collection, only the tcp collection, or all.
2024-03-26 12:17:47 +00:00
iPromKnight
a50b5071b3 key prefixes per collector (#172)
* Ensure the collectors manage sagas in their own keyspace, as we do not want overlap (they have the same correlation ids internally from the exchange)

* version bump
2024-03-26 11:56:14 +00:00
iPromKnight
72db18f0ad add missing env (#171)
* add missing env

* version bump
2024-03-26 11:16:21 +00:00
iPromKnight
d70cef1b86 addon fix (#170)
* addon fix

* version bump
2024-03-26 10:25:43 +00:00
iPromKnight
e1e718cd22 includes qbit collector fix (#169) 2024-03-26 10:17:04 +00:00
iPromKnight
c3e58e4234 Fix redis connection strings for consistency across languages. (#168)
* Fix redis connection strings across languages

* compose version bump
2024-03-26 09:26:35 +00:00
iPromKnight
d584102d60 image updates for patched release (#167) 2024-03-26 00:27:54 +00:00
iPromKnight
fe4bb59502 fix indenting on env file (#166)
* fix images :/

* fix indenting
2024-03-26 00:22:33 +00:00
iPromKnight
472b3342d5 fix images :/ (#165) 2024-03-26 00:01:59 +00:00
131 changed files with 2826 additions and 475 deletions

View File

@@ -12,6 +12,9 @@ A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1.
2.
3.
**Expected behavior**
A clear and concise description of what you expected to happen.
@@ -23,6 +26,7 @@ If the logs are short, make sure to triple backtick them, or use https://pastebi
**Hardware:**
- OS and distro: [e.g. Raspberry Pi OS, Ubuntu, Rocky]
- Server: [e.g. VM, Baremetal, Pi]
- Knightcrawler Version: [2.0.xx]
**Additional context**
Add any other context about the problem here.

View File

@@ -0,0 +1,15 @@
name: Build and Push Torrent Ingestor Service
on:
push:
paths:
- 'src/torrent-ingestor/**'
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/torrent-ingestor
DOCKERFILE: ./src/torrent-ingestor/Dockerfile
IMAGE_NAME: knightcrawler-torrent-ingestor

39
.github/workflows/git_cliff.yml vendored Normal file
View File

@@ -0,0 +1,39 @@
on:
push:
branches:
- main
workflow_dispatch:
jobs:
changelog:
name: Generate changelog
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generate a changelog
uses: orhun/git-cliff-action@v3
with:
config: cliff.toml
args: --verbose
env:
OUTPUT: CHANGELOG.md
GITHUB_REPO: ${{ github.repository }}
- name: Commit
run: |
git config user.name 'github-actions[bot]'
git config user.email 'github-actions[bot]@users.noreply.github.com'
set +e
git checkout -b feat/changelog_$(date +"%d_%m")
git add CHANGELOG.md
git commit -m "[skip ci] Update changelog"
git push https://${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git feat/changelog_$(date +"%d_%m")
- name: create pull request
run: gh pr create -B main -H feat/changelog_$(date +"%d_%m") --title '[skip ci] Update changelog' --body 'Changelog update by git-cliff'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

7
.gitignore vendored
View File

@@ -610,4 +610,9 @@ fabric.properties
**/caddy/logs/**
# Mac directory indexes
.DS_Store
.DS_Store
deployment/docker/stack.env
src/producer/src/python/
src/debrid-collector/python/
src/qbit-collector/python/

22
CHANGELOG.md Normal file
View File

@@ -0,0 +1,22 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.0] - 2024-03-25
### Details
#### Changed
- Change POSTGRES_USERNAME to POSTGRES_USER. Oops by @purple-emily
- Change POSTGRES_DATABASE to POSTGRES_DB by @purple-emily
- Two movie commands instead of movie and tv by @purple-emily
- Cleanup RabbitMQ env vars, and Github Pat by @iPromKnight
#### Fixed
- HRD -> HDR by @mplewis
## New Contributors
* @mplewis made their first contribution
<!-- generated by git-cliff -->

34
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,34 @@
We use [Meaningful commit messages](https://reflectoring.io/meaningful-commit-messages/)
Tl;dr:
1. It should answer the question: “What happens if the changes are applied?".
2. Use the imperative, present tense. It is easier to read and scan quickly:
```
Right: Add feature to alert admin for new user registration
Wrong: Added feature ... (past tense)
```
3. The summary should always be able to complete the following sentence:
`If applied, this commit will… `
We use [git-cliff] for our changelog.
The breaking flag is set to true when the commit has an exclamation mark after the commit type and scope, e.g.:
`feat(scope)!: this is a breaking change`
Keywords (Commit messages should start with these):
```
# Added
add
support
# Removed
remove
delete
# Fixed
test
fix
```
Any other commits will fall under the `Changed` category
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html)

View File

@@ -51,11 +51,11 @@ Download and install [Docker Compose](https://docs.docker.com/compose/install/),
### Environment Setup
Before running the project, you need to set up the environment variables. Copy the `.env.example` file to `.env`:
Before running the project, you need to set up the environment variables. Edit the values in `stack.env`:
```sh
cd deployment/docker
cp .env.example .env
code stack.env
```
Then set any of the values you wouldd like to customize.
@@ -67,33 +67,6 @@ Then set any of the values you wouldd like to customize.
By default, Knight Crawler is configured to be *relatively* conservative in its resource usage. If running on a decent machine (16GB RAM, i5+ or equivalent), you can increase some settings to increase consumer throughput. This is especially helpful if you have a large backlog from [importing databases](#importing-external-dumps).
In your `.env` file, under the `# Consumer` section increase `CONSUMER_REPLICAS` from `3` to `15`.
You can also increase `JOB_CONCURRENCY` from `5` to `10`.
### DebridMediaManager setup (optional)
There are some optional steps you should take to maximise the number of movies/tv shows we can find.
We can search DebridMediaManager hash lists which are hosted on GitHub. This allows us to add hundreds of thousands of movies and tv shows, but it requires a Personal Access Token to be generated. The software only needs read access and only for public repositories. To generate one, please follow these steps:
1. Navigate to GitHub settings -> Developer Settings -> Personal access tokens -> Fine-grained tokens (click [here](https://github.com/settings/tokens?type=beta) for a direct link)
2. Press `Generate new token`
3. Fill out the form (example data below):
```
Token name:
KnightCrawler
Expiration:
90 days
Description:
<blank>
Repository access
(checked) Public Repositories (read-only)
```
4. Click `Generate token`
5. Take the new token and add it to the bottom of the [.env](deployment/docker/.env) file
```
GithubSettings__PAT=<YOUR TOKEN HERE>
```
### Configure external access
Please choose which applies to you:
@@ -143,7 +116,7 @@ Remove or comment out the port for the addon, and connect it to Caddy:
addon:
<<: *knightcrawler-app
env_file:
- .env
- stack.env
hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:latest
labels:

112
cliff.toml Normal file
View File

@@ -0,0 +1,112 @@
# git-cliff ~ configuration file
# https://git-cliff.org/docs/configuration
[changelog]
# changelog header
header = """
# Changelog\n
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n
"""
# template for the changelog body
# https://keats.github.io/tera/docs/#introduction
body = """
{%- macro remote_url() -%}
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
{%- endmacro -%}
{% if version -%}
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else -%}
## [Unreleased]
{% endif -%}
### Details\
{% for group, commits in commits | group_by(attribute="group") %}
#### {{ group | upper_first }}
{%- for commit in commits %}
- {{ commit.message | upper_first | trim }}\
{% if commit.github.username %} by @{{ commit.github.username }}{%- endif -%}
{% if commit.github.pr_number %} in \
[#{{ commit.github.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.github.pr_number }}) \
{%- endif -%}
{% endfor %}
{% endfor %}
{%- if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %}
## New Contributors
{%- endif -%}
{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
* @{{ contributor.username }} made their first contribution
{%- if contributor.pr_number %} in \
[#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
{%- endif %}
{%- endfor %}\n
"""
# template for the changelog footer
footer = """
{%- macro remote_url() -%}
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
{%- endmacro -%}
{% for release in releases -%}
{% if release.version -%}
{% if release.previous.version -%}
[{{ release.version | trim_start_matches(pat="v") }}]: \
{{ self::remote_url() }}/compare/{{ release.previous.version }}..{{ release.version }}
{% endif -%}
{% else -%}
[unreleased]: {{ self::remote_url() }}/compare/{{ release.previous.version }}..HEAD
{% endif -%}
{% endfor %}
<!-- generated by git-cliff -->
"""
# remove the leading and trailing whitespace from the templates
trim = true
[git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true
# filter out the commits that are not conventional
filter_unconventional = true
# process each line of a commit as an individual commit
split_commits = false
# regex for preprocessing the commit messages
commit_preprocessors = [
# remove issue numbers from commits
{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "" },
]
# regex for parsing and grouping commits
commit_parsers = [
{ message = "^.*: add", group = "Added" },
{ message = "^add", group = "Added" },
{ message = "^.*: support", group = "Added" },
{ message = "^support", group = "Added" },
{ message = "^.*: remove", group = "Removed" },
{ message = "^remove", group = "Removed" },
{ message = "^.*: delete", group = "Removed" },
{ message = "^delete", group = "Removed" },
{ message = "^.*: test", group = "Fixed" },
{ message = "^test", group = "Fixed" },
{ message = "^.*: fix", group = "Fixed" },
{ message = "^fix", group = "Fixed" },
{ message = "^.*", group = "Changed" },
]
# protect breaking changes from being skipped due to matching a skipping commit_parser
protect_breaking_commits = false
# filter out the commits that are not matched by commit parsers
filter_commits = true
# regex for matching git tags
tag_pattern = "v[0-9].*"
# regex for skipping tags
skip_tags = "v0.1.0-beta.1"
# regex for ignoring tags
ignore_tags = ""
# sort the tags topologically
topo_order = false
# sort the commits inside sections by oldest/newest order
sort_commits = "oldest"

View File

@@ -1,7 +0,0 @@
version: "3.9"
name: "knightcrawler"
include:
- components/network.yaml
- components/volumes.yaml
- components/infrastructure.yaml
- components/knightcrawler.yaml

View File

@@ -12,11 +12,16 @@ enabled=false
program=
[BitTorrent]
Session\AnonymousModeEnabled=true
Session\BTProtocol=TCP
Session\ConnectionSpeed=150
Session\DefaultSavePath=/downloads/
Session\ExcludedFileNames=
Session\MaxActiveDownloads=10
Session\MaxActiveCheckingTorrents=20
Session\MaxActiveDownloads=20
Session\MaxActiveTorrents=50
Session\MaxActiveUploads=50
Session\MaxConcurrentHTTPAnnounces=1000
Session\MaxConnections=2000
Session\Port=6881
Session\QueueingSystemEnabled=true
@@ -50,9 +55,10 @@ MailNotification\req_auth=true
WebUI\Address=*
WebUI\AuthSubnetWhitelist=0.0.0.0/0
WebUI\AuthSubnetWhitelistEnabled=true
WebUI\HostHeaderValidation=false
WebUI\LocalHostAuth=false
WebUI\ServerDomains=*
[RSS]
AutoDownloader\DownloadRepacks=true
AutoDownloader\SmartEpisodeFilter=s(\\d+)e(\\d+), (\\d+)x(\\d+), "(\\d{4}[.\\-]\\d{1,2}[.\\-]\\d{1,2})", "(\\d{1,2}[.\\-]\\d{1,2}[.\\-]\\d{4})"
AutoDownloader\SmartEpisodeFilter=s(\\d+)e(\\d+), (\\d+)x(\\d+), "(\\d{4}[.\\-]\\d{1,2}[.\\-]\\d{1,2})", "(\\d{1,2}[.\\-]\\d{1,2}[.\\-]\\d{4})"

View File

@@ -0,0 +1,244 @@
version: "3.9"
name: knightcrawler
networks:
knightcrawler-network:
name: knightcrawler-network
driver: bridge
volumes:
postgres:
lavinmq:
redis:
services:
## Postgres is the database that is used by the services.
## All downloaded metadata is stored in this database.
postgres:
env_file: stack.env
healthcheck:
test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
image: postgres:latest
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, change the username and password in the stack.env file.
# # If you want to enhance your security even more, create a new user for the database with a strong password.
# ports:
# - "5432:5432"
networks:
- knightcrawler-network
restart: unless-stopped
volumes:
- postgres:/var/lib/postgresql/data
## Redis is used as a cache for the services.
## It is used to store the infohashes that are currently being processed in sagas, as well as intrim data.
redis:
env_file: stack.env
healthcheck:
test: ["CMD-SHELL", "redis-cli ping"]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
image: redis/redis-stack:latest
# # If you need redis to be accessible from outside, please open the below port.
# ports:
# - "6379:6379"
networks:
- knightcrawler-network
restart: unless-stopped
volumes:
- redis:/data
## LavinMQ is used as a message broker for the services.
## It is a high performance drop in replacement for RabbitMQ.
## It is used to communicate between the services.
lavinmq:
env_file: stack.env
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
# ports:
# - "5672:5672"
# - "15672:15672"
# - "15692:15692"
image: cloudamqp/lavinmq:latest
healthcheck:
test: ["CMD-SHELL", "lavinmqctl status"]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
restart: unless-stopped
networks:
- knightcrawler-network
volumes:
- lavinmq:/var/lib/lavinmq/
## The addon. This is what is used in stremio
addon:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
ports:
- "7000:7000"
restart: unless-stopped
## The consumer is responsible for consuming infohashes and orchestrating download of metadata.
consumer:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-consumer:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
restart: unless-stopped
## The debrid collector is responsible for downloading metadata from debrid services. (Currently only RealDebrid is supported)
debridcollector:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-debrid-collector:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
restart: unless-stopped
## The metadata service is responsible for downloading imdb publically available datasets.
## This is used to enrich the metadata during production of ingested infohashes.
metadata:
depends_on:
migrator:
condition: service_completed_successfully
env_file: stack.env
image: gabisonfire/knightcrawler-metadata:2.0.26
networks:
- knightcrawler-network
restart: "no"
## The migrator is responsible for migrating the database schema.
migrator:
depends_on:
postgres:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-migrator:2.0.26
networks:
- knightcrawler-network
restart: "no"
## The producer is responsible for producing infohashes by acquiring for various sites, including DMM.
producer:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-producer:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
restart: unless-stopped
## QBit collector utilizes QBitTorrent to download metadata.
qbitcollector:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
qbittorrent:
condition: service_healthy
deploy:
replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env
image: gabisonfire/knightcrawler-qbit-collector:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
restart: unless-stopped
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
## The QBit collector requires this.
qbittorrent:
deploy:
replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env
environment:
PGID: "1000"
PUID: "1000"
TORRENTING_PORT: "6881"
WEBUI_PORT: "8080"
healthcheck:
test: ["CMD-SHELL", "curl --fail http://localhost:8080"]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
image: lscr.io/linuxserver/qbittorrent:latest
networks:
- knightcrawler-network
ports:
- "6881:6881/tcp"
- "6881:6881/udp"
# if you want to expose the webui, uncomment the following line
# - "8001:8080"
restart: unless-stopped
volumes:
- ./config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf

View File

@@ -16,7 +16,7 @@ rule_files:
scrape_configs:
- job_name: "rabbitmq"
static_configs:
- targets: ["rabbitmq:15692"]
- targets: ["lavinmq:15692"]
- job_name: "postgres-exporter"
static_configs:
- targets: ["postgres-exporter:9187"]

View File

@@ -4,8 +4,8 @@ x-basehealth: &base-health
retries: 3
start_period: 10s
x-rabbithealth: &rabbitmq-health
test: rabbitmq-diagnostics -q ping
x-lavinhealth: &lavinmq-health
test: [ "CMD-SHELL", "lavinmqctl status" ]
<<: *base-health
x-redishealth: &redis-health
@@ -13,7 +13,7 @@ x-redishealth: &redis-health
<<: *base-health
x-postgreshealth: &postgresdb-health
test: pg_isready
test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ]
<<: *base-health
x-qbit: &qbit-health
@@ -35,7 +35,7 @@ services:
- postgres:/var/lib/postgresql/data
healthcheck: *postgresdb-health
restart: unless-stopped
env_file: ../.env
env_file: ../../.env
networks:
- knightcrawler-network
@@ -48,25 +48,23 @@ services:
- redis:/data
restart: unless-stopped
healthcheck: *redis-health
env_file: ../.env
env_file: ../../.env
networks:
- knightcrawler-network
rabbitmq:
image: rabbitmq:3-management
lavinmq:
env_file: stack.env
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, look at the documentation for rabbit on how to secure the service.
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
# ports:
# - "5672:5672"
# - "15672:15672"
# - "15692:15692"
volumes:
- rabbitmq:/var/lib/rabbitmq
image: cloudamqp/lavinmq:latest
healthcheck: *lavinmq-health
restart: unless-stopped
healthcheck: *rabbitmq-health
env_file: ../.env
networks:
- knightcrawler-network
volumes:
- lavinmq:/var/lib/lavinmq/
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
## The QBit collector requires this.
@@ -80,10 +78,10 @@ services:
ports:
- 6881:6881
- 6881:6881/udp
env_file: ../.env
env_file: ../../.env
networks:
- knightcrawler-network
restart: unless-stopped
healthcheck: *qbit-health
volumes:
- ./config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf
- ../../config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf

View File

@@ -1,7 +1,7 @@
x-apps: &knightcrawler-app
labels:
logging: "promtail"
env_file: ../.env
env_file: ../../.env
networks:
- knightcrawler-network
@@ -11,7 +11,7 @@ x-depends: &knightcrawler-app-depends
condition: service_healthy
postgres:
condition: service_healthy
rabbitmq:
lavinmq:
condition: service_healthy
migrator:
condition: service_completed_successfully
@@ -20,8 +20,8 @@ x-depends: &knightcrawler-app-depends
services:
metadata:
image: gabisonfire/knightcrawler-metadata:2.0.0
env_file: ../.env
image: gabisonfire/knightcrawler-metadata:2.0.26
env_file: ../../.env
networks:
- knightcrawler-network
restart: no
@@ -30,8 +30,8 @@ services:
condition: service_completed_successfully
migrator:
image: gabisonfire/knightcrawler-migrator:2.0.0
env_file: ../.env
image: gabisonfire/knightcrawler-migrator:2.0.26
env_file: ../../.env
networks:
- knightcrawler-network
restart: no
@@ -40,7 +40,7 @@ services:
condition: service_healthy
addon:
image: gabisonfire/knightcrawler-addon:2.0.0
image: gabisonfire/knightcrawler-addon:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
hostname: knightcrawler-addon
@@ -48,22 +48,22 @@ services:
- "7000:7000"
consumer:
image: gabisonfire/knightcrawler-consumer:2.0.0
image: gabisonfire/knightcrawler-consumer:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
debridcollector:
image: gabisonfire/knightcrawler-debridcollector:2.0.0
image: gabisonfire/knightcrawler-debrid-collector:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
producer:
image: gabisonfire/knightcrawler-producer:2.0.0
image: gabisonfire/knightcrawler-producer:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
qbitcollector:
image: gabisonfire/knightcrawler-qbitcollector:2.0.0
image: gabisonfire/knightcrawler-qbit-collector:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
depends_on:

View File

@@ -1,4 +1,4 @@
volumes:
postgres:
redis:
rabbitmq:
lavinmq:

View File

@@ -0,0 +1,7 @@
version: "3.9"
name: "knightcrawler"
include:
- ./components/network.yaml
- ./components/volumes.yaml
- ./components/infrastructure.yaml
- ./components/knightcrawler.yaml

View File

@@ -9,10 +9,12 @@ POSTGRES_PASSWORD=postgres
POSTGRES_DB=knightcrawler
# Redis
REDIS_CONNECTION_STRING=redis:6379
REDIS_HOST=redis
REDIS_PORT=6379
REDIS_EXTRA=abortConnect=false,allowAdmin=true
# RabbitMQ
RABBITMQ_HOST=rabbitmq
# AMQP
RABBITMQ_HOST=lavinmq
RABBITMQ_USER=guest
RABBITMQ_PASSWORD=guest
RABBITMQ_CONSUMER_QUEUE_NAME=ingested
@@ -28,9 +30,12 @@ METADATA_INSERT_BATCH_SIZE=50000
COLLECTOR_QBIT_ENABLED=false
COLLECTOR_DEBRID_ENABLED=true
COLLECTOR_REAL_DEBRID_API_KEY=
QBIT_HOST=http://qbittorrent:8080
QBIT_TRACKERS_URL=https://raw.githubusercontent.com/ngosang/trackerslist/master/trackers_all_http.txt
QBIT_CONCURRENCY=8
# Number of replicas for the qBittorrent collector and qBitTorrent client. Should be 0 or 1.
QBIT_REPLICAS=0
# Addon
DEBUG_MODE=false
# Producer
GITHUB_PAT=

View File

@@ -25,7 +25,9 @@ export const cinemetaConfig = {
}
export const cacheConfig = {
REDIS_CONNECTION_STRING: process.env.REDIS_CONNECTION_STRING || 'redis://localhost:6379/0',
REDIS_HOST: process.env.REDIS_HOST || 'redis',
REDIS_PORT: process.env.REDIS_PORT || '6379',
REDIS_EXTRA: process.env.REDIS_EXTRA || '',
NO_CACHE: parseBool(process.env.NO_CACHE, false),
IMDB_TTL: parseInt(process.env.IMDB_TTL || 60 * 60 * 4), // 4 Hours
STREAM_TTL: parseInt(process.env.STREAM_TTL || 60 * 60 * 4), // 1 Hour
@@ -40,3 +42,5 @@ export const cacheConfig = {
STALE_ERROR_AGE: parseInt(process.env.STALE_ERROR_AGE) || 7 * 24 * 60 * 60, // 7 days
GLOBAL_KEY_PREFIX: process.env.GLOBAL_KEY_PREFIX || 'jackettio-addon',
}
cacheConfig.REDIS_CONNECTION_STRING = 'redis://' + cacheConfig.REDIS_HOST + ':' + cacheConfig.REDIS_PORT + '?' + cacheConfig.REDIS_EXTRA;

View File

@@ -12,7 +12,7 @@
"@redis/client": "^1.5.14",
"@redis/json": "^1.0.6",
"@redis/search": "^1.1.6",
"all-debrid-api": "^1.1.0",
"all-debrid-api": "^1.2.0",
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^3.4.4",

View File

@@ -10,7 +10,7 @@
},
"dependencies": {
"@putdotio/api-client": "^8.42.0",
"all-debrid-api": "^1.1.0",
"all-debrid-api": "^1.2.0",
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^3.4.4",

View File

@@ -3,6 +3,7 @@ import { addonBuilder } from 'stremio-addon-sdk';
import { cacheWrapStream } from './lib/cache.js';
import { dummyManifest } from './lib/manifest.js';
import * as repository from './lib/repository.js';
import applyFilters from "./lib/filter.js";
import applySorting from './lib/sort.js';
import { toStreamInfo, applyStaticInfo } from './lib/streamInfo.js';
import { Type } from './lib/types.js';
@@ -32,6 +33,7 @@ builder.defineStreamHandler((args) => {
.then(records => records
.sort((a, b) => b.torrent.seeders - a.torrent.seeders || b.torrent.uploadDate - a.torrent.uploadDate)
.map(record => toStreamInfo(record)))))
.then(streams => applyFilters(streams, args.extra))
.then(streams => applySorting(streams, args.extra))
.then(streams => applyStaticInfo(streams))
.then(streams => applyMochs(streams, args.extra))

View File

@@ -1,8 +1,12 @@
export const cacheConfig = {
REDIS_CONNECTION_STRING: process.env.REDIS_CONNECTION_STRING || 'redis://localhost:6379/0',
REDIS_HOST: process.env.REDIS_HOST || 'redis',
REDIS_PORT: process.env.REDIS_PORT || '6379',
REDIS_EXTRA: process.env.REDIS_EXTRA || '',
NO_CACHE: parseBool(process.env.NO_CACHE, false),
}
cacheConfig.REDIS_CONNECTION_STRING = 'redis://' + cacheConfig.REDIS_HOST + ':' + cacheConfig.REDIS_PORT + '?' + cacheConfig.REDIS_EXTRA;
export const databaseConfig = {
POSTGRES_HOST: process.env.POSTGRES_HOST || 'postgres',
POSTGRES_PORT: process.env.POSTGRES_PORT || '5432',

View File

@@ -14,13 +14,12 @@ const Torrent = database.define('torrent',
{
infoHash: { type: Sequelize.STRING(64), primaryKey: true },
provider: { type: Sequelize.STRING(32), allowNull: false },
torrentId: { type: Sequelize.STRING(128) },
ingestedTorrentId: { type: Sequelize.BIGINT, allowNull: false },
title: { type: Sequelize.STRING(256), allowNull: false },
size: { type: Sequelize.BIGINT },
type: { type: Sequelize.STRING(16), allowNull: false },
uploadDate: { type: Sequelize.DATE, allowNull: false },
seeders: { type: Sequelize.SMALLINT },
trackers: { type: Sequelize.STRING(4096) },
languages: { type: Sequelize.STRING(4096) },
resolution: { type: Sequelize.STRING(16) }
}
@@ -85,7 +84,7 @@ export function getImdbIdMovieEntries(imdbId) {
where: {
imdbId: { [Op.eq]: imdbId }
},
include: [Torrent],
include: { model: Torrent, required: true },
limit: 500,
order: [
[Torrent, 'size', 'DESC']
@@ -100,7 +99,7 @@ export function getImdbIdSeriesEntries(imdbId, season, episode) {
imdbSeason: { [Op.eq]: season },
imdbEpisode: { [Op.eq]: episode }
},
include: [Torrent],
include: { model: Torrent, required: true },
limit: 500,
order: [
[Torrent, 'size', 'DESC']
@@ -113,7 +112,7 @@ export function getKitsuIdMovieEntries(kitsuId) {
where: {
kitsuId: { [Op.eq]: kitsuId }
},
include: [Torrent],
include: { model: Torrent, required: true },
limit: 500,
order: [
[Torrent, 'size', 'DESC']
@@ -127,7 +126,7 @@ export function getKitsuIdSeriesEntries(kitsuId, episode) {
kitsuId: { [Op.eq]: kitsuId },
kitsuEpisode: { [Op.eq]: episode }
},
include: [Torrent],
include: { model: Torrent, required: true },
limit: 500,
order: [
[Torrent, 'size', 'DESC']

View File

@@ -20,7 +20,7 @@ export function toStreamInfo(record) {
const title = joinDetailParts(
[
joinDetailParts([record.torrent.title.replace(/[, ]+/g, ' ')]),
joinDetailParts([!sameInfo && record.title || undefined]),
joinDetailParts([record.title || undefined]),
joinDetailParts([
joinDetailParts([formatSize(record.size)], '💾 ')
]),

View File

@@ -17,7 +17,6 @@
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
@@ -29,10 +28,30 @@
<None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<Content Remove="eng\**" />
<None Remove="eng\**" />
</ItemGroup>
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<Content Remove="python\**" />
<None Include="python\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\shared\SharedContracts.csproj" />
</ItemGroup>
<ItemGroup>
<Compile Remove="eng\**" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="eng\**" />
</ItemGroup>
</Project>

View File

@@ -6,6 +6,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\share
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{72A042C3-B4F3-45C5-AC20-041FE8F41EFC}"
ProjectSection(SolutionItems) = preProject
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
eng\install-python-reqs.sh = eng\install-python-reqs.sh
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU

View File

@@ -9,12 +9,23 @@ RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S debrid && adduser -S -G debrid debrid
USER debrid
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
ENTRYPOINT ["dotnet", "DebridCollector.dll"]

View File

@@ -1,5 +1,3 @@
using DebridCollector.Features.Configuration;
namespace DebridCollector.Extensions;
public static class ServiceCollectionExtensions
@@ -17,7 +15,8 @@ public static class ServiceCollectionExtensions
var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>();
services.AddRealDebridClient(serviceConfiguration);
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
services.RegisterPythonEngine();
services.AddSingleton<IRankTorrentName, RankTorrentName>();
services.AddHostedService<DebridRequestProcessor>();
return services;
@@ -62,7 +61,10 @@ public static class ServiceCollectionExtensions
cfg.UseMessageRetry(r => r.Intervals(1000,2000,5000));
cfg.UseInMemoryOutbox();
})
.RedisRepository(redisConfiguration.ConnectionString)
.RedisRepository(redisConfiguration.ConnectionString, options =>
{
options.KeyPrefix = "debrid-collector:";
})
.Endpoint(
e =>
{

View File

@@ -1,6 +1,4 @@
using DebridCollector.Features.Configuration;
namespace DebridCollector.Features.Debrid;
namespace DebridCollector.Features.Debrid;
public static class ServiceCollectionExtensions
{

View File

@@ -3,10 +3,11 @@ namespace DebridCollector.Features.Worker;
public static class DebridMetaToTorrentMeta
{
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
IParseTorrentTitle torrentTitle,
IRankTorrentName rankTorrentName,
Torrent torrent,
string ImdbId,
FileDataDictionary Metadata)
FileDataDictionary Metadata,
ILogger<WriteMetadataConsumer> logger)
{
try
{
@@ -15,34 +16,42 @@ public static class DebridMetaToTorrentMeta
foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
var file = new TorrentFile
{
ImdbId = ImdbId,
KitsuId = 0,
InfoHash = torrent.InfoHash,
FileIndex = validFileIndex ? fileIndex : 0,
FileIndex = validFileIndex ? fileIndexMinusOne : 0,
Title = metadataEntry.Value.Filename,
Size = metadataEntry.Value.Filesize.GetValueOrDefault(),
};
var parsedTitle = torrentTitle.Parse(file.Title);
var parsedTitle = rankTorrentName.Parse(file.Title, false);
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault();
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault();
if (!parsedTitle.Success)
{
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
continue;
}
file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
files.Add(file);
}
return files;
}
catch (Exception)
catch (Exception ex)
{
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
return [];
}
}
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata)
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata, ILogger<WriteMetadataConsumer> logger)
{
try
{
@@ -58,13 +67,14 @@ public static class DebridMetaToTorrentMeta
foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
var fileId = torrentFiles.FirstOrDefault(
t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0;
var file = new SubtitleFile
{
InfoHash = InfoHash,
FileIndex = validFileIndex ? fileIndex : 0,
FileIndex = validFileIndex ? fileIndexMinusOne : 0,
FileId = fileId,
Title = metadataEntry.Value.Filename,
};
@@ -74,8 +84,9 @@ public static class DebridMetaToTorrentMeta
return files;
}
catch (Exception)
catch (Exception ex)
{
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
return [];
}
}

View File

@@ -53,6 +53,12 @@ public class InfohashMetadataSagaStateMachine : MassTransitStateMachine<Infohash
.Then(
context =>
{
if (!context.Message.WithFiles)
{
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
return;
}
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
})
.TransitionTo(Completed)

View File

@@ -1,22 +1,22 @@
namespace DebridCollector.Features.Worker;
[EntityName("perform-metadata-request")]
[EntityName("perform-metadata-request-debrid-collector")]
public record PerformMetadataRequest(Guid CorrelationId, string InfoHash) : CorrelatedBy<Guid>;
[EntityName("torrent-metadata-response")]
[EntityName("torrent-metadata-response-debrid-collector")]
public record GotMetadata(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}
[EntityName("write-metadata")]
[EntityName("write-metadata-debrid-collector")]
public record WriteMetadata(Torrent Torrent, TorrentMetadataResponse Metadata, string ImdbId) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}
[EntityName("metadata-written")]
public record MetadataWritten(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid>
[EntityName("metadata-written-debrid-colloctor")]
public record MetadataWritten(TorrentMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}

View File

@@ -1,25 +1,28 @@
namespace DebridCollector.Features.Worker;
public class WriteMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteMetadata>
public class WriteMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteMetadataConsumer> logger) : IConsumer<WriteMetadata>
{
public async Task Consume(ConsumeContext<WriteMetadata> context)
{
var request = context.Message;
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata);
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
if (torrentFiles.Any())
if (!torrentFiles.Any())
{
await dataStorage.InsertFiles(torrentFiles);
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new MetadataWritten(request.Metadata, false));
return;
}
await context.Publish(new MetadataWritten(request.Metadata));
await dataStorage.InsertFiles(torrentFiles);
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new MetadataWritten(request.Metadata, true));
}
}

View File

@@ -4,17 +4,18 @@ global using System.Text.Json;
global using System.Text.Json.Serialization;
global using System.Threading.Channels;
global using DebridCollector.Extensions;
global using DebridCollector.Features.Configuration;
global using DebridCollector.Features.Debrid;
global using DebridCollector.Features.Worker;
global using MassTransit;
global using MassTransit.Mediator;
global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection;
global using Polly;
global using Polly.Extensions.Http;
global using PromKnight.ParseTorrentTitle;
global using SharedContracts.Configuration;
global using SharedContracts.Dapper;
global using SharedContracts.Extensions;
global using SharedContracts.Models;
global using SharedContracts.Python;
global using SharedContracts.Python.RTN;
global using SharedContracts.Requests;

View File

@@ -0,0 +1,2 @@
mkdir -p ../python
python -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1,5 @@
#!/bin/bash
rm -rf ../python
mkdir -p ../python
python3 -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1 @@
rank-torrent-name==0.2.13

View File

@@ -8,12 +8,14 @@ public class PostgresConfiguration
private const string PasswordVariable = "PASSWORD";
private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT";
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
}

View File

@@ -72,7 +72,7 @@ public class BasicsFile(ILogger<BasicsFile> logger, ImdbDbService dbService): IF
Category = csv.GetField(1),
Title = csv.GetField(2),
Adult = isAdultSet && adult == 1,
Year = csv.GetField(5),
Year = csv.GetField(5) == @"\N" ? 0 : int.Parse(csv.GetField(5)),
};
if (cancellationToken.IsCancellationRequested)

View File

@@ -6,5 +6,5 @@ public class ImdbBasicEntry
public string? Category { get; set; }
public string? Title { get; set; }
public bool Adult { get; set; }
public string? Year { get; set; }
public int Year { get; set; }
}

View File

@@ -17,7 +17,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
await writer.WriteAsync(entry.ImdbId, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Category, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Title, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Year, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Year, NpgsqlDbType.Integer);
await writer.WriteAsync(entry.Adult, NpgsqlDbType.Boolean);
}
catch (Npgsql.PostgresException e)
@@ -116,7 +116,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
ExecuteCommandAsync(
async connection =>
{
await using var command = new NpgsqlCommand($"CREATE INDEX title_gist ON {TableNames.MetadataTable} USING gist(title gist_trgm_ops)", connection);
await using var command = new NpgsqlCommand($"CREATE INDEX title_gin ON {TableNames.MetadataTable} USING gin(title gin_trgm_ops)", connection);
await command.ExecuteNonQueryAsync();
}, "Error while creating index on imdb_metadata table");
@@ -125,7 +125,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
async connection =>
{
logger.LogInformation("Dropping Trigrams index if it exists already");
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gist", connection);
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gin", connection);
await dropCommand.ExecuteNonQueryAsync();
}, $"Error while dropping index on {TableNames.MetadataTable} table");
@@ -134,7 +134,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
{
try
{
await using var connection = CreateNpgsqlConnection();
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync();
await operation(connection);
@@ -145,16 +145,6 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
}
}
private NpgsqlConnection CreateNpgsqlConnection()
{
var connectionStringBuilder = new NpgsqlConnectionStringBuilder(configuration.StorageConnectionString)
{
CommandTimeout = 3000,
};
return new(connectionStringBuilder.ConnectionString);
}
private async Task ExecuteCommandWithTransactionAsync(Func<NpgsqlConnection, NpgsqlTransaction, Task> operation, NpgsqlTransaction transaction, string errorMessage)
{
try

View File

@@ -13,7 +13,7 @@
<PackageReference Include="Dapper" Version="2.1.35" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Npgsql" Version="8.0.2" />
<PackageReference Include="Npgsql" Version="8.0.3" />
<PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />

View File

@@ -0,0 +1,35 @@
-- Purpose: Change the year column to integer and add a search function that allows for searching by year.
ALTER TABLE imdb_metadata
ALTER COLUMN year TYPE integer USING (CASE WHEN year = '\N' THEN 0 ELSE year::integer END);
-- Remove the old search function
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, TEXT, INT);
-- Add the new search function that allows for searching by year with a plus/minus one year range
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10)
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
BEGIN
SET pg_trgm.similarity_threshold = 0.9;
RETURN QUERY
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
FROM imdb_metadata
WHERE (imdb_metadata.title % search_term)
AND (imdb_metadata.adult = FALSE)
AND (category_param IS NULL OR imdb_metadata.category = category_param)
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
ORDER BY score DESC
LIMIT limit_param;
END; $$
LANGUAGE plpgsql;
-- Drop the old indexes
DROP INDEX IF EXISTS idx_imdb_metadata_adult;
DROP INDEX IF EXISTS idx_imdb_metadata_category;
DROP INDEX IF EXISTS idx_imdb_metadata_year;
DROP INDEX IF EXISTS title_gist;
-- Add indexes for the new columns
CREATE INDEX idx_imdb_metadata_adult ON imdb_metadata(adult);
CREATE INDEX idx_imdb_metadata_category ON imdb_metadata(category);
CREATE INDEX idx_imdb_metadata_year ON imdb_metadata(year);
CREATE INDEX title_gin ON imdb_metadata USING gin(title gin_trgm_ops);

View File

@@ -0,0 +1,40 @@
-- Purpose: Add the jsonb column to the ingested_torrents table to store the response from RTN
ALTER TABLE ingested_torrents
ADD COLUMN IF NOT EXISTS rtn_response jsonb;
-- Purpose: Drop torrentId column from torrents table
ALTER TABLE torrents
DROP COLUMN IF EXISTS "torrentId";
-- Purpose: Drop Trackers column from torrents table
ALTER TABLE torrents
DROP COLUMN IF EXISTS "trackers";
-- Purpose: Create a foreign key relationsship if it does not already exist between torrents and the source table ingested_torrents, but do not cascade on delete.
ALTER TABLE torrents
ADD COLUMN IF NOT EXISTS "ingestedTorrentId" bigint;
DO $$
BEGIN
IF EXISTS (
SELECT 1
FROM information_schema.table_constraints
WHERE constraint_name = 'fk_torrents_info_hash'
)
THEN
ALTER TABLE torrents
DROP CONSTRAINT fk_torrents_info_hash;
END IF;
END $$;
ALTER TABLE torrents
ADD CONSTRAINT fk_torrents_info_hash
FOREIGN KEY ("ingestedTorrentId")
REFERENCES ingested_torrents("id")
ON DELETE NO ACTION;
UPDATE torrents
SET "ingestedTorrentId" = ingested_torrents."id"
FROM ingested_torrents
WHERE torrents."infoHash" = ingested_torrents."info_hash"
AND torrents."provider" = ingested_torrents."source";

View File

@@ -0,0 +1,55 @@
DROP FUNCTION IF EXISTS kc_maintenance_reconcile_dmm_imdb_ids();
CREATE OR REPLACE FUNCTION kc_maintenance_reconcile_dmm_imdb_ids()
RETURNS INTEGER AS $$
DECLARE
rec RECORD;
imdb_rec RECORD;
rows_affected INTEGER := 0;
BEGIN
RAISE NOTICE 'Starting Reconciliation of DMM IMDB Ids...';
FOR rec IN
SELECT
it."id" as "ingestion_id",
t."infoHash",
it."category" as "ingestion_category",
f."id" as "file_Id",
f."title" as "file_Title",
(rtn_response->>'raw_title')::text as "raw_title",
(rtn_response->>'parsed_title')::text as "parsed_title",
(rtn_response->>'year')::int as "year"
FROM torrents t
JOIN ingested_torrents it ON t."ingestedTorrentId" = it."id"
JOIN files f ON t."infoHash" = f."infoHash"
WHERE t."provider" = 'DMM'
LOOP
RAISE NOTICE 'Processing record with file_Id: %', rec."file_Id";
FOR imdb_rec IN
SELECT * FROM search_imdb_meta(
rec."parsed_title",
CASE
WHEN rec."ingestion_category" = 'tv' THEN 'tvSeries'
WHEN rec."ingestion_category" = 'movies' THEN 'movie'
END,
CASE
WHEN rec."year" = 0 THEN NULL
ELSE rec."year" END,
1)
LOOP
IF imdb_rec IS NOT NULL THEN
RAISE NOTICE 'Updating file_Id: % with imdbId: %, parsed title: %, imdb title: %', rec."file_Id", imdb_rec."imdb_id", rec."parsed_title", imdb_rec."title";
UPDATE "files"
SET "imdbId" = imdb_rec."imdb_id"
WHERE "id" = rec."file_Id";
rows_affected := rows_affected + 1;
ELSE
RAISE NOTICE 'No IMDB ID found for file_Id: %, parsed title: %, imdb title: %, setting imdbId to NULL', rec."file_Id", rec."parsed_title", imdb_rec."title";
UPDATE "files"
SET "imdbId" = NULL
WHERE "id" = rec."file_Id";
END IF;
END LOOP;
END LOOP;
RAISE NOTICE 'Finished reconciliation. Total rows affected: %', rows_affected;
RETURN rows_affected;
END;
$$ LANGUAGE plpgsql;

View File

@@ -0,0 +1,19 @@
-- Remove the old search function
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, INT, INT);
-- Add the new search function that allows for searching by year with a plus/minus one year range
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10, similarity_threshold REAL DEFAULT 0.95)
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
BEGIN
SET pg_trgm.similarity_threshold = similarity_threshold;
RETURN QUERY
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
FROM imdb_metadata
WHERE (imdb_metadata.title % search_term)
AND (imdb_metadata.adult = FALSE)
AND (category_param IS NULL OR imdb_metadata.category = category_param)
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
ORDER BY score DESC
LIMIT limit_param;
END; $$
LANGUAGE plpgsql;

View File

@@ -0,0 +1,19 @@
-- Remove the old search function
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, INT, INT);
-- Add the new search function that allows for searching by year with a plus/minus one year range
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10, similarity_threshold REAL DEFAULT 0.95)
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
BEGIN
EXECUTE format('SET pg_trgm.similarity_threshold = %L', similarity_threshold);
RETURN QUERY
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
FROM imdb_metadata
WHERE (imdb_metadata.title % search_term)
AND (imdb_metadata.adult = FALSE)
AND (category_param IS NULL OR imdb_metadata.category = category_param)
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
ORDER BY score DESC
LIMIT limit_param;
END; $$
LANGUAGE plpgsql;

View File

@@ -0,0 +1,43 @@
-- Drop Duplicate Files in Files Table
DELETE FROM public.files
WHERE id NOT IN (
SELECT MAX(id)
FROM public.files
GROUP BY "infoHash", "fileIndex"
);
-- Add Index to files table
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_constraint
WHERE conname = 'files_unique_infohash_fileindex'
) THEN
ALTER TABLE public.files
ADD CONSTRAINT files_unique_infohash_fileindex UNIQUE ("infoHash", "fileIndex");
END IF;
END $$;
-- Drop Duplicate subtitles in Subtitles Table
DELETE FROM public.subtitles
WHERE id NOT IN (
SELECT MAX(id)
FROM public.subtitles
GROUP BY "infoHash", "fileIndex"
);
-- Add Index to subtitles table
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_constraint
WHERE conname = 'subtitles_unique_infohash_fileindex'
) THEN
ALTER TABLE public.subtitles
ADD CONSTRAINT subtitles_unique_infohash_fileindex UNIQUE ("infoHash", "fileIndex");
END IF;
END $$;

View File

@@ -0,0 +1,2 @@
**/python/
.idea/

View File

@@ -6,6 +6,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\share
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{FF5CA857-51E8-4446-8840-2A1D24ED3952}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{1AE7F597-24C4-4575-B59F-67A625D95C1E}"
ProjectSection(SolutionItems) = preProject
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
eng\install-python-reqs.sh = eng\install-python-reqs.sh
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU

View File

@@ -0,0 +1,3 @@
remove-item -recurse -force ../src/python
mkdir -p ../src/python
pip install -r ../src/requirements.txt -t ../src/python/

View File

@@ -0,0 +1,5 @@
#!/bin/bash
rm -rf ../src/python
mkdir -p ../src/python
python3 -m pip install -r ../src/requirements.txt -t ../src/python/

View File

@@ -0,0 +1,2 @@
**/python/
.idea/

View File

@@ -4,31 +4,38 @@
{
"Name": "SyncEzTvJob",
"IntervalSeconds": 60,
"Enabled": true
"Enabled": true,
"Url": "https://eztvx.to/ezrss.xml",
"XmlNamespace": "http://xmlns.ezrss.it/0.1/"
},
{
"Name": "SyncNyaaJob",
"IntervalSeconds": 60,
"Enabled": true
"Enabled": true,
"Url": "https://nyaa.si/?page=rss&c=1_2&f=0",
"XmlNamespace": "https://nyaa.si/xmlns/nyaa"
},
{
"Name": "SyncTpbJob",
"IntervalSeconds": 60,
"Enabled": true
"Enabled": true,
"Url": "https://apibay.org/precompiled/data_top100_recent.json"
},
{
"Name": "SyncYtsJob",
"IntervalSeconds": 60,
"Enabled": true
"Enabled": true,
"Url": "https://yts.am/rss"
},
{
"Name": "SyncTgxJob",
"IntervalSeconds": 60,
"Enabled": true
"Enabled": true,
"Url": "https://tgx.rs/rss"
},
{
"Name": "SyncDmmJob",
"IntervalSeconds": 1800,
"IntervalSeconds": 10800,
"Enabled": true
},
{

View File

@@ -8,13 +8,27 @@ WORKDIR /src/producer/src
RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S producer && adduser -S -G producer producer
USER producer
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
ENTRYPOINT ["dotnet", "Producer.dll"]

View File

@@ -6,6 +6,12 @@ public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILog
protected virtual async Task Execute(string collectionName)
{
if (string.IsNullOrWhiteSpace(Url))
{
logger.LogWarning("No URL provided for {Source} crawl", Source);
return;
}
logger.LogInformation("Starting {Source} crawl", Source);
using var client = httpClientFactory.CreateClient("Scraper");

View File

@@ -4,6 +4,12 @@ public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogg
{
public override async Task Execute()
{
if (string.IsNullOrWhiteSpace(Url))
{
logger.LogWarning("No URL provided for {Source} crawl", Source);
return;
}
logger.LogInformation("Starting {Source} crawl", Source);
using var client = httpClientFactory.CreateClient(Literals.CrawlerClient);

View File

@@ -7,4 +7,8 @@ public class Scraper
public int IntervalSeconds { get; set; } = 60;
public bool Enabled { get; set; } = true;
public string? Url { get; set; }
public string? XmlNamespace { get; set; }
}

View File

@@ -0,0 +1,70 @@
namespace Producer.Features.Crawlers.Dmm;
public class DMMFileDownloader(HttpClient client, ILogger<DMMFileDownloader> logger) : IDMMFileDownloader
{
private const string Filename = "main.zip";
private readonly IReadOnlyCollection<string> _filesToIgnore = [
"index.html",
"404.html",
"dedupe.sh",
"CNAME",
];
public const string ClientName = "DmmFileDownloader";
public async Task<string> DownloadFileToTempPath(CancellationToken cancellationToken)
{
logger.LogInformation("Downloading DMM Hashlists");
var response = await client.GetAsync(Filename, cancellationToken);
var tempDirectory = Path.Combine(Path.GetTempPath(), "DMMHashlists");
EnsureDirectoryIsClean(tempDirectory);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
using var archive = new ZipArchive(stream);
logger.LogInformation("Extracting DMM Hashlists to {TempDirectory}", tempDirectory);
foreach (var entry in archive.Entries)
{
var entryPath = Path.Combine(tempDirectory, Path.GetFileName(entry.FullName));
if (!entry.FullName.EndsWith('/')) // It's a file
{
entry.ExtractToFile(entryPath, true);
}
}
foreach (var file in _filesToIgnore)
{
CleanRepoExtras(tempDirectory, file);
}
logger.LogInformation("Downloaded and extracted Repository to {TempDirectory}", tempDirectory);
return tempDirectory;
}
private static void CleanRepoExtras(string tempDirectory, string fileName)
{
var repoIndex = Path.Combine(tempDirectory, fileName);
if (File.Exists(repoIndex))
{
File.Delete(repoIndex);
}
}
private static void EnsureDirectoryIsClean(string tempDirectory)
{
if (Directory.Exists(tempDirectory))
{
Directory.Delete(tempDirectory, true);
}
Directory.CreateDirectory(tempDirectory);
}
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.Crawlers.Dmm;
public class DMMHttpClient
{
}

View File

@@ -1,64 +1,99 @@
namespace Producer.Features.Crawlers.Dmm;
public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory,
IDMMFileDownloader dmmFileDownloader,
ILogger<DebridMediaManagerCrawler> logger,
IDataStorage storage,
GithubConfiguration githubConfiguration,
IParseTorrentTitle parseTorrentTitle,
IRankTorrentName rankTorrentName,
IDistributedCache cache) : BaseCrawler(logger, storage)
{
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher();
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
protected override string Url => "";
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/git/trees/main?recursive=1";
protected override string Source => "DMM";
private const int ParallelismCount = 4;
public override async Task Execute()
{
var client = httpClientFactory.CreateClient("Scraper");
client.DefaultRequestHeaders.Authorization = new("Bearer", githubConfiguration.PAT);
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
var tempDirectory = await dmmFileDownloader.DownloadFileToTempPath(CancellationToken.None);
var jsonBody = await client.GetStringAsync(Url);
var files = Directory.GetFiles(tempDirectory, "*.html", SearchOption.AllDirectories);
var json = JsonDocument.Parse(jsonBody);
logger.LogInformation("Found {Files} files to parse", files.Length);
var entriesArray = json.RootElement.GetProperty("tree");
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
logger.LogInformation("Found {Entries} total DMM pages", entriesArray.GetArrayLength());
foreach (var entry in entriesArray.EnumerateArray())
await Parallel.ForEachAsync(files, options, async (file, token) =>
{
await ParsePage(entry, client);
}
var fileName = Path.GetFileName(file);
var torrentDictionary = await ExtractPageContents(file, fileName);
if (torrentDictionary == null)
{
return;
}
await ParseTitlesWithRtn(fileName, torrentDictionary);
var results = await ParseTorrents(torrentDictionary);
if (results.Count <= 0)
{
return;
}
await InsertTorrents(results);
await Storage.MarkPageAsIngested(fileName, token);
});
}
private async Task ParsePage(JsonElement entry, HttpClient client)
private async Task ParseTitlesWithRtn(string fileName, IDictionary<string, DmmContent> page)
{
var (pageIngested, name) = await IsAlreadyIngested(entry);
logger.LogInformation("Parsing titles for {Page}", fileName);
if (string.IsNullOrEmpty(name) || pageIngested)
var batchProcessables = page.Select(value => new RtnBatchProcessable(value.Key, value.Value.Filename)).ToList();
var parsedResponses = rankTorrentName.BatchParse(
batchProcessables.Select<RtnBatchProcessable, string>(bp => bp.Filename).ToList(), trashGarbage: false);
// Filter out unsuccessful responses and match RawTitle to requesting title
var successfulResponses = parsedResponses
.Where(response => response != null && response.Success)
.GroupBy(response => response.Response.RawTitle!)
.ToDictionary(group => group.Key, group => group.First());
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
await Parallel.ForEachAsync(batchProcessables.Select(t => t.InfoHash), options, (infoHash, _) =>
{
return;
}
if (page.TryGetValue(infoHash, out var dmmContent) &&
successfulResponses.TryGetValue(dmmContent.Filename, out var parsedResponse))
{
page[infoHash] = dmmContent with { ParseResponse = parsedResponse };
}
var pageSource = await client.GetStringAsync($"{DownloadBaseUrl}/{name}");
await ExtractPageContents(pageSource, name);
return ValueTask.CompletedTask;
});
}
private async Task ExtractPageContents(string pageSource, string name)
private async Task<ConcurrentDictionary<string, DmmContent>?> ExtractPageContents(string filePath, string filenameOnly)
{
var (pageIngested, name) = await IsAlreadyIngested(filenameOnly);
if (pageIngested)
{
return [];
}
var pageSource = await File.ReadAllTextAsync(filePath);
var match = HashCollectionMatcher().Match(pageSource);
if (!match.Success)
{
logger.LogWarning("Failed to match hash collection for {Name}", name);
await Storage.MarkPageAsIngested(name);
return;
await Storage.MarkPageAsIngested(filenameOnly);
return [];
}
var encodedJson = match.Groups.Values.ElementAtOrDefault(1);
@@ -66,34 +101,165 @@ public partial class DebridMediaManagerCrawler(
if (string.IsNullOrEmpty(encodedJson?.Value))
{
logger.LogWarning("Failed to extract encoded json for {Name}", name);
return;
return [];
}
await ProcessExtractedContentsAsTorrentCollection(encodedJson.Value, name);
}
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson.Value);
private async Task ProcessExtractedContentsAsTorrentCollection(string encodedJson, string name)
{
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson);
var json = JsonDocument.Parse(decodedJson);
await InsertTorrentsForPage(json);
var result = await Storage.MarkPageAsIngested(name);
if (!result.IsSuccess)
JsonElement arrayToProcess;
try
{
logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.Failure.ErrorMessage);
return;
var json = JsonDocument.Parse(decodedJson);
if (json.RootElement.ValueKind == JsonValueKind.Object &&
json.RootElement.TryGetProperty("torrents", out var torrentsProperty) &&
torrentsProperty.ValueKind == JsonValueKind.Array)
{
arrayToProcess = torrentsProperty;
}
else if (json.RootElement.ValueKind == JsonValueKind.Array)
{
arrayToProcess = json.RootElement;
}
else
{
logger.LogWarning("Unexpected JSON format in {Name}", name);
return [];
}
}
catch (Exception ex)
{
logger.LogError("Failed to parse JSON {decodedJson} for {Name}: {Exception}", decodedJson, name, ex);
return [];
}
logger.LogInformation("Successfully marked page as ingested");
var torrents = await arrayToProcess.EnumerateArray()
.ToAsyncEnumerable()
.Select(ParsePageContent)
.Where(t => t is not null)
.ToListAsync();
if (torrents.Count == 0)
{
logger.LogWarning("No torrents found in {Name}", name);
await Storage.MarkPageAsIngested(filenameOnly);
return [];
}
var torrentDictionary = torrents
.Where(x => x is not null)
.GroupBy(x => x.InfoHash)
.ToConcurrentDictionary(g => g.Key, g => new DmmContent(g.First().Filename, g.First().Bytes, null));
logger.LogInformation("Parsed {Torrents} torrents for {Name}", torrentDictionary.Count, name);
return torrentDictionary;
}
private async Task<IngestedTorrent?> ParseTorrent(JsonElement item)
private async Task<List<IngestedTorrent>> ParseTorrents(IDictionary<string, DmmContent> page)
{
var ingestedTorrents = new List<IngestedTorrent>();
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
await Parallel.ForEachAsync(page, options, async (kvp, ct) =>
{
var (infoHash, dmmContent) = kvp;
var parsedTorrent = dmmContent.ParseResponse;
if (parsedTorrent is not { Success: true })
{
return;
}
var torrentType = parsedTorrent.Response.IsMovie ? "movie" : "tvSeries";
var cacheKey = GetCacheKey(torrentType, parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.Year);
var (cached, cachedResult) = await CheckIfInCacheAndReturn(cacheKey);
if (cached)
{
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
lock (ingestedTorrents)
{
ingestedTorrents.Add(MapToTorrent(cachedResult, dmmContent.Bytes, infoHash, parsedTorrent));
}
return;
}
int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null;
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, torrentType, year, ct);
if (imdbEntry is null)
{
return;
}
await AddToCache(cacheKey, imdbEntry);
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
lock (ingestedTorrents)
{
ingestedTorrents.Add(MapToTorrent(imdbEntry, dmmContent.Bytes, infoHash, parsedTorrent));
}
});
return ingestedTorrents;
}
private IngestedTorrent MapToTorrent(ImdbEntry result, long size, string infoHash, ParseTorrentTitleResponse parsedTorrent) =>
new()
{
Source = Source,
Name = result.Title,
Imdb = result.ImdbId,
Size = size.ToString(),
InfoHash = infoHash,
Seeders = 0,
Leechers = 0,
Category = AssignCategory(result),
RtnResponse = parsedTorrent.Response.ToJson(),
};
private Task AddToCache(string cacheKey, ImdbEntry best)
{
var cacheOptions = new DistributedCacheEntryOptions
{
AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1),
};
return cache.SetStringAsync(cacheKey, JsonSerializer.Serialize(best), cacheOptions);
}
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string cacheKey)
{
var cachedImdbId = await cache.GetStringAsync(cacheKey);
if (!string.IsNullOrEmpty(cachedImdbId))
{
return (true, JsonSerializer.Deserialize<ImdbEntry>(cachedImdbId));
}
return (false, null);
}
private async Task<(bool Success, string? Name)> IsAlreadyIngested(string filename)
{
var pageIngested = await Storage.PageIngested(filename);
return (pageIngested, filename);
}
private static string AssignCategory(ImdbEntry entry) =>
entry.Category.ToLower() switch
{
var category when string.Equals(category, "movie", StringComparison.OrdinalIgnoreCase) => "movies",
var category when string.Equals(category, "tvSeries", StringComparison.OrdinalIgnoreCase) => "tv",
_ => "unknown",
};
private static string GetCacheKey(string category, string title, int year) => $"{category.ToLowerInvariant()}:{year}:{title.ToLowerInvariant()}";
private static ExtractedDMMContent? ParsePageContent(JsonElement item)
{
if (!item.TryGetProperty("filename", out var filenameElement) ||
!item.TryGetProperty("bytes", out var bytesElement) ||
!item.TryGetProperty("hash", out var hashElement))
@@ -101,143 +267,10 @@ public partial class DebridMediaManagerCrawler(
return null;
}
var torrentTitle = filenameElement.GetString();
if (torrentTitle.IsNullOrEmpty())
{
return null;
}
var parsedTorrent = parseTorrentTitle.Parse(torrentTitle.CleanTorrentTitleForImdb());
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.Title);
if (cached)
{
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Title);
return new()
{
Source = Source,
Name = cachedResult.Title,
Imdb = cachedResult.ImdbId,
Size = bytesElement.GetInt64().ToString(),
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
Category = parsedTorrent.TorrentType switch
{
TorrentType.Movie => "movies",
TorrentType.Tv => "tv",
_ => "unknown",
},
};
}
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Title, parsedTorrent.TorrentType, parsedTorrent.Year);
if (imdbEntry.Count == 0)
{
return null;
}
var scoredTitles = await ScoreTitles(parsedTorrent, imdbEntry);
if (!scoredTitles.Success)
{
return null;
}
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", scoredTitles.BestMatch.Value.ImdbId, parsedTorrent.Title, scoredTitles.BestMatch.Value.Title, scoredTitles.BestMatch.Score);
var torrent = new IngestedTorrent
{
Source = Source,
Name = scoredTitles.BestMatch.Value.Title,
Imdb = scoredTitles.BestMatch.Value.ImdbId,
Size = bytesElement.GetInt64().ToString(),
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
Category = parsedTorrent.TorrentType switch
{
TorrentType.Movie => "movies",
TorrentType.Tv => "tv",
_ => "unknown",
},
};
return torrent;
return new(filenameElement.GetString(), bytesElement.GetInt64(), hashElement.GetString());
}
private async Task<(bool Success, ExtractedResult<ImdbEntry>? BestMatch)> ScoreTitles(TorrentMetadata parsedTorrent, List<ImdbEntry> imdbEntries)
{
var lowerCaseTitle = parsedTorrent.Title.ToLowerInvariant();
// Scoring directly operates on the List<ImdbEntry>, no need for lookup table.
var scoredResults = Process.ExtractAll(new(){Title = lowerCaseTitle}, imdbEntries, x => x.Title?.ToLowerInvariant(), scorer: new DefaultRatioScorer(), cutoff: 90);
var best = scoredResults.MaxBy(x => x.Score);
if (best is null)
{
return (false, null);
}
await AddToCache(lowerCaseTitle, best);
return (true, best);
}
private Task AddToCache(string lowerCaseTitle, ExtractedResult<ImdbEntry> best)
{
var cacheOptions = new DistributedCacheEntryOptions
{
AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(15),
};
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best.Value), cacheOptions);
}
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string title)
{
var cachedImdbId = await cache.GetStringAsync(title.ToLowerInvariant());
if (!string.IsNullOrEmpty(cachedImdbId))
{
return (true, JsonSerializer.Deserialize<ImdbEntry>(cachedImdbId));
}
return (false, null);
}
private async Task InsertTorrentsForPage(JsonDocument json)
{
var torrents = await json.RootElement.EnumerateArray()
.ToAsyncEnumerable()
.SelectAwait(async x => await ParseTorrent(x))
.Where(t => t is not null)
.ToListAsync();
if (torrents.Count == 0)
{
logger.LogWarning("No torrents found in {Source} response", Source);
return;
}
await InsertTorrents(torrents!);
}
private async Task<(bool Success, string? Name)> IsAlreadyIngested(JsonElement entry)
{
var name = entry.GetProperty("path").GetString();
if (string.IsNullOrEmpty(name))
{
return (false, null);
}
var pageIngested = await Storage.PageIngested(name);
return (pageIngested, name);
}
private record DmmContent(string Filename, long Bytes, ParseTorrentTitleResponse? ParseResponse);
private record ExtractedDMMContent(string Filename, long Bytes, string InfoHash);
private record RtnBatchProcessable(string InfoHash, string Filename);
}

View File

@@ -1,9 +0,0 @@
namespace Producer.Features.Crawlers.Dmm;
public class GithubConfiguration
{
private const string Prefix = "GITHUB";
private const string PatVariable = "PAT";
public string? PAT { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(PatVariable);
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.Crawlers.Dmm;
public interface IDMMFileDownloader
{
Task<string> DownloadFileToTempPath(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,17 @@
namespace Producer.Features.Crawlers.Dmm;
public static class ServiceCollectionExtensions
{
public static IServiceCollection AddDmmSupport(this IServiceCollection services)
{
services.AddHttpClient<IDMMFileDownloader, DMMFileDownloader>(DMMFileDownloader.ClientName, client =>
{
client.BaseAddress = new("https://github.com/debridmediamanager/hashlists/zipball/main/");
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip");
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
client.Timeout = TimeSpan.FromMinutes(10); // 10 minute timeout, #217
});
return services;
}
}

View File

@@ -1,11 +1,10 @@
namespace Producer.Features.Crawlers.EzTv;
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
{
protected override string Url => "https://eztv1.xyz/ezrss.xml";
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "EZTV";
private static readonly XNamespace XmlNamespace = "http://xmlns.ezrss.it/0.1/";
private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.XmlNamespace ?? string.Empty;
protected override IReadOnlyDictionary<string, string> Mappings =>
new Dictionary<string, string>

View File

@@ -1,11 +1,10 @@
namespace Producer.Features.Crawlers.Nyaa;
public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
{
protected override string Url => "https://nyaa.si/?page=rss&c=1_2&f=0";
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "Nyaa";
private static readonly XNamespace XmlNamespace = "https://nyaa.si/xmlns/nyaa";
private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.XmlNamespace ?? string.Empty;
protected override IReadOnlyDictionary<string, string> Mappings =>
new Dictionary<string, string>

View File

@@ -1,13 +1,13 @@
namespace Producer.Features.Crawlers.Tgx;
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
{
[GeneratedRegex(@"Size:\s+(.+?)\s+Added")]
private static partial Regex SizeStringExtractor();
[GeneratedRegex(@"(?i)\b(\d+(\.\d+)?)\s*([KMGT]?B)\b", RegexOptions.None, "en-GB")]
private static partial Regex SizeStringParser();
protected override string Url => "https://tgx.rs/rss";
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTgxJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "TorrentGalaxy";
protected override IReadOnlyDictionary<string, string> Mappings

View File

@@ -1,8 +1,8 @@
namespace Producer.Features.Crawlers.Tpb;
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage)
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseJsonCrawler(httpClientFactory, logger, storage)
{
protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json";
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTpbJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "TPB";

View File

@@ -1,9 +1,8 @@
namespace Producer.Features.Crawlers.Yts;
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
{
protected override string Url => "https://yts.am/rss";
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncYtsJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "YTS";
protected override IReadOnlyDictionary<string, string> Mappings
=> new Dictionary<string, string>

View File

@@ -0,0 +1,24 @@
namespace Producer.Features.DataProcessing
{
public class LengthAwareRatioScorer : IRatioScorer
{
private readonly IRatioScorer _defaultScorer = new DefaultRatioScorer();
public int Score(string input1, string input2)
{
var score = _defaultScorer.Score(input1, input2);
var lengthRatio = (double)Math.Min(input1.Length, input2.Length) / Math.Max(input1.Length, input2.Length);
var result = (int)(score * lengthRatio);
return result > 100 ? 100 : result;
}
public int Score(string input1, string input2, PreprocessMode preprocessMode)
{
var score = _defaultScorer.Score(input1, input2, preprocessMode);
var lengthRatio = (double)Math.Min(input1.Length, input2.Length) / Math.Max(input1.Length, input2.Length);
var result = (int)(score * lengthRatio);
return result > 100 ? 100 : result;
}
}
}

View File

@@ -9,7 +9,8 @@ internal static class ServiceCollectionExtensions
services.AddTransient<IDataStorage, DapperDataStorage>();
services.AddTransient<IMessagePublisher, TorrentPublisher>();
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
services.RegisterPythonEngine();
services.AddSingleton<IRankTorrentName, RankTorrentName>();
services.AddStackExchangeRedisCache(options =>
{
options.Configuration = redisConfiguration.ConnectionString;

View File

@@ -5,7 +5,6 @@ internal static class ServiceCollectionExtensions
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
{
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
var jobTypes = Assembly.GetAssembly(typeof(BaseJob))
@@ -19,18 +18,13 @@ internal static class ServiceCollectionExtensions
services.AddTransient(type);
}
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
services.AddTransient<SyncDmmJob>();
}
var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance);
services.AddQuartz(
quartz =>
{
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration);
RegisterDmmJob(quartz, scrapeConfiguration);
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
RegisterPublisher(quartz, rabbitConfiguration);
});
@@ -64,13 +58,8 @@ internal static class ServiceCollectionExtensions
}
}
private static void RegisterDmmJob(GithubConfiguration githubConfiguration, IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration)
{
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
}
}
private static void RegisterDmmJob(IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration) =>
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
private static void RegisterTorrentioJob(
IServiceCollection services,

View File

@@ -1,12 +1,14 @@
// Global using directives
global using System.Collections.Concurrent;
global using System.IO.Compression;
global using System.Reflection;
global using System.Text;
global using System.Text.Json;
global using System.Text.RegularExpressions;
global using System.Xml.Linq;
global using FuzzySharp;
global using FuzzySharp.Extractor;
global using FuzzySharp.PreProcess;
global using FuzzySharp.SimilarityRatio.Scorer;
global using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;
global using LZStringCSharp;
global using MassTransit;
@@ -23,11 +25,10 @@ global using Producer.Features.Crawlers.Torrentio;
global using Producer.Features.CrawlerSupport;
global using Producer.Features.DataProcessing;
global using Producer.Features.JobSupport;
global using PromKnight.ParseTorrentTitle;
global using Serilog;
global using SharedContracts.Configuration;
global using SharedContracts.Dapper;
global using SharedContracts.Extensions;
global using SharedContracts.Models;
global using SharedContracts.Requests;
global using StackExchange.Redis;
global using SharedContracts.Python;
global using SharedContracts.Python.RTN;
global using SharedContracts.Requests;

View File

@@ -19,6 +19,7 @@
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Polly" Version="8.3.0" />
<PackageReference Include="pythonnet" Version="3.0.3" />
<PackageReference Include="Quartz.Extensions.DependencyInjection" Version="3.8.0" />
<PackageReference Include="Quartz.Extensions.Hosting" Version="3.8.0" />
<PackageReference Include="Serilog" Version="3.1.1" />
@@ -32,11 +33,14 @@
<None Include="Configuration\*.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<Content Remove="Data\**" />
<None Include="Data\**">
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<Content Remove="python\**" />
<None Include="python\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

View File

@@ -12,7 +12,8 @@ builder.Services
.RegisterMassTransit()
.AddDataStorage()
.AddCrawlers()
.AddDmmSupport()
.AddQuartz(builder.Configuration);
var app = builder.Build();
app.Run();
app.Run();

View File

@@ -0,0 +1 @@
rank-torrent-name==0.2.13

View File

@@ -9,12 +9,23 @@ RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S qbit && adduser -S -G qbit qbit
USER qbit
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1
ENTRYPOINT ["dotnet", "QbitCollector.dll"]
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
ENTRYPOINT ["dotnet", "QBitCollector.dll"]

View File

@@ -13,11 +13,13 @@ public static class ServiceCollectionExtensions
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
{
services.AddQBitTorrentClient();
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
services.RegisterPythonEngine();
services.AddSingleton<IRankTorrentName, RankTorrentName>();
services.AddSingleton<QbitRequestProcessor>();
services.AddHttpClient();
services.AddSingleton<ITrackersService, TrackersService>();
services.AddHostedService<TrackersBackgroundService>();
services.AddHostedService<HousekeepingBackgroundService>();
return services;
}
@@ -42,6 +44,7 @@ public static class ServiceCollectionExtensions
{
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
var redisConfiguration = services.LoadConfigurationFromEnv<RedisConfiguration>();
var qbitConfiguration = services.LoadConfigurationFromEnv<QbitConfiguration>();
services.AddStackExchangeRedisCache(
option =>
@@ -78,8 +81,8 @@ public static class ServiceCollectionExtensions
e.ConfigureConsumer<WriteQbitMetadataConsumer>(context);
e.ConfigureConsumer<PerformQbitMetadataRequestConsumer>(context);
e.ConfigureSaga<QbitMetadataSagaState>(context);
e.ConcurrentMessageLimit = 5;
e.PrefetchCount = 5;
e.ConcurrentMessageLimit = qbitConfiguration.Concurrency;
e.PrefetchCount = qbitConfiguration.Concurrency;
});
});
});
@@ -96,16 +99,19 @@ public static class ServiceCollectionExtensions
cfg.UseTimeout(
timeout =>
{
timeout.Timeout = TimeSpan.FromMinutes(1);
timeout.Timeout = TimeSpan.FromMinutes(3);
});
})
.RedisRepository(redisConfiguration.ConnectionString);
.RedisRepository(redisConfiguration.ConnectionString, options =>
{
options.KeyPrefix = "qbit-collector:";
});
private static void AddQBitTorrentClient(this IServiceCollection services)
{
var qbitConfiguration = services.LoadConfigurationFromEnv<QbitConfiguration>();
var client = new QBittorrentClient(new(qbitConfiguration.Host));
client.Timeout = TimeSpan.FromSeconds(10);
client.Timeout = TimeSpan.FromSeconds(20);
services.AddSingleton<IQBittorrentClient>(client);
}

View File

@@ -0,0 +1,52 @@
namespace QBitCollector.Features.Qbit;
public class HousekeepingBackgroundService(IQBittorrentClient client, ILogger<HousekeepingBackgroundService> logger) : BackgroundService
{
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
logger.LogInformation("Service is Running.");
await DoWork();
using PeriodicTimer timer = new(TimeSpan.FromMinutes(2));
try
{
while (await timer.WaitForNextTickAsync(stoppingToken))
{
await DoWork();
}
}
catch (OperationCanceledException)
{
logger.LogInformation("Service stopping.");
}
}
private async Task DoWork()
{
try
{
logger.LogInformation("Cleaning Stale Entries in Qbit...");
var torrents = await client.GetTorrentListAsync();
foreach (var torrentInfo in torrents)
{
if (!(torrentInfo.AddedOn < DateTimeOffset.UtcNow.AddMinutes(-1)))
{
continue;
}
logger.LogInformation("Torrent [{InfoHash}] Identified as stale because was added at {AddedOn}", torrentInfo.Hash, torrentInfo.AddedOn);
await client.DeleteAsync(new[] {torrentInfo.Hash}, deleteDownloadedData: true);
logger.LogInformation("Cleaned up stale torrent: [{InfoHash}]", torrentInfo.Hash);
}
}
catch (Exception e)
{
logger.LogError(e, "Error cleaning up stale torrents this interval.");
}
}
}

View File

@@ -1,6 +1,6 @@
namespace QBitCollector.Features.Qbit;
public class QbitRequestProcessor(IQBittorrentClient client, ITrackersService trackersService, ILogger<QbitRequestProcessor> logger)
public class QbitRequestProcessor(IQBittorrentClient client, ITrackersService trackersService, ILogger<QbitRequestProcessor> logger, QbitConfiguration configuration)
{
public async Task<IReadOnlyList<TorrentContent>?> ProcessAsync(string infoHash, CancellationToken cancellationToken = default)
{
@@ -14,7 +14,7 @@ public class QbitRequestProcessor(IQBittorrentClient client, ITrackersService tr
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
timeoutCts.CancelAfter(TimeSpan.FromSeconds(30));
timeoutCts.CancelAfter(TimeSpan.FromSeconds(60));
try
{
@@ -30,7 +30,7 @@ public class QbitRequestProcessor(IQBittorrentClient client, ITrackersService tr
break;
}
await Task.Delay(TimeSpan.FromSeconds(1), timeoutCts.Token);
await Task.Delay(TimeSpan.FromMilliseconds(200), timeoutCts.Token);
}
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)

View File

@@ -3,7 +3,12 @@ namespace QBitCollector.Features.Qbit;
public class QbitConfiguration
{
private const string Prefix = "QBIT";
private const string ConnectionStringVariable = "HOST";
private const string HOST_VARIABLE = "HOST";
private const string TRACKERS_URL_VARIABLE = "TRACKERS_URL";
private const string CONCURRENCY_VARIABLE = "CONCURRENCY";
public string? Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(ConnectionStringVariable);
public string? Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HOST_VARIABLE);
public string? TrackersUrl { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(TRACKERS_URL_VARIABLE);
public int Concurrency { get; init; } = Prefix.GetEnvironmentVariableAsInt(CONCURRENCY_VARIABLE, 8);
}

View File

@@ -1,8 +1,7 @@
namespace QBitCollector.Features.Trackers;
public class TrackersService(IDistributedCache cache, HttpClient client, IMemoryCache memoryCache) : ITrackersService
public class TrackersService(IDistributedCache cache, HttpClient client, IMemoryCache memoryCache, QbitConfiguration configuration) : ITrackersService
{
private const string TrackersListUrl = "https://ngosang.github.io/trackerslist/trackers_all.txt";
private const string CacheKey = "trackers";
public async Task<List<string>> GetTrackers()
@@ -42,7 +41,7 @@ public class TrackersService(IDistributedCache cache, HttpClient client, IMemory
private async Task<List<string>> GetTrackersAsync()
{
var response = await client.GetStringAsync(TrackersListUrl);
var response = await client.GetStringAsync(configuration.TrackersUrl);
var lines = response.Split(["\r\n", "\r", "\n"], StringSplitOptions.None);

View File

@@ -3,10 +3,11 @@ namespace QBitCollector.Features.Worker;
public static class QbitMetaToTorrentMeta
{
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
IParseTorrentTitle torrentTitle,
IRankTorrentName rankTorrentName,
Torrent torrent,
string ImdbId,
IReadOnlyList<TorrentContent> Metadata)
IReadOnlyList<TorrentContent> Metadata,
ILogger<WriteQbitMetadataConsumer> logger)
{
try
{
@@ -24,23 +25,31 @@ public static class QbitMetaToTorrentMeta
Size = metadataEntry.Size,
};
var parsedTitle = torrentTitle.Parse(file.Title);
var parsedTitle = rankTorrentName.Parse(file.Title, false);
if (!parsedTitle.Success)
{
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
continue;
}
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault();
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault();
file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
files.Add(file);
}
return files;
}
catch (Exception)
catch (Exception ex)
{
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
return [];
}
}
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, IReadOnlyList<TorrentContent> Metadata)
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, IReadOnlyList<TorrentContent> Metadata,
ILogger<WriteQbitMetadataConsumer> logger)
{
try
{
@@ -70,8 +79,9 @@ public static class QbitMetaToTorrentMeta
return files;
}
catch (Exception)
catch (Exception ex)
{
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
return [];
}
}

View File

@@ -53,6 +53,12 @@ public class QbitMetadataSagaStateMachine : MassTransitStateMachine<QbitMetadata
.Then(
context =>
{
if (!context.Message.WithFiles)
{
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
return;
}
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
})
.TransitionTo(Completed)

View File

@@ -1,22 +1,24 @@
namespace QBitCollector.Features.Worker;
[EntityName("perform-metadata-request")]
[EntityName("perform-metadata-request-qbit-collector")]
public record PerformQbitMetadataRequest(Guid CorrelationId, string InfoHash) : CorrelatedBy<Guid>;
[EntityName("torrent-metadata-response")]
[EntityName("torrent-metadata-response-qbit-collector")]
public record GotQbitMetadata(QBitMetadataResponse Metadata) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}
[EntityName("write-metadata")]
[EntityName("write-metadata-qbit-collector")]
public record WriteQbitMetadata(Torrent Torrent, QBitMetadataResponse Metadata, string ImdbId) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}
[EntityName("metadata-written")]
public record QbitMetadataWritten(QBitMetadataResponse Metadata) : CorrelatedBy<Guid>
[EntityName("metadata-written-qbit-collector")]
public record QbitMetadataWritten(QBitMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
public QBitMetadataResponse Metadata { get; init; } = Metadata;
}

View File

@@ -1,25 +1,36 @@
namespace QBitCollector.Features.Worker;
public class WriteQbitMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteQbitMetadata>
public class WriteQbitMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteQbitMetadataConsumer> logger) : IConsumer<WriteQbitMetadata>
{
public async Task Consume(ConsumeContext<WriteQbitMetadata> context)
{
var request = context.Message;
var torrentFiles = QbitMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata);
if (torrentFiles.Any())
if (request.Metadata.Metadata.Count == 0)
{
await dataStorage.InsertFiles(torrentFiles);
var subtitles = await QbitMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new QbitMetadataWritten(request.Metadata, false));
return;
}
await context.Publish(new QbitMetadataWritten(request.Metadata));
var torrentFiles = QbitMetaToTorrentMeta.MapMetadataToFilesCollection(
rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
if (!torrentFiles.Any())
{
await context.Publish(new QbitMetadataWritten(request.Metadata, false));
return;
}
await dataStorage.InsertFiles(torrentFiles);
var subtitles = await QbitMetaToTorrentMeta.MapMetadataToSubtitlesCollection(
dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new QbitMetadataWritten(request.Metadata, true));
}
}

View File

@@ -1,17 +1,11 @@
// Global using directives
global using System.Text.Json;
global using System.Text.Json.Serialization;
global using System.Threading.Channels;
global using MassTransit;
global using MassTransit.Mediator;
global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.Caching.Distributed;
global using Microsoft.Extensions.Caching.Memory;
global using Microsoft.Extensions.DependencyInjection;
global using Polly;
global using Polly.Extensions.Http;
global using PromKnight.ParseTorrentTitle;
global using QBitCollector.Extensions;
global using QBitCollector.Features.Qbit;
global using QBitCollector.Features.Trackers;
@@ -21,4 +15,6 @@ global using SharedContracts.Configuration;
global using SharedContracts.Dapper;
global using SharedContracts.Extensions;
global using SharedContracts.Models;
global using SharedContracts.Python;
global using SharedContracts.Python.RTN;
global using SharedContracts.Requests;

View File

@@ -18,7 +18,6 @@
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="QBittorrent.Client" Version="1.9.23349.1" />
<PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
@@ -31,10 +30,30 @@
<None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<Content Remove="eng\**" />
<None Remove="eng\**" />
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\shared\SharedContracts.csproj" />
</ItemGroup>
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<Content Remove="python\**" />
<None Include="python\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<Compile Remove="eng\**" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="eng\**" />
</ItemGroup>
</Project>

View File

@@ -6,6 +6,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "QBitCollector", "QBitCollector.csproj", "{1EF124BE-6EBE-4D9E-846C-FFF814999F3B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{2F2EA33A-1303-405D-939B-E9394D262BC9}"
ProjectSection(SolutionItems) = preProject
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
eng\install-python-reqs.sh = eng\install-python-reqs.sh
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU

View File

@@ -0,0 +1,3 @@
Remove-Item -Recurse -Force ../python
mkdir -p ../python
python -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1,5 @@
#!/bin/bash
rm -rf ../python
mkdir -p ../python
python3 -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1 @@
rank-torrent-name==0.2.13

View File

@@ -8,12 +8,14 @@ public class PostgresConfiguration
private const string PasswordVariable = "PASSWORD";
private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT";
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
}

View File

@@ -3,7 +3,12 @@ namespace SharedContracts.Configuration;
public class RedisConfiguration
{
private const string Prefix = "REDIS";
private const string ConnectionStringVariable = "CONNECTION_STRING";
public string? ConnectionString { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(ConnectionStringVariable) + ",abortConnect=false,allowAdmin=true";
private const string HostVariable = "HOST";
private const string PortVariable = "PORT";
private const string ExtraVariable = "EXTRA";
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 6379);
private string EXTRA { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(ExtraVariable, "abortConnect=false,allowAdmin=true");
public string ConnectionString => $"{Host}:{PORT},{EXTRA}";
}

View File

@@ -9,9 +9,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
const string query =
"""
INSERT INTO ingested_torrents
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt")
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt", "rtn_response")
VALUES
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt)
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt, @RtnResponse::jsonb)
ON CONFLICT (source, info_hash) DO NOTHING
""";
@@ -110,21 +110,21 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
public async Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE CAST(NULLIF(Year, '\N') AS INTEGER) <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE Year <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
var result = await connection.QueryAsync<ImdbEntry>(query, new { Year = year, LastProcessedImdbId = stateLastProcessedImdbId, BatchSize = batchSize });
return result.ToList();
}, "Error getting imdb metadata.", cancellationToken);
public async Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, TorrentType torrentType, string? year, CancellationToken cancellationToken = default) =>
public async Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, int? year, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType == TorrentType.Movie ? "movie" : "tvSeries")}'";
query += year is not null ? $", '{year}'" : ", NULL";
query += ", 15)";
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\", \"score\" as Score, \"category\" as Category from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{torrentType}'";
query += year is not null ? $", {year}" : ", NULL";
query += ", 1)";
var result = await connection.QueryAsync<ImdbEntry>(query);
return result.ToList();
var results = result.ToList();
return results.FirstOrDefault();
}, "Error finding imdb metadata.", cancellationToken);
public Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default) =>
@@ -134,9 +134,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
const string query =
"""
INSERT INTO "torrents"
("infoHash", "provider", "torrentId", "title", "size", "type", "uploadDate", "seeders", "trackers", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
("infoHash", "ingestedTorrentId", "provider", "title", "size", "type", "uploadDate", "seeders", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
VALUES
(@InfoHash, @Provider, @TorrentId, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, NULL, false, false, NOW(), NOW())
(@InfoHash, @IngestedTorrentId, @Provider, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, false, false, NOW(), NOW())
ON CONFLICT ("infoHash") DO NOTHING
""";
@@ -152,7 +152,8 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
INSERT INTO files
("infoHash", "fileIndex", title, "size", "imdbId", "imdbSeason", "imdbEpisode", "kitsuId", "kitsuEpisode", "createdAt", "updatedAt")
VALUES
(@InfoHash, @FileIndex, @Title, @Size, @ImdbId, @ImdbSeason, @ImdbEpisode, @KitsuId, @KitsuEpisode, Now(), Now());
(@InfoHash, @FileIndex, @Title, @Size, @ImdbId, @ImdbSeason, @ImdbEpisode, @KitsuId, @KitsuEpisode, Now(), Now())
ON CONFLICT ("infoHash", "fileIndex") DO NOTHING;
""";
await connection.ExecuteAsync(query, files);
@@ -168,11 +169,7 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
("infoHash", "fileIndex", "fileId", "title")
VALUES
(@InfoHash, @FileIndex, @FileId, @Title)
ON CONFLICT
("infoHash", "fileIndex")
DO UPDATE SET
"fileId" = COALESCE(subtitles."fileId", EXCLUDED."fileId"),
"title" = COALESCE(subtitles."title", EXCLUDED."title");
ON CONFLICT ("infoHash", "fileIndex") DO NOTHING;
""";
await connection.ExecuteAsync(query, subtitles);

View File

@@ -9,7 +9,7 @@ public interface IDataStorage
Task<DapperResult<PageIngestedResult, PageIngestedResult>> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default);
Task<DapperResult<int, int>> GetRowCountImdbMetadata(CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, TorrentType parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default);
Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, int? parsedTorrentYear, CancellationToken cancellationToken = default);
Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default);
Task InsertFiles(IEnumerable<TorrentFile> files, CancellationToken cancellationToken = default);
Task InsertSubtitles(IEnumerable<SubtitleFile> subtitles, CancellationToken cancellationToken = default);

View File

@@ -1,4 +1,3 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
namespace SharedContracts.Extensions;

View File

@@ -0,0 +1,19 @@
namespace SharedContracts.Extensions;
public static class DictionaryExtensions
{
public static ConcurrentDictionary<TKey, TValue> ToConcurrentDictionary<TSource, TKey, TValue>(
this IEnumerable<TSource> source,
Func<TSource, TKey> keySelector,
Func<TSource, TValue> valueSelector) where TKey : notnull
{
var concurrentDictionary = new ConcurrentDictionary<TKey, TValue>();
foreach (var element in source)
{
concurrentDictionary.TryAdd(keySelector(element), valueSelector(element));
}
return concurrentDictionary;
}
}

Some files were not shown because too many files have changed in this diff Show More