Compare commits
52 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
594320ed63 | ||
|
|
a7d5944d25 | ||
|
|
c053a5f8da | ||
|
|
5611d3776f | ||
|
|
833ac11a96 | ||
|
|
16d8707c48 | ||
|
|
6dfbaa4739 | ||
|
|
03b5617312 | ||
|
|
19cb42af77 | ||
|
|
9344531b34 | ||
|
|
723aa6b6a0 | ||
|
|
e17b476801 | ||
|
|
2a414d8bc0 | ||
|
|
9b5f454e6e | ||
|
|
ad9549c695 | ||
|
|
1e85cb00ff | ||
|
|
da640a4071 | ||
|
|
e6a63fd72e | ||
|
|
02101ac50a | ||
|
|
3c8ffd5082 | ||
|
|
79e0a0f102 | ||
|
|
6181207513 | ||
|
|
684dbba2f0 | ||
|
|
c75ecd2707 | ||
|
|
c493ef3376 | ||
|
|
655a39e35c | ||
|
|
cfeee62f6b | ||
|
|
c6d4c06d70 | ||
|
|
08639a3254 | ||
|
|
d430850749 | ||
|
|
82c0ea459b | ||
|
|
1e83b4c5d8 | ||
|
|
66609c2a46 | ||
|
|
2d78dc2735 | ||
|
|
527d6cdf15 | ||
|
|
bb260d78d6 | ||
|
|
baec0450bf | ||
|
|
4308a0ee71 | ||
|
|
cc15a69517 | ||
|
|
a6d3a4a066 | ||
|
|
9430704205 | ||
|
|
6cc857bdc3 | ||
|
|
cc2adbfca5 | ||
|
|
9f928f9b66 | ||
|
|
a50b5071b3 | ||
|
|
72db18f0ad | ||
|
|
d70cef1b86 | ||
|
|
e1e718cd22 | ||
|
|
c3e58e4234 | ||
|
|
d584102d60 | ||
|
|
fe4bb59502 | ||
|
|
472b3342d5 |
4
.github/ISSUE_TEMPLATE/bug_report.md
vendored
4
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -12,6 +12,9 @@ A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
@@ -23,6 +26,7 @@ If the logs are short, make sure to triple backtick them, or use https://pastebi
|
||||
**Hardware:**
|
||||
- OS and distro: [e.g. Raspberry Pi OS, Ubuntu, Rocky]
|
||||
- Server: [e.g. VM, Baremetal, Pi]
|
||||
- Knightcrawler Version: [2.0.xx]
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
|
||||
15
.github/workflows/build_torrent_ingester.yaml
vendored
Normal file
15
.github/workflows/build_torrent_ingester.yaml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
name: Build and Push Torrent Ingestor Service
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'src/torrent-ingestor/**'
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/torrent-ingestor
|
||||
DOCKERFILE: ./src/torrent-ingestor/Dockerfile
|
||||
IMAGE_NAME: knightcrawler-torrent-ingestor
|
||||
39
.github/workflows/git_cliff.yml
vendored
Normal file
39
.github/workflows/git_cliff.yml
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
changelog:
|
||||
name: Generate changelog
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Generate a changelog
|
||||
uses: orhun/git-cliff-action@v3
|
||||
with:
|
||||
config: cliff.toml
|
||||
args: --verbose
|
||||
env:
|
||||
OUTPUT: CHANGELOG.md
|
||||
GITHUB_REPO: ${{ github.repository }}
|
||||
|
||||
- name: Commit
|
||||
run: |
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email 'github-actions[bot]@users.noreply.github.com'
|
||||
set +e
|
||||
git checkout -b feat/changelog_$(date +"%d_%m")
|
||||
git add CHANGELOG.md
|
||||
git commit -m "[skip ci] Update changelog"
|
||||
git push https://${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git feat/changelog_$(date +"%d_%m")
|
||||
|
||||
- name: create pull request
|
||||
run: gh pr create -B main -H feat/changelog_$(date +"%d_%m") --title '[skip ci] Update changelog' --body 'Changelog update by git-cliff'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -610,4 +610,9 @@ fabric.properties
|
||||
**/caddy/logs/**
|
||||
|
||||
# Mac directory indexes
|
||||
.DS_Store
|
||||
.DS_Store
|
||||
deployment/docker/stack.env
|
||||
|
||||
src/producer/src/python/
|
||||
src/debrid-collector/python/
|
||||
src/qbit-collector/python/
|
||||
|
||||
22
CHANGELOG.md
Normal file
22
CHANGELOG.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [1.0.0] - 2024-03-25
|
||||
### Details
|
||||
#### Changed
|
||||
- Change POSTGRES_USERNAME to POSTGRES_USER. Oops by @purple-emily
|
||||
- Change POSTGRES_DATABASE to POSTGRES_DB by @purple-emily
|
||||
- Two movie commands instead of movie and tv by @purple-emily
|
||||
- Cleanup RabbitMQ env vars, and Github Pat by @iPromKnight
|
||||
|
||||
#### Fixed
|
||||
- HRD -> HDR by @mplewis
|
||||
|
||||
## New Contributors
|
||||
* @mplewis made their first contribution
|
||||
|
||||
<!-- generated by git-cliff -->
|
||||
34
CONTRIBUTING.md
Normal file
34
CONTRIBUTING.md
Normal file
@@ -0,0 +1,34 @@
|
||||
We use [Meaningful commit messages](https://reflectoring.io/meaningful-commit-messages/)
|
||||
|
||||
Tl;dr:
|
||||
1. It should answer the question: “What happens if the changes are applied?".
|
||||
2. Use the imperative, present tense. It is easier to read and scan quickly:
|
||||
```
|
||||
Right: Add feature to alert admin for new user registration
|
||||
Wrong: Added feature ... (past tense)
|
||||
```
|
||||
3. The summary should always be able to complete the following sentence:
|
||||
`If applied, this commit will… `
|
||||
|
||||
We use [git-cliff] for our changelog.
|
||||
|
||||
The breaking flag is set to true when the commit has an exclamation mark after the commit type and scope, e.g.:
|
||||
`feat(scope)!: this is a breaking change`
|
||||
|
||||
Keywords (Commit messages should start with these):
|
||||
```
|
||||
# Added
|
||||
add
|
||||
support
|
||||
# Removed
|
||||
remove
|
||||
delete
|
||||
# Fixed
|
||||
test
|
||||
fix
|
||||
```
|
||||
|
||||
Any other commits will fall under the `Changed` category
|
||||
|
||||
|
||||
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html)
|
||||
33
README.md
33
README.md
@@ -51,11 +51,11 @@ Download and install [Docker Compose](https://docs.docker.com/compose/install/),
|
||||
|
||||
### Environment Setup
|
||||
|
||||
Before running the project, you need to set up the environment variables. Copy the `.env.example` file to `.env`:
|
||||
Before running the project, you need to set up the environment variables. Edit the values in `stack.env`:
|
||||
|
||||
```sh
|
||||
cd deployment/docker
|
||||
cp .env.example .env
|
||||
code stack.env
|
||||
```
|
||||
|
||||
Then set any of the values you wouldd like to customize.
|
||||
@@ -67,33 +67,6 @@ Then set any of the values you wouldd like to customize.
|
||||
|
||||
By default, Knight Crawler is configured to be *relatively* conservative in its resource usage. If running on a decent machine (16GB RAM, i5+ or equivalent), you can increase some settings to increase consumer throughput. This is especially helpful if you have a large backlog from [importing databases](#importing-external-dumps).
|
||||
|
||||
In your `.env` file, under the `# Consumer` section increase `CONSUMER_REPLICAS` from `3` to `15`.
|
||||
You can also increase `JOB_CONCURRENCY` from `5` to `10`.
|
||||
|
||||
### DebridMediaManager setup (optional)
|
||||
|
||||
There are some optional steps you should take to maximise the number of movies/tv shows we can find.
|
||||
|
||||
We can search DebridMediaManager hash lists which are hosted on GitHub. This allows us to add hundreds of thousands of movies and tv shows, but it requires a Personal Access Token to be generated. The software only needs read access and only for public repositories. To generate one, please follow these steps:
|
||||
|
||||
1. Navigate to GitHub settings -> Developer Settings -> Personal access tokens -> Fine-grained tokens (click [here](https://github.com/settings/tokens?type=beta) for a direct link)
|
||||
2. Press `Generate new token`
|
||||
3. Fill out the form (example data below):
|
||||
```
|
||||
Token name:
|
||||
KnightCrawler
|
||||
Expiration:
|
||||
90 days
|
||||
Description:
|
||||
<blank>
|
||||
Repository access
|
||||
(checked) Public Repositories (read-only)
|
||||
```
|
||||
4. Click `Generate token`
|
||||
5. Take the new token and add it to the bottom of the [.env](deployment/docker/.env) file
|
||||
```
|
||||
GithubSettings__PAT=<YOUR TOKEN HERE>
|
||||
```
|
||||
### Configure external access
|
||||
|
||||
Please choose which applies to you:
|
||||
@@ -143,7 +116,7 @@ Remove or comment out the port for the addon, and connect it to Caddy:
|
||||
addon:
|
||||
<<: *knightcrawler-app
|
||||
env_file:
|
||||
- .env
|
||||
- stack.env
|
||||
hostname: knightcrawler-addon
|
||||
image: gabisonfire/knightcrawler-addon:latest
|
||||
labels:
|
||||
|
||||
112
cliff.toml
Normal file
112
cliff.toml
Normal file
@@ -0,0 +1,112 @@
|
||||
# git-cliff ~ configuration file
|
||||
# https://git-cliff.org/docs/configuration
|
||||
|
||||
[changelog]
|
||||
# changelog header
|
||||
header = """
|
||||
# Changelog\n
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n
|
||||
"""
|
||||
# template for the changelog body
|
||||
# https://keats.github.io/tera/docs/#introduction
|
||||
body = """
|
||||
{%- macro remote_url() -%}
|
||||
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
|
||||
{%- endmacro -%}
|
||||
|
||||
{% if version -%}
|
||||
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
|
||||
{% else -%}
|
||||
## [Unreleased]
|
||||
{% endif -%}
|
||||
|
||||
### Details\
|
||||
|
||||
{% for group, commits in commits | group_by(attribute="group") %}
|
||||
#### {{ group | upper_first }}
|
||||
{%- for commit in commits %}
|
||||
- {{ commit.message | upper_first | trim }}\
|
||||
{% if commit.github.username %} by @{{ commit.github.username }}{%- endif -%}
|
||||
{% if commit.github.pr_number %} in \
|
||||
[#{{ commit.github.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.github.pr_number }}) \
|
||||
{%- endif -%}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
{%- if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %}
|
||||
## New Contributors
|
||||
{%- endif -%}
|
||||
|
||||
{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
|
||||
* @{{ contributor.username }} made their first contribution
|
||||
{%- if contributor.pr_number %} in \
|
||||
[#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
|
||||
{%- endif %}
|
||||
{%- endfor %}\n
|
||||
"""
|
||||
# template for the changelog footer
|
||||
footer = """
|
||||
{%- macro remote_url() -%}
|
||||
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
|
||||
{%- endmacro -%}
|
||||
|
||||
{% for release in releases -%}
|
||||
{% if release.version -%}
|
||||
{% if release.previous.version -%}
|
||||
[{{ release.version | trim_start_matches(pat="v") }}]: \
|
||||
{{ self::remote_url() }}/compare/{{ release.previous.version }}..{{ release.version }}
|
||||
{% endif -%}
|
||||
{% else -%}
|
||||
[unreleased]: {{ self::remote_url() }}/compare/{{ release.previous.version }}..HEAD
|
||||
{% endif -%}
|
||||
{% endfor %}
|
||||
<!-- generated by git-cliff -->
|
||||
"""
|
||||
# remove the leading and trailing whitespace from the templates
|
||||
trim = true
|
||||
|
||||
[git]
|
||||
# parse the commits based on https://www.conventionalcommits.org
|
||||
conventional_commits = true
|
||||
# filter out the commits that are not conventional
|
||||
filter_unconventional = true
|
||||
# process each line of a commit as an individual commit
|
||||
split_commits = false
|
||||
# regex for preprocessing the commit messages
|
||||
commit_preprocessors = [
|
||||
# remove issue numbers from commits
|
||||
{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "" },
|
||||
]
|
||||
# regex for parsing and grouping commits
|
||||
commit_parsers = [
|
||||
{ message = "^.*: add", group = "Added" },
|
||||
{ message = "^add", group = "Added" },
|
||||
{ message = "^.*: support", group = "Added" },
|
||||
{ message = "^support", group = "Added" },
|
||||
{ message = "^.*: remove", group = "Removed" },
|
||||
{ message = "^remove", group = "Removed" },
|
||||
{ message = "^.*: delete", group = "Removed" },
|
||||
{ message = "^delete", group = "Removed" },
|
||||
{ message = "^.*: test", group = "Fixed" },
|
||||
{ message = "^test", group = "Fixed" },
|
||||
{ message = "^.*: fix", group = "Fixed" },
|
||||
{ message = "^fix", group = "Fixed" },
|
||||
{ message = "^.*", group = "Changed" },
|
||||
]
|
||||
# protect breaking changes from being skipped due to matching a skipping commit_parser
|
||||
protect_breaking_commits = false
|
||||
# filter out the commits that are not matched by commit parsers
|
||||
filter_commits = true
|
||||
# regex for matching git tags
|
||||
tag_pattern = "v[0-9].*"
|
||||
# regex for skipping tags
|
||||
skip_tags = "v0.1.0-beta.1"
|
||||
# regex for ignoring tags
|
||||
ignore_tags = ""
|
||||
# sort the tags topologically
|
||||
topo_order = false
|
||||
# sort the commits inside sections by oldest/newest order
|
||||
sort_commits = "oldest"
|
||||
@@ -1,7 +0,0 @@
|
||||
version: "3.9"
|
||||
name: "knightcrawler"
|
||||
include:
|
||||
- components/network.yaml
|
||||
- components/volumes.yaml
|
||||
- components/infrastructure.yaml
|
||||
- components/knightcrawler.yaml
|
||||
10
deployment/docker/components/config/qbit/qbittorrent.conf → deployment/docker/config/qbit/qbittorrent.conf
Normal file → Executable file
10
deployment/docker/components/config/qbit/qbittorrent.conf → deployment/docker/config/qbit/qbittorrent.conf
Normal file → Executable file
@@ -12,11 +12,16 @@ enabled=false
|
||||
program=
|
||||
|
||||
[BitTorrent]
|
||||
Session\AnonymousModeEnabled=true
|
||||
Session\BTProtocol=TCP
|
||||
Session\ConnectionSpeed=150
|
||||
Session\DefaultSavePath=/downloads/
|
||||
Session\ExcludedFileNames=
|
||||
Session\MaxActiveDownloads=10
|
||||
Session\MaxActiveCheckingTorrents=20
|
||||
Session\MaxActiveDownloads=20
|
||||
Session\MaxActiveTorrents=50
|
||||
Session\MaxActiveUploads=50
|
||||
Session\MaxConcurrentHTTPAnnounces=1000
|
||||
Session\MaxConnections=2000
|
||||
Session\Port=6881
|
||||
Session\QueueingSystemEnabled=true
|
||||
@@ -50,9 +55,10 @@ MailNotification\req_auth=true
|
||||
WebUI\Address=*
|
||||
WebUI\AuthSubnetWhitelist=0.0.0.0/0
|
||||
WebUI\AuthSubnetWhitelistEnabled=true
|
||||
WebUI\HostHeaderValidation=false
|
||||
WebUI\LocalHostAuth=false
|
||||
WebUI\ServerDomains=*
|
||||
|
||||
[RSS]
|
||||
AutoDownloader\DownloadRepacks=true
|
||||
AutoDownloader\SmartEpisodeFilter=s(\\d+)e(\\d+), (\\d+)x(\\d+), "(\\d{4}[.\\-]\\d{1,2}[.\\-]\\d{1,2})", "(\\d{1,2}[.\\-]\\d{1,2}[.\\-]\\d{4})"
|
||||
AutoDownloader\SmartEpisodeFilter=s(\\d+)e(\\d+), (\\d+)x(\\d+), "(\\d{4}[.\\-]\\d{1,2}[.\\-]\\d{1,2})", "(\\d{1,2}[.\\-]\\d{1,2}[.\\-]\\d{4})"
|
||||
244
deployment/docker/docker-compose.yaml
Normal file
244
deployment/docker/docker-compose.yaml
Normal file
@@ -0,0 +1,244 @@
|
||||
version: "3.9"
|
||||
|
||||
name: knightcrawler
|
||||
|
||||
networks:
|
||||
knightcrawler-network:
|
||||
name: knightcrawler-network
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
postgres:
|
||||
lavinmq:
|
||||
redis:
|
||||
|
||||
services:
|
||||
## Postgres is the database that is used by the services.
|
||||
## All downloaded metadata is stored in this database.
|
||||
postgres:
|
||||
env_file: stack.env
|
||||
healthcheck:
|
||||
test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ]
|
||||
timeout: 10s
|
||||
interval: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
image: postgres:latest
|
||||
# # If you need the database to be accessible from outside, please open the below port.
|
||||
# # Furthermore, please, please, please, change the username and password in the stack.env file.
|
||||
# # If you want to enhance your security even more, create a new user for the database with a strong password.
|
||||
# ports:
|
||||
# - "5432:5432"
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- postgres:/var/lib/postgresql/data
|
||||
|
||||
## Redis is used as a cache for the services.
|
||||
## It is used to store the infohashes that are currently being processed in sagas, as well as intrim data.
|
||||
redis:
|
||||
env_file: stack.env
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "redis-cli ping"]
|
||||
timeout: 10s
|
||||
interval: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
image: redis/redis-stack:latest
|
||||
# # If you need redis to be accessible from outside, please open the below port.
|
||||
# ports:
|
||||
# - "6379:6379"
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redis:/data
|
||||
|
||||
## LavinMQ is used as a message broker for the services.
|
||||
## It is a high performance drop in replacement for RabbitMQ.
|
||||
## It is used to communicate between the services.
|
||||
lavinmq:
|
||||
env_file: stack.env
|
||||
# # If you need the database to be accessible from outside, please open the below port.
|
||||
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
|
||||
# ports:
|
||||
# - "5672:5672"
|
||||
# - "15672:15672"
|
||||
# - "15692:15692"
|
||||
image: cloudamqp/lavinmq:latest
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "lavinmqctl status"]
|
||||
timeout: 10s
|
||||
interval: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
volumes:
|
||||
- lavinmq:/var/lib/lavinmq/
|
||||
|
||||
## The addon. This is what is used in stremio
|
||||
addon:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
hostname: knightcrawler-addon
|
||||
image: gabisonfire/knightcrawler-addon:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
ports:
|
||||
- "7000:7000"
|
||||
restart: unless-stopped
|
||||
|
||||
## The consumer is responsible for consuming infohashes and orchestrating download of metadata.
|
||||
consumer:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
|
||||
## The debrid collector is responsible for downloading metadata from debrid services. (Currently only RealDebrid is supported)
|
||||
debridcollector:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-debrid-collector:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
|
||||
## The metadata service is responsible for downloading imdb publically available datasets.
|
||||
## This is used to enrich the metadata during production of ingested infohashes.
|
||||
metadata:
|
||||
depends_on:
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.26
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: "no"
|
||||
|
||||
## The migrator is responsible for migrating the database schema.
|
||||
migrator:
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.26
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: "no"
|
||||
|
||||
## The producer is responsible for producing infohashes by acquiring for various sites, including DMM.
|
||||
producer:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-producer:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
|
||||
## QBit collector utilizes QBitTorrent to download metadata.
|
||||
qbitcollector:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
qbittorrent:
|
||||
condition: service_healthy
|
||||
deploy:
|
||||
replicas: ${QBIT_REPLICAS:-0}
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-qbit-collector:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
|
||||
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
|
||||
## The QBit collector requires this.
|
||||
qbittorrent:
|
||||
deploy:
|
||||
replicas: ${QBIT_REPLICAS:-0}
|
||||
env_file: stack.env
|
||||
environment:
|
||||
PGID: "1000"
|
||||
PUID: "1000"
|
||||
TORRENTING_PORT: "6881"
|
||||
WEBUI_PORT: "8080"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl --fail http://localhost:8080"]
|
||||
timeout: 10s
|
||||
interval: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
image: lscr.io/linuxserver/qbittorrent:latest
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
ports:
|
||||
- "6881:6881/tcp"
|
||||
- "6881:6881/udp"
|
||||
# if you want to expose the webui, uncomment the following line
|
||||
# - "8001:8080"
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf
|
||||
@@ -16,7 +16,7 @@ rule_files:
|
||||
scrape_configs:
|
||||
- job_name: "rabbitmq"
|
||||
static_configs:
|
||||
- targets: ["rabbitmq:15692"]
|
||||
- targets: ["lavinmq:15692"]
|
||||
- job_name: "postgres-exporter"
|
||||
static_configs:
|
||||
- targets: ["postgres-exporter:9187"]
|
||||
|
||||
@@ -4,8 +4,8 @@ x-basehealth: &base-health
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
x-rabbithealth: &rabbitmq-health
|
||||
test: rabbitmq-diagnostics -q ping
|
||||
x-lavinhealth: &lavinmq-health
|
||||
test: [ "CMD-SHELL", "lavinmqctl status" ]
|
||||
<<: *base-health
|
||||
|
||||
x-redishealth: &redis-health
|
||||
@@ -13,7 +13,7 @@ x-redishealth: &redis-health
|
||||
<<: *base-health
|
||||
|
||||
x-postgreshealth: &postgresdb-health
|
||||
test: pg_isready
|
||||
test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ]
|
||||
<<: *base-health
|
||||
|
||||
x-qbit: &qbit-health
|
||||
@@ -35,7 +35,7 @@ services:
|
||||
- postgres:/var/lib/postgresql/data
|
||||
healthcheck: *postgresdb-health
|
||||
restart: unless-stopped
|
||||
env_file: ../.env
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
@@ -48,25 +48,23 @@ services:
|
||||
- redis:/data
|
||||
restart: unless-stopped
|
||||
healthcheck: *redis-health
|
||||
env_file: ../.env
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
rabbitmq:
|
||||
image: rabbitmq:3-management
|
||||
lavinmq:
|
||||
env_file: stack.env
|
||||
# # If you need the database to be accessible from outside, please open the below port.
|
||||
# # Furthermore, please, please, please, look at the documentation for rabbit on how to secure the service.
|
||||
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
|
||||
# ports:
|
||||
# - "5672:5672"
|
||||
# - "15672:15672"
|
||||
# - "15692:15692"
|
||||
volumes:
|
||||
- rabbitmq:/var/lib/rabbitmq
|
||||
image: cloudamqp/lavinmq:latest
|
||||
healthcheck: *lavinmq-health
|
||||
restart: unless-stopped
|
||||
healthcheck: *rabbitmq-health
|
||||
env_file: ../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
volumes:
|
||||
- lavinmq:/var/lib/lavinmq/
|
||||
|
||||
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
|
||||
## The QBit collector requires this.
|
||||
@@ -80,10 +78,10 @@ services:
|
||||
ports:
|
||||
- 6881:6881
|
||||
- 6881:6881/udp
|
||||
env_file: ../.env
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
healthcheck: *qbit-health
|
||||
volumes:
|
||||
- ./config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf
|
||||
- ../../config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf
|
||||
@@ -1,7 +1,7 @@
|
||||
x-apps: &knightcrawler-app
|
||||
labels:
|
||||
logging: "promtail"
|
||||
env_file: ../.env
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
@@ -11,7 +11,7 @@ x-depends: &knightcrawler-app-depends
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
rabbitmq:
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
@@ -20,8 +20,8 @@ x-depends: &knightcrawler-app-depends
|
||||
|
||||
services:
|
||||
metadata:
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.0
|
||||
env_file: ../.env
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.26
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: no
|
||||
@@ -30,8 +30,8 @@ services:
|
||||
condition: service_completed_successfully
|
||||
|
||||
migrator:
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.0
|
||||
env_file: ../.env
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.26
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: no
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
condition: service_healthy
|
||||
|
||||
addon:
|
||||
image: gabisonfire/knightcrawler-addon:2.0.0
|
||||
image: gabisonfire/knightcrawler-addon:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
hostname: knightcrawler-addon
|
||||
@@ -48,22 +48,22 @@ services:
|
||||
- "7000:7000"
|
||||
|
||||
consumer:
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.0
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
debridcollector:
|
||||
image: gabisonfire/knightcrawler-debridcollector:2.0.0
|
||||
image: gabisonfire/knightcrawler-debrid-collector:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
producer:
|
||||
image: gabisonfire/knightcrawler-producer:2.0.0
|
||||
image: gabisonfire/knightcrawler-producer:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
qbitcollector:
|
||||
image: gabisonfire/knightcrawler-qbitcollector:2.0.0
|
||||
image: gabisonfire/knightcrawler-qbit-collector:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
@@ -1,4 +1,4 @@
|
||||
volumes:
|
||||
postgres:
|
||||
redis:
|
||||
rabbitmq:
|
||||
lavinmq:
|
||||
7
deployment/docker/src/compose.yaml
Normal file
7
deployment/docker/src/compose.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
version: "3.9"
|
||||
name: "knightcrawler"
|
||||
include:
|
||||
- ./components/network.yaml
|
||||
- ./components/volumes.yaml
|
||||
- ./components/infrastructure.yaml
|
||||
- ./components/knightcrawler.yaml
|
||||
@@ -9,10 +9,12 @@ POSTGRES_PASSWORD=postgres
|
||||
POSTGRES_DB=knightcrawler
|
||||
|
||||
# Redis
|
||||
REDIS_CONNECTION_STRING=redis:6379
|
||||
REDIS_HOST=redis
|
||||
REDIS_PORT=6379
|
||||
REDIS_EXTRA=abortConnect=false,allowAdmin=true
|
||||
|
||||
# RabbitMQ
|
||||
RABBITMQ_HOST=rabbitmq
|
||||
# AMQP
|
||||
RABBITMQ_HOST=lavinmq
|
||||
RABBITMQ_USER=guest
|
||||
RABBITMQ_PASSWORD=guest
|
||||
RABBITMQ_CONSUMER_QUEUE_NAME=ingested
|
||||
@@ -28,9 +30,12 @@ METADATA_INSERT_BATCH_SIZE=50000
|
||||
COLLECTOR_QBIT_ENABLED=false
|
||||
COLLECTOR_DEBRID_ENABLED=true
|
||||
COLLECTOR_REAL_DEBRID_API_KEY=
|
||||
QBIT_HOST=http://qbittorrent:8080
|
||||
QBIT_TRACKERS_URL=https://raw.githubusercontent.com/ngosang/trackerslist/master/trackers_all_http.txt
|
||||
QBIT_CONCURRENCY=8
|
||||
|
||||
# Number of replicas for the qBittorrent collector and qBitTorrent client. Should be 0 or 1.
|
||||
QBIT_REPLICAS=0
|
||||
|
||||
# Addon
|
||||
DEBUG_MODE=false
|
||||
|
||||
# Producer
|
||||
GITHUB_PAT=
|
||||
@@ -25,7 +25,9 @@ export const cinemetaConfig = {
|
||||
}
|
||||
|
||||
export const cacheConfig = {
|
||||
REDIS_CONNECTION_STRING: process.env.REDIS_CONNECTION_STRING || 'redis://localhost:6379/0',
|
||||
REDIS_HOST: process.env.REDIS_HOST || 'redis',
|
||||
REDIS_PORT: process.env.REDIS_PORT || '6379',
|
||||
REDIS_EXTRA: process.env.REDIS_EXTRA || '',
|
||||
NO_CACHE: parseBool(process.env.NO_CACHE, false),
|
||||
IMDB_TTL: parseInt(process.env.IMDB_TTL || 60 * 60 * 4), // 4 Hours
|
||||
STREAM_TTL: parseInt(process.env.STREAM_TTL || 60 * 60 * 4), // 1 Hour
|
||||
@@ -40,3 +42,5 @@ export const cacheConfig = {
|
||||
STALE_ERROR_AGE: parseInt(process.env.STALE_ERROR_AGE) || 7 * 24 * 60 * 60, // 7 days
|
||||
GLOBAL_KEY_PREFIX: process.env.GLOBAL_KEY_PREFIX || 'jackettio-addon',
|
||||
}
|
||||
|
||||
cacheConfig.REDIS_CONNECTION_STRING = 'redis://' + cacheConfig.REDIS_HOST + ':' + cacheConfig.REDIS_PORT + '?' + cacheConfig.REDIS_EXTRA;
|
||||
|
||||
2
src/addon/package-lock.json
generated
2
src/addon/package-lock.json
generated
@@ -12,7 +12,7 @@
|
||||
"@redis/client": "^1.5.14",
|
||||
"@redis/json": "^1.0.6",
|
||||
"@redis/search": "^1.1.6",
|
||||
"all-debrid-api": "^1.1.0",
|
||||
"all-debrid-api": "^1.2.0",
|
||||
"axios": "^1.6.1",
|
||||
"bottleneck": "^2.19.5",
|
||||
"cache-manager": "^3.4.4",
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@putdotio/api-client": "^8.42.0",
|
||||
"all-debrid-api": "^1.1.0",
|
||||
"all-debrid-api": "^1.2.0",
|
||||
"axios": "^1.6.1",
|
||||
"bottleneck": "^2.19.5",
|
||||
"cache-manager": "^3.4.4",
|
||||
|
||||
@@ -3,6 +3,7 @@ import { addonBuilder } from 'stremio-addon-sdk';
|
||||
import { cacheWrapStream } from './lib/cache.js';
|
||||
import { dummyManifest } from './lib/manifest.js';
|
||||
import * as repository from './lib/repository.js';
|
||||
import applyFilters from "./lib/filter.js";
|
||||
import applySorting from './lib/sort.js';
|
||||
import { toStreamInfo, applyStaticInfo } from './lib/streamInfo.js';
|
||||
import { Type } from './lib/types.js';
|
||||
@@ -32,6 +33,7 @@ builder.defineStreamHandler((args) => {
|
||||
.then(records => records
|
||||
.sort((a, b) => b.torrent.seeders - a.torrent.seeders || b.torrent.uploadDate - a.torrent.uploadDate)
|
||||
.map(record => toStreamInfo(record)))))
|
||||
.then(streams => applyFilters(streams, args.extra))
|
||||
.then(streams => applySorting(streams, args.extra))
|
||||
.then(streams => applyStaticInfo(streams))
|
||||
.then(streams => applyMochs(streams, args.extra))
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
export const cacheConfig = {
|
||||
REDIS_CONNECTION_STRING: process.env.REDIS_CONNECTION_STRING || 'redis://localhost:6379/0',
|
||||
REDIS_HOST: process.env.REDIS_HOST || 'redis',
|
||||
REDIS_PORT: process.env.REDIS_PORT || '6379',
|
||||
REDIS_EXTRA: process.env.REDIS_EXTRA || '',
|
||||
NO_CACHE: parseBool(process.env.NO_CACHE, false),
|
||||
}
|
||||
|
||||
cacheConfig.REDIS_CONNECTION_STRING = 'redis://' + cacheConfig.REDIS_HOST + ':' + cacheConfig.REDIS_PORT + '?' + cacheConfig.REDIS_EXTRA;
|
||||
|
||||
export const databaseConfig = {
|
||||
POSTGRES_HOST: process.env.POSTGRES_HOST || 'postgres',
|
||||
POSTGRES_PORT: process.env.POSTGRES_PORT || '5432',
|
||||
|
||||
@@ -14,13 +14,12 @@ const Torrent = database.define('torrent',
|
||||
{
|
||||
infoHash: { type: Sequelize.STRING(64), primaryKey: true },
|
||||
provider: { type: Sequelize.STRING(32), allowNull: false },
|
||||
torrentId: { type: Sequelize.STRING(128) },
|
||||
ingestedTorrentId: { type: Sequelize.BIGINT, allowNull: false },
|
||||
title: { type: Sequelize.STRING(256), allowNull: false },
|
||||
size: { type: Sequelize.BIGINT },
|
||||
type: { type: Sequelize.STRING(16), allowNull: false },
|
||||
uploadDate: { type: Sequelize.DATE, allowNull: false },
|
||||
seeders: { type: Sequelize.SMALLINT },
|
||||
trackers: { type: Sequelize.STRING(4096) },
|
||||
languages: { type: Sequelize.STRING(4096) },
|
||||
resolution: { type: Sequelize.STRING(16) }
|
||||
}
|
||||
@@ -85,7 +84,7 @@ export function getImdbIdMovieEntries(imdbId) {
|
||||
where: {
|
||||
imdbId: { [Op.eq]: imdbId }
|
||||
},
|
||||
include: [Torrent],
|
||||
include: { model: Torrent, required: true },
|
||||
limit: 500,
|
||||
order: [
|
||||
[Torrent, 'size', 'DESC']
|
||||
@@ -100,7 +99,7 @@ export function getImdbIdSeriesEntries(imdbId, season, episode) {
|
||||
imdbSeason: { [Op.eq]: season },
|
||||
imdbEpisode: { [Op.eq]: episode }
|
||||
},
|
||||
include: [Torrent],
|
||||
include: { model: Torrent, required: true },
|
||||
limit: 500,
|
||||
order: [
|
||||
[Torrent, 'size', 'DESC']
|
||||
@@ -113,7 +112,7 @@ export function getKitsuIdMovieEntries(kitsuId) {
|
||||
where: {
|
||||
kitsuId: { [Op.eq]: kitsuId }
|
||||
},
|
||||
include: [Torrent],
|
||||
include: { model: Torrent, required: true },
|
||||
limit: 500,
|
||||
order: [
|
||||
[Torrent, 'size', 'DESC']
|
||||
@@ -127,7 +126,7 @@ export function getKitsuIdSeriesEntries(kitsuId, episode) {
|
||||
kitsuId: { [Op.eq]: kitsuId },
|
||||
kitsuEpisode: { [Op.eq]: episode }
|
||||
},
|
||||
include: [Torrent],
|
||||
include: { model: Torrent, required: true },
|
||||
limit: 500,
|
||||
order: [
|
||||
[Torrent, 'size', 'DESC']
|
||||
|
||||
@@ -20,7 +20,7 @@ export function toStreamInfo(record) {
|
||||
const title = joinDetailParts(
|
||||
[
|
||||
joinDetailParts([record.torrent.title.replace(/[, ]+/g, ' ')]),
|
||||
joinDetailParts([!sameInfo && record.title || undefined]),
|
||||
joinDetailParts([record.title || undefined]),
|
||||
joinDetailParts([
|
||||
joinDetailParts([formatSize(record.size)], '💾 ')
|
||||
]),
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
|
||||
<PackageReference Include="Polly" Version="8.3.1" />
|
||||
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
|
||||
@@ -29,10 +28,30 @@
|
||||
<None Include="Configuration\logging.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="requirements.txt">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<Content Remove="eng\**" />
|
||||
<None Remove="eng\**" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
|
||||
<Content Remove="python\**" />
|
||||
<None Include="python\**">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\shared\SharedContracts.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Remove="eng\**" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Remove="eng\**" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -6,6 +6,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\share
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{72A042C3-B4F3-45C5-AC20-041FE8F41EFC}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
|
||||
eng\install-python-reqs.sh = eng\install-python-reqs.sh
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
|
||||
@@ -9,12 +9,23 @@ RUN dotnet restore -a $TARGETARCH
|
||||
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
|
||||
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
|
||||
|
||||
COPY --from=build /src/out .
|
||||
|
||||
RUN rm -rf /app/python && mkdir -p /app/python
|
||||
|
||||
RUN pip3 install -r /app/requirements.txt -t /app/python
|
||||
|
||||
RUN addgroup -S debrid && adduser -S -G debrid debrid
|
||||
USER debrid
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||
CMD pgrep -f dotnet || exit 1
|
||||
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
|
||||
ENTRYPOINT ["dotnet", "DebridCollector.dll"]
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
using DebridCollector.Features.Configuration;
|
||||
|
||||
namespace DebridCollector.Extensions;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
@@ -17,7 +15,8 @@ public static class ServiceCollectionExtensions
|
||||
var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>();
|
||||
|
||||
services.AddRealDebridClient(serviceConfiguration);
|
||||
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
|
||||
services.RegisterPythonEngine();
|
||||
services.AddSingleton<IRankTorrentName, RankTorrentName>();
|
||||
services.AddHostedService<DebridRequestProcessor>();
|
||||
|
||||
return services;
|
||||
@@ -62,7 +61,10 @@ public static class ServiceCollectionExtensions
|
||||
cfg.UseMessageRetry(r => r.Intervals(1000,2000,5000));
|
||||
cfg.UseInMemoryOutbox();
|
||||
})
|
||||
.RedisRepository(redisConfiguration.ConnectionString)
|
||||
.RedisRepository(redisConfiguration.ConnectionString, options =>
|
||||
{
|
||||
options.KeyPrefix = "debrid-collector:";
|
||||
})
|
||||
.Endpoint(
|
||||
e =>
|
||||
{
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
using DebridCollector.Features.Configuration;
|
||||
|
||||
namespace DebridCollector.Features.Debrid;
|
||||
namespace DebridCollector.Features.Debrid;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
|
||||
@@ -3,10 +3,11 @@ namespace DebridCollector.Features.Worker;
|
||||
public static class DebridMetaToTorrentMeta
|
||||
{
|
||||
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
|
||||
IParseTorrentTitle torrentTitle,
|
||||
IRankTorrentName rankTorrentName,
|
||||
Torrent torrent,
|
||||
string ImdbId,
|
||||
FileDataDictionary Metadata)
|
||||
FileDataDictionary Metadata,
|
||||
ILogger<WriteMetadataConsumer> logger)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -15,34 +16,42 @@ public static class DebridMetaToTorrentMeta
|
||||
foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
|
||||
{
|
||||
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
|
||||
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
|
||||
|
||||
var file = new TorrentFile
|
||||
{
|
||||
ImdbId = ImdbId,
|
||||
KitsuId = 0,
|
||||
InfoHash = torrent.InfoHash,
|
||||
FileIndex = validFileIndex ? fileIndex : 0,
|
||||
FileIndex = validFileIndex ? fileIndexMinusOne : 0,
|
||||
Title = metadataEntry.Value.Filename,
|
||||
Size = metadataEntry.Value.Filesize.GetValueOrDefault(),
|
||||
};
|
||||
|
||||
var parsedTitle = torrentTitle.Parse(file.Title);
|
||||
var parsedTitle = rankTorrentName.Parse(file.Title, false);
|
||||
|
||||
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault();
|
||||
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault();
|
||||
if (!parsedTitle.Success)
|
||||
{
|
||||
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
|
||||
continue;
|
||||
}
|
||||
|
||||
file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
|
||||
file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
|
||||
|
||||
files.Add(file);
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
catch (Exception)
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata)
|
||||
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata, ILogger<WriteMetadataConsumer> logger)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -58,13 +67,14 @@ public static class DebridMetaToTorrentMeta
|
||||
foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
|
||||
{
|
||||
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
|
||||
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
|
||||
var fileId = torrentFiles.FirstOrDefault(
|
||||
t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0;
|
||||
|
||||
var file = new SubtitleFile
|
||||
{
|
||||
InfoHash = InfoHash,
|
||||
FileIndex = validFileIndex ? fileIndex : 0,
|
||||
FileIndex = validFileIndex ? fileIndexMinusOne : 0,
|
||||
FileId = fileId,
|
||||
Title = metadataEntry.Value.Filename,
|
||||
};
|
||||
@@ -74,8 +84,9 @@ public static class DebridMetaToTorrentMeta
|
||||
|
||||
return files;
|
||||
}
|
||||
catch (Exception)
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,6 +53,12 @@ public class InfohashMetadataSagaStateMachine : MassTransitStateMachine<Infohash
|
||||
.Then(
|
||||
context =>
|
||||
{
|
||||
if (!context.Message.WithFiles)
|
||||
{
|
||||
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
|
||||
})
|
||||
.TransitionTo(Completed)
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
[EntityName("perform-metadata-request")]
|
||||
[EntityName("perform-metadata-request-debrid-collector")]
|
||||
public record PerformMetadataRequest(Guid CorrelationId, string InfoHash) : CorrelatedBy<Guid>;
|
||||
|
||||
[EntityName("torrent-metadata-response")]
|
||||
[EntityName("torrent-metadata-response-debrid-collector")]
|
||||
public record GotMetadata(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
}
|
||||
|
||||
[EntityName("write-metadata")]
|
||||
[EntityName("write-metadata-debrid-collector")]
|
||||
public record WriteMetadata(Torrent Torrent, TorrentMetadataResponse Metadata, string ImdbId) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
}
|
||||
|
||||
[EntityName("metadata-written")]
|
||||
public record MetadataWritten(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid>
|
||||
[EntityName("metadata-written-debrid-colloctor")]
|
||||
public record MetadataWritten(TorrentMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
}
|
||||
@@ -1,25 +1,28 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
public class WriteMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteMetadata>
|
||||
public class WriteMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteMetadataConsumer> logger) : IConsumer<WriteMetadata>
|
||||
{
|
||||
public async Task Consume(ConsumeContext<WriteMetadata> context)
|
||||
{
|
||||
var request = context.Message;
|
||||
|
||||
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata);
|
||||
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
|
||||
|
||||
if (torrentFiles.Any())
|
||||
if (!torrentFiles.Any())
|
||||
{
|
||||
await dataStorage.InsertFiles(torrentFiles);
|
||||
|
||||
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
|
||||
|
||||
if (subtitles.Any())
|
||||
{
|
||||
await dataStorage.InsertSubtitles(subtitles);
|
||||
}
|
||||
await context.Publish(new MetadataWritten(request.Metadata, false));
|
||||
return;
|
||||
}
|
||||
|
||||
await context.Publish(new MetadataWritten(request.Metadata));
|
||||
await dataStorage.InsertFiles(torrentFiles);
|
||||
|
||||
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
|
||||
|
||||
if (subtitles.Any())
|
||||
{
|
||||
await dataStorage.InsertSubtitles(subtitles);
|
||||
}
|
||||
|
||||
await context.Publish(new MetadataWritten(request.Metadata, true));
|
||||
}
|
||||
}
|
||||
@@ -4,17 +4,18 @@ global using System.Text.Json;
|
||||
global using System.Text.Json.Serialization;
|
||||
global using System.Threading.Channels;
|
||||
global using DebridCollector.Extensions;
|
||||
global using DebridCollector.Features.Configuration;
|
||||
global using DebridCollector.Features.Debrid;
|
||||
global using DebridCollector.Features.Worker;
|
||||
global using MassTransit;
|
||||
global using MassTransit.Mediator;
|
||||
global using Microsoft.AspNetCore.Builder;
|
||||
global using Microsoft.Extensions.DependencyInjection;
|
||||
global using Polly;
|
||||
global using Polly.Extensions.Http;
|
||||
global using PromKnight.ParseTorrentTitle;
|
||||
global using SharedContracts.Configuration;
|
||||
global using SharedContracts.Dapper;
|
||||
global using SharedContracts.Extensions;
|
||||
global using SharedContracts.Models;
|
||||
global using SharedContracts.Python;
|
||||
global using SharedContracts.Python.RTN;
|
||||
global using SharedContracts.Requests;
|
||||
2
src/debrid-collector/eng/install-python-reqs.ps1
Normal file
2
src/debrid-collector/eng/install-python-reqs.ps1
Normal file
@@ -0,0 +1,2 @@
|
||||
mkdir -p ../python
|
||||
python -m pip install -r ../requirements.txt -t ../python/
|
||||
5
src/debrid-collector/eng/install-python-reqs.sh
Normal file
5
src/debrid-collector/eng/install-python-reqs.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -rf ../python
|
||||
mkdir -p ../python
|
||||
python3 -m pip install -r ../requirements.txt -t ../python/
|
||||
1
src/debrid-collector/requirements.txt
Normal file
1
src/debrid-collector/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
rank-torrent-name==0.2.13
|
||||
@@ -8,12 +8,14 @@ public class PostgresConfiguration
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
private const string DatabaseVariable = "DB";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
|
||||
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
|
||||
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
|
||||
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ public class BasicsFile(ILogger<BasicsFile> logger, ImdbDbService dbService): IF
|
||||
Category = csv.GetField(1),
|
||||
Title = csv.GetField(2),
|
||||
Adult = isAdultSet && adult == 1,
|
||||
Year = csv.GetField(5),
|
||||
Year = csv.GetField(5) == @"\N" ? 0 : int.Parse(csv.GetField(5)),
|
||||
};
|
||||
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
|
||||
@@ -6,5 +6,5 @@ public class ImdbBasicEntry
|
||||
public string? Category { get; set; }
|
||||
public string? Title { get; set; }
|
||||
public bool Adult { get; set; }
|
||||
public string? Year { get; set; }
|
||||
public int Year { get; set; }
|
||||
}
|
||||
@@ -17,7 +17,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
await writer.WriteAsync(entry.ImdbId, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Category, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Title, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Year, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Year, NpgsqlDbType.Integer);
|
||||
await writer.WriteAsync(entry.Adult, NpgsqlDbType.Boolean);
|
||||
}
|
||||
catch (Npgsql.PostgresException e)
|
||||
@@ -116,7 +116,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
ExecuteCommandAsync(
|
||||
async connection =>
|
||||
{
|
||||
await using var command = new NpgsqlCommand($"CREATE INDEX title_gist ON {TableNames.MetadataTable} USING gist(title gist_trgm_ops)", connection);
|
||||
await using var command = new NpgsqlCommand($"CREATE INDEX title_gin ON {TableNames.MetadataTable} USING gin(title gin_trgm_ops)", connection);
|
||||
await command.ExecuteNonQueryAsync();
|
||||
}, "Error while creating index on imdb_metadata table");
|
||||
|
||||
@@ -125,7 +125,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
async connection =>
|
||||
{
|
||||
logger.LogInformation("Dropping Trigrams index if it exists already");
|
||||
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gist", connection);
|
||||
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gin", connection);
|
||||
await dropCommand.ExecuteNonQueryAsync();
|
||||
}, $"Error while dropping index on {TableNames.MetadataTable} table");
|
||||
|
||||
@@ -134,7 +134,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var connection = CreateNpgsqlConnection();
|
||||
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
|
||||
await connection.OpenAsync();
|
||||
|
||||
await operation(connection);
|
||||
@@ -145,16 +145,6 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
}
|
||||
}
|
||||
|
||||
private NpgsqlConnection CreateNpgsqlConnection()
|
||||
{
|
||||
var connectionStringBuilder = new NpgsqlConnectionStringBuilder(configuration.StorageConnectionString)
|
||||
{
|
||||
CommandTimeout = 3000,
|
||||
};
|
||||
|
||||
return new(connectionStringBuilder.ConnectionString);
|
||||
}
|
||||
|
||||
private async Task ExecuteCommandWithTransactionAsync(Func<NpgsqlConnection, NpgsqlTransaction, Task> operation, NpgsqlTransaction transaction, string errorMessage)
|
||||
{
|
||||
try
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
<PackageReference Include="Dapper" Version="2.1.35" />
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.2" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.3" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
|
||||
|
||||
35
src/migrator/migrations/009_imdb_year_column_int.sql
Normal file
35
src/migrator/migrations/009_imdb_year_column_int.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
-- Purpose: Change the year column to integer and add a search function that allows for searching by year.
|
||||
ALTER TABLE imdb_metadata
|
||||
ALTER COLUMN year TYPE integer USING (CASE WHEN year = '\N' THEN 0 ELSE year::integer END);
|
||||
|
||||
-- Remove the old search function
|
||||
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, TEXT, INT);
|
||||
|
||||
-- Add the new search function that allows for searching by year with a plus/minus one year range
|
||||
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10)
|
||||
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
|
||||
BEGIN
|
||||
SET pg_trgm.similarity_threshold = 0.9;
|
||||
RETURN QUERY
|
||||
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
|
||||
FROM imdb_metadata
|
||||
WHERE (imdb_metadata.title % search_term)
|
||||
AND (imdb_metadata.adult = FALSE)
|
||||
AND (category_param IS NULL OR imdb_metadata.category = category_param)
|
||||
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
|
||||
ORDER BY score DESC
|
||||
LIMIT limit_param;
|
||||
END; $$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-- Drop the old indexes
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_adult;
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_category;
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_year;
|
||||
DROP INDEX IF EXISTS title_gist;
|
||||
|
||||
-- Add indexes for the new columns
|
||||
CREATE INDEX idx_imdb_metadata_adult ON imdb_metadata(adult);
|
||||
CREATE INDEX idx_imdb_metadata_category ON imdb_metadata(category);
|
||||
CREATE INDEX idx_imdb_metadata_year ON imdb_metadata(year);
|
||||
CREATE INDEX title_gin ON imdb_metadata USING gin(title gin_trgm_ops);
|
||||
@@ -0,0 +1,40 @@
|
||||
-- Purpose: Add the jsonb column to the ingested_torrents table to store the response from RTN
|
||||
ALTER TABLE ingested_torrents
|
||||
ADD COLUMN IF NOT EXISTS rtn_response jsonb;
|
||||
|
||||
-- Purpose: Drop torrentId column from torrents table
|
||||
ALTER TABLE torrents
|
||||
DROP COLUMN IF EXISTS "torrentId";
|
||||
|
||||
-- Purpose: Drop Trackers column from torrents table
|
||||
ALTER TABLE torrents
|
||||
DROP COLUMN IF EXISTS "trackers";
|
||||
|
||||
-- Purpose: Create a foreign key relationsship if it does not already exist between torrents and the source table ingested_torrents, but do not cascade on delete.
|
||||
ALTER TABLE torrents
|
||||
ADD COLUMN IF NOT EXISTS "ingestedTorrentId" bigint;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM information_schema.table_constraints
|
||||
WHERE constraint_name = 'fk_torrents_info_hash'
|
||||
)
|
||||
THEN
|
||||
ALTER TABLE torrents
|
||||
DROP CONSTRAINT fk_torrents_info_hash;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
ALTER TABLE torrents
|
||||
ADD CONSTRAINT fk_torrents_info_hash
|
||||
FOREIGN KEY ("ingestedTorrentId")
|
||||
REFERENCES ingested_torrents("id")
|
||||
ON DELETE NO ACTION;
|
||||
|
||||
UPDATE torrents
|
||||
SET "ingestedTorrentId" = ingested_torrents."id"
|
||||
FROM ingested_torrents
|
||||
WHERE torrents."infoHash" = ingested_torrents."info_hash"
|
||||
AND torrents."provider" = ingested_torrents."source";
|
||||
@@ -0,0 +1,55 @@
|
||||
DROP FUNCTION IF EXISTS kc_maintenance_reconcile_dmm_imdb_ids();
|
||||
CREATE OR REPLACE FUNCTION kc_maintenance_reconcile_dmm_imdb_ids()
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
rec RECORD;
|
||||
imdb_rec RECORD;
|
||||
rows_affected INTEGER := 0;
|
||||
BEGIN
|
||||
RAISE NOTICE 'Starting Reconciliation of DMM IMDB Ids...';
|
||||
FOR rec IN
|
||||
SELECT
|
||||
it."id" as "ingestion_id",
|
||||
t."infoHash",
|
||||
it."category" as "ingestion_category",
|
||||
f."id" as "file_Id",
|
||||
f."title" as "file_Title",
|
||||
(rtn_response->>'raw_title')::text as "raw_title",
|
||||
(rtn_response->>'parsed_title')::text as "parsed_title",
|
||||
(rtn_response->>'year')::int as "year"
|
||||
FROM torrents t
|
||||
JOIN ingested_torrents it ON t."ingestedTorrentId" = it."id"
|
||||
JOIN files f ON t."infoHash" = f."infoHash"
|
||||
WHERE t."provider" = 'DMM'
|
||||
LOOP
|
||||
RAISE NOTICE 'Processing record with file_Id: %', rec."file_Id";
|
||||
FOR imdb_rec IN
|
||||
SELECT * FROM search_imdb_meta(
|
||||
rec."parsed_title",
|
||||
CASE
|
||||
WHEN rec."ingestion_category" = 'tv' THEN 'tvSeries'
|
||||
WHEN rec."ingestion_category" = 'movies' THEN 'movie'
|
||||
END,
|
||||
CASE
|
||||
WHEN rec."year" = 0 THEN NULL
|
||||
ELSE rec."year" END,
|
||||
1)
|
||||
LOOP
|
||||
IF imdb_rec IS NOT NULL THEN
|
||||
RAISE NOTICE 'Updating file_Id: % with imdbId: %, parsed title: %, imdb title: %', rec."file_Id", imdb_rec."imdb_id", rec."parsed_title", imdb_rec."title";
|
||||
UPDATE "files"
|
||||
SET "imdbId" = imdb_rec."imdb_id"
|
||||
WHERE "id" = rec."file_Id";
|
||||
rows_affected := rows_affected + 1;
|
||||
ELSE
|
||||
RAISE NOTICE 'No IMDB ID found for file_Id: %, parsed title: %, imdb title: %, setting imdbId to NULL', rec."file_Id", rec."parsed_title", imdb_rec."title";
|
||||
UPDATE "files"
|
||||
SET "imdbId" = NULL
|
||||
WHERE "id" = rec."file_Id";
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
RAISE NOTICE 'Finished reconciliation. Total rows affected: %', rows_affected;
|
||||
RETURN rows_affected;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
19
src/migrator/migrations/012_imdb_change_ratio.sql
Normal file
19
src/migrator/migrations/012_imdb_change_ratio.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- Remove the old search function
|
||||
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, INT, INT);
|
||||
|
||||
-- Add the new search function that allows for searching by year with a plus/minus one year range
|
||||
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10, similarity_threshold REAL DEFAULT 0.95)
|
||||
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
|
||||
BEGIN
|
||||
SET pg_trgm.similarity_threshold = similarity_threshold;
|
||||
RETURN QUERY
|
||||
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
|
||||
FROM imdb_metadata
|
||||
WHERE (imdb_metadata.title % search_term)
|
||||
AND (imdb_metadata.adult = FALSE)
|
||||
AND (category_param IS NULL OR imdb_metadata.category = category_param)
|
||||
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
|
||||
ORDER BY score DESC
|
||||
LIMIT limit_param;
|
||||
END; $$
|
||||
LANGUAGE plpgsql;
|
||||
19
src/migrator/migrations/013_imdb_change_ratio_fix.sql
Normal file
19
src/migrator/migrations/013_imdb_change_ratio_fix.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- Remove the old search function
|
||||
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, INT, INT);
|
||||
|
||||
-- Add the new search function that allows for searching by year with a plus/minus one year range
|
||||
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10, similarity_threshold REAL DEFAULT 0.95)
|
||||
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
|
||||
BEGIN
|
||||
EXECUTE format('SET pg_trgm.similarity_threshold = %L', similarity_threshold);
|
||||
RETURN QUERY
|
||||
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
|
||||
FROM imdb_metadata
|
||||
WHERE (imdb_metadata.title % search_term)
|
||||
AND (imdb_metadata.adult = FALSE)
|
||||
AND (category_param IS NULL OR imdb_metadata.category = category_param)
|
||||
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
|
||||
ORDER BY score DESC
|
||||
LIMIT limit_param;
|
||||
END; $$
|
||||
LANGUAGE plpgsql;
|
||||
@@ -0,0 +1,43 @@
|
||||
-- Drop Duplicate Files in Files Table
|
||||
DELETE FROM public.files
|
||||
WHERE id NOT IN (
|
||||
SELECT MAX(id)
|
||||
FROM public.files
|
||||
GROUP BY "infoHash", "fileIndex"
|
||||
);
|
||||
|
||||
-- Add Index to files table
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_constraint
|
||||
WHERE conname = 'files_unique_infohash_fileindex'
|
||||
) THEN
|
||||
ALTER TABLE public.files
|
||||
ADD CONSTRAINT files_unique_infohash_fileindex UNIQUE ("infoHash", "fileIndex");
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
|
||||
-- Drop Duplicate subtitles in Subtitles Table
|
||||
DELETE FROM public.subtitles
|
||||
WHERE id NOT IN (
|
||||
SELECT MAX(id)
|
||||
FROM public.subtitles
|
||||
GROUP BY "infoHash", "fileIndex"
|
||||
);
|
||||
|
||||
-- Add Index to subtitles table
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_constraint
|
||||
WHERE conname = 'subtitles_unique_infohash_fileindex'
|
||||
) THEN
|
||||
ALTER TABLE public.subtitles
|
||||
ADD CONSTRAINT subtitles_unique_infohash_fileindex UNIQUE ("infoHash", "fileIndex");
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
2
src/producer/.dockerignore
Normal file
2
src/producer/.dockerignore
Normal file
@@ -0,0 +1,2 @@
|
||||
**/python/
|
||||
.idea/
|
||||
@@ -6,6 +6,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\share
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{FF5CA857-51E8-4446-8840-2A1D24ED3952}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{1AE7F597-24C4-4575-B59F-67A625D95C1E}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
|
||||
eng\install-python-reqs.sh = eng\install-python-reqs.sh
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
|
||||
3
src/producer/eng/install-python-reqs.ps1
Normal file
3
src/producer/eng/install-python-reqs.ps1
Normal file
@@ -0,0 +1,3 @@
|
||||
remove-item -recurse -force ../src/python
|
||||
mkdir -p ../src/python
|
||||
pip install -r ../src/requirements.txt -t ../src/python/
|
||||
5
src/producer/eng/install-python-reqs.sh
Normal file
5
src/producer/eng/install-python-reqs.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -rf ../src/python
|
||||
mkdir -p ../src/python
|
||||
python3 -m pip install -r ../src/requirements.txt -t ../src/python/
|
||||
2
src/producer/src/.dockerignore
Normal file
2
src/producer/src/.dockerignore
Normal file
@@ -0,0 +1,2 @@
|
||||
**/python/
|
||||
.idea/
|
||||
@@ -4,31 +4,38 @@
|
||||
{
|
||||
"Name": "SyncEzTvJob",
|
||||
"IntervalSeconds": 60,
|
||||
"Enabled": true
|
||||
"Enabled": true,
|
||||
"Url": "https://eztvx.to/ezrss.xml",
|
||||
"XmlNamespace": "http://xmlns.ezrss.it/0.1/"
|
||||
},
|
||||
{
|
||||
"Name": "SyncNyaaJob",
|
||||
"IntervalSeconds": 60,
|
||||
"Enabled": true
|
||||
"Enabled": true,
|
||||
"Url": "https://nyaa.si/?page=rss&c=1_2&f=0",
|
||||
"XmlNamespace": "https://nyaa.si/xmlns/nyaa"
|
||||
},
|
||||
{
|
||||
"Name": "SyncTpbJob",
|
||||
"IntervalSeconds": 60,
|
||||
"Enabled": true
|
||||
"Enabled": true,
|
||||
"Url": "https://apibay.org/precompiled/data_top100_recent.json"
|
||||
},
|
||||
{
|
||||
"Name": "SyncYtsJob",
|
||||
"IntervalSeconds": 60,
|
||||
"Enabled": true
|
||||
"Enabled": true,
|
||||
"Url": "https://yts.am/rss"
|
||||
},
|
||||
{
|
||||
"Name": "SyncTgxJob",
|
||||
"IntervalSeconds": 60,
|
||||
"Enabled": true
|
||||
"Enabled": true,
|
||||
"Url": "https://tgx.rs/rss"
|
||||
},
|
||||
{
|
||||
"Name": "SyncDmmJob",
|
||||
"IntervalSeconds": 1800,
|
||||
"IntervalSeconds": 10800,
|
||||
"Enabled": true
|
||||
},
|
||||
{
|
||||
|
||||
@@ -8,13 +8,27 @@ WORKDIR /src/producer/src
|
||||
RUN dotnet restore -a $TARGETARCH
|
||||
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
|
||||
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
|
||||
|
||||
COPY --from=build /src/out .
|
||||
|
||||
RUN rm -rf /app/python && mkdir -p /app/python
|
||||
|
||||
RUN pip3 install -r /app/requirements.txt -t /app/python
|
||||
|
||||
RUN addgroup -S producer && adduser -S -G producer producer
|
||||
|
||||
USER producer
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||
CMD pgrep -f dotnet || exit 1
|
||||
|
||||
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
|
||||
|
||||
ENTRYPOINT ["dotnet", "Producer.dll"]
|
||||
|
||||
@@ -6,6 +6,12 @@ public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILog
|
||||
|
||||
protected virtual async Task Execute(string collectionName)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(Url))
|
||||
{
|
||||
logger.LogWarning("No URL provided for {Source} crawl", Source);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.LogInformation("Starting {Source} crawl", Source);
|
||||
|
||||
using var client = httpClientFactory.CreateClient("Scraper");
|
||||
|
||||
@@ -4,6 +4,12 @@ public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogg
|
||||
{
|
||||
public override async Task Execute()
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(Url))
|
||||
{
|
||||
logger.LogWarning("No URL provided for {Source} crawl", Source);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.LogInformation("Starting {Source} crawl", Source);
|
||||
|
||||
using var client = httpClientFactory.CreateClient(Literals.CrawlerClient);
|
||||
|
||||
@@ -7,4 +7,8 @@ public class Scraper
|
||||
public int IntervalSeconds { get; set; } = 60;
|
||||
|
||||
public bool Enabled { get; set; } = true;
|
||||
|
||||
public string? Url { get; set; }
|
||||
|
||||
public string? XmlNamespace { get; set; }
|
||||
}
|
||||
|
||||
70
src/producer/src/Features/Crawlers/Dmm/DMMFileDownloader.cs
Normal file
70
src/producer/src/Features/Crawlers/Dmm/DMMFileDownloader.cs
Normal file
@@ -0,0 +1,70 @@
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public class DMMFileDownloader(HttpClient client, ILogger<DMMFileDownloader> logger) : IDMMFileDownloader
|
||||
{
|
||||
private const string Filename = "main.zip";
|
||||
private readonly IReadOnlyCollection<string> _filesToIgnore = [
|
||||
"index.html",
|
||||
"404.html",
|
||||
"dedupe.sh",
|
||||
"CNAME",
|
||||
];
|
||||
|
||||
public const string ClientName = "DmmFileDownloader";
|
||||
|
||||
public async Task<string> DownloadFileToTempPath(CancellationToken cancellationToken)
|
||||
{
|
||||
logger.LogInformation("Downloading DMM Hashlists");
|
||||
|
||||
var response = await client.GetAsync(Filename, cancellationToken);
|
||||
|
||||
var tempDirectory = Path.Combine(Path.GetTempPath(), "DMMHashlists");
|
||||
|
||||
EnsureDirectoryIsClean(tempDirectory);
|
||||
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
|
||||
using var archive = new ZipArchive(stream);
|
||||
|
||||
logger.LogInformation("Extracting DMM Hashlists to {TempDirectory}", tempDirectory);
|
||||
|
||||
foreach (var entry in archive.Entries)
|
||||
{
|
||||
var entryPath = Path.Combine(tempDirectory, Path.GetFileName(entry.FullName));
|
||||
if (!entry.FullName.EndsWith('/')) // It's a file
|
||||
{
|
||||
entry.ExtractToFile(entryPath, true);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var file in _filesToIgnore)
|
||||
{
|
||||
CleanRepoExtras(tempDirectory, file);
|
||||
}
|
||||
|
||||
logger.LogInformation("Downloaded and extracted Repository to {TempDirectory}", tempDirectory);
|
||||
|
||||
return tempDirectory;
|
||||
}
|
||||
|
||||
private static void CleanRepoExtras(string tempDirectory, string fileName)
|
||||
{
|
||||
var repoIndex = Path.Combine(tempDirectory, fileName);
|
||||
|
||||
if (File.Exists(repoIndex))
|
||||
{
|
||||
File.Delete(repoIndex);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EnsureDirectoryIsClean(string tempDirectory)
|
||||
{
|
||||
if (Directory.Exists(tempDirectory))
|
||||
{
|
||||
Directory.Delete(tempDirectory, true);
|
||||
}
|
||||
|
||||
Directory.CreateDirectory(tempDirectory);
|
||||
}
|
||||
}
|
||||
6
src/producer/src/Features/Crawlers/Dmm/DMMHttpClient.cs
Normal file
6
src/producer/src/Features/Crawlers/Dmm/DMMHttpClient.cs
Normal file
@@ -0,0 +1,6 @@
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public class DMMHttpClient
|
||||
{
|
||||
|
||||
}
|
||||
@@ -1,64 +1,99 @@
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public partial class DebridMediaManagerCrawler(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
IDMMFileDownloader dmmFileDownloader,
|
||||
ILogger<DebridMediaManagerCrawler> logger,
|
||||
IDataStorage storage,
|
||||
GithubConfiguration githubConfiguration,
|
||||
IParseTorrentTitle parseTorrentTitle,
|
||||
IRankTorrentName rankTorrentName,
|
||||
IDistributedCache cache) : BaseCrawler(logger, storage)
|
||||
{
|
||||
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
|
||||
private static partial Regex HashCollectionMatcher();
|
||||
|
||||
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
|
||||
protected override string Url => "";
|
||||
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
|
||||
protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/git/trees/main?recursive=1";
|
||||
protected override string Source => "DMM";
|
||||
|
||||
|
||||
private const int ParallelismCount = 4;
|
||||
|
||||
public override async Task Execute()
|
||||
{
|
||||
var client = httpClientFactory.CreateClient("Scraper");
|
||||
client.DefaultRequestHeaders.Authorization = new("Bearer", githubConfiguration.PAT);
|
||||
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
|
||||
var tempDirectory = await dmmFileDownloader.DownloadFileToTempPath(CancellationToken.None);
|
||||
|
||||
var jsonBody = await client.GetStringAsync(Url);
|
||||
var files = Directory.GetFiles(tempDirectory, "*.html", SearchOption.AllDirectories);
|
||||
|
||||
var json = JsonDocument.Parse(jsonBody);
|
||||
logger.LogInformation("Found {Files} files to parse", files.Length);
|
||||
|
||||
var entriesArray = json.RootElement.GetProperty("tree");
|
||||
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
|
||||
|
||||
logger.LogInformation("Found {Entries} total DMM pages", entriesArray.GetArrayLength());
|
||||
|
||||
foreach (var entry in entriesArray.EnumerateArray())
|
||||
await Parallel.ForEachAsync(files, options, async (file, token) =>
|
||||
{
|
||||
await ParsePage(entry, client);
|
||||
}
|
||||
var fileName = Path.GetFileName(file);
|
||||
var torrentDictionary = await ExtractPageContents(file, fileName);
|
||||
|
||||
if (torrentDictionary == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await ParseTitlesWithRtn(fileName, torrentDictionary);
|
||||
var results = await ParseTorrents(torrentDictionary);
|
||||
|
||||
if (results.Count <= 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await InsertTorrents(results);
|
||||
await Storage.MarkPageAsIngested(fileName, token);
|
||||
});
|
||||
}
|
||||
|
||||
private async Task ParsePage(JsonElement entry, HttpClient client)
|
||||
private async Task ParseTitlesWithRtn(string fileName, IDictionary<string, DmmContent> page)
|
||||
{
|
||||
var (pageIngested, name) = await IsAlreadyIngested(entry);
|
||||
logger.LogInformation("Parsing titles for {Page}", fileName);
|
||||
|
||||
if (string.IsNullOrEmpty(name) || pageIngested)
|
||||
var batchProcessables = page.Select(value => new RtnBatchProcessable(value.Key, value.Value.Filename)).ToList();
|
||||
var parsedResponses = rankTorrentName.BatchParse(
|
||||
batchProcessables.Select<RtnBatchProcessable, string>(bp => bp.Filename).ToList(), trashGarbage: false);
|
||||
|
||||
// Filter out unsuccessful responses and match RawTitle to requesting title
|
||||
var successfulResponses = parsedResponses
|
||||
.Where(response => response != null && response.Success)
|
||||
.GroupBy(response => response.Response.RawTitle!)
|
||||
.ToDictionary(group => group.Key, group => group.First());
|
||||
|
||||
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
|
||||
|
||||
await Parallel.ForEachAsync(batchProcessables.Select(t => t.InfoHash), options, (infoHash, _) =>
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (page.TryGetValue(infoHash, out var dmmContent) &&
|
||||
successfulResponses.TryGetValue(dmmContent.Filename, out var parsedResponse))
|
||||
{
|
||||
page[infoHash] = dmmContent with { ParseResponse = parsedResponse };
|
||||
}
|
||||
|
||||
var pageSource = await client.GetStringAsync($"{DownloadBaseUrl}/{name}");
|
||||
|
||||
await ExtractPageContents(pageSource, name);
|
||||
return ValueTask.CompletedTask;
|
||||
});
|
||||
}
|
||||
|
||||
private async Task ExtractPageContents(string pageSource, string name)
|
||||
private async Task<ConcurrentDictionary<string, DmmContent>?> ExtractPageContents(string filePath, string filenameOnly)
|
||||
{
|
||||
var (pageIngested, name) = await IsAlreadyIngested(filenameOnly);
|
||||
|
||||
if (pageIngested)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
var pageSource = await File.ReadAllTextAsync(filePath);
|
||||
|
||||
var match = HashCollectionMatcher().Match(pageSource);
|
||||
|
||||
if (!match.Success)
|
||||
{
|
||||
logger.LogWarning("Failed to match hash collection for {Name}", name);
|
||||
await Storage.MarkPageAsIngested(name);
|
||||
return;
|
||||
await Storage.MarkPageAsIngested(filenameOnly);
|
||||
return [];
|
||||
}
|
||||
|
||||
var encodedJson = match.Groups.Values.ElementAtOrDefault(1);
|
||||
@@ -66,34 +101,165 @@ public partial class DebridMediaManagerCrawler(
|
||||
if (string.IsNullOrEmpty(encodedJson?.Value))
|
||||
{
|
||||
logger.LogWarning("Failed to extract encoded json for {Name}", name);
|
||||
return;
|
||||
return [];
|
||||
}
|
||||
|
||||
await ProcessExtractedContentsAsTorrentCollection(encodedJson.Value, name);
|
||||
}
|
||||
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson.Value);
|
||||
|
||||
private async Task ProcessExtractedContentsAsTorrentCollection(string encodedJson, string name)
|
||||
{
|
||||
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson);
|
||||
|
||||
var json = JsonDocument.Parse(decodedJson);
|
||||
|
||||
await InsertTorrentsForPage(json);
|
||||
|
||||
var result = await Storage.MarkPageAsIngested(name);
|
||||
|
||||
if (!result.IsSuccess)
|
||||
JsonElement arrayToProcess;
|
||||
try
|
||||
{
|
||||
logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.Failure.ErrorMessage);
|
||||
return;
|
||||
var json = JsonDocument.Parse(decodedJson);
|
||||
|
||||
if (json.RootElement.ValueKind == JsonValueKind.Object &&
|
||||
json.RootElement.TryGetProperty("torrents", out var torrentsProperty) &&
|
||||
torrentsProperty.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
arrayToProcess = torrentsProperty;
|
||||
}
|
||||
else if (json.RootElement.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
arrayToProcess = json.RootElement;
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.LogWarning("Unexpected JSON format in {Name}", name);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError("Failed to parse JSON {decodedJson} for {Name}: {Exception}", decodedJson, name, ex);
|
||||
return [];
|
||||
}
|
||||
|
||||
logger.LogInformation("Successfully marked page as ingested");
|
||||
var torrents = await arrayToProcess.EnumerateArray()
|
||||
.ToAsyncEnumerable()
|
||||
.Select(ParsePageContent)
|
||||
.Where(t => t is not null)
|
||||
.ToListAsync();
|
||||
|
||||
if (torrents.Count == 0)
|
||||
{
|
||||
logger.LogWarning("No torrents found in {Name}", name);
|
||||
await Storage.MarkPageAsIngested(filenameOnly);
|
||||
return [];
|
||||
}
|
||||
|
||||
var torrentDictionary = torrents
|
||||
.Where(x => x is not null)
|
||||
.GroupBy(x => x.InfoHash)
|
||||
.ToConcurrentDictionary(g => g.Key, g => new DmmContent(g.First().Filename, g.First().Bytes, null));
|
||||
|
||||
logger.LogInformation("Parsed {Torrents} torrents for {Name}", torrentDictionary.Count, name);
|
||||
|
||||
return torrentDictionary;
|
||||
}
|
||||
|
||||
private async Task<IngestedTorrent?> ParseTorrent(JsonElement item)
|
||||
private async Task<List<IngestedTorrent>> ParseTorrents(IDictionary<string, DmmContent> page)
|
||||
{
|
||||
var ingestedTorrents = new List<IngestedTorrent>();
|
||||
|
||||
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
|
||||
|
||||
await Parallel.ForEachAsync(page, options, async (kvp, ct) =>
|
||||
{
|
||||
var (infoHash, dmmContent) = kvp;
|
||||
var parsedTorrent = dmmContent.ParseResponse;
|
||||
if (parsedTorrent is not { Success: true })
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var torrentType = parsedTorrent.Response.IsMovie ? "movie" : "tvSeries";
|
||||
var cacheKey = GetCacheKey(torrentType, parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.Year);
|
||||
var (cached, cachedResult) = await CheckIfInCacheAndReturn(cacheKey);
|
||||
|
||||
if (cached)
|
||||
{
|
||||
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
|
||||
lock (ingestedTorrents)
|
||||
{
|
||||
ingestedTorrents.Add(MapToTorrent(cachedResult, dmmContent.Bytes, infoHash, parsedTorrent));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null;
|
||||
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, torrentType, year, ct);
|
||||
|
||||
if (imdbEntry is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await AddToCache(cacheKey, imdbEntry);
|
||||
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
|
||||
lock (ingestedTorrents)
|
||||
{
|
||||
ingestedTorrents.Add(MapToTorrent(imdbEntry, dmmContent.Bytes, infoHash, parsedTorrent));
|
||||
}
|
||||
});
|
||||
|
||||
return ingestedTorrents;
|
||||
}
|
||||
|
||||
private IngestedTorrent MapToTorrent(ImdbEntry result, long size, string infoHash, ParseTorrentTitleResponse parsedTorrent) =>
|
||||
new()
|
||||
{
|
||||
Source = Source,
|
||||
Name = result.Title,
|
||||
Imdb = result.ImdbId,
|
||||
Size = size.ToString(),
|
||||
InfoHash = infoHash,
|
||||
Seeders = 0,
|
||||
Leechers = 0,
|
||||
Category = AssignCategory(result),
|
||||
RtnResponse = parsedTorrent.Response.ToJson(),
|
||||
};
|
||||
|
||||
|
||||
private Task AddToCache(string cacheKey, ImdbEntry best)
|
||||
{
|
||||
var cacheOptions = new DistributedCacheEntryOptions
|
||||
{
|
||||
AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1),
|
||||
};
|
||||
|
||||
return cache.SetStringAsync(cacheKey, JsonSerializer.Serialize(best), cacheOptions);
|
||||
}
|
||||
|
||||
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string cacheKey)
|
||||
{
|
||||
var cachedImdbId = await cache.GetStringAsync(cacheKey);
|
||||
|
||||
if (!string.IsNullOrEmpty(cachedImdbId))
|
||||
{
|
||||
return (true, JsonSerializer.Deserialize<ImdbEntry>(cachedImdbId));
|
||||
}
|
||||
|
||||
return (false, null);
|
||||
}
|
||||
|
||||
private async Task<(bool Success, string? Name)> IsAlreadyIngested(string filename)
|
||||
{
|
||||
var pageIngested = await Storage.PageIngested(filename);
|
||||
|
||||
return (pageIngested, filename);
|
||||
}
|
||||
|
||||
private static string AssignCategory(ImdbEntry entry) =>
|
||||
entry.Category.ToLower() switch
|
||||
{
|
||||
var category when string.Equals(category, "movie", StringComparison.OrdinalIgnoreCase) => "movies",
|
||||
var category when string.Equals(category, "tvSeries", StringComparison.OrdinalIgnoreCase) => "tv",
|
||||
_ => "unknown",
|
||||
};
|
||||
|
||||
private static string GetCacheKey(string category, string title, int year) => $"{category.ToLowerInvariant()}:{year}:{title.ToLowerInvariant()}";
|
||||
|
||||
private static ExtractedDMMContent? ParsePageContent(JsonElement item)
|
||||
{
|
||||
if (!item.TryGetProperty("filename", out var filenameElement) ||
|
||||
!item.TryGetProperty("bytes", out var bytesElement) ||
|
||||
!item.TryGetProperty("hash", out var hashElement))
|
||||
@@ -101,143 +267,10 @@ public partial class DebridMediaManagerCrawler(
|
||||
return null;
|
||||
}
|
||||
|
||||
var torrentTitle = filenameElement.GetString();
|
||||
|
||||
if (torrentTitle.IsNullOrEmpty())
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var parsedTorrent = parseTorrentTitle.Parse(torrentTitle.CleanTorrentTitleForImdb());
|
||||
|
||||
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.Title);
|
||||
|
||||
if (cached)
|
||||
{
|
||||
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Title);
|
||||
return new()
|
||||
{
|
||||
Source = Source,
|
||||
Name = cachedResult.Title,
|
||||
Imdb = cachedResult.ImdbId,
|
||||
Size = bytesElement.GetInt64().ToString(),
|
||||
InfoHash = hashElement.ToString(),
|
||||
Seeders = 0,
|
||||
Leechers = 0,
|
||||
Category = parsedTorrent.TorrentType switch
|
||||
{
|
||||
TorrentType.Movie => "movies",
|
||||
TorrentType.Tv => "tv",
|
||||
_ => "unknown",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Title, parsedTorrent.TorrentType, parsedTorrent.Year);
|
||||
|
||||
if (imdbEntry.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var scoredTitles = await ScoreTitles(parsedTorrent, imdbEntry);
|
||||
|
||||
if (!scoredTitles.Success)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", scoredTitles.BestMatch.Value.ImdbId, parsedTorrent.Title, scoredTitles.BestMatch.Value.Title, scoredTitles.BestMatch.Score);
|
||||
|
||||
var torrent = new IngestedTorrent
|
||||
{
|
||||
Source = Source,
|
||||
Name = scoredTitles.BestMatch.Value.Title,
|
||||
Imdb = scoredTitles.BestMatch.Value.ImdbId,
|
||||
Size = bytesElement.GetInt64().ToString(),
|
||||
InfoHash = hashElement.ToString(),
|
||||
Seeders = 0,
|
||||
Leechers = 0,
|
||||
Category = parsedTorrent.TorrentType switch
|
||||
{
|
||||
TorrentType.Movie => "movies",
|
||||
TorrentType.Tv => "tv",
|
||||
_ => "unknown",
|
||||
},
|
||||
};
|
||||
|
||||
return torrent;
|
||||
return new(filenameElement.GetString(), bytesElement.GetInt64(), hashElement.GetString());
|
||||
}
|
||||
|
||||
private async Task<(bool Success, ExtractedResult<ImdbEntry>? BestMatch)> ScoreTitles(TorrentMetadata parsedTorrent, List<ImdbEntry> imdbEntries)
|
||||
{
|
||||
var lowerCaseTitle = parsedTorrent.Title.ToLowerInvariant();
|
||||
|
||||
// Scoring directly operates on the List<ImdbEntry>, no need for lookup table.
|
||||
var scoredResults = Process.ExtractAll(new(){Title = lowerCaseTitle}, imdbEntries, x => x.Title?.ToLowerInvariant(), scorer: new DefaultRatioScorer(), cutoff: 90);
|
||||
|
||||
var best = scoredResults.MaxBy(x => x.Score);
|
||||
|
||||
if (best is null)
|
||||
{
|
||||
return (false, null);
|
||||
}
|
||||
|
||||
await AddToCache(lowerCaseTitle, best);
|
||||
|
||||
return (true, best);
|
||||
}
|
||||
|
||||
private Task AddToCache(string lowerCaseTitle, ExtractedResult<ImdbEntry> best)
|
||||
{
|
||||
var cacheOptions = new DistributedCacheEntryOptions
|
||||
{
|
||||
AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(15),
|
||||
};
|
||||
|
||||
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best.Value), cacheOptions);
|
||||
}
|
||||
|
||||
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string title)
|
||||
{
|
||||
var cachedImdbId = await cache.GetStringAsync(title.ToLowerInvariant());
|
||||
|
||||
if (!string.IsNullOrEmpty(cachedImdbId))
|
||||
{
|
||||
return (true, JsonSerializer.Deserialize<ImdbEntry>(cachedImdbId));
|
||||
}
|
||||
|
||||
return (false, null);
|
||||
}
|
||||
|
||||
private async Task InsertTorrentsForPage(JsonDocument json)
|
||||
{
|
||||
var torrents = await json.RootElement.EnumerateArray()
|
||||
.ToAsyncEnumerable()
|
||||
.SelectAwait(async x => await ParseTorrent(x))
|
||||
.Where(t => t is not null)
|
||||
.ToListAsync();
|
||||
|
||||
if (torrents.Count == 0)
|
||||
{
|
||||
logger.LogWarning("No torrents found in {Source} response", Source);
|
||||
return;
|
||||
}
|
||||
|
||||
await InsertTorrents(torrents!);
|
||||
}
|
||||
|
||||
private async Task<(bool Success, string? Name)> IsAlreadyIngested(JsonElement entry)
|
||||
{
|
||||
var name = entry.GetProperty("path").GetString();
|
||||
|
||||
if (string.IsNullOrEmpty(name))
|
||||
{
|
||||
return (false, null);
|
||||
}
|
||||
|
||||
var pageIngested = await Storage.PageIngested(name);
|
||||
|
||||
return (pageIngested, name);
|
||||
}
|
||||
private record DmmContent(string Filename, long Bytes, ParseTorrentTitleResponse? ParseResponse);
|
||||
private record ExtractedDMMContent(string Filename, long Bytes, string InfoHash);
|
||||
private record RtnBatchProcessable(string InfoHash, string Filename);
|
||||
}
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public class GithubConfiguration
|
||||
{
|
||||
private const string Prefix = "GITHUB";
|
||||
private const string PatVariable = "PAT";
|
||||
|
||||
public string? PAT { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(PatVariable);
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public interface IDMMFileDownloader
|
||||
{
|
||||
Task<string> DownloadFileToTempPath(CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddDmmSupport(this IServiceCollection services)
|
||||
{
|
||||
services.AddHttpClient<IDMMFileDownloader, DMMFileDownloader>(DMMFileDownloader.ClientName, client =>
|
||||
{
|
||||
client.BaseAddress = new("https://github.com/debridmediamanager/hashlists/zipball/main/");
|
||||
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip");
|
||||
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
|
||||
client.Timeout = TimeSpan.FromMinutes(10); // 10 minute timeout, #217
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,10 @@
|
||||
namespace Producer.Features.Crawlers.EzTv;
|
||||
|
||||
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
protected override string Url => "https://eztv1.xyz/ezrss.xml";
|
||||
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
|
||||
protected override string Source => "EZTV";
|
||||
|
||||
private static readonly XNamespace XmlNamespace = "http://xmlns.ezrss.it/0.1/";
|
||||
private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.XmlNamespace ?? string.Empty;
|
||||
|
||||
protected override IReadOnlyDictionary<string, string> Mappings =>
|
||||
new Dictionary<string, string>
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
namespace Producer.Features.Crawlers.Nyaa;
|
||||
|
||||
public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
protected override string Url => "https://nyaa.si/?page=rss&c=1_2&f=0";
|
||||
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
|
||||
protected override string Source => "Nyaa";
|
||||
|
||||
private static readonly XNamespace XmlNamespace = "https://nyaa.si/xmlns/nyaa";
|
||||
private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.XmlNamespace ?? string.Empty;
|
||||
|
||||
protected override IReadOnlyDictionary<string, string> Mappings =>
|
||||
new Dictionary<string, string>
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
namespace Producer.Features.Crawlers.Tgx;
|
||||
|
||||
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
[GeneratedRegex(@"Size:\s+(.+?)\s+Added")]
|
||||
private static partial Regex SizeStringExtractor();
|
||||
[GeneratedRegex(@"(?i)\b(\d+(\.\d+)?)\s*([KMGT]?B)\b", RegexOptions.None, "en-GB")]
|
||||
private static partial Regex SizeStringParser();
|
||||
|
||||
protected override string Url => "https://tgx.rs/rss";
|
||||
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTgxJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
|
||||
|
||||
protected override string Source => "TorrentGalaxy";
|
||||
protected override IReadOnlyDictionary<string, string> Mappings
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
namespace Producer.Features.Crawlers.Tpb;
|
||||
|
||||
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage)
|
||||
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseJsonCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json";
|
||||
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTpbJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
|
||||
|
||||
protected override string Source => "TPB";
|
||||
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
namespace Producer.Features.Crawlers.Yts;
|
||||
|
||||
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
protected override string Url => "https://yts.am/rss";
|
||||
|
||||
protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncYtsJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
|
||||
protected override string Source => "YTS";
|
||||
protected override IReadOnlyDictionary<string, string> Mappings
|
||||
=> new Dictionary<string, string>
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
namespace Producer.Features.DataProcessing
|
||||
{
|
||||
public class LengthAwareRatioScorer : IRatioScorer
|
||||
{
|
||||
private readonly IRatioScorer _defaultScorer = new DefaultRatioScorer();
|
||||
|
||||
public int Score(string input1, string input2)
|
||||
{
|
||||
var score = _defaultScorer.Score(input1, input2);
|
||||
var lengthRatio = (double)Math.Min(input1.Length, input2.Length) / Math.Max(input1.Length, input2.Length);
|
||||
var result = (int)(score * lengthRatio);
|
||||
return result > 100 ? 100 : result;
|
||||
}
|
||||
|
||||
public int Score(string input1, string input2, PreprocessMode preprocessMode)
|
||||
{
|
||||
var score = _defaultScorer.Score(input1, input2, preprocessMode);
|
||||
var lengthRatio = (double)Math.Min(input1.Length, input2.Length) / Math.Max(input1.Length, input2.Length);
|
||||
var result = (int)(score * lengthRatio);
|
||||
|
||||
return result > 100 ? 100 : result;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,7 +9,8 @@ internal static class ServiceCollectionExtensions
|
||||
|
||||
services.AddTransient<IDataStorage, DapperDataStorage>();
|
||||
services.AddTransient<IMessagePublisher, TorrentPublisher>();
|
||||
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
|
||||
services.RegisterPythonEngine();
|
||||
services.AddSingleton<IRankTorrentName, RankTorrentName>();
|
||||
services.AddStackExchangeRedisCache(options =>
|
||||
{
|
||||
options.Configuration = redisConfiguration.ConnectionString;
|
||||
|
||||
@@ -5,7 +5,6 @@ internal static class ServiceCollectionExtensions
|
||||
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
|
||||
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
|
||||
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
|
||||
|
||||
var jobTypes = Assembly.GetAssembly(typeof(BaseJob))
|
||||
@@ -19,18 +18,13 @@ internal static class ServiceCollectionExtensions
|
||||
services.AddTransient(type);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
services.AddTransient<SyncDmmJob>();
|
||||
}
|
||||
|
||||
var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance);
|
||||
|
||||
services.AddQuartz(
|
||||
quartz =>
|
||||
{
|
||||
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
|
||||
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration);
|
||||
RegisterDmmJob(quartz, scrapeConfiguration);
|
||||
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
|
||||
RegisterPublisher(quartz, rabbitConfiguration);
|
||||
});
|
||||
@@ -64,13 +58,8 @@ internal static class ServiceCollectionExtensions
|
||||
}
|
||||
}
|
||||
|
||||
private static void RegisterDmmJob(GithubConfiguration githubConfiguration, IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
|
||||
}
|
||||
}
|
||||
private static void RegisterDmmJob(IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration) =>
|
||||
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
|
||||
|
||||
private static void RegisterTorrentioJob(
|
||||
IServiceCollection services,
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
// Global using directives
|
||||
|
||||
global using System.Collections.Concurrent;
|
||||
global using System.IO.Compression;
|
||||
global using System.Reflection;
|
||||
global using System.Text;
|
||||
global using System.Text.Json;
|
||||
global using System.Text.RegularExpressions;
|
||||
global using System.Xml.Linq;
|
||||
global using FuzzySharp;
|
||||
global using FuzzySharp.Extractor;
|
||||
global using FuzzySharp.PreProcess;
|
||||
global using FuzzySharp.SimilarityRatio.Scorer;
|
||||
global using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;
|
||||
global using LZStringCSharp;
|
||||
global using MassTransit;
|
||||
@@ -23,11 +25,10 @@ global using Producer.Features.Crawlers.Torrentio;
|
||||
global using Producer.Features.CrawlerSupport;
|
||||
global using Producer.Features.DataProcessing;
|
||||
global using Producer.Features.JobSupport;
|
||||
global using PromKnight.ParseTorrentTitle;
|
||||
global using Serilog;
|
||||
global using SharedContracts.Configuration;
|
||||
global using SharedContracts.Dapper;
|
||||
global using SharedContracts.Extensions;
|
||||
global using SharedContracts.Models;
|
||||
global using SharedContracts.Requests;
|
||||
global using StackExchange.Redis;
|
||||
global using SharedContracts.Python;
|
||||
global using SharedContracts.Python.RTN;
|
||||
global using SharedContracts.Requests;
|
||||
@@ -19,6 +19,7 @@
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Polly" Version="8.3.0" />
|
||||
<PackageReference Include="pythonnet" Version="3.0.3" />
|
||||
<PackageReference Include="Quartz.Extensions.DependencyInjection" Version="3.8.0" />
|
||||
<PackageReference Include="Quartz.Extensions.Hosting" Version="3.8.0" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
@@ -32,11 +33,14 @@
|
||||
<None Include="Configuration\*.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="requirements.txt">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Remove="Data\**" />
|
||||
<None Include="Data\**">
|
||||
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
|
||||
<Content Remove="python\**" />
|
||||
<None Include="python\**">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
@@ -12,7 +12,8 @@ builder.Services
|
||||
.RegisterMassTransit()
|
||||
.AddDataStorage()
|
||||
.AddCrawlers()
|
||||
.AddDmmSupport()
|
||||
.AddQuartz(builder.Configuration);
|
||||
|
||||
var app = builder.Build();
|
||||
app.Run();
|
||||
app.Run();
|
||||
1
src/producer/src/requirements.txt
Normal file
1
src/producer/src/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
rank-torrent-name==0.2.13
|
||||
@@ -9,12 +9,23 @@ RUN dotnet restore -a $TARGETARCH
|
||||
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
|
||||
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
|
||||
|
||||
COPY --from=build /src/out .
|
||||
|
||||
RUN rm -rf /app/python && mkdir -p /app/python
|
||||
|
||||
RUN pip3 install -r /app/requirements.txt -t /app/python
|
||||
|
||||
RUN addgroup -S qbit && adduser -S -G qbit qbit
|
||||
USER qbit
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||
CMD pgrep -f dotnet || exit 1
|
||||
ENTRYPOINT ["dotnet", "QbitCollector.dll"]
|
||||
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
|
||||
ENTRYPOINT ["dotnet", "QBitCollector.dll"]
|
||||
|
||||
@@ -13,11 +13,13 @@ public static class ServiceCollectionExtensions
|
||||
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
|
||||
{
|
||||
services.AddQBitTorrentClient();
|
||||
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
|
||||
services.RegisterPythonEngine();
|
||||
services.AddSingleton<IRankTorrentName, RankTorrentName>();
|
||||
services.AddSingleton<QbitRequestProcessor>();
|
||||
services.AddHttpClient();
|
||||
services.AddSingleton<ITrackersService, TrackersService>();
|
||||
services.AddHostedService<TrackersBackgroundService>();
|
||||
services.AddHostedService<HousekeepingBackgroundService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
@@ -42,6 +44,7 @@ public static class ServiceCollectionExtensions
|
||||
{
|
||||
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
|
||||
var redisConfiguration = services.LoadConfigurationFromEnv<RedisConfiguration>();
|
||||
var qbitConfiguration = services.LoadConfigurationFromEnv<QbitConfiguration>();
|
||||
|
||||
services.AddStackExchangeRedisCache(
|
||||
option =>
|
||||
@@ -78,8 +81,8 @@ public static class ServiceCollectionExtensions
|
||||
e.ConfigureConsumer<WriteQbitMetadataConsumer>(context);
|
||||
e.ConfigureConsumer<PerformQbitMetadataRequestConsumer>(context);
|
||||
e.ConfigureSaga<QbitMetadataSagaState>(context);
|
||||
e.ConcurrentMessageLimit = 5;
|
||||
e.PrefetchCount = 5;
|
||||
e.ConcurrentMessageLimit = qbitConfiguration.Concurrency;
|
||||
e.PrefetchCount = qbitConfiguration.Concurrency;
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -96,16 +99,19 @@ public static class ServiceCollectionExtensions
|
||||
cfg.UseTimeout(
|
||||
timeout =>
|
||||
{
|
||||
timeout.Timeout = TimeSpan.FromMinutes(1);
|
||||
timeout.Timeout = TimeSpan.FromMinutes(3);
|
||||
});
|
||||
})
|
||||
.RedisRepository(redisConfiguration.ConnectionString);
|
||||
.RedisRepository(redisConfiguration.ConnectionString, options =>
|
||||
{
|
||||
options.KeyPrefix = "qbit-collector:";
|
||||
});
|
||||
|
||||
private static void AddQBitTorrentClient(this IServiceCollection services)
|
||||
{
|
||||
var qbitConfiguration = services.LoadConfigurationFromEnv<QbitConfiguration>();
|
||||
var client = new QBittorrentClient(new(qbitConfiguration.Host));
|
||||
client.Timeout = TimeSpan.FromSeconds(10);
|
||||
client.Timeout = TimeSpan.FromSeconds(20);
|
||||
|
||||
services.AddSingleton<IQBittorrentClient>(client);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
namespace QBitCollector.Features.Qbit;
|
||||
|
||||
public class HousekeepingBackgroundService(IQBittorrentClient client, ILogger<HousekeepingBackgroundService> logger) : BackgroundService
|
||||
{
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
logger.LogInformation("Service is Running.");
|
||||
|
||||
await DoWork();
|
||||
|
||||
using PeriodicTimer timer = new(TimeSpan.FromMinutes(2));
|
||||
|
||||
try
|
||||
{
|
||||
while (await timer.WaitForNextTickAsync(stoppingToken))
|
||||
{
|
||||
await DoWork();
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
logger.LogInformation("Service stopping.");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DoWork()
|
||||
{
|
||||
try
|
||||
{
|
||||
logger.LogInformation("Cleaning Stale Entries in Qbit...");
|
||||
|
||||
var torrents = await client.GetTorrentListAsync();
|
||||
|
||||
foreach (var torrentInfo in torrents)
|
||||
{
|
||||
if (!(torrentInfo.AddedOn < DateTimeOffset.UtcNow.AddMinutes(-1)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
logger.LogInformation("Torrent [{InfoHash}] Identified as stale because was added at {AddedOn}", torrentInfo.Hash, torrentInfo.AddedOn);
|
||||
|
||||
await client.DeleteAsync(new[] {torrentInfo.Hash}, deleteDownloadedData: true);
|
||||
logger.LogInformation("Cleaned up stale torrent: [{InfoHash}]", torrentInfo.Hash);
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.LogError(e, "Error cleaning up stale torrents this interval.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
namespace QBitCollector.Features.Qbit;
|
||||
|
||||
public class QbitRequestProcessor(IQBittorrentClient client, ITrackersService trackersService, ILogger<QbitRequestProcessor> logger)
|
||||
public class QbitRequestProcessor(IQBittorrentClient client, ITrackersService trackersService, ILogger<QbitRequestProcessor> logger, QbitConfiguration configuration)
|
||||
{
|
||||
public async Task<IReadOnlyList<TorrentContent>?> ProcessAsync(string infoHash, CancellationToken cancellationToken = default)
|
||||
{
|
||||
@@ -14,7 +14,7 @@ public class QbitRequestProcessor(IQBittorrentClient client, ITrackersService tr
|
||||
|
||||
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
|
||||
timeoutCts.CancelAfter(TimeSpan.FromSeconds(30));
|
||||
timeoutCts.CancelAfter(TimeSpan.FromSeconds(60));
|
||||
|
||||
try
|
||||
{
|
||||
@@ -30,7 +30,7 @@ public class QbitRequestProcessor(IQBittorrentClient client, ITrackersService tr
|
||||
break;
|
||||
}
|
||||
|
||||
await Task.Delay(TimeSpan.FromSeconds(1), timeoutCts.Token);
|
||||
await Task.Delay(TimeSpan.FromMilliseconds(200), timeoutCts.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
|
||||
@@ -3,7 +3,12 @@ namespace QBitCollector.Features.Qbit;
|
||||
public class QbitConfiguration
|
||||
{
|
||||
private const string Prefix = "QBIT";
|
||||
private const string ConnectionStringVariable = "HOST";
|
||||
private const string HOST_VARIABLE = "HOST";
|
||||
private const string TRACKERS_URL_VARIABLE = "TRACKERS_URL";
|
||||
private const string CONCURRENCY_VARIABLE = "CONCURRENCY";
|
||||
|
||||
public string? Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(ConnectionStringVariable);
|
||||
public string? Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HOST_VARIABLE);
|
||||
public string? TrackersUrl { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(TRACKERS_URL_VARIABLE);
|
||||
|
||||
public int Concurrency { get; init; } = Prefix.GetEnvironmentVariableAsInt(CONCURRENCY_VARIABLE, 8);
|
||||
}
|
||||
@@ -1,8 +1,7 @@
|
||||
namespace QBitCollector.Features.Trackers;
|
||||
|
||||
public class TrackersService(IDistributedCache cache, HttpClient client, IMemoryCache memoryCache) : ITrackersService
|
||||
public class TrackersService(IDistributedCache cache, HttpClient client, IMemoryCache memoryCache, QbitConfiguration configuration) : ITrackersService
|
||||
{
|
||||
private const string TrackersListUrl = "https://ngosang.github.io/trackerslist/trackers_all.txt";
|
||||
private const string CacheKey = "trackers";
|
||||
|
||||
public async Task<List<string>> GetTrackers()
|
||||
@@ -42,7 +41,7 @@ public class TrackersService(IDistributedCache cache, HttpClient client, IMemory
|
||||
|
||||
private async Task<List<string>> GetTrackersAsync()
|
||||
{
|
||||
var response = await client.GetStringAsync(TrackersListUrl);
|
||||
var response = await client.GetStringAsync(configuration.TrackersUrl);
|
||||
|
||||
var lines = response.Split(["\r\n", "\r", "\n"], StringSplitOptions.None);
|
||||
|
||||
|
||||
@@ -3,10 +3,11 @@ namespace QBitCollector.Features.Worker;
|
||||
public static class QbitMetaToTorrentMeta
|
||||
{
|
||||
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
|
||||
IParseTorrentTitle torrentTitle,
|
||||
IRankTorrentName rankTorrentName,
|
||||
Torrent torrent,
|
||||
string ImdbId,
|
||||
IReadOnlyList<TorrentContent> Metadata)
|
||||
IReadOnlyList<TorrentContent> Metadata,
|
||||
ILogger<WriteQbitMetadataConsumer> logger)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -24,23 +25,31 @@ public static class QbitMetaToTorrentMeta
|
||||
Size = metadataEntry.Size,
|
||||
};
|
||||
|
||||
var parsedTitle = torrentTitle.Parse(file.Title);
|
||||
var parsedTitle = rankTorrentName.Parse(file.Title, false);
|
||||
|
||||
if (!parsedTitle.Success)
|
||||
{
|
||||
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
|
||||
continue;
|
||||
}
|
||||
|
||||
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault();
|
||||
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault();
|
||||
file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
|
||||
file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
|
||||
|
||||
files.Add(file);
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
catch (Exception)
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, IReadOnlyList<TorrentContent> Metadata)
|
||||
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, IReadOnlyList<TorrentContent> Metadata,
|
||||
ILogger<WriteQbitMetadataConsumer> logger)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -70,8 +79,9 @@ public static class QbitMetaToTorrentMeta
|
||||
|
||||
return files;
|
||||
}
|
||||
catch (Exception)
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,6 +53,12 @@ public class QbitMetadataSagaStateMachine : MassTransitStateMachine<QbitMetadata
|
||||
.Then(
|
||||
context =>
|
||||
{
|
||||
if (!context.Message.WithFiles)
|
||||
{
|
||||
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
|
||||
})
|
||||
.TransitionTo(Completed)
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
namespace QBitCollector.Features.Worker;
|
||||
|
||||
[EntityName("perform-metadata-request")]
|
||||
[EntityName("perform-metadata-request-qbit-collector")]
|
||||
public record PerformQbitMetadataRequest(Guid CorrelationId, string InfoHash) : CorrelatedBy<Guid>;
|
||||
|
||||
[EntityName("torrent-metadata-response")]
|
||||
[EntityName("torrent-metadata-response-qbit-collector")]
|
||||
public record GotQbitMetadata(QBitMetadataResponse Metadata) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
}
|
||||
|
||||
[EntityName("write-metadata")]
|
||||
[EntityName("write-metadata-qbit-collector")]
|
||||
public record WriteQbitMetadata(Torrent Torrent, QBitMetadataResponse Metadata, string ImdbId) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
}
|
||||
|
||||
[EntityName("metadata-written")]
|
||||
public record QbitMetadataWritten(QBitMetadataResponse Metadata) : CorrelatedBy<Guid>
|
||||
[EntityName("metadata-written-qbit-collector")]
|
||||
public record QbitMetadataWritten(QBitMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
|
||||
public QBitMetadataResponse Metadata { get; init; } = Metadata;
|
||||
}
|
||||
@@ -1,25 +1,36 @@
|
||||
namespace QBitCollector.Features.Worker;
|
||||
|
||||
public class WriteQbitMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteQbitMetadata>
|
||||
public class WriteQbitMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteQbitMetadataConsumer> logger) : IConsumer<WriteQbitMetadata>
|
||||
{
|
||||
public async Task Consume(ConsumeContext<WriteQbitMetadata> context)
|
||||
{
|
||||
var request = context.Message;
|
||||
|
||||
var torrentFiles = QbitMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata);
|
||||
|
||||
if (torrentFiles.Any())
|
||||
if (request.Metadata.Metadata.Count == 0)
|
||||
{
|
||||
await dataStorage.InsertFiles(torrentFiles);
|
||||
|
||||
var subtitles = await QbitMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
|
||||
|
||||
if (subtitles.Any())
|
||||
{
|
||||
await dataStorage.InsertSubtitles(subtitles);
|
||||
}
|
||||
await context.Publish(new QbitMetadataWritten(request.Metadata, false));
|
||||
return;
|
||||
}
|
||||
|
||||
await context.Publish(new QbitMetadataWritten(request.Metadata));
|
||||
|
||||
var torrentFiles = QbitMetaToTorrentMeta.MapMetadataToFilesCollection(
|
||||
rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
|
||||
|
||||
if (!torrentFiles.Any())
|
||||
{
|
||||
await context.Publish(new QbitMetadataWritten(request.Metadata, false));
|
||||
return;
|
||||
}
|
||||
|
||||
await dataStorage.InsertFiles(torrentFiles);
|
||||
|
||||
var subtitles = await QbitMetaToTorrentMeta.MapMetadataToSubtitlesCollection(
|
||||
dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
|
||||
|
||||
if (subtitles.Any())
|
||||
{
|
||||
await dataStorage.InsertSubtitles(subtitles);
|
||||
}
|
||||
|
||||
await context.Publish(new QbitMetadataWritten(request.Metadata, true));
|
||||
}
|
||||
}
|
||||
@@ -1,17 +1,11 @@
|
||||
// Global using directives
|
||||
|
||||
global using System.Text.Json;
|
||||
global using System.Text.Json.Serialization;
|
||||
global using System.Threading.Channels;
|
||||
global using MassTransit;
|
||||
global using MassTransit.Mediator;
|
||||
global using Microsoft.AspNetCore.Builder;
|
||||
global using Microsoft.Extensions.Caching.Distributed;
|
||||
global using Microsoft.Extensions.Caching.Memory;
|
||||
global using Microsoft.Extensions.DependencyInjection;
|
||||
global using Polly;
|
||||
global using Polly.Extensions.Http;
|
||||
global using PromKnight.ParseTorrentTitle;
|
||||
global using QBitCollector.Extensions;
|
||||
global using QBitCollector.Features.Qbit;
|
||||
global using QBitCollector.Features.Trackers;
|
||||
@@ -21,4 +15,6 @@ global using SharedContracts.Configuration;
|
||||
global using SharedContracts.Dapper;
|
||||
global using SharedContracts.Extensions;
|
||||
global using SharedContracts.Models;
|
||||
global using SharedContracts.Python;
|
||||
global using SharedContracts.Python.RTN;
|
||||
global using SharedContracts.Requests;
|
||||
@@ -18,7 +18,6 @@
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
|
||||
<PackageReference Include="Polly" Version="8.3.1" />
|
||||
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
|
||||
<PackageReference Include="QBittorrent.Client" Version="1.9.23349.1" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||
@@ -31,10 +30,30 @@
|
||||
<None Include="Configuration\logging.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<Content Remove="eng\**" />
|
||||
<None Remove="eng\**" />
|
||||
<None Update="requirements.txt">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\shared\SharedContracts.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
|
||||
<Content Remove="python\**" />
|
||||
<None Include="python\**">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Remove="eng\**" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Remove="eng\**" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -6,6 +6,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "QBitCollector", "QBitCollector.csproj", "{1EF124BE-6EBE-4D9E-846C-FFF814999F3B}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{2F2EA33A-1303-405D-939B-E9394D262BC9}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
|
||||
eng\install-python-reqs.sh = eng\install-python-reqs.sh
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
|
||||
3
src/qbit-collector/eng/install-python-reqs.ps1
Normal file
3
src/qbit-collector/eng/install-python-reqs.ps1
Normal file
@@ -0,0 +1,3 @@
|
||||
Remove-Item -Recurse -Force ../python
|
||||
mkdir -p ../python
|
||||
python -m pip install -r ../requirements.txt -t ../python/
|
||||
5
src/qbit-collector/eng/install-python-reqs.sh
Normal file
5
src/qbit-collector/eng/install-python-reqs.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -rf ../python
|
||||
mkdir -p ../python
|
||||
python3 -m pip install -r ../requirements.txt -t ../python/
|
||||
1
src/qbit-collector/requirements.txt
Normal file
1
src/qbit-collector/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
rank-torrent-name==0.2.13
|
||||
@@ -8,12 +8,14 @@ public class PostgresConfiguration
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
private const string DatabaseVariable = "DB";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
|
||||
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
|
||||
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
|
||||
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
|
||||
}
|
||||
|
||||
@@ -3,7 +3,12 @@ namespace SharedContracts.Configuration;
|
||||
public class RedisConfiguration
|
||||
{
|
||||
private const string Prefix = "REDIS";
|
||||
private const string ConnectionStringVariable = "CONNECTION_STRING";
|
||||
|
||||
public string? ConnectionString { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(ConnectionStringVariable) + ",abortConnect=false,allowAdmin=true";
|
||||
private const string HostVariable = "HOST";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string ExtraVariable = "EXTRA";
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 6379);
|
||||
private string EXTRA { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(ExtraVariable, "abortConnect=false,allowAdmin=true");
|
||||
public string ConnectionString => $"{Host}:{PORT},{EXTRA}";
|
||||
}
|
||||
@@ -9,9 +9,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
const string query =
|
||||
"""
|
||||
INSERT INTO ingested_torrents
|
||||
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt")
|
||||
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt", "rtn_response")
|
||||
VALUES
|
||||
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt)
|
||||
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt, @RtnResponse::jsonb)
|
||||
ON CONFLICT (source, info_hash) DO NOTHING
|
||||
""";
|
||||
|
||||
@@ -110,21 +110,21 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
public async Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default) =>
|
||||
await ExecuteCommandAsync(async connection =>
|
||||
{
|
||||
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE CAST(NULLIF(Year, '\N') AS INTEGER) <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
|
||||
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE Year <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
|
||||
var result = await connection.QueryAsync<ImdbEntry>(query, new { Year = year, LastProcessedImdbId = stateLastProcessedImdbId, BatchSize = batchSize });
|
||||
return result.ToList();
|
||||
}, "Error getting imdb metadata.", cancellationToken);
|
||||
|
||||
public async Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, TorrentType torrentType, string? year, CancellationToken cancellationToken = default) =>
|
||||
public async Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, int? year, CancellationToken cancellationToken = default) =>
|
||||
await ExecuteCommandAsync(async connection =>
|
||||
{
|
||||
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType == TorrentType.Movie ? "movie" : "tvSeries")}'";
|
||||
query += year is not null ? $", '{year}'" : ", NULL";
|
||||
query += ", 15)";
|
||||
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\", \"score\" as Score, \"category\" as Category from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{torrentType}'";
|
||||
query += year is not null ? $", {year}" : ", NULL";
|
||||
query += ", 1)";
|
||||
|
||||
var result = await connection.QueryAsync<ImdbEntry>(query);
|
||||
|
||||
return result.ToList();
|
||||
var results = result.ToList();
|
||||
return results.FirstOrDefault();
|
||||
}, "Error finding imdb metadata.", cancellationToken);
|
||||
|
||||
public Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default) =>
|
||||
@@ -134,9 +134,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
const string query =
|
||||
"""
|
||||
INSERT INTO "torrents"
|
||||
("infoHash", "provider", "torrentId", "title", "size", "type", "uploadDate", "seeders", "trackers", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
|
||||
("infoHash", "ingestedTorrentId", "provider", "title", "size", "type", "uploadDate", "seeders", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
|
||||
VALUES
|
||||
(@InfoHash, @Provider, @TorrentId, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, NULL, false, false, NOW(), NOW())
|
||||
(@InfoHash, @IngestedTorrentId, @Provider, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, false, false, NOW(), NOW())
|
||||
ON CONFLICT ("infoHash") DO NOTHING
|
||||
""";
|
||||
|
||||
@@ -152,7 +152,8 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
INSERT INTO files
|
||||
("infoHash", "fileIndex", title, "size", "imdbId", "imdbSeason", "imdbEpisode", "kitsuId", "kitsuEpisode", "createdAt", "updatedAt")
|
||||
VALUES
|
||||
(@InfoHash, @FileIndex, @Title, @Size, @ImdbId, @ImdbSeason, @ImdbEpisode, @KitsuId, @KitsuEpisode, Now(), Now());
|
||||
(@InfoHash, @FileIndex, @Title, @Size, @ImdbId, @ImdbSeason, @ImdbEpisode, @KitsuId, @KitsuEpisode, Now(), Now())
|
||||
ON CONFLICT ("infoHash", "fileIndex") DO NOTHING;
|
||||
""";
|
||||
|
||||
await connection.ExecuteAsync(query, files);
|
||||
@@ -168,11 +169,7 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
("infoHash", "fileIndex", "fileId", "title")
|
||||
VALUES
|
||||
(@InfoHash, @FileIndex, @FileId, @Title)
|
||||
ON CONFLICT
|
||||
("infoHash", "fileIndex")
|
||||
DO UPDATE SET
|
||||
"fileId" = COALESCE(subtitles."fileId", EXCLUDED."fileId"),
|
||||
"title" = COALESCE(subtitles."title", EXCLUDED."title");
|
||||
ON CONFLICT ("infoHash", "fileIndex") DO NOTHING;
|
||||
""";
|
||||
|
||||
await connection.ExecuteAsync(query, subtitles);
|
||||
|
||||
@@ -9,7 +9,7 @@ public interface IDataStorage
|
||||
Task<DapperResult<PageIngestedResult, PageIngestedResult>> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default);
|
||||
Task<DapperResult<int, int>> GetRowCountImdbMetadata(CancellationToken cancellationToken = default);
|
||||
Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default);
|
||||
Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, TorrentType parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default);
|
||||
Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, int? parsedTorrentYear, CancellationToken cancellationToken = default);
|
||||
Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default);
|
||||
Task InsertFiles(IEnumerable<TorrentFile> files, CancellationToken cancellationToken = default);
|
||||
Task InsertSubtitles(IEnumerable<SubtitleFile> subtitles, CancellationToken cancellationToken = default);
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
|
||||
namespace SharedContracts.Extensions;
|
||||
|
||||
19
src/shared/Extensions/DictionaryExtensions.cs
Normal file
19
src/shared/Extensions/DictionaryExtensions.cs
Normal file
@@ -0,0 +1,19 @@
|
||||
namespace SharedContracts.Extensions;
|
||||
|
||||
public static class DictionaryExtensions
|
||||
{
|
||||
public static ConcurrentDictionary<TKey, TValue> ToConcurrentDictionary<TSource, TKey, TValue>(
|
||||
this IEnumerable<TSource> source,
|
||||
Func<TSource, TKey> keySelector,
|
||||
Func<TSource, TValue> valueSelector) where TKey : notnull
|
||||
{
|
||||
var concurrentDictionary = new ConcurrentDictionary<TKey, TValue>();
|
||||
|
||||
foreach (var element in source)
|
||||
{
|
||||
concurrentDictionary.TryAdd(keySelector(element), valueSelector(element));
|
||||
}
|
||||
|
||||
return concurrentDictionary;
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user