18 Commits

Author SHA1 Message Date
purple_emily
10d6544673 Re-enable master only 2024-03-10 13:18:02 +00:00
purple_emily
2134f2f51d Missing forward slash 2024-03-10 13:17:08 +00:00
purple_emily
64149c55a8 WIP: External access 2024-03-10 13:15:05 +00:00
purple_emily
5584143eeb Rename for consistency 2024-03-10 13:11:31 +00:00
purple_emily
d84a186132 Pre-commit run 2024-03-10 11:43:35 +00:00
purple_emily
7e338975ed WIP: External access 2024-03-10 11:40:59 +00:00
purple_emily
123b8aad96 WIP: Getting-started 2024-03-10 11:40:59 +00:00
purple_emily
c947b013a2 Only run when the docs folder is changed 2024-03-10 11:40:58 +00:00
purple_emily
f000ae6c12 WIP: Run Knight Crawler 2024-03-10 11:40:58 +00:00
purple_emily
108a4a9066 WIP: Run Knight Crawler 2024-03-10 11:40:58 +00:00
purple_emily
ad286a984f Add Getting started and WIP: Run Knight Crawler 2024-03-10 11:40:58 +00:00
purple_emily
71ec7bdf2b Add some line breaks for readability 2024-03-10 11:40:58 +00:00
purple_emily
ffeeb56610 Fix instance ID's 2024-03-10 11:40:58 +00:00
purple_emily
cfc2b8f601 Ew, we are still using master as the main branch... 2024-03-10 11:40:57 +00:00
purple_emily
66d37a8c05 Change images web-path as per documentation (THIS COULD BE WRONG) 2024-03-10 11:40:57 +00:00
purple_emily
b19d1f0bf4 Add KC logo to header 2024-03-10 11:40:57 +00:00
purple_emily
cbf5fda723 GitHub action to build documentation 2024-03-10 11:40:57 +00:00
purple_emily
5def66858f Overview page 2024-03-10 11:40:57 +00:00
487 changed files with 44612 additions and 264471 deletions

View File

@@ -12,9 +12,6 @@ A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1.
2.
3.
**Expected behavior**
A clear and concise description of what you expected to happen.
@@ -26,7 +23,6 @@ If the logs are short, make sure to triple backtick them, or use https://pastebi
**Hardware:**
- OS and distro: [e.g. Raspberry Pi OS, Ubuntu, Rocky]
- Server: [e.g. VM, Baremetal, Pi]
- Knightcrawler Version: [2.0.xx]
**Additional context**
Add any other context about the problem here.

View File

@@ -6,16 +6,12 @@ on:
CONTEXT:
required: true
type: string
DOCKERFILE:
required: true
type: string
IMAGE_NAME:
required: true
type: string
env:
CONTEXT: ${{ inputs.CONTEXT }}
DOCKERFILE: ${{ inputs.DOCKERFILE }}
IMAGE_NAME: ${{ inputs.IMAGE_NAME }}
PLATFORMS: linux/amd64,linux/arm64
@@ -25,13 +21,11 @@ jobs:
steps:
- name: Setting variables
run: |
echo "CONTEXT=${{ env.CONTEXT }}"
echo "DOCKERFILE=${{ env.DOCKERFILE }}"
echo "IMAGE_NAME=${{ env.IMAGE_NAME }}"
echo "CONTEXT=${{ env.CONTEXT }}
echo "IMAGE_NAME=${{ env.IMAGE_NAME }}
echo "PLATFORMS=${{ env.PLATFORMS }}"
outputs:
CONTEXT: ${{ env.CONTEXT }}
DOCKERFILE: ${{ env.DOCKERFILE }}
IMAGE_NAME: ${{ env.IMAGE_NAME }}
PLATFORMS: ${{ env.PLATFORMS }}
@@ -76,17 +70,14 @@ jobs:
flavor: |
latest=auto
tags: |
type=ref,event=tag
type=ref,event=pr
type=edge,branch=master,commit=${{ github.sha }}
type=sha,commit=${{ github.sha }}
type=semver,pattern={{version}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Build image for scanning
uses: docker/build-push-action@v5
with:
context: ${{ needs.set-vars.outputs.CONTEXT }}
file: ${{ needs.set-vars.outputs.DOCKERFILE }}
push: true
provenance: false
tags: localhost:5000/dockle-examine-image:test
@@ -139,11 +130,10 @@ jobs:
sarif_file: 'trivy-results-os.sarif'
- name: Push Service Image to repo
# if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
uses: docker/build-push-action@v5
with:
context: ${{ needs.set-vars.outputs.CONTEXT }}
file: ${{ needs.set-vars.outputs.DOCKERFILE }}
push: true
provenance: false
tags: ${{ steps.docker-metadata.outputs.tags }}

View File

@@ -2,17 +2,13 @@ name: Build and Push Addon Service
on:
push:
tags:
- '**'
paths:
- 'src/addon/**'
workflow_dispatch:
- 'src/node/addon/**'
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/addon/
DOCKERFILE: ./src/addon/Dockerfile
CONTEXT: ./src/node/addon/
IMAGE_NAME: knightcrawler-addon

View File

@@ -2,17 +2,13 @@ name: Build and Push Consumer Service
on:
push:
tags:
- '**'
paths:
- 'src/torrent-consumer/**'
workflow_dispatch:
- 'src/node/consumer/**'
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/
DOCKERFILE: ./src/torrent-consumer/Dockerfile
CONTEXT: ./src/node/consumer/
IMAGE_NAME: knightcrawler-consumer

View File

@@ -1,18 +0,0 @@
name: Build and Push Debrid Collector Service
on:
push:
tags:
- '**'
paths:
- 'src/debrid-collector/**'
workflow_dispatch:
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/
DOCKERFILE: ./src/debrid-collector/Dockerfile
IMAGE_NAME: knightcrawler-debrid-collector

86
.github/workflows/build_docs.yaml vendored Normal file
View File

@@ -0,0 +1,86 @@
name: Build documentation
# TODO: Only run on ./docs folder change
on:
push:
branches: ["master"]
paths:
- 'docs/**'
# Specify to run a workflow manually from the Actions tab on GitHub
workflow_dispatch:
permissions:
id-token: write
pages: write
env:
INSTANCE: Writerside/kc
ARTIFACT: webHelpKC2-all.zip
DOCS_FOLDER: ./docs
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Build Writerside docs using Docker
uses: JetBrains/writerside-github-action@v4
with:
instance: ${{ env.INSTANCE }}
artifact: ${{ env.ARTIFACT }}
location: ${{ env.DOCS_FOLDER }}
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: docs
path: |
artifacts/${{ env.ARTIFACT }}
artifacts/report.json
retention-days: 7
test:
needs: build
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v3
with:
name: docs
path: artifacts
- name: Test documentation
uses: JetBrains/writerside-checker-action@v1
with:
instance: ${{ env.INSTANCE }}
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
needs: [build, test]
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v3
with:
name: docs
- name: Unzip artifact
run: unzip -O UTF-8 -qq '${{ env.ARTIFACT }}' -d dir
- name: Setup Pages
uses: actions/configure-pages@v4
- name: Package and upload Pages artifact
uses: actions/upload-pages-artifact@v3
with:
path: dir
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

View File

@@ -2,17 +2,13 @@ name: Build and Push Jackett Addon Service
on:
push:
tags:
- '**'
paths:
- 'src/addon-jackett/**'
workflow_dispatch:
- 'src/node/addon-jackett/**'
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/addon-jackett/
DOCKERFILE: ./src/addon-jackett/Dockerfile
CONTEXT: ./src/node/addon-jackett/
IMAGE_NAME: knightcrawler-addon-jackett

View File

@@ -2,11 +2,8 @@ name: Build and Push Metadata Service
on:
push:
tags:
- '**'
paths:
- 'src/metadata/**'
workflow_dispatch:
jobs:
process:
@@ -14,5 +11,4 @@ jobs:
secrets: inherit
with:
CONTEXT: ./src/metadata/
DOCKERFILE: ./src/metadata/Dockerfile
IMAGE_NAME: knightcrawler-metadata

View File

@@ -2,11 +2,8 @@ name: Build and Push Migrator Service
on:
push:
tags:
- '**'
paths:
- 'src/migrator/**'
workflow_dispatch:
jobs:
process:
@@ -14,5 +11,4 @@ jobs:
secrets: inherit
with:
CONTEXT: ./src/migrator/
DOCKERFILE: ./src/migrator/Dockerfile
IMAGE_NAME: knightcrawler-migrator

View File

@@ -2,17 +2,13 @@ name: Build and Push Producer Service
on:
push:
tags:
- '**'
paths:
- 'src/producer/**'
workflow_dispatch:
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/
DOCKERFILE: ./src/producer/src/Dockerfile
CONTEXT: ./src/producer/
IMAGE_NAME: knightcrawler-producer

View File

@@ -1,18 +0,0 @@
name: Build and Push Qbit Collector Service
on:
push:
tags:
- '**'
paths:
- 'src/qbit-collector/**'
workflow_dispatch:
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/
DOCKERFILE: ./src/qbit-collector/Dockerfile
IMAGE_NAME: knightcrawler-qbit-collector

View File

@@ -1,18 +0,0 @@
name: Build and Push Tissue Service
on:
push:
tags:
- '**'
paths:
- 'src/tissue/**'
workflow_dispatch:
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/tissue/
DOCKERFILE: ./src/tissue/Dockerfile
IMAGE_NAME: knightcrawler-tissue

View File

@@ -1,15 +0,0 @@
name: Build and Push Torrent Ingestor Service
on:
push:
paths:
- 'src/torrent-ingestor/**'
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/torrent-ingestor
DOCKERFILE: ./src/torrent-ingestor/Dockerfile
IMAGE_NAME: knightcrawler-torrent-ingestor

View File

@@ -1,39 +0,0 @@
on:
push:
branches:
- main
workflow_dispatch:
jobs:
changelog:
name: Generate changelog
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generate a changelog
uses: orhun/git-cliff-action@v3
with:
config: cliff.toml
args: --verbose
env:
OUTPUT: CHANGELOG.md
GITHUB_REPO: ${{ github.repository }}
- name: Commit
run: |
git config user.name 'github-actions[bot]'
git config user.email 'github-actions[bot]@users.noreply.github.com'
set +e
git checkout -b feat/changelog_$(date +"%d_%m")
git add CHANGELOG.md
git commit -m "[skip ci] Update changelog"
git push https://${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git feat/changelog_$(date +"%d_%m")
- name: create pull request
run: gh pr create -B main -H feat/changelog_$(date +"%d_%m") --title '[skip ci] Update changelog' --body 'Changelog update by git-cliff'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

13
.gitignore vendored
View File

@@ -355,9 +355,6 @@ MigrationBackup/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# Jetbrains ide's run profiles (Could contain sensative information)
**/.run/
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
@@ -395,6 +392,8 @@ dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
@@ -608,11 +607,3 @@ fabric.properties
# Caddy logs
!**/caddy/logs/.gitkeep
**/caddy/logs/**
# Mac directory indexes
.DS_Store
deployment/docker/stack.env
src/producer/src/python/
src/debrid-collector/python/
src/qbit-collector/python/

View File

@@ -3,7 +3,6 @@ repos:
rev: v4.5.0
hooks:
- id: check-added-large-files
args: ['--maxkb=2500']
- id: check-json
- id: check-toml
- id: check-xml
@@ -16,6 +15,5 @@ repos:
rev: v2.2.6
hooks:
- id: codespell
exclude: |
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*|src/tissue/Data/.*)$
exclude: ^src/node/consumer/test/
args: ["-L", "strem,chage"]

View File

@@ -1,22 +0,0 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.0] - 2024-03-25
### Details
#### Changed
- Change POSTGRES_USERNAME to POSTGRES_USER. Oops by @purple-emily
- Change POSTGRES_DATABASE to POSTGRES_DB by @purple-emily
- Two movie commands instead of movie and tv by @purple-emily
- Cleanup RabbitMQ env vars, and Github Pat by @iPromKnight
#### Fixed
- HRD -> HDR by @mplewis
## New Contributors
* @mplewis made their first contribution
<!-- generated by git-cliff -->

View File

@@ -1,34 +0,0 @@
We use [Meaningful commit messages](https://reflectoring.io/meaningful-commit-messages/)
Tl;dr:
1. It should answer the question: “What happens if the changes are applied?".
2. Use the imperative, present tense. It is easier to read and scan quickly:
```
Right: Add feature to alert admin for new user registration
Wrong: Added feature ... (past tense)
```
3. The summary should always be able to complete the following sentence:
`If applied, this commit will… `
We use [git-cliff] for our changelog.
The breaking flag is set to true when the commit has an exclamation mark after the commit type and scope, e.g.:
`feat(scope)!: this is a breaking change`
Keywords (Commit messages should start with these):
```
# Added
add
support
# Removed
remove
delete
# Fixed
test
fix
```
Any other commits will fall under the `Changed` category
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html)

View File

@@ -7,6 +7,9 @@
## Contents
> [!CAUTION]
> Until we reach `v1.0.0`, please consider releases as alpha.
> [!IMPORTANT]
> The latest change renames the project and requires a [small migration](#selfhostio-to-knightcrawler-migration).
- [Contents](#contents)
@@ -51,11 +54,11 @@ Download and install [Docker Compose](https://docs.docker.com/compose/install/),
### Environment Setup
Before running the project, you need to set up the environment variables. Edit the values in `stack.env`:
Before running the project, you need to set up the environment variables. Copy the `.env.example` file to `.env`:
```sh
cd deployment/docker
code stack.env
cp .env.example .env
```
Then set any of the values you wouldd like to customize.
@@ -67,6 +70,33 @@ Then set any of the values you wouldd like to customize.
By default, Knight Crawler is configured to be *relatively* conservative in its resource usage. If running on a decent machine (16GB RAM, i5+ or equivalent), you can increase some settings to increase consumer throughput. This is especially helpful if you have a large backlog from [importing databases](#importing-external-dumps).
In your `.env` file, under the `# Consumer` section increase `CONSUMER_REPLICAS` from `3` to `15`.
You can also increase `JOB_CONCURRENCY` from `5` to `10`.
### DebridMediaManager setup (optional)
There are some optional steps you should take to maximise the number of movies/tv shows we can find.
We can search DebridMediaManager hash lists which are hosted on GitHub. This allows us to add hundreds of thousands of movies and tv shows, but it requires a Personal Access Token to be generated. The software only needs read access and only for public repositories. To generate one, please follow these steps:
1. Navigate to GitHub settings -> Developer Settings -> Personal access tokens -> Fine-grained tokens (click [here](https://github.com/settings/tokens?type=beta) for a direct link)
2. Press `Generate new token`
3. Fill out the form (example data below):
```
Token name:
KnightCrawler
Expiration:
90 days
Description:
<blank>
Repository access
(checked) Public Repositories (read-only)
```
4. Click `Generate token`
5. Take the new token and add it to the bottom of the [.env](deployment/docker/.env) file
```
GithubSettings__PAT=<YOUR TOKEN HERE>
```
### Configure external access
Please choose which applies to you:
@@ -116,7 +146,7 @@ Remove or comment out the port for the addon, and connect it to Caddy:
addon:
<<: *knightcrawler-app
env_file:
- stack.env
- .env
hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:latest
labels:

View File

@@ -1,112 +0,0 @@
# git-cliff ~ configuration file
# https://git-cliff.org/docs/configuration
[changelog]
# changelog header
header = """
# Changelog\n
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n
"""
# template for the changelog body
# https://keats.github.io/tera/docs/#introduction
body = """
{%- macro remote_url() -%}
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
{%- endmacro -%}
{% if version -%}
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else -%}
## [Unreleased]
{% endif -%}
### Details\
{% for group, commits in commits | group_by(attribute="group") %}
#### {{ group | upper_first }}
{%- for commit in commits %}
- {{ commit.message | upper_first | trim }}\
{% if commit.github.username %} by @{{ commit.github.username }}{%- endif -%}
{% if commit.github.pr_number %} in \
[#{{ commit.github.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.github.pr_number }}) \
{%- endif -%}
{% endfor %}
{% endfor %}
{%- if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %}
## New Contributors
{%- endif -%}
{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
* @{{ contributor.username }} made their first contribution
{%- if contributor.pr_number %} in \
[#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
{%- endif %}
{%- endfor %}\n
"""
# template for the changelog footer
footer = """
{%- macro remote_url() -%}
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
{%- endmacro -%}
{% for release in releases -%}
{% if release.version -%}
{% if release.previous.version -%}
[{{ release.version | trim_start_matches(pat="v") }}]: \
{{ self::remote_url() }}/compare/{{ release.previous.version }}..{{ release.version }}
{% endif -%}
{% else -%}
[unreleased]: {{ self::remote_url() }}/compare/{{ release.previous.version }}..HEAD
{% endif -%}
{% endfor %}
<!-- generated by git-cliff -->
"""
# remove the leading and trailing whitespace from the templates
trim = true
[git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true
# filter out the commits that are not conventional
filter_unconventional = true
# process each line of a commit as an individual commit
split_commits = false
# regex for preprocessing the commit messages
commit_preprocessors = [
# remove issue numbers from commits
{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "" },
]
# regex for parsing and grouping commits
commit_parsers = [
{ message = "^.*: add", group = "Added" },
{ message = "^add", group = "Added" },
{ message = "^.*: support", group = "Added" },
{ message = "^support", group = "Added" },
{ message = "^.*: remove", group = "Removed" },
{ message = "^remove", group = "Removed" },
{ message = "^.*: delete", group = "Removed" },
{ message = "^delete", group = "Removed" },
{ message = "^.*: test", group = "Fixed" },
{ message = "^test", group = "Fixed" },
{ message = "^.*: fix", group = "Fixed" },
{ message = "^fix", group = "Fixed" },
{ message = "^.*", group = "Changed" },
]
# protect breaking changes from being skipped due to matching a skipping commit_parser
protect_breaking_commits = false
# filter out the commits that are not matched by commit parsers
filter_commits = true
# regex for matching git tags
tag_pattern = "v[0-9].*"
# regex for skipping tags
skip_tags = "v0.1.0-beta.1"
# regex for ignoring tags
ignore_tags = ""
# sort the tags topologically
topo_order = false
# sort the commits inside sections by oldest/newest order
sort_commits = "oldest"

View File

@@ -0,0 +1,55 @@
# General environment variables
TZ=London/Europe
# PostgreSQL
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DB=knightcrawler
# MongoDB
MONGODB_HOST=mongodb
MONGODB_PORT=27017
MONGODB_DB=knightcrawler
MONGODB_USER=mongo
MONGODB_PASSWORD=mongo
# RabbitMQ
RABBITMQ_HOST=rabbitmq
RABBITMQ_USER=guest
RABBITMQ_PASSWORD=guest
RABBITMQ_QUEUE_NAME=ingested
RABBITMQ_DURABLE=true
RABBITMQ_MAX_QUEUE_SIZE=0
RABBITMQ_MAX_PUBLISH_BATCH_SIZE=500
RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS=10
# Metadata
## Only used if DATA_ONCE is set to false. If true, the schedule is ignored
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE="0 0 1 * *"
## If true, the metadata will be downloaded once and then the schedule will be ignored
METADATA_DOWNLOAD_IMDB_DATA_ONCE=true
## Controls the amount of records processed in memory at any given time during import, higher values will consume more memory
METADATA_INSERT_BATCH_SIZE=25000
# Addon
DEBUG_MODE=false
# Consumer
JOB_CONCURRENCY=5
JOBS_ENABLED=true
## can be debug for extra verbosity (a lot more verbosity - useful for development)
LOG_LEVEL=info
MAX_CONNECTIONS_PER_TORRENT=10
MAX_CONNECTIONS_OVERALL=100
TORRENT_TIMEOUT=30000
UDP_TRACKERS_ENABLED=true
CONSUMER_REPLICAS=3
## Fix for #66 - toggle on for development
AUTO_CREATE_AND_APPLY_MIGRATIONS=false
## Allows control of the threshold for matching titles to the IMDB dataset. The closer to 0, the more strict the matching.
TITLE_MATCH_THRESHOLD=0.25
# Producer
GITHUB_PAT=

View File

@@ -1,64 +0,0 @@
[Application]
FileLogger\Age=1
FileLogger\AgeType=1
FileLogger\Backup=true
FileLogger\DeleteOld=true
FileLogger\Enabled=true
FileLogger\MaxSizeBytes=66560
FileLogger\Path=/config/qBittorrent/logs
[AutoRun]
enabled=false
program=
[BitTorrent]
Session\AnonymousModeEnabled=true
Session\BTProtocol=TCP
Session\ConnectionSpeed=150
Session\DefaultSavePath=/downloads/
Session\ExcludedFileNames=
Session\MaxActiveCheckingTorrents=20
Session\MaxActiveDownloads=20
Session\MaxActiveTorrents=50
Session\MaxActiveUploads=50
Session\MaxConcurrentHTTPAnnounces=1000
Session\MaxConnections=2000
Session\Port=6881
Session\QueueingSystemEnabled=true
Session\TempPath=/downloads/incomplete/
Session\TorrentStopCondition=MetadataReceived
[Core]
AutoDeleteAddedTorrentFile=Never
[LegalNotice]
Accepted=true
[Meta]
MigrationVersion=6
[Network]
PortForwardingEnabled=true
Proxy\HostnameLookupEnabled=false
Proxy\Profiles\BitTorrent=true
Proxy\Profiles\Misc=true
Proxy\Profiles\RSS=true
[Preferences]
Connection\PortRangeMin=6881
Connection\ResolvePeerCountries=false
Connection\UPnP=false
Downloads\SavePath=/downloads/
Downloads\TempPath=/downloads/incomplete/
General\Locale=en
MailNotification\req_auth=true
WebUI\Address=*
WebUI\AuthSubnetWhitelist=0.0.0.0/0
WebUI\AuthSubnetWhitelistEnabled=true
WebUI\HostHeaderValidation=false
WebUI\LocalHostAuth=false
WebUI\ServerDomains=*
[RSS]
AutoDownloader\DownloadRepacks=true
AutoDownloader\SmartEpisodeFilter=s(\\d+)e(\\d+), (\\d+)x(\\d+), "(\\d{4}[.\\-]\\d{1,2}[.\\-]\\d{1,2})", "(\\d{1,2}[.\\-]\\d{1,2}[.\\-]\\d{4})"

View File

@@ -1,244 +1,139 @@
version: "3.9"
name: knightcrawler
networks:
knightcrawler-network:
name: knightcrawler-network
driver: bridge
x-restart: &restart-policy "unless-stopped"
volumes:
postgres:
lavinmq:
redis:
x-basehealth: &base-health
interval: 10s
timeout: 10s
retries: 3
start_period: 10s
x-rabbithealth: &rabbitmq-health
test: rabbitmq-diagnostics -q ping
<<: *base-health
x-mongohealth: &mongodb-health
test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')"]
<<: *base-health
x-postgreshealth: &postgresdb-health
test: pg_isready
<<: *base-health
x-apps: &knightcrawler-app
depends_on:
mongodb:
condition: service_healthy
postgres:
condition: service_healthy
rabbitmq:
condition: service_healthy
restart: *restart-policy
services:
## Postgres is the database that is used by the services.
## All downloaded metadata is stored in this database.
postgres:
env_file: stack.env
healthcheck:
test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
image: postgres:latest
env_file: .env
environment:
PGUSER: postgres # needed for healthcheck.
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, change the username and password in the stack.env file.
# # Furthermore, please, please, please, change the username and password in the .env file.
# # If you want to enhance your security even more, create a new user for the database with a strong password.
# ports:
# - "5432:5432"
networks:
- knightcrawler-network
restart: unless-stopped
volumes:
- postgres:/var/lib/postgresql/data
## Redis is used as a cache for the services.
## It is used to store the infohashes that are currently being processed in sagas, as well as intrim data.
redis:
env_file: stack.env
healthcheck:
test: ["CMD-SHELL", "redis-cli ping"]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
image: redis/redis-stack:latest
# # If you need redis to be accessible from outside, please open the below port.
# ports:
# - "6379:6379"
healthcheck: *postgresdb-health
restart: *restart-policy
networks:
- knightcrawler-network
restart: unless-stopped
volumes:
- redis:/data
## LavinMQ is used as a message broker for the services.
## It is a high performance drop in replacement for RabbitMQ.
## It is used to communicate between the services.
lavinmq:
env_file: stack.env
mongodb:
image: mongo:latest
env_file: .env
environment:
MONGO_INITDB_ROOT_USERNAME: ${MONGODB_USER:?Variable MONGODB_USER not set}
MONGO_INITDB_ROOT_PASSWORD: ${MONGODB_PASSWORD:?Variable MONGODB_PASSWORD not set}
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
# # Furthermore, please, please, please, change the username and password in the .env file.
# ports:
# - "27017:27017"
volumes:
- mongo:/data/db
restart: *restart-policy
healthcheck: *mongodb-health
networks:
- knightcrawler-network
rabbitmq:
image: rabbitmq:3-management
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, look at the documentation for rabbit on how to secure the service.
# ports:
# - "5672:5672"
# - "15672:15672"
# - "15692:15692"
image: cloudamqp/lavinmq:latest
healthcheck:
test: ["CMD-SHELL", "lavinmqctl status"]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
restart: unless-stopped
networks:
- knightcrawler-network
volumes:
- lavinmq:/var/lib/lavinmq/
## The addon. This is what is used in stremio
addon:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:2.0.26
labels:
logging: promtail
- rabbitmq:/var/lib/rabbitmq
hostname: ${RABBITMQ_HOST}
restart: *restart-policy
healthcheck: *rabbitmq-health
networks:
- knightcrawler-network
producer:
image: gabisonfire/knightcrawler-producer:latest
labels:
logging: "promtail"
env_file: .env
<<: *knightcrawler-app
networks:
- knightcrawler-network
consumer:
image: gabisonfire/knightcrawler-consumer:latest
env_file: .env
labels:
logging: "promtail"
deploy:
replicas: ${CONSUMER_REPLICAS}
<<: *knightcrawler-app
networks:
- knightcrawler-network
metadata:
image: gabisonfire/knightcrawler-metadata:latest
env_file: .env
labels:
logging: "promtail"
restart: no
networks:
- knightcrawler-network
addon:
<<: *knightcrawler-app
env_file: .env
hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:latest
labels:
logging: "promtail"
networks:
- knightcrawler-network
# - caddy
ports:
- "7000:7000"
restart: unless-stopped
## The consumer is responsible for consuming infohashes and orchestrating download of metadata.
consumer:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-consumer:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
restart: unless-stopped
## The debrid collector is responsible for downloading metadata from debrid services. (Currently only RealDebrid is supported)
debridcollector:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-debrid-collector:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
restart: unless-stopped
networks:
knightcrawler-network:
driver: bridge
name: knightcrawler-network
## The metadata service is responsible for downloading imdb publically available datasets.
## This is used to enrich the metadata during production of ingested infohashes.
metadata:
depends_on:
migrator:
condition: service_completed_successfully
env_file: stack.env
image: gabisonfire/knightcrawler-metadata:2.0.26
networks:
- knightcrawler-network
restart: "no"
# caddy:
# name: caddy
# external: true
## The migrator is responsible for migrating the database schema.
migrator:
depends_on:
postgres:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-migrator:2.0.26
networks:
- knightcrawler-network
restart: "no"
## The producer is responsible for producing infohashes by acquiring for various sites, including DMM.
producer:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-producer:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
restart: unless-stopped
## QBit collector utilizes QBitTorrent to download metadata.
qbitcollector:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
qbittorrent:
condition: service_healthy
deploy:
replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env
image: gabisonfire/knightcrawler-qbit-collector:2.0.26
labels:
logging: promtail
networks:
- knightcrawler-network
restart: unless-stopped
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
## The QBit collector requires this.
qbittorrent:
deploy:
replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env
environment:
PGID: "1000"
PUID: "1000"
TORRENTING_PORT: "6881"
WEBUI_PORT: "8080"
healthcheck:
test: ["CMD-SHELL", "curl --fail http://localhost:8080"]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
image: lscr.io/linuxserver/qbittorrent:latest
networks:
- knightcrawler-network
ports:
- "6881:6881/tcp"
- "6881:6881/udp"
# if you want to expose the webui, uncomment the following line
# - "8001:8080"
restart: unless-stopped
volumes:
- ./config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf
volumes:
postgres:
mongo:
rabbitmq:

View File

@@ -4,7 +4,7 @@
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Once you have confirmed Caddy works you should comment out
## the below line:
acme_ca https://acme-staging-v02.api.letsencrypt.org/directory
acme_ca https://acme-staging-v02.api.letsencrypt.org/director
}
(security-headers) {

View File

@@ -16,7 +16,7 @@ rule_files:
scrape_configs:
- job_name: "rabbitmq"
static_configs:
- targets: ["lavinmq:15692"]
- targets: ["rabbitmq:15692"]
- job_name: "postgres-exporter"
static_configs:
- targets: ["postgres-exporter:9187"]

View File

@@ -1,87 +0,0 @@
x-basehealth: &base-health
interval: 10s
timeout: 10s
retries: 3
start_period: 10s
x-lavinhealth: &lavinmq-health
test: [ "CMD-SHELL", "lavinmqctl status" ]
<<: *base-health
x-redishealth: &redis-health
test: redis-cli ping
<<: *base-health
x-postgreshealth: &postgresdb-health
test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ]
<<: *base-health
x-qbit: &qbit-health
test: "curl --fail http://localhost:8080"
<<: *base-health
services:
postgres:
image: postgres:latest
environment:
PGUSER: postgres # needed for healthcheck.
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, change the username and password in the .env file.
# # If you want to enhance your security even more, create a new user for the database with a strong password.
# ports:
# - "5432:5432"
volumes:
- postgres:/var/lib/postgresql/data
healthcheck: *postgresdb-health
restart: unless-stopped
env_file: ../../.env
networks:
- knightcrawler-network
redis:
image: redis/redis-stack:latest
# # If you need redis to be accessible from outside, please open the below port.
# ports:
# - "6379:6379"
volumes:
- redis:/data
restart: unless-stopped
healthcheck: *redis-health
env_file: ../../.env
networks:
- knightcrawler-network
lavinmq:
env_file: stack.env
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
# ports:
# - "5672:5672"
# - "15672:15672"
# - "15692:15692"
image: cloudamqp/lavinmq:latest
healthcheck: *lavinmq-health
restart: unless-stopped
volumes:
- lavinmq:/var/lib/lavinmq/
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
## The QBit collector requires this.
qbittorrent:
image: lscr.io/linuxserver/qbittorrent:latest
environment:
- PUID=1000
- PGID=1000
- WEBUI_PORT=8080
- TORRENTING_PORT=6881
ports:
- 6881:6881
- 6881:6881/udp
env_file: ../../.env
networks:
- knightcrawler-network
restart: unless-stopped
healthcheck: *qbit-health
volumes:
- ../../config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf

View File

@@ -1,71 +0,0 @@
x-apps: &knightcrawler-app
labels:
logging: "promtail"
env_file: ../../.env
networks:
- knightcrawler-network
x-depends: &knightcrawler-app-depends
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
migrator:
condition: service_completed_successfully
metadata:
condition: service_completed_successfully
services:
metadata:
image: gabisonfire/knightcrawler-metadata:2.0.26
env_file: ../../.env
networks:
- knightcrawler-network
restart: no
depends_on:
migrator:
condition: service_completed_successfully
migrator:
image: gabisonfire/knightcrawler-migrator:2.0.26
env_file: ../../.env
networks:
- knightcrawler-network
restart: no
depends_on:
postgres:
condition: service_healthy
addon:
image: gabisonfire/knightcrawler-addon:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
hostname: knightcrawler-addon
ports:
- "7000:7000"
consumer:
image: gabisonfire/knightcrawler-consumer:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
debridcollector:
image: gabisonfire/knightcrawler-debrid-collector:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
producer:
image: gabisonfire/knightcrawler-producer:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
qbitcollector:
image: gabisonfire/knightcrawler-qbit-collector:2.0.26
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
depends_on:
qbittorrent:
condition: service_healthy

View File

@@ -1,4 +0,0 @@
networks:
knightcrawler-network:
driver: bridge
name: knightcrawler-network

View File

@@ -1,4 +0,0 @@
volumes:
postgres:
redis:
lavinmq:

View File

@@ -1,7 +0,0 @@
services:
qbittorrent:
deploy:
replicas: 0
qbitcollector:
deploy:
replicas: 0

View File

@@ -1,7 +0,0 @@
version: "3.9"
name: "knightcrawler"
include:
- ./components/network.yaml
- ./components/volumes.yaml
- ./components/infrastructure.yaml
- ./components/knightcrawler.yaml

View File

@@ -1,41 +0,0 @@
# General environment variables
TZ=London/Europe
# PostgreSQL
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DB=knightcrawler
# Redis
REDIS_HOST=redis
REDIS_PORT=6379
REDIS_EXTRA=abortConnect=false,allowAdmin=true
# AMQP
RABBITMQ_HOST=lavinmq
RABBITMQ_USER=guest
RABBITMQ_PASSWORD=guest
RABBITMQ_CONSUMER_QUEUE_NAME=ingested
RABBITMQ_DURABLE=true
RABBITMQ_MAX_QUEUE_SIZE=0
RABBITMQ_MAX_PUBLISH_BATCH_SIZE=500
RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS=10
# Metadata
METADATA_INSERT_BATCH_SIZE=50000
# Collectors
COLLECTOR_QBIT_ENABLED=false
COLLECTOR_DEBRID_ENABLED=true
COLLECTOR_REAL_DEBRID_API_KEY=
QBIT_HOST=http://qbittorrent:8080
QBIT_TRACKERS_URL=https://raw.githubusercontent.com/ngosang/trackerslist/master/trackers_all_http.txt
QBIT_CONCURRENCY=8
# Number of replicas for the qBittorrent collector and qBitTorrent client. Should be 0 or 1.
QBIT_REPLICAS=0
# Addon
DEBUG_MODE=false

View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<buildprofiles xsi:noNamespaceSchemaLocation="https://resources.jetbrains.com/writerside/1.0/build-profiles.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<variables>
<header-logo>knight-crawler-logo.png</header-logo>
</variables>
<build-profile instance="kc">
<variables>
<noindex-content>true</noindex-content>
</variables>
</build-profile>
</buildprofiles>

Binary file not shown.

After

Width:  |  Height:  |  Size: 568 KiB

13
docs/Writerside/kc.tree Normal file
View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE instance-profile
SYSTEM "https://resources.jetbrains.com/writerside/1.0/product-profile.dtd">
<instance-profile id="kc" name="Knight Crawler"
start-page="Overview.md">
<toc-element topic="Overview.md"/>
<toc-element topic="Getting-started.md">
</toc-element>
<toc-element topic="External-access.md"/>
<toc-element topic="Supported-Debrid-services.md"/>
</instance-profile>

View File

@@ -0,0 +1,57 @@
# External access
This guide outlines how to use Knight Crawler on devices like your TV. While it's currently limited to the device of
installation, we can change that. With some extra effort, we'll show you how to make it accessible on other devices.
This limitation is set by Stremio, as [explained here](https://github.com/Stremio/stremio-features/issues/687#issuecomment-1890546094).
## What to keep in mind
Before we make Knight Crawler available outside your home network, we've got to talk about safety. No software is
perfect, including ours. Knight Crawler is built on lots of different parts, some made by other people. So, if we keep
it just for your home network, it's a bit safer. But if you want to use it over the internet, just know that keeping
your devices secure is up to you. We won't be responsible for any problems or lost data if you use Knight Crawler that way.
## Initial setup
To enable external access for Knight Crawler, whether it's within your home network or over the internet, you'll
need to follow these initial setup steps:
- Set up Caddy, a powerful and easy-to-use web server.
- Disable the open port in the Knight Crawler <path>docker-compose.yaml</path> file.
### Caddy
A basic Caddy configuration is included with Knight Crawler in the deployment directory.
<path>deployment/docker/optional-services/caddy</path>
```Generic
deployment/
└── docker/
└── optional-services/
└── caddy/
├── config/
│ ├── snippets/
│ │ └── cloudflare-replace-X-Forwarded-For
│ └── Caddyfile
├── logs/
└── docker-compose.yaml
```
ports:
- "8080:8080"
By disabling the default port, Knight Crawler will only be accessible internally within your network, ensuring added security.
## Home network access
## Internet access
### Through a VPN
### On the public web
## Troubleshooting?
## Additional Resources?

View File

@@ -0,0 +1,192 @@
# Getting started
Knight Crawler is provided as an all-in-one solution. This means we include all the necessary software you need to get started
out of the box.
## Before you start
Make sure that you have:
- A place to host Knight Crawler
- [Docker](https://docs.docker.com/get-docker/) and [Compose](https://docs.docker.com/compose/install/) installed
- A [GitHub](https://github.com/) account _(optional)_
## Download the files
Installing Knight Crawler is as simple as downloading a copy of the [deployment directory](https://github.com/Gabisonfire/knightcrawler/tree/master/deployment/docker).
A basic installation requires only two files:
- <path>deployment/docker/.env.example</path>
- <path>deployment/docker/docker-compose.yaml</path>.
For this guide I will be placing them in a directory on my home drive <path>~/knightcrawler</path>.
Rename the <path>.env.example</path> file to be <path>.env</path>
```
~/
└── knightcrawler/
├── .env
└── docker-compose.yaml
```
## Initial configuration
Below are a few recommended configuration changes.
Open the <path>.env</path> file in your favourite editor.
> If you are using an external database, configure it in the <path>.env</path> file. Don't forget to disable the ones
> included in the <path>docker-compose.yaml</path>.
### Database credentials
It is strongly recommended that you change the credentials for the databases included with Knight Crawler. This is best done
before running Knight Crawler for the first time. It is much harder to change the passwords once the services have been started
for the first time.
```Bash
POSTGRES_PASSWORD=postgres
...
MONGODB_PASSWORD=mongo
...
RABBITMQ_PASSWORD=guest
```
Here's a few options on generating a secure password:
```Bash
# Linux
tr -cd '[:alnum:]' < /dev/urandom | fold -w 64 | head -n 1
# Or you could use openssl
openssl rand -hex 32
```
```Python
# Python
import secrets
print(secrets.token_hex(32))
```
### Your time zone
```Bash
TZ=London/Europe
```
A list of time zones can be found on [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
### Consumers
```Bash
JOB_CONCURRENCY=5
...
MAX_CONNECTIONS_PER_TORRENT=10
...
CONSUMER_REPLICAS=3
```
These are totally subjective to your machine and network capacity. The above default is pretty minimal and will work on
most machines.
`JOB_CONCURRENCY` is how many films and tv shows the consumers should process at once. As this affects every consumer
this will likely cause exponential
strain on your system. It's probably best to leave this at 5, but you can try experimenting with it if you wish.
`MAX_CONNECTIONS_PER_TORRENT` is how many peers the consumer will attempt to connect to when it is trying to collect
metadata.
Increasing this value can speed up processing, but you will eventually reach a point where more connections are being
made than
your router can handle. This will then cause a cascading fail where your internet stops working. If you are going to
increase this value
then try increasing it by 10 at a time.
> Increasing this value increases the max connections for every parallel job, for every consumer. For example
> with the default values above this means that Knight Crawler will be on average making `(5 x 3) x 10 = 150`
> connections at any one time.
>
{style="warning"}
`CONSUMER_REPLICAS` is how many consumers should be initially started. You can increase or decrease the number of consumers whilst the
service is running by running the command `docker compose up -d --scale consumer=<number>`.
### GitHub personal access token
This step is optional but strongly recommended. [Debrid Media Manager](https://debridmediamanager.com/start) is a media library manager
for Debrid services. When a user of this service chooses to export/share their library publicly it is saved to a public GitHub repository.
This is, essentially, a repository containing a vast amount of ready to go films and tv shows. Knight Crawler comes with the ability to
read these exported lists, but it requires a GitHub account to make it work.
Knight Crawler needs a personal access token with read-only access to public repositories. This means we can not access any private
repositories you have.
1. Navigate to GitHub settings ([GitHub token settings](https://github.com/settings/tokens?type=beta)):
- Navigate to `GitHub settings`.
- Click on `Developer Settings`.
- Select `Personal access tokens`.
- Choose `Fine-grained tokens`.
2. Press `Generate new token`.
3. Fill out the form with the following information:
```Generic
Token name:
KnightCrawler
Expiration:
90 days
Description:
<blank>
Repository access:
(checked) Public Repositories (read-only)
```
4. Click `Generate token`.
5. Take the new token and add it to the bottom of the <path>.env</path> file:
```Bash
# Producer
GITHUB_PAT=<YOUR TOKEN HERE>
```
## Start Knight Crawler
To start Knight Crawler use the following command:
```Bash
docker compose up -d
```
Then we can follow the logs to watch it start:
```Bash
docker compose logs -f --since 1m
```
> Knight Crawler will only be accessible on the machine you run it on, to make it accessible from other machines navigate to [External access](External-access.md).
>
{style="note"}
To stop following the logs press <shortcut>Ctrl+C</shortcut> at any time.
The Knight Crawler configuration page should now be accessible in your web browser at [http://localhost:7000](http://localhost:7000)
## Start more consumers
If you wish to speed up the processing of the films and tv shows that Knight Crawler finds, then you'll likely want to
increase the number of consumers.
The below command can be used to both increase or decrease the number of running consumers. Gradually increase the number
until you encounter any issues and then decrease until stable.
```Bash
docker compose up -d --scale consumer=<number>
```
## Stop Knight Crawler
Knight Crawler can be stopped with the following command:
```Bash
docker compose down
```

View File

@@ -0,0 +1,30 @@
# Overview
<img alt="The image shows a Knight in silvery armour looking forwards." src="knight-crawler-logo.png" title="Knight Crawler logo" width="100"/>
Knight Crawler is a self-hosted [Stremio](https://www.stremio.com/) addon for streaming torrents via
a [Debrid](Supported-Debrid-services.md "Click for a list of Debrid services we support") service.
We are active on [Discord](https://discord.gg/8fQdxay9z2) for both support and casual conversation.
> Knight Crawler is currently alpha software.
>
> Users are responsible for ensuring their data is backed up regularly.
>
> Please read the changelogs before updating to the latest version.
>
{style="warning"}
## What does Knight Crawler do?
Knight Crawler is an addon for [Stremio](https://www.stremio.com/). It began as a fork of the very popular
[Torrentio](https://github.com/TheBeastLT/torrentio-scraper) addon. Knight crawler essentially does the following:
1. It searches the internet for available films and tv shows.
2. It collects as much information as it can about each film and tv show it finds.
3. It then stores this information to a database for easy access.
When you choose on a film or tv show to watch on Stremio, a request will be sent to your installation of Knight Crawler.
Knight Crawler will query the database and return a list of all the copies it has stored in the database as Debrid
links.
This enables playback to begin immediately for your chosen media.

View File

@@ -0,0 +1,3 @@
# Supported Debrid services
Start typing here...

View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ihp SYSTEM "https://resources.jetbrains.com/writerside/1.0/ihp.dtd">
<ihp version="2.0">
<topics dir="topics" web-path="topics"/>
<images dir="images" web-path="knightcrawler"/>
<instance src="kc.tree"/>
</ihp>

View File

@@ -1,36 +0,0 @@
{
"Serilog": {
"Using": [ "Serilog.Sinks.Console" ],
"MinimumLevel": {
"Default": "Information",
"Override": {
"Microsoft": "Warning",
"System": "Warning",
"Npgsql.Command": "Warning",
"Marten.IDocumentStore": "Warning",
"Wolverine.Runtime.WolverineRuntime": "Warning",
"Wolverine.Runtime.Agents.NodeAgentController": "Warning",
"Oakton.Resources.ResourceSetupHostService": "Warning",
"System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning",
"System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning",
"Quartz.Impl.StdSchedulerFactory": "Warning",
"Quartz.Core.QuartzScheduler": "Warning",
"Quartz.Simpl.RAMJobStore": "Warning",
"Quartz.Core.JobRunShell": "Warning",
"Quartz.Core.SchedulerSignalerImpl": "Warning"
}
},
"WriteTo": [
{
"Name": "Console",
"Args": {
"outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}"
}
}
],
"Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ],
"Properties": {
"Application": "Metadata"
}
}
}

View File

@@ -1,57 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk.Worker">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<UserSecretsId>54cad2ee-57df-4bb2-a192-d5d501448e0a</UserSecretsId>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Dapper" Version="2.1.35" />
<PackageReference Include="MassTransit" Version="8.2.0" />
<PackageReference Include="MassTransit.RabbitMQ" Version="8.2.0" />
<PackageReference Include="MassTransit.Redis" Version="8.2.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
<PackageReference Include="System.Interactive.Async" Version="6.0.1" />
</ItemGroup>
<ItemGroup>
<Content Remove="Configuration\logging.json" />
<None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<Content Remove="eng\**" />
<None Remove="eng\**" />
</ItemGroup>
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<Content Remove="python\**" />
<None Include="python\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\shared\SharedContracts.csproj" />
</ItemGroup>
<ItemGroup>
<Compile Remove="eng\**" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="eng\**" />
</ItemGroup>
</Project>

View File

@@ -1,33 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DebridCollector", "DebridCollector.csproj", "{64C3253C-0638-4825-AC82-7D5600D1F9C9}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\shared\SharedContracts.csproj", "{C9BE500C-CE04-480B-874F-A85D33CAA821}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{72A042C3-B4F3-45C5-AC20-041FE8F41EFC}"
ProjectSection(SolutionItems) = preProject
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
eng\install-python-reqs.sh = eng\install-python-reqs.sh
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Release|Any CPU.Build.0 = Release|Any CPU
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{C9BE500C-CE04-480B-874F-A85D33CAA821} = {2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}
EndGlobalSection
EndGlobal

View File

@@ -1,31 +0,0 @@
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build
ARG TARGETARCH
WORKDIR /src
COPY shared/ shared/
COPY debrid-collector/ debrid-collector/
WORKDIR /src/debrid-collector/
RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S debrid && adduser -S -G debrid debrid
USER debrid
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
ENTRYPOINT ["dotnet", "DebridCollector.dll"]

View File

@@ -1,75 +0,0 @@
namespace DebridCollector.Extensions;
public static class ServiceCollectionExtensions
{
internal static IServiceCollection AddDatabase(this IServiceCollection services)
{
services.LoadConfigurationFromEnv<PostgresConfiguration>();
services.AddTransient<IDataStorage, DapperDataStorage>();
return services;
}
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
{
var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>();
services.AddRealDebridClient(serviceConfiguration);
services.RegisterPythonEngine();
services.AddSingleton<IRankTorrentName, RankTorrentName>();
services.AddHostedService<DebridRequestProcessor>();
return services;
}
internal static IServiceCollection RegisterMassTransit(this IServiceCollection services)
{
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
var redisConfiguration = services.LoadConfigurationFromEnv<RedisConfiguration>();
services.AddMassTransit(x =>
{
x.SetKebabCaseEndpointNameFormatter();
x.UsingRabbitMq((context, cfg) =>
{
cfg.AutoStart = true;
cfg.Host(
rabbitConfiguration.Host, h =>
{
h.Username(rabbitConfiguration.Username);
h.Password(rabbitConfiguration.Password);
});
cfg.Message<CollectMetadata>(e => e.SetEntityName(rabbitConfiguration.DebridCollectorQueueName));
cfg.ConfigureEndpoints(context);
});
x.AddConsumer<PerformMetadataRequestConsumer>();
x.AddConsumer<WriteMetadataConsumer>();
x.RegisterMetadataIngestionSaga(redisConfiguration, rabbitConfiguration);
});
return services;
}
private static void RegisterMetadataIngestionSaga(this IBusRegistrationConfigurator x, RedisConfiguration redisConfiguration, RabbitMqConfiguration rabbitMqConfiguration) =>
x.AddSagaStateMachine<InfohashMetadataSagaStateMachine, InfohashMetadataSagaState>(
cfg =>
{
cfg.UseMessageRetry(r => r.Intervals(1000,2000,5000));
cfg.UseInMemoryOutbox();
})
.RedisRepository(redisConfiguration.ConnectionString, options =>
{
options.KeyPrefix = "debrid-collector:";
})
.Endpoint(
e =>
{
e.Name = rabbitMqConfiguration.DebridCollectorQueueName;
e.ConcurrentMessageLimit = 50;
e.PrefetchCount = 50;
});
}

View File

@@ -1,8 +0,0 @@
namespace DebridCollector.Features.Configuration;
public class DebridCollectorConfiguration
{
private const string Prefix = "COLLECTOR";
private const string RealDebridApiKeyVariable = "REAL_DEBRID_API_KEY";
public string RealDebridApiKey { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(RealDebridApiKeyVariable);
}

View File

@@ -1,64 +0,0 @@
namespace DebridCollector.Features.Debrid;
public class DebridRequestProcessor(IDebridHttpClient debridHttpClient, ILogger<DebridRequestProcessor> logger, IBus messageBus) : BackgroundService
{
private const int BatchDelay = 3000;
public const int MaxBatchSize = 100;
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
var requests = new List<PerformMetadataRequest>(MaxBatchSize);
var delay = TimeSpan.FromMilliseconds(BatchDelay);
while (!stoppingToken.IsCancellationRequested)
{
while (ProcessorChannel.Queue.Reader.TryRead(out var request))
{
if (requests.Count >= MaxBatchSize)
{
break;
}
if (requests.All(x => x.InfoHash != request.InfoHash))
{
requests.Add(request);
}
}
if (requests.Any())
{
await ProcessRequests(requests, stoppingToken);
requests.Clear();
}
await Task.Delay(delay, stoppingToken);
}
// After the loop ends, there may be remaining requests which were not processed. Let's process them:
if (requests.Count != 0)
{
await ProcessRequests(requests, stoppingToken);
requests.Clear();
}
}
private async Task ProcessRequests(IReadOnlyCollection<PerformMetadataRequest> requests, CancellationToken stoppingToken = default)
{
try
{
var results = await debridHttpClient.GetMetadataAsync(requests, stoppingToken);
await ProcessResponses(results);
logger.LogInformation("Processed: {Count} infoHashes", requests.Count);
}
catch (Exception e)
{
logger.LogError(e, "Failed to process infoHashes");
}
}
private async Task ProcessResponses(IEnumerable<TorrentMetadataResponse> results)
{
var messages = results.Select(response => new GotMetadata(response)).ToList();
await messageBus.PublishBatch(messages);
}
}

View File

@@ -1,6 +0,0 @@
namespace DebridCollector.Features.Debrid;
public interface IDebridHttpClient
{
public Task<IReadOnlyList<TorrentMetadataResponse>> GetMetadataAsync(IReadOnlyCollection<PerformMetadataRequest> infoHashes, CancellationToken cancellationToken = default);
}

View File

@@ -1,12 +0,0 @@
namespace DebridCollector.Features.Debrid;
public static class ProcessorChannel
{
public static Channel<PerformMetadataRequest> Queue = Channel.CreateUnbounded<PerformMetadataRequest>(new()
{
SingleReader = true,
SingleWriter = true,
});
public static bool AddToQueue(PerformMetadataRequest infoHash) => Queue.Writer.TryWrite(infoHash);
}

View File

@@ -1,65 +0,0 @@
namespace DebridCollector.Features.Debrid;
public class RealDebridClient(HttpClient client) : IDebridHttpClient
{
private const string TorrentsInstantAvailability = "torrents/instantAvailability/";
public async Task<IReadOnlyList<TorrentMetadataResponse>> GetMetadataAsync(IReadOnlyCollection<PerformMetadataRequest> requests, CancellationToken cancellationToken = default)
{
var responseAsString = await client.GetStringAsync($"{TorrentsInstantAvailability}{string.Join("/", requests.Select(x => x.InfoHash.ToLowerInvariant()))}", cancellationToken);
var document = JsonDocument.Parse(responseAsString);
var torrentMetadataResponses = new List<TorrentMetadataResponse>();
foreach (var request in requests)
{
if (document.RootElement.TryGetProperty(request.InfoHash.ToLowerInvariant(), out var dataElement) &&
dataElement.ValueKind == JsonValueKind.Object &&
dataElement.TryGetProperty("rd", out var rdDataElement) &&
rdDataElement.ValueKind == JsonValueKind.Array &&
rdDataElement.GetArrayLength() > 0)
{
MapResponseToMetadata(rdDataElement, torrentMetadataResponses, request);
continue;
}
torrentMetadataResponses.Add(new(request.CorrelationId, new()));
}
return torrentMetadataResponses;
}
private static void MapResponseToMetadata(JsonElement rdDataElement, List<TorrentMetadataResponse> torrentMetadataResponses, PerformMetadataRequest request)
{
var metaData = new FileDataDictionary();
foreach (var item in rdDataElement.EnumerateArray())
{
if (item.ValueKind == JsonValueKind.Object)
{
foreach (var property in item.EnumerateObject())
{
if (property.Value.ValueKind == JsonValueKind.Object)
{
var fileData = new FileData();
if (property.Value.TryGetProperty("filename", out var filenameElement) && filenameElement.ValueKind == JsonValueKind.String)
{
fileData.Filename = filenameElement.GetString();
}
if (property.Value.TryGetProperty("filesize", out var filesizeElement) && filesizeElement.ValueKind == JsonValueKind.Number)
{
fileData.Filesize = filesizeElement.GetInt64();
}
metaData[property.Name] = fileData;
}
}
}
}
torrentMetadataResponses.Add(new(request.CorrelationId, metaData));
}
}

View File

@@ -1,24 +0,0 @@
namespace DebridCollector.Features.Debrid;
public class RealDebridResponse : Dictionary<string, RdData?>
{
}
public class RdData
{
[JsonPropertyName("rd")]
public List<FileDataDictionary>? Rd { get; set; }
}
public class FileDataDictionary : Dictionary<string, FileData>
{
}
public class FileData
{
[JsonPropertyName("filename")]
public string? Filename { get; set; }
[JsonPropertyName("filesize")]
public long? Filesize { get; set; }
}

View File

@@ -1,30 +0,0 @@
namespace DebridCollector.Features.Debrid;
public static class ServiceCollectionExtensions
{
public static IServiceCollection AddRealDebridClient(this IServiceCollection services, DebridCollectorConfiguration serviceConfiguration)
{
services.AddHttpClient<IDebridHttpClient, RealDebridClient>(
client =>
{
client.BaseAddress = new("https://api.real-debrid.com/rest/1.0/");
client.DefaultRequestHeaders.Add("Authorization", $"Bearer {serviceConfiguration.RealDebridApiKey}");
})
.AddPolicyHandler(GetRetryPolicy())
.AddPolicyHandler(GetCircuitBreakerPolicy());
return services;
}
private static AsyncPolicy<HttpResponseMessage> GetRetryPolicy(int MaxRetryCount = 5, int MaxJitterTime = 1000) =>
HttpPolicyExtensions
.HandleTransientHttpError()
.WaitAndRetryAsync(MaxRetryCount, RetryAttempt =>
TimeSpan.FromSeconds(Math.Pow(2, RetryAttempt)) +
TimeSpan.FromMilliseconds(Random.Shared.Next(0, MaxJitterTime)));
private static AsyncPolicy<HttpResponseMessage> GetCircuitBreakerPolicy() =>
HttpPolicyExtensions
.HandleTransientHttpError()
.CircuitBreakerAsync(handledEventsAllowedBeforeBreaking: 5, TimeSpan.FromSeconds(30));
}

View File

@@ -1,93 +0,0 @@
namespace DebridCollector.Features.Worker;
public static class DebridMetaToTorrentMeta
{
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
IRankTorrentName rankTorrentName,
Torrent torrent,
string ImdbId,
FileDataDictionary Metadata,
ILogger<WriteMetadataConsumer> logger)
{
try
{
var files = new List<TorrentFile>();
foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
var file = new TorrentFile
{
ImdbId = ImdbId,
KitsuId = 0,
InfoHash = torrent.InfoHash,
FileIndex = validFileIndex ? fileIndexMinusOne : 0,
Title = metadataEntry.Value.Filename,
Size = metadataEntry.Value.Filesize.GetValueOrDefault(),
};
var parsedTitle = rankTorrentName.Parse(file.Title, false);
if (!parsedTitle.Success)
{
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
continue;
}
file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
files.Add(file);
}
return files;
}
catch (Exception ex)
{
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
return [];
}
}
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata, ILogger<WriteMetadataConsumer> logger)
{
try
{
var files = new List<SubtitleFile>();
var torrentFiles = await storage.GetTorrentFiles(InfoHash.ToLowerInvariant());
if (torrentFiles.Count == 0)
{
return files;
}
foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
var fileId = torrentFiles.FirstOrDefault(
t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0;
var file = new SubtitleFile
{
InfoHash = InfoHash,
FileIndex = validFileIndex ? fileIndexMinusOne : 0,
FileId = fileId,
Title = metadataEntry.Value.Filename,
};
files.Add(file);
}
return files;
}
catch (Exception ex)
{
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
return [];
}
}
}

View File

@@ -1,104 +0,0 @@
namespace DebridCollector.Features.Worker;
public static class Filetypes
{
public static IReadOnlyList<string> VideoFileExtensions =
[
".3g2",
".3gp",
".3gp2",
".3gpp",
".60d",
".ajp",
".asf",
".asx",
".avchd",
".avi",
".bik",
".bix",
".box",
".cam",
".dat",
".divx",
".dmf",
".dv",
".dvr-ms",
".evo",
".flc",
".fli",
".flic",
".flv",
".flx",
".gvi",
".gvp",
".h264",
".m1v",
".m2p",
".m2ts",
".m2v",
".m4e",
".m4v",
".mjp",
".mjpeg",
".mjpg",
".mkv",
".moov",
".mov",
".movhd",
".movie",
".movx",
".mp4",
".mpe",
".mpeg",
".mpg",
".mpv",
".mpv2",
".mxf",
".nsv",
".nut",
".ogg",
".ogm",
".omf",
".ps",
".qt",
".ram",
".rm",
".rmvb",
".swf",
".ts",
".vfw",
".vid",
".video",
".viv",
".vivo",
".vob",
".vro",
".wm",
".wmv",
".wmx",
".wrap",
".wvx",
".wx",
".x264",
".xvid",
];
public static IReadOnlyList<string> SubtitleFileExtensions =
[
".a",
".srt",
".ass",
".ssa",
".stl",
".scc",
".ttml",
".sbv",
".dks",
".qtx",
".jss",
".vtt",
".smi",
".usf",
".idx"
];
}

View File

@@ -1,14 +0,0 @@
namespace DebridCollector.Features.Worker;
public class InfohashMetadataSagaState : SagaStateMachineInstance, ISagaVersion
{
public Torrent? Torrent { get; set; }
public string? Title { get; set; }
public string? ImdbId { get; set; }
public TorrentMetadataResponse? Metadata { get; set; }
public int RetriesAllowed { get; set; } = 2;
public Guid CorrelationId { get; set; }
public int Version { get; set; }
public int CurrentState { get; set; }
}

View File

@@ -1,69 +0,0 @@
namespace DebridCollector.Features.Worker;
public class InfohashMetadataSagaStateMachine : MassTransitStateMachine<InfohashMetadataSagaState>
{
public State Ingesting { get; private set; } = null!;
public State Writing { get; private set; } = null!;
public State Completed { get; private set; } = null!;
public Event<CollectMetadata> CollectMetadata { get; private set; } = null!;
public Event<GotMetadata> GotMetadata { get; private set; } = null!;
public Event<MetadataWritten> MetadataWritten { get; private set; } = null!;
public InfohashMetadataSagaStateMachine(ILogger<InfohashMetadataSagaStateMachine> logger)
{
InstanceState(x => x.CurrentState);
Event(() => CollectMetadata, x => x.CorrelateById(context => context.Message.CorrelationId));
Event(() => GotMetadata, x => x.CorrelateById(context => context.Message.CorrelationId));
Event(() => MetadataWritten, x => x.CorrelateById(context => context.Message.CorrelationId));
Initially(
When(CollectMetadata)
.ThenAsync(
async context =>
{
context.Saga.CorrelationId = context.Data.CorrelationId;
context.Saga.Torrent = context.Data.Torrent;
context.Saga.ImdbId = context.Data.ImdbId;
await context.Publish(new PerformMetadataRequest(context.Saga.CorrelationId, context.Saga.Torrent.InfoHash));
logger.LogInformation("Collecting Metadata for torrent {InfoHash} in Saga {SagaId}", context.Instance.Torrent.InfoHash, context.Instance.CorrelationId);
})
.TransitionTo(Ingesting));
During(
Ingesting,
When(GotMetadata)
.ThenAsync(
async context =>
{
context.Saga.Metadata = context.Data.Metadata;
await context.Publish(new WriteMetadata(context.Saga.Torrent, context.Saga.Metadata, context.Saga.ImdbId));
logger.LogInformation("Got Metadata for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
})
.TransitionTo(Writing));
During(
Writing,
When(MetadataWritten)
.Then(
context =>
{
if (!context.Message.WithFiles)
{
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
return;
}
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
})
.TransitionTo(Completed)
.Finalize());
SetCompletedWhenFinalized();
}
}

View File

@@ -1,10 +0,0 @@
namespace DebridCollector.Features.Worker;
public class PerformMetadataRequestConsumer : IConsumer<PerformMetadataRequest>
{
public Task Consume(ConsumeContext<PerformMetadataRequest> context)
{
ProcessorChannel.AddToQueue(context.Message);
return Task.CompletedTask;
}
}

View File

@@ -1,22 +0,0 @@
namespace DebridCollector.Features.Worker;
[EntityName("perform-metadata-request-debrid-collector")]
public record PerformMetadataRequest(Guid CorrelationId, string InfoHash) : CorrelatedBy<Guid>;
[EntityName("torrent-metadata-response-debrid-collector")]
public record GotMetadata(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}
[EntityName("write-metadata-debrid-collector")]
public record WriteMetadata(Torrent Torrent, TorrentMetadataResponse Metadata, string ImdbId) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}
[EntityName("metadata-written-debrid-colloctor")]
public record MetadataWritten(TorrentMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}

View File

@@ -1,4 +0,0 @@
namespace DebridCollector.Features.Worker;
[EntityName("torrent-metadata-response")]
public record TorrentMetadataResponse(Guid CorrelationId, FileDataDictionary Metadata) : CorrelatedBy<Guid>;

View File

@@ -1,28 +0,0 @@
namespace DebridCollector.Features.Worker;
public class WriteMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteMetadataConsumer> logger) : IConsumer<WriteMetadata>
{
public async Task Consume(ConsumeContext<WriteMetadata> context)
{
var request = context.Message;
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
if (!torrentFiles.Any())
{
await context.Publish(new MetadataWritten(request.Metadata, false));
return;
}
await dataStorage.InsertFiles(torrentFiles);
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new MetadataWritten(request.Metadata, true));
}
}

View File

@@ -1,21 +0,0 @@
// Global using directives
global using System.Text.Json;
global using System.Text.Json.Serialization;
global using System.Threading.Channels;
global using DebridCollector.Extensions;
global using DebridCollector.Features.Configuration;
global using DebridCollector.Features.Debrid;
global using DebridCollector.Features.Worker;
global using MassTransit;
global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection;
global using Polly;
global using Polly.Extensions.Http;
global using SharedContracts.Configuration;
global using SharedContracts.Dapper;
global using SharedContracts.Extensions;
global using SharedContracts.Models;
global using SharedContracts.Python;
global using SharedContracts.Python.RTN;
global using SharedContracts.Requests;

View File

@@ -1,17 +0,0 @@
var builder = WebApplication.CreateBuilder();
builder.DisableIpPortBinding();
builder.Configuration
.AddServiceConfiguration();
builder.Host
.SetupSerilog(builder.Configuration);
builder.Services
.AddServiceConfiguration()
.AddDatabase()
.RegisterMassTransit();
var app = builder.Build();
app.Run();

View File

@@ -1,2 +0,0 @@
mkdir -p ../python
python -m pip install -r ../requirements.txt -t ../python/

View File

@@ -1,5 +0,0 @@
#!/bin/bash
rm -rf ../python
mkdir -p ../python
python3 -m pip install -r ../requirements.txt -t ../python/

View File

@@ -1 +0,0 @@
rank-torrent-name==0.2.13

View File

@@ -9,29 +9,22 @@ public static class ServiceCollectionExtensions
return services;
}
internal static IServiceCollection AddDatabase(this IServiceCollection services)
internal static IServiceCollection AddMongoDb(this IServiceCollection services)
{
services.LoadConfigurationFromEnv<PostgresConfiguration>();
services.AddScoped<ImdbDbService>();
services.LoadConfigurationFromEnv<MongoConfiguration>();
services.AddTransient<ImdbMongoDbService>();
return services;
}
internal static IServiceCollection AddImporters(this IServiceCollection services)
internal static IServiceCollection AddJobSupport(this IServiceCollection services)
{
services.AddScoped<IFileImport<ImdbBasicEntry>, BasicsFile>();
services.AddScoped<IFileImport<ImdbAkaEntry>, AkasFile>();
services.AddScoped<IFileImport<ImdbEpisodeEntry>, EpisodesFile>();
return services;
}
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
{
services.LoadConfigurationFromEnv<ServiceConfiguration>();
services.AddScoped<IImdbFileDownloader, ImdbFileDownloader>();
services.AddHostedService<DownloadImdbDataJob>();
services.LoadConfigurationFromEnv<JobConfiguration>();
services.AddScheduler()
.AddTransient<DownloadImdbDataJob>()
.AddHostedService<JobScheduler>();
return services;
}
}

View File

@@ -12,8 +12,6 @@ internal static class WebApplicationBuilderExtensions
options =>
{
options.DefaultExecutionTimeout = 6.Hours();
options.CodeGeneration.TypeLoadMode = TypeLoadMode.Static;
options.Services.AssertAllExpectedPreBuiltTypesExistOnStartUp();
});
return builder;

View File

@@ -1,3 +0,0 @@
namespace Metadata.Features.ClearExistingImdbData;
public record ClearExistingImdbDataRequest(string TitleBasicsFilePath, string TitleAkasFilePath, string EpisodesFilePath);

View File

@@ -1,16 +0,0 @@
namespace Metadata.Features.ClearExistingImdbData;
public class ClearExistingImdbDataRequestHandler(ILogger<ClearExistingImdbDataRequestHandler> logger, ImdbDbService dbService)
{
public async Task<ImportImdbDataRequest> Handle(ClearExistingImdbDataRequest request, CancellationToken _)
{
logger.LogInformation("Clearing existing IMDB data from database");
await dbService.DropPgtrmIndex();
await dbService.TruncateTable(TableNames.EpisodesTable);
await dbService.TruncateTable(TableNames.AkasTable);
await dbService.TruncateTable(TableNames.MetadataTable, cascade: true);
logger.LogInformation("Existing IMDB data cleared from database");
return new(request.TitleBasicsFilePath, request.TitleAkasFilePath, request.EpisodesFilePath);
}
}

View File

@@ -0,0 +1,13 @@
namespace Metadata.Features.Configuration;
public class JobConfiguration
{
private const string Prefix = "METADATA";
private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE";
private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE";
private const string InsertBatchSizeVariable = "INSERT_BATCH_SIZE";
public int InsertBatchSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(InsertBatchSizeVariable, 25_000);
public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour);
public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable);
}

View File

@@ -0,0 +1,20 @@
namespace Metadata.Features.Configuration;
public class MongoConfiguration
{
private const string Prefix = "MONGODB";
private const string HostVariable = "HOST";
private const string PortVariable = "PORT";
private const string DbVariable = "DB";
private const string UsernameVariable = "USER";
private const string PasswordVariable = "PASSWORD";
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
public string DbName { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DbVariable);
public string ConnectionString => $"mongodb://{Username}:{Password}@{Host}:{Port}/{DbName}?tls=false&directConnection=true&authSource=admin";
}

View File

@@ -1,21 +0,0 @@
namespace Metadata.Features.Configuration;
public class PostgresConfiguration
{
private const string Prefix = "POSTGRES";
private const string HostVariable = "HOST";
private const string UsernameVariable = "USER";
private const string PasswordVariable = "PASSWORD";
private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT";
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
}

View File

@@ -1,9 +0,0 @@
namespace Metadata.Features.Configuration;
public class ServiceConfiguration
{
private const string Prefix = "METADATA";
private const string InsertBatchSizeVariable = "INSERT_BATCH_SIZE";
public int InsertBatchSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(InsertBatchSizeVariable, 25_000);
}

View File

@@ -1,3 +1,3 @@
namespace Metadata.Features.DeleteDownloadedImdbData;
public record DeleteDownloadedImdbDataRequest(string TitleBasicsFilePath, string TitleAkasFilePath, string EpisodesFilePath);
public record DeleteDownloadedImdbDataRequest(string FilePath);

View File

@@ -1,23 +1,21 @@
namespace Metadata.Features.DeleteDownloadedImdbData;
public class DeleteDownloadedImdbDataRequestHandler(ILogger<DeleteDownloadedImdbDataRequestHandler> logger)
public class DeleteDownloadedImdbDataRequestHandler(ILogger<DeleteDownloadedImdbDataRequestHandler> logger, JobConfiguration configuration)
{
public Task Handle(DeleteDownloadedImdbDataRequest request, CancellationToken _)
{
DeleteFile(request.TitleBasicsFilePath);
DeleteFile(request.TitleAkasFilePath);
DeleteFile(request.EpisodesFilePath);
logger.LogInformation("Processing Completed");
logger.LogInformation("Deleting file {FilePath}", request.FilePath);
Environment.Exit(0);
File.Delete(request.FilePath);
logger.LogInformation("File Deleted");
if (configuration.DownloadImdbOnce)
{
logger.LogInformation("Processing Completed: Exiting application as DownloadImdbOnce is set to true");
Environment.Exit(0);
}
return Task.CompletedTask;
}
private void DeleteFile(string file)
{
logger.LogInformation("Deleting file {FilePath}", file);
File.Delete(file);
logger.LogInformation("File Deleted");
}
}

View File

@@ -0,0 +1,8 @@
namespace Metadata.Features.DownloadImdbData;
public class DownloadImdbDataJob(IMessageBus messageBus, JobConfiguration configuration) : BaseJob
{
public override bool IsScheduelable => !configuration.DownloadImdbOnce && !string.IsNullOrEmpty(configuration.DownloadImdbCronSchedule);
public override string JobName => nameof(DownloadImdbDataJob);
public override async Task Invoke() => await messageBus.SendAsync(new GetImdbDataRequest());
}

View File

@@ -1,20 +1,30 @@
namespace Metadata.Features.DownloadImdbData;
public class GetImdbDataRequestHandler(IHttpClientFactory clientFactory, IImdbFileDownloader downloader, ILogger<GetImdbDataRequestHandler> logger)
public class GetImdbDataRequestHandler(IHttpClientFactory clientFactory, ILogger<GetImdbDataRequestHandler> logger)
{
private const string TitleBasicsFileName = "title.basics.tsv";
private const string TitleAkasFileName = "title.akas.tsv";
private const string EpisodesFileName = "title.episode.tsv";
public async Task<ClearExistingImdbDataRequest> Handle(GetImdbDataRequest _, CancellationToken cancellationToken)
public async Task<ImportImdbDataRequest> Handle(GetImdbDataRequest _, CancellationToken cancellationToken)
{
logger.LogInformation("Downloading IMDB data");
var client = clientFactory.CreateClient("imdb-data");
var tempBasicsFile = await downloader.DownloadFileToTempPath(client, TitleBasicsFileName, cancellationToken);
var tempAkasFile = await downloader.DownloadFileToTempPath(client, TitleAkasFileName, cancellationToken);
var tempEpisodesFile = await downloader.DownloadFileToTempPath(client, EpisodesFileName, cancellationToken);
var response = await client.GetAsync($"{TitleBasicsFileName}.gz", cancellationToken);
return new(tempBasicsFile, tempAkasFile, tempEpisodesFile);
var tempFile = Path.Combine(Path.GetTempPath(), TitleBasicsFileName);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
await using var gzipStream = new GZipStream(stream, CompressionMode.Decompress);
await using var fileStream = File.Create(tempFile);
await gzipStream.CopyToAsync(fileStream, cancellationToken);
logger.LogInformation("Downloaded IMDB data to {TempFile}", tempFile);
fileStream.Close();
return new(tempFile);
}
}

View File

@@ -1,6 +0,0 @@
namespace Metadata.Features.DownloadImdbData;
public interface IImdbFileDownloader
{
Task<string> DownloadFileToTempPath(HttpClient client, string fileName, CancellationToken cancellationToken);
}

View File

@@ -1,24 +0,0 @@
namespace Metadata.Features.DownloadImdbData;
public class ImdbFileDownloader(ILogger<ImdbFileDownloader> logger) : IImdbFileDownloader
{
public async Task<string> DownloadFileToTempPath(HttpClient client, string fileName, CancellationToken cancellationToken)
{
var response = await client.GetAsync($"{fileName}.gz", cancellationToken);
var tempFile = Path.Combine(Path.GetTempPath(), fileName);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
await using var gzipStream = new GZipStream(stream, CompressionMode.Decompress);
await using var fileStream = File.Create(tempFile);
await gzipStream.CopyToAsync(fileStream, cancellationToken);
logger.LogInformation("Downloaded IMDB data '{Filename}' to {TempFile}", fileName, tempFile);
fileStream.Close();
return tempFile;
}
}

View File

@@ -1,89 +0,0 @@
namespace Metadata.Features.Files;
public class AkasFile(ILogger<AkasFile> logger, ImdbDbService dbService) : IFileImport<ImdbAkaEntry>
{
public async Task Import(string fileName, int batchSize, CancellationToken cancellationToken)
{
logger.LogInformation("Importing Downloaded IMDB AKAs data from {FilePath}", fileName);
var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = "\t",
BadDataFound = null, // Skip Bad Data from Imdb
MissingFieldFound = null, // Skip Missing Fields from Imdb
};
using var reader = new StreamReader(fileName);
using var csv = new CsvReader(reader, csvConfig);
var channel = Channel.CreateBounded<ImdbAkaEntry>(new BoundedChannelOptions(batchSize)
{
FullMode = BoundedChannelFullMode.Wait,
});
await csv.ReadAsync();
var batchInsertTask = CreateBatchOfAkaEntries(channel, batchSize, cancellationToken);
await ReadAkaEntries(csv, channel, cancellationToken);
channel.Writer.Complete();
await batchInsertTask;
}
private Task CreateBatchOfAkaEntries(Channel<ImdbAkaEntry, ImdbAkaEntry> channel, int batchSize, CancellationToken cancellationToken) =>
Task.Run(async () =>
{
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
{
if (cancellationToken.IsCancellationRequested)
{
return;
}
var batch = new List<ImdbAkaEntry>
{
movieData,
};
while (batch.Count < batchSize && channel.Reader.TryRead(out var nextMovieData))
{
batch.Add(nextMovieData);
}
if (batch.Count > 0)
{
await dbService.InsertImdbAkaEntries(batch);
logger.LogInformation("Imported batch of {BatchSize} aka entries starting with ImdbId {FirstImdbId}", batch.Count, batch.First().ImdbId);
}
}
}, cancellationToken);
private static async Task ReadAkaEntries(CsvReader csv, Channel<ImdbAkaEntry, ImdbAkaEntry> channel, CancellationToken cancellationToken)
{
while (await csv.ReadAsync())
{
var data = new ImdbAkaEntry
{
ImdbId = csv.GetField(0),
Ordering = csv.GetField<int>(1),
LocalizedTitle = csv.GetField(2),
Region = csv.GetField(3),
Language = csv.GetField(4),
Types = csv.GetField(5),
Attributes = csv.GetField(6),
};
var isOriginalTitle = int.TryParse(csv.GetField(7), out var original);
data.IsOriginalTitle = isOriginalTitle && original == 1;
if (cancellationToken.IsCancellationRequested)
{
return;
}
await channel.Writer.WriteAsync(data, cancellationToken);
}
}
}

View File

@@ -1,86 +0,0 @@
namespace Metadata.Features.Files;
public class BasicsFile(ILogger<BasicsFile> logger, ImdbDbService dbService): IFileImport<ImdbBasicEntry>
{
public async Task Import(string fileName, int batchSize, CancellationToken cancellationToken)
{
logger.LogInformation("Importing Downloaded IMDB Basics data from {FilePath}", fileName);
var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = "\t",
BadDataFound = null, // Skip Bad Data from Imdb
MissingFieldFound = null, // Skip Missing Fields from Imdb
};
using var reader = new StreamReader(fileName);
using var csv = new CsvReader(reader, csvConfig);
var channel = Channel.CreateBounded<ImdbBasicEntry>(new BoundedChannelOptions(batchSize)
{
FullMode = BoundedChannelFullMode.Wait,
});
await csv.ReadAsync();
var batchInsertTask = CreateBatchOfBasicEntries(channel, batchSize, cancellationToken);
await ReadBasicEntries(csv, channel, cancellationToken);
channel.Writer.Complete();
await batchInsertTask;
}
private Task CreateBatchOfBasicEntries(Channel<ImdbBasicEntry, ImdbBasicEntry> channel, int batchSize, CancellationToken cancellationToken) =>
Task.Run(async () =>
{
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
{
if (cancellationToken.IsCancellationRequested)
{
return;
}
var batch = new List<ImdbBasicEntry>
{
movieData,
};
while (batch.Count < batchSize && channel.Reader.TryRead(out var nextMovieData))
{
batch.Add(nextMovieData);
}
if (batch.Count > 0)
{
await dbService.InsertImdbEntries(batch);
logger.LogInformation("Imported batch of {BatchSize} basics starting with ImdbId {FirstImdbId}", batch.Count, batch.First().ImdbId);
}
}
}, cancellationToken);
private static async Task ReadBasicEntries(CsvReader csv, Channel<ImdbBasicEntry, ImdbBasicEntry> channel, CancellationToken cancellationToken)
{
while (await csv.ReadAsync())
{
var isAdultSet = int.TryParse(csv.GetField(4), out var adult);
var movieData = new ImdbBasicEntry
{
ImdbId = csv.GetField(0),
Category = csv.GetField(1),
Title = csv.GetField(2),
Adult = isAdultSet && adult == 1,
Year = csv.GetField(5) == @"\N" ? 0 : int.Parse(csv.GetField(5)),
};
if (cancellationToken.IsCancellationRequested)
{
return;
}
await channel.Writer.WriteAsync(movieData, cancellationToken);
}
}
}

View File

@@ -1,83 +0,0 @@
namespace Metadata.Features.Files;
public class EpisodesFile(ILogger<EpisodesFile> logger, ImdbDbService dbService): IFileImport<ImdbEpisodeEntry>
{
public async Task Import(string fileName, int batchSize, CancellationToken cancellationToken)
{
logger.LogInformation("Importing Downloaded IMDB Episodes data from {FilePath}", fileName);
var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = "\t",
BadDataFound = null, // Skip Bad Data from Imdb
MissingFieldFound = null, // Skip Missing Fields from Imdb
};
using var reader = new StreamReader(fileName);
using var csv = new CsvReader(reader, csvConfig);
var channel = Channel.CreateBounded<ImdbEpisodeEntry>(new BoundedChannelOptions(batchSize)
{
FullMode = BoundedChannelFullMode.Wait,
});
await csv.ReadAsync();
var batchInsertTask = CreateBatchOfAkaEntries(channel, batchSize, cancellationToken);
await ReadAkaEntries(csv, channel, cancellationToken);
channel.Writer.Complete();
await batchInsertTask;
}
private Task CreateBatchOfAkaEntries(Channel<ImdbEpisodeEntry, ImdbEpisodeEntry> channel, int batchSize, CancellationToken cancellationToken) =>
Task.Run(async () =>
{
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
{
if (cancellationToken.IsCancellationRequested)
{
return;
}
var batch = new List<ImdbEpisodeEntry>
{
movieData,
};
while (batch.Count < batchSize && channel.Reader.TryRead(out var nextMovieData))
{
batch.Add(nextMovieData);
}
if (batch.Count > 0)
{
await dbService.InsertImdbEpisodeEntries(batch);
logger.LogInformation("Imported batch of {BatchSize} episodes starting with ImdbId {FirstImdbId}", batch.Count, batch.First().EpisodeImdbId);
}
}
}, cancellationToken);
private static async Task ReadAkaEntries(CsvReader csv, Channel<ImdbEpisodeEntry, ImdbEpisodeEntry> channel, CancellationToken cancellationToken)
{
while (await csv.ReadAsync())
{
var data = new ImdbEpisodeEntry
{
EpisodeImdbId = csv.GetField(0),
ParentImdbId = csv.GetField(1),
SeasonNumber = csv.GetField(2),
EpisodeNumber = csv.GetField(3),
};
if (cancellationToken.IsCancellationRequested)
{
return;
}
await channel.Writer.WriteAsync(data, cancellationToken);
}
}
}

View File

@@ -1,6 +0,0 @@
namespace Metadata.Features.Files;
public interface IFileImport<TImportType>
{
Task Import(string fileName, int batchSize, CancellationToken cancellationToken);
}

View File

@@ -1,13 +0,0 @@
namespace Metadata.Features.Files;
public class ImdbAkaEntry
{
public string ImdbId { get; set; } = default!;
public int Ordering { get; set; }
public string? LocalizedTitle { get; set; }
public string? Region { get; set; }
public string? Language { get; set; }
public string? Types { get; set; }
public string? Attributes { get; set; }
public bool IsOriginalTitle { get; set; }
}

View File

@@ -1,10 +0,0 @@
namespace Metadata.Features.Files;
public class ImdbBasicEntry
{
public string ImdbId { get; set; } = default!;
public string? Category { get; set; }
public string? Title { get; set; }
public bool Adult { get; set; }
public int Year { get; set; }
}

View File

@@ -1,9 +0,0 @@
namespace Metadata.Features.Files;
public class ImdbEpisodeEntry
{
public string EpisodeImdbId { get; set; } = default!;
public string? ParentImdbId { get; set; }
public string? SeasonNumber { get; set; }
public string? EpisodeNumber { get; set; }
}

View File

@@ -1,164 +0,0 @@
namespace Metadata.Features.ImportImdbData;
public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbService> logger)
{
public Task InsertImdbEntries(IEnumerable<ImdbBasicEntry> entries) =>
ExecuteCommandAsync(
async connection =>
{
await using var writer = await connection.BeginBinaryImportAsync(
$"COPY {TableNames.MetadataTable} (\"imdb_id\", \"category\", \"title\", \"year\", \"adult\") FROM STDIN (FORMAT BINARY)");
foreach (var entry in entries)
{
try
{
await writer.StartRowAsync();
await writer.WriteAsync(entry.ImdbId, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Category, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Title, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Year, NpgsqlDbType.Integer);
await writer.WriteAsync(entry.Adult, NpgsqlDbType.Boolean);
}
catch (Npgsql.PostgresException e)
{
if (e.Message.Contains("duplicate key value violates unique constraint", StringComparison.OrdinalIgnoreCase))
{
continue;
}
throw;
}
}
await writer.CompleteAsync();
}, "Error while inserting imdb entries into database");
public Task InsertImdbAkaEntries(IEnumerable<ImdbAkaEntry> entries) =>
ExecuteCommandAsync(
async connection =>
{
await using var writer = await connection.BeginBinaryImportAsync(
$"COPY {TableNames.AkasTable} (\"imdb_id\", \"ordering\", \"localized_title\", \"region\", \"language\", \"types\", \"attributes\", \"is_original_title\") FROM STDIN (FORMAT BINARY)");
foreach (var entry in entries.Where(x=>x.LocalizedTitle?.Length <= 8000))
{
try
{
await writer.StartRowAsync();
await writer.WriteAsync(entry.ImdbId, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Ordering, NpgsqlDbType.Integer);
await writer.WriteAsync(entry.LocalizedTitle, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Region, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Language, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Types, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Attributes, NpgsqlDbType.Text);
await writer.WriteAsync(entry.IsOriginalTitle, NpgsqlDbType.Boolean);
}
catch (PostgresException e)
{
if (e.Message.Contains("value too long for type character", StringComparison.OrdinalIgnoreCase))
{
continue;
}
throw;
}
}
await writer.CompleteAsync();
}, "Error while inserting imdb entries into database");
public Task InsertImdbEpisodeEntries(IEnumerable<ImdbEpisodeEntry> entries) =>
ExecuteCommandAsync(
async connection =>
{
await using var writer = await connection.BeginBinaryImportAsync(
$"COPY {TableNames.EpisodesTable} (\"episode_id\", \"parent_id\", \"season\", \"episode\") FROM STDIN (FORMAT BINARY)");
foreach (var entry in entries)
{
try
{
await writer.StartRowAsync();
await writer.WriteAsync(entry.EpisodeImdbId, NpgsqlDbType.Text);
await writer.WriteAsync(entry.ParentImdbId, NpgsqlDbType.Text);
await writer.WriteAsync(entry.SeasonNumber, NpgsqlDbType.Text);
await writer.WriteAsync(entry.EpisodeNumber, NpgsqlDbType.Text);
}
catch (PostgresException e)
{
if (e.Message.Contains("value too long for type character", StringComparison.OrdinalIgnoreCase))
{
continue;
}
throw;
}
}
await writer.CompleteAsync();
}, "Error while inserting imdb entries into database");
public Task TruncateTable(string table, bool cascade = false) =>
ExecuteCommandAsync(
async connection =>
{
var cascadeOption = cascade ? "CASCADE" : string.Empty;
logger.LogInformation("Truncating '{Table}' table", table);
await using var command = new NpgsqlCommand($"TRUNCATE TABLE {table} {cascadeOption}", connection);
await command.ExecuteNonQueryAsync();
}, $"Error while clearing '{table}' table");
public Task CreatePgtrmIndex() =>
ExecuteCommandAsync(
async connection =>
{
await using var command = new NpgsqlCommand($"CREATE INDEX title_gin ON {TableNames.MetadataTable} USING gin(title gin_trgm_ops)", connection);
await command.ExecuteNonQueryAsync();
}, "Error while creating index on imdb_metadata table");
public Task DropPgtrmIndex() =>
ExecuteCommandAsync(
async connection =>
{
logger.LogInformation("Dropping Trigrams index if it exists already");
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gin", connection);
await dropCommand.ExecuteNonQueryAsync();
}, $"Error while dropping index on {TableNames.MetadataTable} table");
private async Task ExecuteCommandAsync(Func<NpgsqlConnection, Task> operation, string errorMessage)
{
try
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync();
await operation(connection);
}
catch (Exception e)
{
logger.LogError(e, errorMessage);
}
}
private async Task ExecuteCommandWithTransactionAsync(Func<NpgsqlConnection, NpgsqlTransaction, Task> operation, NpgsqlTransaction transaction, string errorMessage)
{
try
{
await operation(transaction.Connection, transaction);
}
catch (PostgresException)
{
await transaction.RollbackAsync();
throw;
}
catch (Exception e)
{
logger.LogError(e, errorMessage);
}
}
}

View File

@@ -0,0 +1,15 @@
namespace Metadata.Features.ImportImdbData;
public class ImdbEntry
{
[BsonId]
public string ImdbId { get; set; } = default!;
public string? TitleType { get; set; }
public string? PrimaryTitle { get; set; }
public string? OriginalTitle { get; set; }
public string? IsAdult { get; set; }
public string? StartYear { get; set; }
public string? EndYear { get; set; }
public string? RuntimeMinutes { get; set; }
public string? Genres { get; set; }
}

View File

@@ -0,0 +1,64 @@
namespace Metadata.Features.ImportImdbData;
public class ImdbMongoDbService
{
private readonly ILogger<ImdbMongoDbService> _logger;
private readonly IMongoCollection<ImdbEntry> _imdbCollection;
public ImdbMongoDbService(MongoConfiguration configuration, ILogger<ImdbMongoDbService> logger)
{
_logger = logger;
var client = new MongoClient(configuration.ConnectionString);
var database = client.GetDatabase(configuration.DbName);
_imdbCollection = database.GetCollection<ImdbEntry>("imdb-entries");
}
public async Task InsertImdbEntries(IEnumerable<ImdbEntry> entries)
{
var operations = new List<WriteModel<ImdbEntry>>();
foreach (var entry in entries)
{
var filter = Builders<ImdbEntry>.Filter.Eq(e => e.ImdbId, entry.ImdbId);
var update = Builders<ImdbEntry>.Update
.SetOnInsert(e => e.TitleType, entry.TitleType)
.SetOnInsert(e => e.PrimaryTitle, entry.PrimaryTitle)
.SetOnInsert(e => e.OriginalTitle, entry.OriginalTitle)
.SetOnInsert(e => e.IsAdult, entry.IsAdult)
.SetOnInsert(e => e.StartYear, entry.StartYear)
.SetOnInsert(e => e.EndYear, entry.EndYear)
.SetOnInsert(e => e.RuntimeMinutes, entry.RuntimeMinutes)
.SetOnInsert(e => e.Genres, entry.Genres);
operations.Add(new UpdateOneModel<ImdbEntry>(filter, update) { IsUpsert = true });
}
await _imdbCollection.BulkWriteAsync(operations);
}
public bool IsDatabaseInitialized()
{
try
{
// Create compound index for PrimaryTitle, TitleType, and StartYear
var indexKeysDefinition = Builders<ImdbEntry>.IndexKeys
.Text(e => e.PrimaryTitle)
.Ascending(e => e.TitleType)
.Ascending(e => e.StartYear);
var createIndexOptions = new CreateIndexOptions { Background = true };
var indexModel = new CreateIndexModel<ImdbEntry>(indexKeysDefinition, createIndexOptions);
_imdbCollection.Indexes.CreateOne(indexModel);
return true;
}
catch (Exception e)
{
_logger.LogError(e, "Error initializing database");
return false;
}
}
}

View File

@@ -1,3 +1,3 @@
namespace Metadata.Features.ImportImdbData;
public record ImportImdbDataRequest(string TitleBasicsFilePath, string TitleAkasFilePath, string EpisodesFilePath);
public record ImportImdbDataRequest(string FilePath);

View File

@@ -1,17 +1,92 @@
namespace Metadata.Features.ImportImdbData;
public class ImportImdbDataRequestHandler(
ServiceConfiguration configuration,
IFileImport<ImdbBasicEntry> basicsFile,
IFileImport<ImdbAkaEntry> akasFile,
IFileImport<ImdbEpisodeEntry> episodesFile)
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService, JobConfiguration configuration)
{
public async Task<IndexImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
{
await basicsFile.Import(request.TitleBasicsFilePath, configuration.InsertBatchSize, cancellationToken);
await akasFile.Import(request.TitleAkasFilePath, configuration.InsertBatchSize, cancellationToken);
await episodesFile.Import(request.EpisodesFilePath, configuration.InsertBatchSize, cancellationToken);
return new(request.TitleBasicsFilePath, request.TitleAkasFilePath, request.EpisodesFilePath);
logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath);
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = "\t",
BadDataFound = null, // Skip Bad Data from Imdb
MissingFieldFound = null, // Skip Missing Fields from Imdb
};
using var reader = new StreamReader(request.FilePath);
using var csv = new CsvReader(reader, config);
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(configuration.InsertBatchSize)
{
FullMode = BoundedChannelFullMode.Wait,
});
// Skip the header row
await csv.ReadAsync();
var batchInsertTask = CreateBatchOfEntries(channel, cancellationToken);
await ReadEntries(csv, channel, cancellationToken);
channel.Writer.Complete();
await batchInsertTask;
return new(request.FilePath);
}
}
private Task CreateBatchOfEntries(Channel<ImdbEntry, ImdbEntry> channel, CancellationToken cancellationToken) =>
Task.Run(async () =>
{
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
{
if (cancellationToken.IsCancellationRequested)
{
return;
}
var batch = new List<ImdbEntry>
{
movieData,
};
while (batch.Count < configuration.InsertBatchSize && channel.Reader.TryRead(out var nextMovieData))
{
batch.Add(nextMovieData);
}
if (batch.Count > 0)
{
await mongoDbService.InsertImdbEntries(batch);
logger.LogInformation("Imported batch of {BatchSize} starting with ImdbId {FirstImdbId}", batch.Count, batch.First().ImdbId);
}
}
}, cancellationToken);
private static async Task ReadEntries(CsvReader csv, Channel<ImdbEntry, ImdbEntry> channel, CancellationToken cancellationToken)
{
while (await csv.ReadAsync())
{
var movieData = new ImdbEntry
{
ImdbId = csv.GetField(0),
TitleType = csv.GetField(1),
PrimaryTitle = csv.GetField(2),
OriginalTitle = csv.GetField(3),
IsAdult = csv.GetField(4),
StartYear = csv.GetField(5),
EndYear = csv.GetField(6),
RuntimeMinutes = csv.GetField(7),
Genres = csv.GetField(8),
};
if (cancellationToken.IsCancellationRequested)
{
return;
}
await channel.Writer.WriteAsync(movieData, cancellationToken);
}
}
}

View File

@@ -1,8 +0,0 @@
namespace Metadata.Features.ImportImdbData;
public static class TableNames
{
public const string MetadataTable = "imdb_metadata";
public const string EpisodesTable = "imdb_metadata_episodes";
public const string AkasTable = "imdb_metadata_akas";
}

View File

@@ -1,3 +0,0 @@
namespace Metadata.Features.IndexImdbData;
public record IndexImdbDataRequest(string TitleBasicsFilePath, string TitleAkasFilePath, string EpisodesFilePath);

View File

@@ -1,13 +0,0 @@
namespace Metadata.Features.IndexImdbData;
public class IndexImdbDataRequestHandler(ILogger<IndexImdbDataRequestHandler> logger, ImdbDbService dbService)
{
public async Task<DeleteDownloadedImdbDataRequest> Handle(IndexImdbDataRequest request, CancellationToken _)
{
logger.LogInformation("Creating Trigram Indexes for IMDB data");
await dbService.CreatePgtrmIndex();
return new(request.TitleBasicsFilePath, request.TitleAkasFilePath, request.EpisodesFilePath);
}
}

View File

@@ -0,0 +1,10 @@
namespace Metadata.Features.Jobs;
public abstract class BaseJob : IMetadataJob
{
public abstract bool IsScheduelable { get; }
public abstract string JobName { get; }
public abstract Task Invoke();
}

View File

@@ -1,9 +0,0 @@
namespace Metadata.Features.Jobs;
public class DownloadImdbDataJob(IMessageBus messageBus) : IHostedService
{
public async Task StartAsync(CancellationToken cancellationToken) =>
await messageBus.SendAsync(new GetImdbDataRequest());
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}

View File

@@ -0,0 +1,7 @@
namespace Metadata.Features.Jobs;
public interface IMetadataJob : IInvocable
{
bool IsScheduelable { get; }
string JobName { get; }
}

View File

@@ -0,0 +1,34 @@
namespace Metadata.Features.Jobs;
public class JobScheduler(IServiceProvider serviceProvider) : IHostedService
{
public Task StartAsync(CancellationToken cancellationToken)
{
using var scope = serviceProvider.CreateAsyncScope();
var mongoDbService = scope.ServiceProvider.GetRequiredService<ImdbMongoDbService>();
if (!mongoDbService.IsDatabaseInitialized())
{
throw new InvalidOperationException("MongoDb is not initialized");
}
var jobConfigurations = scope.ServiceProvider.GetRequiredService<JobConfiguration>();
var downloadJob = scope.ServiceProvider.GetRequiredService<DownloadImdbDataJob>();
if (!downloadJob.IsScheduelable)
{
return downloadJob.Invoke();
}
var scheduler = scope.ServiceProvider.GetRequiredService<IScheduler>();
scheduler.Schedule<DownloadImdbDataJob>()
.Cron(jobConfigurations.DownloadImdbCronSchedule)
.PreventOverlapping(nameof(downloadJob.JobName));
return Task.CompletedTask;
}
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}

View File

@@ -4,26 +4,23 @@ global using System.Globalization;
global using System.IO.Compression;
global using System.Text.Json;
global using System.Threading.Channels;
global using Coravel;
global using Coravel.Invocable;
global using Coravel.Scheduling.Schedule.Interfaces;
global using CsvHelper;
global using CsvHelper.Configuration;
global using JasperFx.CodeGeneration;
global using JasperFx.CodeGeneration.Commands;
global using JasperFx.Core;
global using Metadata.Extensions;
global using Metadata.Features.ClearExistingImdbData;
global using Metadata.Features.Configuration;
global using Metadata.Features.DeleteDownloadedImdbData;
global using Metadata.Features.DownloadImdbData;
global using Metadata.Features.Files;
global using Metadata.Features.ImportImdbData;
global using Metadata.Features.IndexImdbData;
global using Metadata.Features.Jobs;
global using Metadata.Features.Literals;
global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection;
global using Microsoft.Extensions.DependencyInjection.Extensions;
global using Npgsql;
global using NpgsqlTypes;
global using Oakton;
global using MongoDB.Bson.Serialization.Attributes;
global using MongoDB.Driver;
global using Serilog;
global using Wolverine;

View File

@@ -1,47 +0,0 @@
// <auto-generated/>
#pragma warning disable
using Metadata.Features.Configuration;
using Microsoft.Extensions.Logging;
namespace Internal.Generated.WolverineHandlers
{
// START: ClearExistingImdbDataRequestHandler2085209125
public class ClearExistingImdbDataRequestHandler2085209125 : Wolverine.Runtime.Handlers.MessageHandler
{
private readonly Microsoft.Extensions.Logging.ILogger<Metadata.Features.ClearExistingImdbData.ClearExistingImdbDataRequestHandler> _logger1;
private readonly Microsoft.Extensions.Logging.ILogger<Metadata.Features.ImportImdbData.ImdbDbService> _logger2;
private readonly Metadata.Features.Configuration.PostgresConfiguration _postgresConfiguration;
public ClearExistingImdbDataRequestHandler2085209125(Microsoft.Extensions.Logging.ILogger<Metadata.Features.ClearExistingImdbData.ClearExistingImdbDataRequestHandler> __logger1, Microsoft.Extensions.Logging.ILogger<Metadata.Features.ImportImdbData.ImdbDbService> __logger2, Metadata.Features.Configuration.PostgresConfiguration postgresConfiguration)
{
_logger1 = __logger1;
_logger2 = __logger2;
_postgresConfiguration = postgresConfiguration;
}
public override async System.Threading.Tasks.Task HandleAsync(Wolverine.Runtime.MessageContext context, System.Threading.CancellationToken cancellation)
{
var imdbDbService = new Metadata.Features.ImportImdbData.ImdbDbService(_postgresConfiguration, _logger2);
var clearExistingImdbDataRequestHandler = new Metadata.Features.ClearExistingImdbData.ClearExistingImdbDataRequestHandler(_logger1, imdbDbService);
// The actual message body
var clearExistingImdbDataRequest = (Metadata.Features.ClearExistingImdbData.ClearExistingImdbDataRequest)context.Envelope.Message;
// The actual message execution
var outgoing1 = await clearExistingImdbDataRequestHandler.Handle(clearExistingImdbDataRequest, cancellation).ConfigureAwait(false);
// Outgoing, cascaded message
await context.EnqueueCascadingAsync(outgoing1).ConfigureAwait(false);
}
}
// END: ClearExistingImdbDataRequestHandler2085209125
}

Some files were not shown because too many files have changed in this diff Show More