Compare commits
18 Commits
main
...
create-doc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10d6544673 | ||
|
|
2134f2f51d | ||
|
|
64149c55a8 | ||
|
|
5584143eeb | ||
|
|
d84a186132 | ||
|
|
7e338975ed | ||
|
|
123b8aad96 | ||
|
|
c947b013a2 | ||
|
|
f000ae6c12 | ||
|
|
108a4a9066 | ||
|
|
ad286a984f | ||
|
|
71ec7bdf2b | ||
|
|
ffeeb56610 | ||
|
|
cfc2b8f601 | ||
|
|
66d37a8c05 | ||
|
|
b19d1f0bf4 | ||
|
|
cbf5fda723 | ||
|
|
5def66858f |
4
.github/ISSUE_TEMPLATE/bug_report.md
vendored
4
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -12,9 +12,6 @@ A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
@@ -26,7 +23,6 @@ If the logs are short, make sure to triple backtick them, or use https://pastebi
|
||||
**Hardware:**
|
||||
- OS and distro: [e.g. Raspberry Pi OS, Ubuntu, Rocky]
|
||||
- Server: [e.g. VM, Baremetal, Pi]
|
||||
- Knightcrawler Version: [2.0.xx]
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
|
||||
18
.github/workflows/base_image_workflow.yaml
vendored
18
.github/workflows/base_image_workflow.yaml
vendored
@@ -6,16 +6,12 @@ on:
|
||||
CONTEXT:
|
||||
required: true
|
||||
type: string
|
||||
DOCKERFILE:
|
||||
required: true
|
||||
type: string
|
||||
IMAGE_NAME:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
CONTEXT: ${{ inputs.CONTEXT }}
|
||||
DOCKERFILE: ${{ inputs.DOCKERFILE }}
|
||||
IMAGE_NAME: ${{ inputs.IMAGE_NAME }}
|
||||
PLATFORMS: linux/amd64,linux/arm64
|
||||
|
||||
@@ -25,13 +21,11 @@ jobs:
|
||||
steps:
|
||||
- name: Setting variables
|
||||
run: |
|
||||
echo "CONTEXT=${{ env.CONTEXT }}"
|
||||
echo "DOCKERFILE=${{ env.DOCKERFILE }}"
|
||||
echo "IMAGE_NAME=${{ env.IMAGE_NAME }}"
|
||||
echo "CONTEXT=${{ env.CONTEXT }}
|
||||
echo "IMAGE_NAME=${{ env.IMAGE_NAME }}
|
||||
echo "PLATFORMS=${{ env.PLATFORMS }}"
|
||||
outputs:
|
||||
CONTEXT: ${{ env.CONTEXT }}
|
||||
DOCKERFILE: ${{ env.DOCKERFILE }}
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
PLATFORMS: ${{ env.PLATFORMS }}
|
||||
|
||||
@@ -76,17 +70,14 @@ jobs:
|
||||
flavor: |
|
||||
latest=auto
|
||||
tags: |
|
||||
type=ref,event=tag
|
||||
type=ref,event=pr
|
||||
type=edge,branch=master,commit=${{ github.sha }}
|
||||
type=sha,commit=${{ github.sha }}
|
||||
type=semver,pattern={{version}}
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
|
||||
- name: Build image for scanning
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ needs.set-vars.outputs.CONTEXT }}
|
||||
file: ${{ needs.set-vars.outputs.DOCKERFILE }}
|
||||
push: true
|
||||
provenance: false
|
||||
tags: localhost:5000/dockle-examine-image:test
|
||||
@@ -139,11 +130,10 @@ jobs:
|
||||
sarif_file: 'trivy-results-os.sarif'
|
||||
|
||||
- name: Push Service Image to repo
|
||||
# if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
|
||||
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ needs.set-vars.outputs.CONTEXT }}
|
||||
file: ${{ needs.set-vars.outputs.DOCKERFILE }}
|
||||
push: true
|
||||
provenance: false
|
||||
tags: ${{ steps.docker-metadata.outputs.tags }}
|
||||
|
||||
8
.github/workflows/build_addon.yaml
vendored
8
.github/workflows/build_addon.yaml
vendored
@@ -2,17 +2,13 @@ name: Build and Push Addon Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/addon/**'
|
||||
workflow_dispatch:
|
||||
- 'src/node/addon/**'
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/addon/
|
||||
DOCKERFILE: ./src/addon/Dockerfile
|
||||
CONTEXT: ./src/node/addon/
|
||||
IMAGE_NAME: knightcrawler-addon
|
||||
|
||||
8
.github/workflows/build_consumer.yaml
vendored
8
.github/workflows/build_consumer.yaml
vendored
@@ -2,17 +2,13 @@ name: Build and Push Consumer Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/torrent-consumer/**'
|
||||
workflow_dispatch:
|
||||
- 'src/node/consumer/**'
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/
|
||||
DOCKERFILE: ./src/torrent-consumer/Dockerfile
|
||||
CONTEXT: ./src/node/consumer/
|
||||
IMAGE_NAME: knightcrawler-consumer
|
||||
|
||||
18
.github/workflows/build_debrid_collector.yaml
vendored
18
.github/workflows/build_debrid_collector.yaml
vendored
@@ -1,18 +0,0 @@
|
||||
name: Build and Push Debrid Collector Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/debrid-collector/**'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/
|
||||
DOCKERFILE: ./src/debrid-collector/Dockerfile
|
||||
IMAGE_NAME: knightcrawler-debrid-collector
|
||||
86
.github/workflows/build_docs.yaml
vendored
Normal file
86
.github/workflows/build_docs.yaml
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
name: Build documentation
|
||||
|
||||
# TODO: Only run on ./docs folder change
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["master"]
|
||||
paths:
|
||||
- 'docs/**'
|
||||
# Specify to run a workflow manually from the Actions tab on GitHub
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
pages: write
|
||||
|
||||
env:
|
||||
INSTANCE: Writerside/kc
|
||||
ARTIFACT: webHelpKC2-all.zip
|
||||
DOCS_FOLDER: ./docs
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build Writerside docs using Docker
|
||||
uses: JetBrains/writerside-github-action@v4
|
||||
with:
|
||||
instance: ${{ env.INSTANCE }}
|
||||
artifact: ${{ env.ARTIFACT }}
|
||||
location: ${{ env.DOCS_FOLDER }}
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: docs
|
||||
path: |
|
||||
artifacts/${{ env.ARTIFACT }}
|
||||
artifacts/report.json
|
||||
retention-days: 7
|
||||
|
||||
test:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: docs
|
||||
path: artifacts
|
||||
|
||||
- name: Test documentation
|
||||
uses: JetBrains/writerside-checker-action@v1
|
||||
with:
|
||||
instance: ${{ env.INSTANCE }}
|
||||
|
||||
deploy:
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
needs: [build, test]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: docs
|
||||
|
||||
- name: Unzip artifact
|
||||
run: unzip -O UTF-8 -qq '${{ env.ARTIFACT }}' -d dir
|
||||
|
||||
- name: Setup Pages
|
||||
uses: actions/configure-pages@v4
|
||||
|
||||
- name: Package and upload Pages artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: dir
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deployment
|
||||
uses: actions/deploy-pages@v4
|
||||
8
.github/workflows/build_jackett-addon.yaml
vendored
8
.github/workflows/build_jackett-addon.yaml
vendored
@@ -2,17 +2,13 @@ name: Build and Push Jackett Addon Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/addon-jackett/**'
|
||||
workflow_dispatch:
|
||||
- 'src/node/addon-jackett/**'
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/addon-jackett/
|
||||
DOCKERFILE: ./src/addon-jackett/Dockerfile
|
||||
CONTEXT: ./src/node/addon-jackett/
|
||||
IMAGE_NAME: knightcrawler-addon-jackett
|
||||
|
||||
4
.github/workflows/build_metadata.yaml
vendored
4
.github/workflows/build_metadata.yaml
vendored
@@ -2,11 +2,8 @@ name: Build and Push Metadata Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/metadata/**'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
process:
|
||||
@@ -14,5 +11,4 @@ jobs:
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/metadata/
|
||||
DOCKERFILE: ./src/metadata/Dockerfile
|
||||
IMAGE_NAME: knightcrawler-metadata
|
||||
|
||||
4
.github/workflows/build_migrator.yaml
vendored
4
.github/workflows/build_migrator.yaml
vendored
@@ -2,11 +2,8 @@ name: Build and Push Migrator Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/migrator/**'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
process:
|
||||
@@ -14,5 +11,4 @@ jobs:
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/migrator/
|
||||
DOCKERFILE: ./src/migrator/Dockerfile
|
||||
IMAGE_NAME: knightcrawler-migrator
|
||||
|
||||
6
.github/workflows/build_producer.yaml
vendored
6
.github/workflows/build_producer.yaml
vendored
@@ -2,17 +2,13 @@ name: Build and Push Producer Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/producer/**'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/
|
||||
DOCKERFILE: ./src/producer/src/Dockerfile
|
||||
CONTEXT: ./src/producer/
|
||||
IMAGE_NAME: knightcrawler-producer
|
||||
|
||||
18
.github/workflows/build_qbit_collector.yaml
vendored
18
.github/workflows/build_qbit_collector.yaml
vendored
@@ -1,18 +0,0 @@
|
||||
name: Build and Push Qbit Collector Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/qbit-collector/**'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/
|
||||
DOCKERFILE: ./src/qbit-collector/Dockerfile
|
||||
IMAGE_NAME: knightcrawler-qbit-collector
|
||||
18
.github/workflows/build_tissue.yaml
vendored
18
.github/workflows/build_tissue.yaml
vendored
@@ -1,18 +0,0 @@
|
||||
name: Build and Push Tissue Service
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
paths:
|
||||
- 'src/tissue/**'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/tissue/
|
||||
DOCKERFILE: ./src/tissue/Dockerfile
|
||||
IMAGE_NAME: knightcrawler-tissue
|
||||
15
.github/workflows/build_torrent_ingester.yaml
vendored
15
.github/workflows/build_torrent_ingester.yaml
vendored
@@ -1,15 +0,0 @@
|
||||
name: Build and Push Torrent Ingestor Service
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'src/torrent-ingestor/**'
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/torrent-ingestor
|
||||
DOCKERFILE: ./src/torrent-ingestor/Dockerfile
|
||||
IMAGE_NAME: knightcrawler-torrent-ingestor
|
||||
39
.github/workflows/git_cliff.yml
vendored
39
.github/workflows/git_cliff.yml
vendored
@@ -1,39 +0,0 @@
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
changelog:
|
||||
name: Generate changelog
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Generate a changelog
|
||||
uses: orhun/git-cliff-action@v3
|
||||
with:
|
||||
config: cliff.toml
|
||||
args: --verbose
|
||||
env:
|
||||
OUTPUT: CHANGELOG.md
|
||||
GITHUB_REPO: ${{ github.repository }}
|
||||
|
||||
- name: Commit
|
||||
run: |
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email 'github-actions[bot]@users.noreply.github.com'
|
||||
set +e
|
||||
git checkout -b feat/changelog_$(date +"%d_%m")
|
||||
git add CHANGELOG.md
|
||||
git commit -m "[skip ci] Update changelog"
|
||||
git push https://${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git feat/changelog_$(date +"%d_%m")
|
||||
|
||||
- name: create pull request
|
||||
run: gh pr create -B main -H feat/changelog_$(date +"%d_%m") --title '[skip ci] Update changelog' --body 'Changelog update by git-cliff'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
13
.gitignore
vendored
13
.gitignore
vendored
@@ -355,9 +355,6 @@ MigrationBackup/
|
||||
# Fody - auto-generated XML schema
|
||||
FodyWeavers.xsd
|
||||
|
||||
# Jetbrains ide's run profiles (Could contain sensative information)
|
||||
**/.run/
|
||||
|
||||
# VS Code files for those working on multiple tools
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
@@ -395,6 +392,8 @@ dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
@@ -608,11 +607,3 @@ fabric.properties
|
||||
# Caddy logs
|
||||
!**/caddy/logs/.gitkeep
|
||||
**/caddy/logs/**
|
||||
|
||||
# Mac directory indexes
|
||||
.DS_Store
|
||||
deployment/docker/stack.env
|
||||
|
||||
src/producer/src/python/
|
||||
src/debrid-collector/python/
|
||||
src/qbit-collector/python/
|
||||
|
||||
@@ -3,7 +3,6 @@ repos:
|
||||
rev: v4.5.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=2500']
|
||||
- id: check-json
|
||||
- id: check-toml
|
||||
- id: check-xml
|
||||
@@ -16,6 +15,5 @@ repos:
|
||||
rev: v2.2.6
|
||||
hooks:
|
||||
- id: codespell
|
||||
exclude: |
|
||||
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*|src/tissue/Data/.*)$
|
||||
exclude: ^src/node/consumer/test/
|
||||
args: ["-L", "strem,chage"]
|
||||
|
||||
22
CHANGELOG.md
22
CHANGELOG.md
@@ -1,22 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [1.0.0] - 2024-03-25
|
||||
### Details
|
||||
#### Changed
|
||||
- Change POSTGRES_USERNAME to POSTGRES_USER. Oops by @purple-emily
|
||||
- Change POSTGRES_DATABASE to POSTGRES_DB by @purple-emily
|
||||
- Two movie commands instead of movie and tv by @purple-emily
|
||||
- Cleanup RabbitMQ env vars, and Github Pat by @iPromKnight
|
||||
|
||||
#### Fixed
|
||||
- HRD -> HDR by @mplewis
|
||||
|
||||
## New Contributors
|
||||
* @mplewis made their first contribution
|
||||
|
||||
<!-- generated by git-cliff -->
|
||||
@@ -1,34 +0,0 @@
|
||||
We use [Meaningful commit messages](https://reflectoring.io/meaningful-commit-messages/)
|
||||
|
||||
Tl;dr:
|
||||
1. It should answer the question: “What happens if the changes are applied?".
|
||||
2. Use the imperative, present tense. It is easier to read and scan quickly:
|
||||
```
|
||||
Right: Add feature to alert admin for new user registration
|
||||
Wrong: Added feature ... (past tense)
|
||||
```
|
||||
3. The summary should always be able to complete the following sentence:
|
||||
`If applied, this commit will… `
|
||||
|
||||
We use [git-cliff] for our changelog.
|
||||
|
||||
The breaking flag is set to true when the commit has an exclamation mark after the commit type and scope, e.g.:
|
||||
`feat(scope)!: this is a breaking change`
|
||||
|
||||
Keywords (Commit messages should start with these):
|
||||
```
|
||||
# Added
|
||||
add
|
||||
support
|
||||
# Removed
|
||||
remove
|
||||
delete
|
||||
# Fixed
|
||||
test
|
||||
fix
|
||||
```
|
||||
|
||||
Any other commits will fall under the `Changed` category
|
||||
|
||||
|
||||
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html)
|
||||
36
README.md
36
README.md
@@ -7,6 +7,9 @@
|
||||
|
||||
## Contents
|
||||
|
||||
> [!CAUTION]
|
||||
> Until we reach `v1.0.0`, please consider releases as alpha.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> The latest change renames the project and requires a [small migration](#selfhostio-to-knightcrawler-migration).
|
||||
- [Contents](#contents)
|
||||
@@ -51,11 +54,11 @@ Download and install [Docker Compose](https://docs.docker.com/compose/install/),
|
||||
|
||||
### Environment Setup
|
||||
|
||||
Before running the project, you need to set up the environment variables. Edit the values in `stack.env`:
|
||||
Before running the project, you need to set up the environment variables. Copy the `.env.example` file to `.env`:
|
||||
|
||||
```sh
|
||||
cd deployment/docker
|
||||
code stack.env
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Then set any of the values you wouldd like to customize.
|
||||
@@ -67,6 +70,33 @@ Then set any of the values you wouldd like to customize.
|
||||
|
||||
By default, Knight Crawler is configured to be *relatively* conservative in its resource usage. If running on a decent machine (16GB RAM, i5+ or equivalent), you can increase some settings to increase consumer throughput. This is especially helpful if you have a large backlog from [importing databases](#importing-external-dumps).
|
||||
|
||||
In your `.env` file, under the `# Consumer` section increase `CONSUMER_REPLICAS` from `3` to `15`.
|
||||
You can also increase `JOB_CONCURRENCY` from `5` to `10`.
|
||||
|
||||
### DebridMediaManager setup (optional)
|
||||
|
||||
There are some optional steps you should take to maximise the number of movies/tv shows we can find.
|
||||
|
||||
We can search DebridMediaManager hash lists which are hosted on GitHub. This allows us to add hundreds of thousands of movies and tv shows, but it requires a Personal Access Token to be generated. The software only needs read access and only for public repositories. To generate one, please follow these steps:
|
||||
|
||||
1. Navigate to GitHub settings -> Developer Settings -> Personal access tokens -> Fine-grained tokens (click [here](https://github.com/settings/tokens?type=beta) for a direct link)
|
||||
2. Press `Generate new token`
|
||||
3. Fill out the form (example data below):
|
||||
```
|
||||
Token name:
|
||||
KnightCrawler
|
||||
Expiration:
|
||||
90 days
|
||||
Description:
|
||||
<blank>
|
||||
Repository access
|
||||
(checked) Public Repositories (read-only)
|
||||
```
|
||||
4. Click `Generate token`
|
||||
5. Take the new token and add it to the bottom of the [.env](deployment/docker/.env) file
|
||||
```
|
||||
GithubSettings__PAT=<YOUR TOKEN HERE>
|
||||
```
|
||||
### Configure external access
|
||||
|
||||
Please choose which applies to you:
|
||||
@@ -116,7 +146,7 @@ Remove or comment out the port for the addon, and connect it to Caddy:
|
||||
addon:
|
||||
<<: *knightcrawler-app
|
||||
env_file:
|
||||
- stack.env
|
||||
- .env
|
||||
hostname: knightcrawler-addon
|
||||
image: gabisonfire/knightcrawler-addon:latest
|
||||
labels:
|
||||
|
||||
112
cliff.toml
112
cliff.toml
@@ -1,112 +0,0 @@
|
||||
# git-cliff ~ configuration file
|
||||
# https://git-cliff.org/docs/configuration
|
||||
|
||||
[changelog]
|
||||
# changelog header
|
||||
header = """
|
||||
# Changelog\n
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n
|
||||
"""
|
||||
# template for the changelog body
|
||||
# https://keats.github.io/tera/docs/#introduction
|
||||
body = """
|
||||
{%- macro remote_url() -%}
|
||||
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
|
||||
{%- endmacro -%}
|
||||
|
||||
{% if version -%}
|
||||
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
|
||||
{% else -%}
|
||||
## [Unreleased]
|
||||
{% endif -%}
|
||||
|
||||
### Details\
|
||||
|
||||
{% for group, commits in commits | group_by(attribute="group") %}
|
||||
#### {{ group | upper_first }}
|
||||
{%- for commit in commits %}
|
||||
- {{ commit.message | upper_first | trim }}\
|
||||
{% if commit.github.username %} by @{{ commit.github.username }}{%- endif -%}
|
||||
{% if commit.github.pr_number %} in \
|
||||
[#{{ commit.github.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.github.pr_number }}) \
|
||||
{%- endif -%}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
{%- if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %}
|
||||
## New Contributors
|
||||
{%- endif -%}
|
||||
|
||||
{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
|
||||
* @{{ contributor.username }} made their first contribution
|
||||
{%- if contributor.pr_number %} in \
|
||||
[#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
|
||||
{%- endif %}
|
||||
{%- endfor %}\n
|
||||
"""
|
||||
# template for the changelog footer
|
||||
footer = """
|
||||
{%- macro remote_url() -%}
|
||||
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
|
||||
{%- endmacro -%}
|
||||
|
||||
{% for release in releases -%}
|
||||
{% if release.version -%}
|
||||
{% if release.previous.version -%}
|
||||
[{{ release.version | trim_start_matches(pat="v") }}]: \
|
||||
{{ self::remote_url() }}/compare/{{ release.previous.version }}..{{ release.version }}
|
||||
{% endif -%}
|
||||
{% else -%}
|
||||
[unreleased]: {{ self::remote_url() }}/compare/{{ release.previous.version }}..HEAD
|
||||
{% endif -%}
|
||||
{% endfor %}
|
||||
<!-- generated by git-cliff -->
|
||||
"""
|
||||
# remove the leading and trailing whitespace from the templates
|
||||
trim = true
|
||||
|
||||
[git]
|
||||
# parse the commits based on https://www.conventionalcommits.org
|
||||
conventional_commits = true
|
||||
# filter out the commits that are not conventional
|
||||
filter_unconventional = true
|
||||
# process each line of a commit as an individual commit
|
||||
split_commits = false
|
||||
# regex for preprocessing the commit messages
|
||||
commit_preprocessors = [
|
||||
# remove issue numbers from commits
|
||||
{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "" },
|
||||
]
|
||||
# regex for parsing and grouping commits
|
||||
commit_parsers = [
|
||||
{ message = "^.*: add", group = "Added" },
|
||||
{ message = "^add", group = "Added" },
|
||||
{ message = "^.*: support", group = "Added" },
|
||||
{ message = "^support", group = "Added" },
|
||||
{ message = "^.*: remove", group = "Removed" },
|
||||
{ message = "^remove", group = "Removed" },
|
||||
{ message = "^.*: delete", group = "Removed" },
|
||||
{ message = "^delete", group = "Removed" },
|
||||
{ message = "^.*: test", group = "Fixed" },
|
||||
{ message = "^test", group = "Fixed" },
|
||||
{ message = "^.*: fix", group = "Fixed" },
|
||||
{ message = "^fix", group = "Fixed" },
|
||||
{ message = "^.*", group = "Changed" },
|
||||
]
|
||||
# protect breaking changes from being skipped due to matching a skipping commit_parser
|
||||
protect_breaking_commits = false
|
||||
# filter out the commits that are not matched by commit parsers
|
||||
filter_commits = true
|
||||
# regex for matching git tags
|
||||
tag_pattern = "v[0-9].*"
|
||||
# regex for skipping tags
|
||||
skip_tags = "v0.1.0-beta.1"
|
||||
# regex for ignoring tags
|
||||
ignore_tags = ""
|
||||
# sort the tags topologically
|
||||
topo_order = false
|
||||
# sort the commits inside sections by oldest/newest order
|
||||
sort_commits = "oldest"
|
||||
55
deployment/docker/.env.example
Normal file
55
deployment/docker/.env.example
Normal file
@@ -0,0 +1,55 @@
|
||||
# General environment variables
|
||||
TZ=London/Europe
|
||||
|
||||
# PostgreSQL
|
||||
POSTGRES_HOST=postgres
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USER=postgres
|
||||
POSTGRES_PASSWORD=postgres
|
||||
POSTGRES_DB=knightcrawler
|
||||
|
||||
# MongoDB
|
||||
MONGODB_HOST=mongodb
|
||||
MONGODB_PORT=27017
|
||||
MONGODB_DB=knightcrawler
|
||||
MONGODB_USER=mongo
|
||||
MONGODB_PASSWORD=mongo
|
||||
|
||||
# RabbitMQ
|
||||
RABBITMQ_HOST=rabbitmq
|
||||
RABBITMQ_USER=guest
|
||||
RABBITMQ_PASSWORD=guest
|
||||
RABBITMQ_QUEUE_NAME=ingested
|
||||
RABBITMQ_DURABLE=true
|
||||
RABBITMQ_MAX_QUEUE_SIZE=0
|
||||
RABBITMQ_MAX_PUBLISH_BATCH_SIZE=500
|
||||
RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS=10
|
||||
|
||||
# Metadata
|
||||
## Only used if DATA_ONCE is set to false. If true, the schedule is ignored
|
||||
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE="0 0 1 * *"
|
||||
## If true, the metadata will be downloaded once and then the schedule will be ignored
|
||||
METADATA_DOWNLOAD_IMDB_DATA_ONCE=true
|
||||
## Controls the amount of records processed in memory at any given time during import, higher values will consume more memory
|
||||
METADATA_INSERT_BATCH_SIZE=25000
|
||||
|
||||
# Addon
|
||||
DEBUG_MODE=false
|
||||
|
||||
# Consumer
|
||||
JOB_CONCURRENCY=5
|
||||
JOBS_ENABLED=true
|
||||
## can be debug for extra verbosity (a lot more verbosity - useful for development)
|
||||
LOG_LEVEL=info
|
||||
MAX_CONNECTIONS_PER_TORRENT=10
|
||||
MAX_CONNECTIONS_OVERALL=100
|
||||
TORRENT_TIMEOUT=30000
|
||||
UDP_TRACKERS_ENABLED=true
|
||||
CONSUMER_REPLICAS=3
|
||||
## Fix for #66 - toggle on for development
|
||||
AUTO_CREATE_AND_APPLY_MIGRATIONS=false
|
||||
## Allows control of the threshold for matching titles to the IMDB dataset. The closer to 0, the more strict the matching.
|
||||
TITLE_MATCH_THRESHOLD=0.25
|
||||
|
||||
# Producer
|
||||
GITHUB_PAT=
|
||||
@@ -1,64 +0,0 @@
|
||||
[Application]
|
||||
FileLogger\Age=1
|
||||
FileLogger\AgeType=1
|
||||
FileLogger\Backup=true
|
||||
FileLogger\DeleteOld=true
|
||||
FileLogger\Enabled=true
|
||||
FileLogger\MaxSizeBytes=66560
|
||||
FileLogger\Path=/config/qBittorrent/logs
|
||||
|
||||
[AutoRun]
|
||||
enabled=false
|
||||
program=
|
||||
|
||||
[BitTorrent]
|
||||
Session\AnonymousModeEnabled=true
|
||||
Session\BTProtocol=TCP
|
||||
Session\ConnectionSpeed=150
|
||||
Session\DefaultSavePath=/downloads/
|
||||
Session\ExcludedFileNames=
|
||||
Session\MaxActiveCheckingTorrents=20
|
||||
Session\MaxActiveDownloads=20
|
||||
Session\MaxActiveTorrents=50
|
||||
Session\MaxActiveUploads=50
|
||||
Session\MaxConcurrentHTTPAnnounces=1000
|
||||
Session\MaxConnections=2000
|
||||
Session\Port=6881
|
||||
Session\QueueingSystemEnabled=true
|
||||
Session\TempPath=/downloads/incomplete/
|
||||
Session\TorrentStopCondition=MetadataReceived
|
||||
|
||||
[Core]
|
||||
AutoDeleteAddedTorrentFile=Never
|
||||
|
||||
[LegalNotice]
|
||||
Accepted=true
|
||||
|
||||
[Meta]
|
||||
MigrationVersion=6
|
||||
|
||||
[Network]
|
||||
PortForwardingEnabled=true
|
||||
Proxy\HostnameLookupEnabled=false
|
||||
Proxy\Profiles\BitTorrent=true
|
||||
Proxy\Profiles\Misc=true
|
||||
Proxy\Profiles\RSS=true
|
||||
|
||||
[Preferences]
|
||||
Connection\PortRangeMin=6881
|
||||
Connection\ResolvePeerCountries=false
|
||||
Connection\UPnP=false
|
||||
Downloads\SavePath=/downloads/
|
||||
Downloads\TempPath=/downloads/incomplete/
|
||||
General\Locale=en
|
||||
MailNotification\req_auth=true
|
||||
WebUI\Address=*
|
||||
WebUI\AuthSubnetWhitelist=0.0.0.0/0
|
||||
WebUI\AuthSubnetWhitelistEnabled=true
|
||||
WebUI\HostHeaderValidation=false
|
||||
WebUI\LocalHostAuth=false
|
||||
WebUI\ServerDomains=*
|
||||
|
||||
[RSS]
|
||||
AutoDownloader\DownloadRepacks=true
|
||||
AutoDownloader\SmartEpisodeFilter=s(\\d+)e(\\d+), (\\d+)x(\\d+), "(\\d{4}[.\\-]\\d{1,2}[.\\-]\\d{1,2})", "(\\d{1,2}[.\\-]\\d{1,2}[.\\-]\\d{4})"
|
||||
@@ -1,244 +1,139 @@
|
||||
version: "3.9"
|
||||
|
||||
name: knightcrawler
|
||||
|
||||
networks:
|
||||
knightcrawler-network:
|
||||
name: knightcrawler-network
|
||||
driver: bridge
|
||||
x-restart: &restart-policy "unless-stopped"
|
||||
|
||||
volumes:
|
||||
postgres:
|
||||
lavinmq:
|
||||
redis:
|
||||
x-basehealth: &base-health
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
x-rabbithealth: &rabbitmq-health
|
||||
test: rabbitmq-diagnostics -q ping
|
||||
<<: *base-health
|
||||
|
||||
x-mongohealth: &mongodb-health
|
||||
test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')"]
|
||||
<<: *base-health
|
||||
|
||||
x-postgreshealth: &postgresdb-health
|
||||
test: pg_isready
|
||||
<<: *base-health
|
||||
|
||||
x-apps: &knightcrawler-app
|
||||
depends_on:
|
||||
mongodb:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
rabbitmq:
|
||||
condition: service_healthy
|
||||
restart: *restart-policy
|
||||
|
||||
services:
|
||||
## Postgres is the database that is used by the services.
|
||||
## All downloaded metadata is stored in this database.
|
||||
postgres:
|
||||
env_file: stack.env
|
||||
healthcheck:
|
||||
test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ]
|
||||
timeout: 10s
|
||||
interval: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
image: postgres:latest
|
||||
env_file: .env
|
||||
environment:
|
||||
PGUSER: postgres # needed for healthcheck.
|
||||
# # If you need the database to be accessible from outside, please open the below port.
|
||||
# # Furthermore, please, please, please, change the username and password in the stack.env file.
|
||||
# # Furthermore, please, please, please, change the username and password in the .env file.
|
||||
# # If you want to enhance your security even more, create a new user for the database with a strong password.
|
||||
# ports:
|
||||
# - "5432:5432"
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- postgres:/var/lib/postgresql/data
|
||||
|
||||
## Redis is used as a cache for the services.
|
||||
## It is used to store the infohashes that are currently being processed in sagas, as well as intrim data.
|
||||
redis:
|
||||
env_file: stack.env
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "redis-cli ping"]
|
||||
timeout: 10s
|
||||
interval: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
image: redis/redis-stack:latest
|
||||
# # If you need redis to be accessible from outside, please open the below port.
|
||||
# ports:
|
||||
# - "6379:6379"
|
||||
healthcheck: *postgresdb-health
|
||||
restart: *restart-policy
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redis:/data
|
||||
|
||||
## LavinMQ is used as a message broker for the services.
|
||||
## It is a high performance drop in replacement for RabbitMQ.
|
||||
## It is used to communicate between the services.
|
||||
lavinmq:
|
||||
env_file: stack.env
|
||||
mongodb:
|
||||
image: mongo:latest
|
||||
env_file: .env
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: ${MONGODB_USER:?Variable MONGODB_USER not set}
|
||||
MONGO_INITDB_ROOT_PASSWORD: ${MONGODB_PASSWORD:?Variable MONGODB_PASSWORD not set}
|
||||
# # If you need the database to be accessible from outside, please open the below port.
|
||||
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
|
||||
# # Furthermore, please, please, please, change the username and password in the .env file.
|
||||
# ports:
|
||||
# - "27017:27017"
|
||||
volumes:
|
||||
- mongo:/data/db
|
||||
restart: *restart-policy
|
||||
healthcheck: *mongodb-health
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
rabbitmq:
|
||||
image: rabbitmq:3-management
|
||||
# # If you need the database to be accessible from outside, please open the below port.
|
||||
# # Furthermore, please, please, please, look at the documentation for rabbit on how to secure the service.
|
||||
# ports:
|
||||
# - "5672:5672"
|
||||
# - "15672:15672"
|
||||
# - "15692:15692"
|
||||
image: cloudamqp/lavinmq:latest
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "lavinmqctl status"]
|
||||
timeout: 10s
|
||||
interval: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
volumes:
|
||||
- lavinmq:/var/lib/lavinmq/
|
||||
|
||||
## The addon. This is what is used in stremio
|
||||
addon:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
hostname: knightcrawler-addon
|
||||
image: gabisonfire/knightcrawler-addon:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
- rabbitmq:/var/lib/rabbitmq
|
||||
hostname: ${RABBITMQ_HOST}
|
||||
restart: *restart-policy
|
||||
healthcheck: *rabbitmq-health
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
producer:
|
||||
image: gabisonfire/knightcrawler-producer:latest
|
||||
labels:
|
||||
logging: "promtail"
|
||||
env_file: .env
|
||||
<<: *knightcrawler-app
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
consumer:
|
||||
image: gabisonfire/knightcrawler-consumer:latest
|
||||
env_file: .env
|
||||
labels:
|
||||
logging: "promtail"
|
||||
deploy:
|
||||
replicas: ${CONSUMER_REPLICAS}
|
||||
<<: *knightcrawler-app
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
metadata:
|
||||
image: gabisonfire/knightcrawler-metadata:latest
|
||||
env_file: .env
|
||||
labels:
|
||||
logging: "promtail"
|
||||
restart: no
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
addon:
|
||||
<<: *knightcrawler-app
|
||||
env_file: .env
|
||||
hostname: knightcrawler-addon
|
||||
image: gabisonfire/knightcrawler-addon:latest
|
||||
labels:
|
||||
logging: "promtail"
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
# - caddy
|
||||
ports:
|
||||
- "7000:7000"
|
||||
restart: unless-stopped
|
||||
|
||||
## The consumer is responsible for consuming infohashes and orchestrating download of metadata.
|
||||
consumer:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
|
||||
## The debrid collector is responsible for downloading metadata from debrid services. (Currently only RealDebrid is supported)
|
||||
debridcollector:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-debrid-collector:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
knightcrawler-network:
|
||||
driver: bridge
|
||||
name: knightcrawler-network
|
||||
|
||||
## The metadata service is responsible for downloading imdb publically available datasets.
|
||||
## This is used to enrich the metadata during production of ingested infohashes.
|
||||
metadata:
|
||||
depends_on:
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.26
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: "no"
|
||||
# caddy:
|
||||
# name: caddy
|
||||
# external: true
|
||||
|
||||
## The migrator is responsible for migrating the database schema.
|
||||
migrator:
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.26
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: "no"
|
||||
|
||||
## The producer is responsible for producing infohashes by acquiring for various sites, including DMM.
|
||||
producer:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-producer:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
|
||||
## QBit collector utilizes QBitTorrent to download metadata.
|
||||
qbitcollector:
|
||||
depends_on:
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
qbittorrent:
|
||||
condition: service_healthy
|
||||
deploy:
|
||||
replicas: ${QBIT_REPLICAS:-0}
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-qbit-collector:2.0.26
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
|
||||
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
|
||||
## The QBit collector requires this.
|
||||
qbittorrent:
|
||||
deploy:
|
||||
replicas: ${QBIT_REPLICAS:-0}
|
||||
env_file: stack.env
|
||||
environment:
|
||||
PGID: "1000"
|
||||
PUID: "1000"
|
||||
TORRENTING_PORT: "6881"
|
||||
WEBUI_PORT: "8080"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl --fail http://localhost:8080"]
|
||||
timeout: 10s
|
||||
interval: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
image: lscr.io/linuxserver/qbittorrent:latest
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
ports:
|
||||
- "6881:6881/tcp"
|
||||
- "6881:6881/udp"
|
||||
# if you want to expose the webui, uncomment the following line
|
||||
# - "8001:8080"
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf
|
||||
volumes:
|
||||
postgres:
|
||||
mongo:
|
||||
rabbitmq:
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
## Once you have confirmed Caddy works you should comment out
|
||||
## the below line:
|
||||
acme_ca https://acme-staging-v02.api.letsencrypt.org/directory
|
||||
acme_ca https://acme-staging-v02.api.letsencrypt.org/director
|
||||
}
|
||||
|
||||
(security-headers) {
|
||||
|
||||
@@ -16,7 +16,7 @@ rule_files:
|
||||
scrape_configs:
|
||||
- job_name: "rabbitmq"
|
||||
static_configs:
|
||||
- targets: ["lavinmq:15692"]
|
||||
- targets: ["rabbitmq:15692"]
|
||||
- job_name: "postgres-exporter"
|
||||
static_configs:
|
||||
- targets: ["postgres-exporter:9187"]
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
x-basehealth: &base-health
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
x-lavinhealth: &lavinmq-health
|
||||
test: [ "CMD-SHELL", "lavinmqctl status" ]
|
||||
<<: *base-health
|
||||
|
||||
x-redishealth: &redis-health
|
||||
test: redis-cli ping
|
||||
<<: *base-health
|
||||
|
||||
x-postgreshealth: &postgresdb-health
|
||||
test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ]
|
||||
<<: *base-health
|
||||
|
||||
x-qbit: &qbit-health
|
||||
test: "curl --fail http://localhost:8080"
|
||||
<<: *base-health
|
||||
|
||||
services:
|
||||
|
||||
postgres:
|
||||
image: postgres:latest
|
||||
environment:
|
||||
PGUSER: postgres # needed for healthcheck.
|
||||
# # If you need the database to be accessible from outside, please open the below port.
|
||||
# # Furthermore, please, please, please, change the username and password in the .env file.
|
||||
# # If you want to enhance your security even more, create a new user for the database with a strong password.
|
||||
# ports:
|
||||
# - "5432:5432"
|
||||
volumes:
|
||||
- postgres:/var/lib/postgresql/data
|
||||
healthcheck: *postgresdb-health
|
||||
restart: unless-stopped
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
redis:
|
||||
image: redis/redis-stack:latest
|
||||
# # If you need redis to be accessible from outside, please open the below port.
|
||||
# ports:
|
||||
# - "6379:6379"
|
||||
volumes:
|
||||
- redis:/data
|
||||
restart: unless-stopped
|
||||
healthcheck: *redis-health
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
lavinmq:
|
||||
env_file: stack.env
|
||||
# # If you need the database to be accessible from outside, please open the below port.
|
||||
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
|
||||
# ports:
|
||||
# - "5672:5672"
|
||||
# - "15672:15672"
|
||||
# - "15692:15692"
|
||||
image: cloudamqp/lavinmq:latest
|
||||
healthcheck: *lavinmq-health
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- lavinmq:/var/lib/lavinmq/
|
||||
|
||||
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
|
||||
## The QBit collector requires this.
|
||||
qbittorrent:
|
||||
image: lscr.io/linuxserver/qbittorrent:latest
|
||||
environment:
|
||||
- PUID=1000
|
||||
- PGID=1000
|
||||
- WEBUI_PORT=8080
|
||||
- TORRENTING_PORT=6881
|
||||
ports:
|
||||
- 6881:6881
|
||||
- 6881:6881/udp
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: unless-stopped
|
||||
healthcheck: *qbit-health
|
||||
volumes:
|
||||
- ../../config/qbit/qbittorrent.conf:/config/qBittorrent/qBittorrent.conf
|
||||
@@ -1,71 +0,0 @@
|
||||
x-apps: &knightcrawler-app
|
||||
labels:
|
||||
logging: "promtail"
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
|
||||
x-depends: &knightcrawler-app-depends
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
lavinmq:
|
||||
condition: service_healthy
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
metadata:
|
||||
condition: service_completed_successfully
|
||||
|
||||
services:
|
||||
metadata:
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.26
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: no
|
||||
depends_on:
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
|
||||
migrator:
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.26
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: no
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
|
||||
addon:
|
||||
image: gabisonfire/knightcrawler-addon:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
hostname: knightcrawler-addon
|
||||
ports:
|
||||
- "7000:7000"
|
||||
|
||||
consumer:
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
debridcollector:
|
||||
image: gabisonfire/knightcrawler-debrid-collector:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
producer:
|
||||
image: gabisonfire/knightcrawler-producer:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
qbitcollector:
|
||||
image: gabisonfire/knightcrawler-qbit-collector:2.0.26
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
qbittorrent:
|
||||
condition: service_healthy
|
||||
@@ -1,4 +0,0 @@
|
||||
networks:
|
||||
knightcrawler-network:
|
||||
driver: bridge
|
||||
name: knightcrawler-network
|
||||
@@ -1,4 +0,0 @@
|
||||
volumes:
|
||||
postgres:
|
||||
redis:
|
||||
lavinmq:
|
||||
@@ -1,7 +0,0 @@
|
||||
services:
|
||||
qbittorrent:
|
||||
deploy:
|
||||
replicas: 0
|
||||
qbitcollector:
|
||||
deploy:
|
||||
replicas: 0
|
||||
@@ -1,7 +0,0 @@
|
||||
version: "3.9"
|
||||
name: "knightcrawler"
|
||||
include:
|
||||
- ./components/network.yaml
|
||||
- ./components/volumes.yaml
|
||||
- ./components/infrastructure.yaml
|
||||
- ./components/knightcrawler.yaml
|
||||
@@ -1,41 +0,0 @@
|
||||
# General environment variables
|
||||
TZ=London/Europe
|
||||
|
||||
# PostgreSQL
|
||||
POSTGRES_HOST=postgres
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USER=postgres
|
||||
POSTGRES_PASSWORD=postgres
|
||||
POSTGRES_DB=knightcrawler
|
||||
|
||||
# Redis
|
||||
REDIS_HOST=redis
|
||||
REDIS_PORT=6379
|
||||
REDIS_EXTRA=abortConnect=false,allowAdmin=true
|
||||
|
||||
# AMQP
|
||||
RABBITMQ_HOST=lavinmq
|
||||
RABBITMQ_USER=guest
|
||||
RABBITMQ_PASSWORD=guest
|
||||
RABBITMQ_CONSUMER_QUEUE_NAME=ingested
|
||||
RABBITMQ_DURABLE=true
|
||||
RABBITMQ_MAX_QUEUE_SIZE=0
|
||||
RABBITMQ_MAX_PUBLISH_BATCH_SIZE=500
|
||||
RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS=10
|
||||
|
||||
# Metadata
|
||||
METADATA_INSERT_BATCH_SIZE=50000
|
||||
|
||||
# Collectors
|
||||
COLLECTOR_QBIT_ENABLED=false
|
||||
COLLECTOR_DEBRID_ENABLED=true
|
||||
COLLECTOR_REAL_DEBRID_API_KEY=
|
||||
QBIT_HOST=http://qbittorrent:8080
|
||||
QBIT_TRACKERS_URL=https://raw.githubusercontent.com/ngosang/trackerslist/master/trackers_all_http.txt
|
||||
QBIT_CONCURRENCY=8
|
||||
|
||||
# Number of replicas for the qBittorrent collector and qBitTorrent client. Should be 0 or 1.
|
||||
QBIT_REPLICAS=0
|
||||
|
||||
# Addon
|
||||
DEBUG_MODE=false
|
||||
14
docs/Writerside/cfg/buildprofiles.xml
Normal file
14
docs/Writerside/cfg/buildprofiles.xml
Normal file
@@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<buildprofiles xsi:noNamespaceSchemaLocation="https://resources.jetbrains.com/writerside/1.0/build-profiles.xsd"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
|
||||
<variables>
|
||||
<header-logo>knight-crawler-logo.png</header-logo>
|
||||
</variables>
|
||||
<build-profile instance="kc">
|
||||
<variables>
|
||||
<noindex-content>true</noindex-content>
|
||||
</variables>
|
||||
</build-profile>
|
||||
|
||||
</buildprofiles>
|
||||
BIN
docs/Writerside/images/knight-crawler-logo.png
Normal file
BIN
docs/Writerside/images/knight-crawler-logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 568 KiB |
13
docs/Writerside/kc.tree
Normal file
13
docs/Writerside/kc.tree
Normal file
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE instance-profile
|
||||
SYSTEM "https://resources.jetbrains.com/writerside/1.0/product-profile.dtd">
|
||||
|
||||
<instance-profile id="kc" name="Knight Crawler"
|
||||
start-page="Overview.md">
|
||||
|
||||
<toc-element topic="Overview.md"/>
|
||||
<toc-element topic="Getting-started.md">
|
||||
</toc-element>
|
||||
<toc-element topic="External-access.md"/>
|
||||
<toc-element topic="Supported-Debrid-services.md"/>
|
||||
</instance-profile>
|
||||
57
docs/Writerside/topics/External-access.md
Normal file
57
docs/Writerside/topics/External-access.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# External access
|
||||
|
||||
This guide outlines how to use Knight Crawler on devices like your TV. While it's currently limited to the device of
|
||||
installation, we can change that. With some extra effort, we'll show you how to make it accessible on other devices.
|
||||
This limitation is set by Stremio, as [explained here](https://github.com/Stremio/stremio-features/issues/687#issuecomment-1890546094).
|
||||
|
||||
## What to keep in mind
|
||||
|
||||
Before we make Knight Crawler available outside your home network, we've got to talk about safety. No software is
|
||||
perfect, including ours. Knight Crawler is built on lots of different parts, some made by other people. So, if we keep
|
||||
it just for your home network, it's a bit safer. But if you want to use it over the internet, just know that keeping
|
||||
your devices secure is up to you. We won't be responsible for any problems or lost data if you use Knight Crawler that way.
|
||||
|
||||
## Initial setup
|
||||
|
||||
To enable external access for Knight Crawler, whether it's within your home network or over the internet, you'll
|
||||
need to follow these initial setup steps:
|
||||
|
||||
- Set up Caddy, a powerful and easy-to-use web server.
|
||||
- Disable the open port in the Knight Crawler <path>docker-compose.yaml</path> file.
|
||||
|
||||
|
||||
### Caddy
|
||||
|
||||
A basic Caddy configuration is included with Knight Crawler in the deployment directory.
|
||||
|
||||
<path>deployment/docker/optional-services/caddy</path>
|
||||
|
||||
```Generic
|
||||
deployment/
|
||||
└── docker/
|
||||
└── optional-services/
|
||||
└── caddy/
|
||||
├── config/
|
||||
│ ├── snippets/
|
||||
│ │ └── cloudflare-replace-X-Forwarded-For
|
||||
│ └── Caddyfile
|
||||
├── logs/
|
||||
└── docker-compose.yaml
|
||||
```
|
||||
|
||||
ports:
|
||||
- "8080:8080"
|
||||
|
||||
By disabling the default port, Knight Crawler will only be accessible internally within your network, ensuring added security.
|
||||
|
||||
## Home network access
|
||||
|
||||
## Internet access
|
||||
|
||||
### Through a VPN
|
||||
|
||||
### On the public web
|
||||
|
||||
## Troubleshooting?
|
||||
|
||||
## Additional Resources?
|
||||
192
docs/Writerside/topics/Getting-started.md
Normal file
192
docs/Writerside/topics/Getting-started.md
Normal file
@@ -0,0 +1,192 @@
|
||||
# Getting started
|
||||
|
||||
Knight Crawler is provided as an all-in-one solution. This means we include all the necessary software you need to get started
|
||||
out of the box.
|
||||
|
||||
## Before you start
|
||||
|
||||
Make sure that you have:
|
||||
|
||||
- A place to host Knight Crawler
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Compose](https://docs.docker.com/compose/install/) installed
|
||||
- A [GitHub](https://github.com/) account _(optional)_
|
||||
|
||||
|
||||
## Download the files
|
||||
|
||||
Installing Knight Crawler is as simple as downloading a copy of the [deployment directory](https://github.com/Gabisonfire/knightcrawler/tree/master/deployment/docker).
|
||||
|
||||
A basic installation requires only two files:
|
||||
- <path>deployment/docker/.env.example</path>
|
||||
- <path>deployment/docker/docker-compose.yaml</path>.
|
||||
|
||||
For this guide I will be placing them in a directory on my home drive <path>~/knightcrawler</path>.
|
||||
|
||||
Rename the <path>.env.example</path> file to be <path>.env</path>
|
||||
|
||||
```
|
||||
~/
|
||||
└── knightcrawler/
|
||||
├── .env
|
||||
└── docker-compose.yaml
|
||||
```
|
||||
|
||||
## Initial configuration
|
||||
|
||||
Below are a few recommended configuration changes.
|
||||
|
||||
Open the <path>.env</path> file in your favourite editor.
|
||||
|
||||
> If you are using an external database, configure it in the <path>.env</path> file. Don't forget to disable the ones
|
||||
> included in the <path>docker-compose.yaml</path>.
|
||||
|
||||
### Database credentials
|
||||
|
||||
It is strongly recommended that you change the credentials for the databases included with Knight Crawler. This is best done
|
||||
before running Knight Crawler for the first time. It is much harder to change the passwords once the services have been started
|
||||
for the first time.
|
||||
|
||||
```Bash
|
||||
POSTGRES_PASSWORD=postgres
|
||||
...
|
||||
MONGODB_PASSWORD=mongo
|
||||
...
|
||||
RABBITMQ_PASSWORD=guest
|
||||
```
|
||||
|
||||
Here's a few options on generating a secure password:
|
||||
|
||||
```Bash
|
||||
# Linux
|
||||
tr -cd '[:alnum:]' < /dev/urandom | fold -w 64 | head -n 1
|
||||
# Or you could use openssl
|
||||
openssl rand -hex 32
|
||||
```
|
||||
```Python
|
||||
# Python
|
||||
import secrets
|
||||
|
||||
print(secrets.token_hex(32))
|
||||
```
|
||||
|
||||
### Your time zone
|
||||
|
||||
```Bash
|
||||
TZ=London/Europe
|
||||
```
|
||||
|
||||
A list of time zones can be found on [Wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
|
||||
|
||||
### Consumers
|
||||
|
||||
```Bash
|
||||
JOB_CONCURRENCY=5
|
||||
...
|
||||
MAX_CONNECTIONS_PER_TORRENT=10
|
||||
...
|
||||
CONSUMER_REPLICAS=3
|
||||
```
|
||||
|
||||
These are totally subjective to your machine and network capacity. The above default is pretty minimal and will work on
|
||||
most machines.
|
||||
|
||||
`JOB_CONCURRENCY` is how many films and tv shows the consumers should process at once. As this affects every consumer
|
||||
this will likely cause exponential
|
||||
strain on your system. It's probably best to leave this at 5, but you can try experimenting with it if you wish.
|
||||
|
||||
`MAX_CONNECTIONS_PER_TORRENT` is how many peers the consumer will attempt to connect to when it is trying to collect
|
||||
metadata.
|
||||
Increasing this value can speed up processing, but you will eventually reach a point where more connections are being
|
||||
made than
|
||||
your router can handle. This will then cause a cascading fail where your internet stops working. If you are going to
|
||||
increase this value
|
||||
then try increasing it by 10 at a time.
|
||||
|
||||
> Increasing this value increases the max connections for every parallel job, for every consumer. For example
|
||||
> with the default values above this means that Knight Crawler will be on average making `(5 x 3) x 10 = 150`
|
||||
> connections at any one time.
|
||||
>
|
||||
{style="warning"}
|
||||
|
||||
`CONSUMER_REPLICAS` is how many consumers should be initially started. You can increase or decrease the number of consumers whilst the
|
||||
service is running by running the command `docker compose up -d --scale consumer=<number>`.
|
||||
|
||||
### GitHub personal access token
|
||||
|
||||
This step is optional but strongly recommended. [Debrid Media Manager](https://debridmediamanager.com/start) is a media library manager
|
||||
for Debrid services. When a user of this service chooses to export/share their library publicly it is saved to a public GitHub repository.
|
||||
This is, essentially, a repository containing a vast amount of ready to go films and tv shows. Knight Crawler comes with the ability to
|
||||
read these exported lists, but it requires a GitHub account to make it work.
|
||||
|
||||
Knight Crawler needs a personal access token with read-only access to public repositories. This means we can not access any private
|
||||
repositories you have.
|
||||
|
||||
1. Navigate to GitHub settings ([GitHub token settings](https://github.com/settings/tokens?type=beta)):
|
||||
- Navigate to `GitHub settings`.
|
||||
- Click on `Developer Settings`.
|
||||
- Select `Personal access tokens`.
|
||||
- Choose `Fine-grained tokens`.
|
||||
|
||||
2. Press `Generate new token`.
|
||||
|
||||
3. Fill out the form with the following information:
|
||||
```Generic
|
||||
Token name:
|
||||
KnightCrawler
|
||||
Expiration:
|
||||
90 days
|
||||
Description:
|
||||
<blank>
|
||||
Repository access:
|
||||
(checked) Public Repositories (read-only)
|
||||
```
|
||||
|
||||
4. Click `Generate token`.
|
||||
|
||||
5. Take the new token and add it to the bottom of the <path>.env</path> file:
|
||||
```Bash
|
||||
# Producer
|
||||
GITHUB_PAT=<YOUR TOKEN HERE>
|
||||
```
|
||||
|
||||
## Start Knight Crawler
|
||||
|
||||
To start Knight Crawler use the following command:
|
||||
|
||||
```Bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Then we can follow the logs to watch it start:
|
||||
|
||||
```Bash
|
||||
docker compose logs -f --since 1m
|
||||
```
|
||||
|
||||
> Knight Crawler will only be accessible on the machine you run it on, to make it accessible from other machines navigate to [External access](External-access.md).
|
||||
>
|
||||
{style="note"}
|
||||
|
||||
To stop following the logs press <shortcut>Ctrl+C</shortcut> at any time.
|
||||
|
||||
The Knight Crawler configuration page should now be accessible in your web browser at [http://localhost:7000](http://localhost:7000)
|
||||
|
||||
## Start more consumers
|
||||
|
||||
If you wish to speed up the processing of the films and tv shows that Knight Crawler finds, then you'll likely want to
|
||||
increase the number of consumers.
|
||||
|
||||
The below command can be used to both increase or decrease the number of running consumers. Gradually increase the number
|
||||
until you encounter any issues and then decrease until stable.
|
||||
|
||||
```Bash
|
||||
docker compose up -d --scale consumer=<number>
|
||||
```
|
||||
|
||||
## Stop Knight Crawler
|
||||
|
||||
Knight Crawler can be stopped with the following command:
|
||||
|
||||
```Bash
|
||||
docker compose down
|
||||
```
|
||||
30
docs/Writerside/topics/Overview.md
Normal file
30
docs/Writerside/topics/Overview.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Overview
|
||||
|
||||
<img alt="The image shows a Knight in silvery armour looking forwards." src="knight-crawler-logo.png" title="Knight Crawler logo" width="100"/>
|
||||
|
||||
Knight Crawler is a self-hosted [Stremio](https://www.stremio.com/) addon for streaming torrents via
|
||||
a [Debrid](Supported-Debrid-services.md "Click for a list of Debrid services we support") service.
|
||||
|
||||
We are active on [Discord](https://discord.gg/8fQdxay9z2) for both support and casual conversation.
|
||||
|
||||
> Knight Crawler is currently alpha software.
|
||||
>
|
||||
> Users are responsible for ensuring their data is backed up regularly.
|
||||
>
|
||||
> Please read the changelogs before updating to the latest version.
|
||||
>
|
||||
{style="warning"}
|
||||
|
||||
## What does Knight Crawler do?
|
||||
|
||||
Knight Crawler is an addon for [Stremio](https://www.stremio.com/). It began as a fork of the very popular
|
||||
[Torrentio](https://github.com/TheBeastLT/torrentio-scraper) addon. Knight crawler essentially does the following:
|
||||
|
||||
1. It searches the internet for available films and tv shows.
|
||||
2. It collects as much information as it can about each film and tv show it finds.
|
||||
3. It then stores this information to a database for easy access.
|
||||
|
||||
When you choose on a film or tv show to watch on Stremio, a request will be sent to your installation of Knight Crawler.
|
||||
Knight Crawler will query the database and return a list of all the copies it has stored in the database as Debrid
|
||||
links.
|
||||
This enables playback to begin immediately for your chosen media.
|
||||
3
docs/Writerside/topics/Supported-Debrid-services.md
Normal file
3
docs/Writerside/topics/Supported-Debrid-services.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Supported Debrid services
|
||||
|
||||
Start typing here...
|
||||
8
docs/Writerside/writerside.cfg
Normal file
8
docs/Writerside/writerside.cfg
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE ihp SYSTEM "https://resources.jetbrains.com/writerside/1.0/ihp.dtd">
|
||||
|
||||
<ihp version="2.0">
|
||||
<topics dir="topics" web-path="topics"/>
|
||||
<images dir="images" web-path="knightcrawler"/>
|
||||
<instance src="kc.tree"/>
|
||||
</ihp>
|
||||
@@ -1,36 +0,0 @@
|
||||
{
|
||||
"Serilog": {
|
||||
"Using": [ "Serilog.Sinks.Console" ],
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft": "Warning",
|
||||
"System": "Warning",
|
||||
"Npgsql.Command": "Warning",
|
||||
"Marten.IDocumentStore": "Warning",
|
||||
"Wolverine.Runtime.WolverineRuntime": "Warning",
|
||||
"Wolverine.Runtime.Agents.NodeAgentController": "Warning",
|
||||
"Oakton.Resources.ResourceSetupHostService": "Warning",
|
||||
"System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning",
|
||||
"System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning",
|
||||
"Quartz.Impl.StdSchedulerFactory": "Warning",
|
||||
"Quartz.Core.QuartzScheduler": "Warning",
|
||||
"Quartz.Simpl.RAMJobStore": "Warning",
|
||||
"Quartz.Core.JobRunShell": "Warning",
|
||||
"Quartz.Core.SchedulerSignalerImpl": "Warning"
|
||||
}
|
||||
},
|
||||
"WriteTo": [
|
||||
{
|
||||
"Name": "Console",
|
||||
"Args": {
|
||||
"outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ],
|
||||
"Properties": {
|
||||
"Application": "Metadata"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk.Worker">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<UserSecretsId>54cad2ee-57df-4bb2-a192-d5d501448e0a</UserSecretsId>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Dapper" Version="2.1.35" />
|
||||
<PackageReference Include="MassTransit" Version="8.2.0" />
|
||||
<PackageReference Include="MassTransit.RabbitMQ" Version="8.2.0" />
|
||||
<PackageReference Include="MassTransit.Redis" Version="8.2.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
|
||||
<PackageReference Include="Polly" Version="8.3.1" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
|
||||
<PackageReference Include="System.Interactive.Async" Version="6.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Remove="Configuration\logging.json" />
|
||||
<None Include="Configuration\logging.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="requirements.txt">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<Content Remove="eng\**" />
|
||||
<None Remove="eng\**" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
|
||||
<Content Remove="python\**" />
|
||||
<None Include="python\**">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\shared\SharedContracts.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Remove="eng\**" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Remove="eng\**" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -1,33 +0,0 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DebridCollector", "DebridCollector.csproj", "{64C3253C-0638-4825-AC82-7D5600D1F9C9}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\shared\SharedContracts.csproj", "{C9BE500C-CE04-480B-874F-A85D33CAA821}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{72A042C3-B4F3-45C5-AC20-041FE8F41EFC}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
|
||||
eng\install-python-reqs.sh = eng\install-python-reqs.sh
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(NestedProjects) = preSolution
|
||||
{C9BE500C-CE04-480B-874F-A85D33CAA821} = {2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
@@ -1,31 +0,0 @@
|
||||
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build
|
||||
|
||||
ARG TARGETARCH
|
||||
WORKDIR /src
|
||||
COPY shared/ shared/
|
||||
COPY debrid-collector/ debrid-collector/
|
||||
WORKDIR /src/debrid-collector/
|
||||
RUN dotnet restore -a $TARGETARCH
|
||||
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
|
||||
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
|
||||
|
||||
COPY --from=build /src/out .
|
||||
|
||||
RUN rm -rf /app/python && mkdir -p /app/python
|
||||
|
||||
RUN pip3 install -r /app/requirements.txt -t /app/python
|
||||
|
||||
RUN addgroup -S debrid && adduser -S -G debrid debrid
|
||||
USER debrid
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||
CMD pgrep -f dotnet || exit 1
|
||||
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
|
||||
ENTRYPOINT ["dotnet", "DebridCollector.dll"]
|
||||
@@ -1,75 +0,0 @@
|
||||
namespace DebridCollector.Extensions;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
internal static IServiceCollection AddDatabase(this IServiceCollection services)
|
||||
{
|
||||
services.LoadConfigurationFromEnv<PostgresConfiguration>();
|
||||
services.AddTransient<IDataStorage, DapperDataStorage>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
|
||||
{
|
||||
var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>();
|
||||
|
||||
services.AddRealDebridClient(serviceConfiguration);
|
||||
services.RegisterPythonEngine();
|
||||
services.AddSingleton<IRankTorrentName, RankTorrentName>();
|
||||
services.AddHostedService<DebridRequestProcessor>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
internal static IServiceCollection RegisterMassTransit(this IServiceCollection services)
|
||||
{
|
||||
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
|
||||
var redisConfiguration = services.LoadConfigurationFromEnv<RedisConfiguration>();
|
||||
|
||||
services.AddMassTransit(x =>
|
||||
{
|
||||
x.SetKebabCaseEndpointNameFormatter();
|
||||
|
||||
x.UsingRabbitMq((context, cfg) =>
|
||||
{
|
||||
cfg.AutoStart = true;
|
||||
|
||||
cfg.Host(
|
||||
rabbitConfiguration.Host, h =>
|
||||
{
|
||||
h.Username(rabbitConfiguration.Username);
|
||||
h.Password(rabbitConfiguration.Password);
|
||||
});
|
||||
|
||||
cfg.Message<CollectMetadata>(e => e.SetEntityName(rabbitConfiguration.DebridCollectorQueueName));
|
||||
cfg.ConfigureEndpoints(context);
|
||||
});
|
||||
|
||||
x.AddConsumer<PerformMetadataRequestConsumer>();
|
||||
x.AddConsumer<WriteMetadataConsumer>();
|
||||
x.RegisterMetadataIngestionSaga(redisConfiguration, rabbitConfiguration);
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
private static void RegisterMetadataIngestionSaga(this IBusRegistrationConfigurator x, RedisConfiguration redisConfiguration, RabbitMqConfiguration rabbitMqConfiguration) =>
|
||||
x.AddSagaStateMachine<InfohashMetadataSagaStateMachine, InfohashMetadataSagaState>(
|
||||
cfg =>
|
||||
{
|
||||
cfg.UseMessageRetry(r => r.Intervals(1000,2000,5000));
|
||||
cfg.UseInMemoryOutbox();
|
||||
})
|
||||
.RedisRepository(redisConfiguration.ConnectionString, options =>
|
||||
{
|
||||
options.KeyPrefix = "debrid-collector:";
|
||||
})
|
||||
.Endpoint(
|
||||
e =>
|
||||
{
|
||||
e.Name = rabbitMqConfiguration.DebridCollectorQueueName;
|
||||
e.ConcurrentMessageLimit = 50;
|
||||
e.PrefetchCount = 50;
|
||||
});
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
namespace DebridCollector.Features.Configuration;
|
||||
|
||||
public class DebridCollectorConfiguration
|
||||
{
|
||||
private const string Prefix = "COLLECTOR";
|
||||
private const string RealDebridApiKeyVariable = "REAL_DEBRID_API_KEY";
|
||||
public string RealDebridApiKey { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(RealDebridApiKeyVariable);
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
namespace DebridCollector.Features.Debrid;
|
||||
|
||||
public class DebridRequestProcessor(IDebridHttpClient debridHttpClient, ILogger<DebridRequestProcessor> logger, IBus messageBus) : BackgroundService
|
||||
{
|
||||
private const int BatchDelay = 3000;
|
||||
public const int MaxBatchSize = 100;
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var requests = new List<PerformMetadataRequest>(MaxBatchSize);
|
||||
var delay = TimeSpan.FromMilliseconds(BatchDelay);
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
while (ProcessorChannel.Queue.Reader.TryRead(out var request))
|
||||
{
|
||||
if (requests.Count >= MaxBatchSize)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (requests.All(x => x.InfoHash != request.InfoHash))
|
||||
{
|
||||
requests.Add(request);
|
||||
}
|
||||
}
|
||||
|
||||
if (requests.Any())
|
||||
{
|
||||
await ProcessRequests(requests, stoppingToken);
|
||||
requests.Clear();
|
||||
}
|
||||
|
||||
await Task.Delay(delay, stoppingToken);
|
||||
}
|
||||
|
||||
// After the loop ends, there may be remaining requests which were not processed. Let's process them:
|
||||
if (requests.Count != 0)
|
||||
{
|
||||
await ProcessRequests(requests, stoppingToken);
|
||||
requests.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ProcessRequests(IReadOnlyCollection<PerformMetadataRequest> requests, CancellationToken stoppingToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var results = await debridHttpClient.GetMetadataAsync(requests, stoppingToken);
|
||||
await ProcessResponses(results);
|
||||
logger.LogInformation("Processed: {Count} infoHashes", requests.Count);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.LogError(e, "Failed to process infoHashes");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ProcessResponses(IEnumerable<TorrentMetadataResponse> results)
|
||||
{
|
||||
var messages = results.Select(response => new GotMetadata(response)).ToList();
|
||||
|
||||
await messageBus.PublishBatch(messages);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
namespace DebridCollector.Features.Debrid;
|
||||
|
||||
public interface IDebridHttpClient
|
||||
{
|
||||
public Task<IReadOnlyList<TorrentMetadataResponse>> GetMetadataAsync(IReadOnlyCollection<PerformMetadataRequest> infoHashes, CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
namespace DebridCollector.Features.Debrid;
|
||||
|
||||
public static class ProcessorChannel
|
||||
{
|
||||
public static Channel<PerformMetadataRequest> Queue = Channel.CreateUnbounded<PerformMetadataRequest>(new()
|
||||
{
|
||||
SingleReader = true,
|
||||
SingleWriter = true,
|
||||
});
|
||||
|
||||
public static bool AddToQueue(PerformMetadataRequest infoHash) => Queue.Writer.TryWrite(infoHash);
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
namespace DebridCollector.Features.Debrid;
|
||||
|
||||
public class RealDebridClient(HttpClient client) : IDebridHttpClient
|
||||
{
|
||||
private const string TorrentsInstantAvailability = "torrents/instantAvailability/";
|
||||
|
||||
public async Task<IReadOnlyList<TorrentMetadataResponse>> GetMetadataAsync(IReadOnlyCollection<PerformMetadataRequest> requests, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var responseAsString = await client.GetStringAsync($"{TorrentsInstantAvailability}{string.Join("/", requests.Select(x => x.InfoHash.ToLowerInvariant()))}", cancellationToken);
|
||||
|
||||
var document = JsonDocument.Parse(responseAsString);
|
||||
|
||||
var torrentMetadataResponses = new List<TorrentMetadataResponse>();
|
||||
|
||||
foreach (var request in requests)
|
||||
{
|
||||
if (document.RootElement.TryGetProperty(request.InfoHash.ToLowerInvariant(), out var dataElement) &&
|
||||
dataElement.ValueKind == JsonValueKind.Object &&
|
||||
dataElement.TryGetProperty("rd", out var rdDataElement) &&
|
||||
rdDataElement.ValueKind == JsonValueKind.Array &&
|
||||
rdDataElement.GetArrayLength() > 0)
|
||||
{
|
||||
MapResponseToMetadata(rdDataElement, torrentMetadataResponses, request);
|
||||
continue;
|
||||
}
|
||||
|
||||
torrentMetadataResponses.Add(new(request.CorrelationId, new()));
|
||||
}
|
||||
|
||||
return torrentMetadataResponses;
|
||||
}
|
||||
|
||||
private static void MapResponseToMetadata(JsonElement rdDataElement, List<TorrentMetadataResponse> torrentMetadataResponses, PerformMetadataRequest request)
|
||||
{
|
||||
var metaData = new FileDataDictionary();
|
||||
|
||||
foreach (var item in rdDataElement.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
foreach (var property in item.EnumerateObject())
|
||||
{
|
||||
if (property.Value.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
var fileData = new FileData();
|
||||
|
||||
if (property.Value.TryGetProperty("filename", out var filenameElement) && filenameElement.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
fileData.Filename = filenameElement.GetString();
|
||||
}
|
||||
|
||||
if (property.Value.TryGetProperty("filesize", out var filesizeElement) && filesizeElement.ValueKind == JsonValueKind.Number)
|
||||
{
|
||||
fileData.Filesize = filesizeElement.GetInt64();
|
||||
}
|
||||
|
||||
metaData[property.Name] = fileData;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
torrentMetadataResponses.Add(new(request.CorrelationId, metaData));
|
||||
}
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
namespace DebridCollector.Features.Debrid;
|
||||
|
||||
public class RealDebridResponse : Dictionary<string, RdData?>
|
||||
{
|
||||
}
|
||||
|
||||
public class RdData
|
||||
{
|
||||
[JsonPropertyName("rd")]
|
||||
public List<FileDataDictionary>? Rd { get; set; }
|
||||
}
|
||||
|
||||
public class FileDataDictionary : Dictionary<string, FileData>
|
||||
{
|
||||
}
|
||||
|
||||
public class FileData
|
||||
{
|
||||
[JsonPropertyName("filename")]
|
||||
public string? Filename { get; set; }
|
||||
|
||||
[JsonPropertyName("filesize")]
|
||||
public long? Filesize { get; set; }
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
namespace DebridCollector.Features.Debrid;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddRealDebridClient(this IServiceCollection services, DebridCollectorConfiguration serviceConfiguration)
|
||||
{
|
||||
services.AddHttpClient<IDebridHttpClient, RealDebridClient>(
|
||||
client =>
|
||||
{
|
||||
client.BaseAddress = new("https://api.real-debrid.com/rest/1.0/");
|
||||
client.DefaultRequestHeaders.Add("Authorization", $"Bearer {serviceConfiguration.RealDebridApiKey}");
|
||||
})
|
||||
.AddPolicyHandler(GetRetryPolicy())
|
||||
.AddPolicyHandler(GetCircuitBreakerPolicy());
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
private static AsyncPolicy<HttpResponseMessage> GetRetryPolicy(int MaxRetryCount = 5, int MaxJitterTime = 1000) =>
|
||||
HttpPolicyExtensions
|
||||
.HandleTransientHttpError()
|
||||
.WaitAndRetryAsync(MaxRetryCount, RetryAttempt =>
|
||||
TimeSpan.FromSeconds(Math.Pow(2, RetryAttempt)) +
|
||||
TimeSpan.FromMilliseconds(Random.Shared.Next(0, MaxJitterTime)));
|
||||
|
||||
private static AsyncPolicy<HttpResponseMessage> GetCircuitBreakerPolicy() =>
|
||||
HttpPolicyExtensions
|
||||
.HandleTransientHttpError()
|
||||
.CircuitBreakerAsync(handledEventsAllowedBeforeBreaking: 5, TimeSpan.FromSeconds(30));
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
public static class DebridMetaToTorrentMeta
|
||||
{
|
||||
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
|
||||
IRankTorrentName rankTorrentName,
|
||||
Torrent torrent,
|
||||
string ImdbId,
|
||||
FileDataDictionary Metadata,
|
||||
ILogger<WriteMetadataConsumer> logger)
|
||||
{
|
||||
try
|
||||
{
|
||||
var files = new List<TorrentFile>();
|
||||
|
||||
foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
|
||||
{
|
||||
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
|
||||
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
|
||||
|
||||
var file = new TorrentFile
|
||||
{
|
||||
ImdbId = ImdbId,
|
||||
KitsuId = 0,
|
||||
InfoHash = torrent.InfoHash,
|
||||
FileIndex = validFileIndex ? fileIndexMinusOne : 0,
|
||||
Title = metadataEntry.Value.Filename,
|
||||
Size = metadataEntry.Value.Filesize.GetValueOrDefault(),
|
||||
};
|
||||
|
||||
var parsedTitle = rankTorrentName.Parse(file.Title, false);
|
||||
|
||||
if (!parsedTitle.Success)
|
||||
{
|
||||
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
|
||||
continue;
|
||||
}
|
||||
|
||||
file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
|
||||
file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
|
||||
|
||||
files.Add(file);
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata, ILogger<WriteMetadataConsumer> logger)
|
||||
{
|
||||
try
|
||||
{
|
||||
var files = new List<SubtitleFile>();
|
||||
|
||||
var torrentFiles = await storage.GetTorrentFiles(InfoHash.ToLowerInvariant());
|
||||
|
||||
if (torrentFiles.Count == 0)
|
||||
{
|
||||
return files;
|
||||
}
|
||||
|
||||
foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
|
||||
{
|
||||
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
|
||||
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
|
||||
var fileId = torrentFiles.FirstOrDefault(
|
||||
t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0;
|
||||
|
||||
var file = new SubtitleFile
|
||||
{
|
||||
InfoHash = InfoHash,
|
||||
FileIndex = validFileIndex ? fileIndexMinusOne : 0,
|
||||
FileId = fileId,
|
||||
Title = metadataEntry.Value.Filename,
|
||||
};
|
||||
|
||||
files.Add(file);
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,104 +0,0 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
public static class Filetypes
|
||||
{
|
||||
public static IReadOnlyList<string> VideoFileExtensions =
|
||||
[
|
||||
".3g2",
|
||||
".3gp",
|
||||
".3gp2",
|
||||
".3gpp",
|
||||
".60d",
|
||||
".ajp",
|
||||
".asf",
|
||||
".asx",
|
||||
".avchd",
|
||||
".avi",
|
||||
".bik",
|
||||
".bix",
|
||||
".box",
|
||||
".cam",
|
||||
".dat",
|
||||
".divx",
|
||||
".dmf",
|
||||
".dv",
|
||||
".dvr-ms",
|
||||
".evo",
|
||||
".flc",
|
||||
".fli",
|
||||
".flic",
|
||||
".flv",
|
||||
".flx",
|
||||
".gvi",
|
||||
".gvp",
|
||||
".h264",
|
||||
".m1v",
|
||||
".m2p",
|
||||
".m2ts",
|
||||
".m2v",
|
||||
".m4e",
|
||||
".m4v",
|
||||
".mjp",
|
||||
".mjpeg",
|
||||
".mjpg",
|
||||
".mkv",
|
||||
".moov",
|
||||
".mov",
|
||||
".movhd",
|
||||
".movie",
|
||||
".movx",
|
||||
".mp4",
|
||||
".mpe",
|
||||
".mpeg",
|
||||
".mpg",
|
||||
".mpv",
|
||||
".mpv2",
|
||||
".mxf",
|
||||
".nsv",
|
||||
".nut",
|
||||
".ogg",
|
||||
".ogm",
|
||||
".omf",
|
||||
".ps",
|
||||
".qt",
|
||||
".ram",
|
||||
".rm",
|
||||
".rmvb",
|
||||
".swf",
|
||||
".ts",
|
||||
".vfw",
|
||||
".vid",
|
||||
".video",
|
||||
".viv",
|
||||
".vivo",
|
||||
".vob",
|
||||
".vro",
|
||||
".wm",
|
||||
".wmv",
|
||||
".wmx",
|
||||
".wrap",
|
||||
".wvx",
|
||||
".wx",
|
||||
".x264",
|
||||
".xvid",
|
||||
];
|
||||
|
||||
public static IReadOnlyList<string> SubtitleFileExtensions =
|
||||
[
|
||||
".a",
|
||||
".srt",
|
||||
".ass",
|
||||
".ssa",
|
||||
".stl",
|
||||
".scc",
|
||||
".ttml",
|
||||
".sbv",
|
||||
".dks",
|
||||
".qtx",
|
||||
".jss",
|
||||
".vtt",
|
||||
".smi",
|
||||
".usf",
|
||||
".idx"
|
||||
];
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
public class InfohashMetadataSagaState : SagaStateMachineInstance, ISagaVersion
|
||||
{
|
||||
public Torrent? Torrent { get; set; }
|
||||
public string? Title { get; set; }
|
||||
public string? ImdbId { get; set; }
|
||||
public TorrentMetadataResponse? Metadata { get; set; }
|
||||
public int RetriesAllowed { get; set; } = 2;
|
||||
|
||||
public Guid CorrelationId { get; set; }
|
||||
public int Version { get; set; }
|
||||
public int CurrentState { get; set; }
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
public class InfohashMetadataSagaStateMachine : MassTransitStateMachine<InfohashMetadataSagaState>
|
||||
{
|
||||
public State Ingesting { get; private set; } = null!;
|
||||
public State Writing { get; private set; } = null!;
|
||||
public State Completed { get; private set; } = null!;
|
||||
|
||||
public Event<CollectMetadata> CollectMetadata { get; private set; } = null!;
|
||||
public Event<GotMetadata> GotMetadata { get; private set; } = null!;
|
||||
public Event<MetadataWritten> MetadataWritten { get; private set; } = null!;
|
||||
|
||||
public InfohashMetadataSagaStateMachine(ILogger<InfohashMetadataSagaStateMachine> logger)
|
||||
{
|
||||
InstanceState(x => x.CurrentState);
|
||||
|
||||
Event(() => CollectMetadata, x => x.CorrelateById(context => context.Message.CorrelationId));
|
||||
Event(() => GotMetadata, x => x.CorrelateById(context => context.Message.CorrelationId));
|
||||
Event(() => MetadataWritten, x => x.CorrelateById(context => context.Message.CorrelationId));
|
||||
|
||||
Initially(
|
||||
When(CollectMetadata)
|
||||
.ThenAsync(
|
||||
async context =>
|
||||
{
|
||||
context.Saga.CorrelationId = context.Data.CorrelationId;
|
||||
context.Saga.Torrent = context.Data.Torrent;
|
||||
context.Saga.ImdbId = context.Data.ImdbId;
|
||||
|
||||
await context.Publish(new PerformMetadataRequest(context.Saga.CorrelationId, context.Saga.Torrent.InfoHash));
|
||||
|
||||
logger.LogInformation("Collecting Metadata for torrent {InfoHash} in Saga {SagaId}", context.Instance.Torrent.InfoHash, context.Instance.CorrelationId);
|
||||
})
|
||||
.TransitionTo(Ingesting));
|
||||
|
||||
During(
|
||||
Ingesting,
|
||||
When(GotMetadata)
|
||||
.ThenAsync(
|
||||
async context =>
|
||||
{
|
||||
context.Saga.Metadata = context.Data.Metadata;
|
||||
|
||||
await context.Publish(new WriteMetadata(context.Saga.Torrent, context.Saga.Metadata, context.Saga.ImdbId));
|
||||
|
||||
logger.LogInformation("Got Metadata for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
|
||||
})
|
||||
.TransitionTo(Writing));
|
||||
|
||||
During(
|
||||
Writing,
|
||||
When(MetadataWritten)
|
||||
.Then(
|
||||
context =>
|
||||
{
|
||||
if (!context.Message.WithFiles)
|
||||
{
|
||||
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
|
||||
})
|
||||
.TransitionTo(Completed)
|
||||
.Finalize());
|
||||
|
||||
SetCompletedWhenFinalized();
|
||||
}
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
public class PerformMetadataRequestConsumer : IConsumer<PerformMetadataRequest>
|
||||
{
|
||||
public Task Consume(ConsumeContext<PerformMetadataRequest> context)
|
||||
{
|
||||
ProcessorChannel.AddToQueue(context.Message);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
[EntityName("perform-metadata-request-debrid-collector")]
|
||||
public record PerformMetadataRequest(Guid CorrelationId, string InfoHash) : CorrelatedBy<Guid>;
|
||||
|
||||
[EntityName("torrent-metadata-response-debrid-collector")]
|
||||
public record GotMetadata(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
}
|
||||
|
||||
[EntityName("write-metadata-debrid-collector")]
|
||||
public record WriteMetadata(Torrent Torrent, TorrentMetadataResponse Metadata, string ImdbId) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
}
|
||||
|
||||
[EntityName("metadata-written-debrid-colloctor")]
|
||||
public record MetadataWritten(TorrentMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
|
||||
{
|
||||
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
[EntityName("torrent-metadata-response")]
|
||||
public record TorrentMetadataResponse(Guid CorrelationId, FileDataDictionary Metadata) : CorrelatedBy<Guid>;
|
||||
@@ -1,28 +0,0 @@
|
||||
namespace DebridCollector.Features.Worker;
|
||||
|
||||
public class WriteMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteMetadataConsumer> logger) : IConsumer<WriteMetadata>
|
||||
{
|
||||
public async Task Consume(ConsumeContext<WriteMetadata> context)
|
||||
{
|
||||
var request = context.Message;
|
||||
|
||||
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
|
||||
|
||||
if (!torrentFiles.Any())
|
||||
{
|
||||
await context.Publish(new MetadataWritten(request.Metadata, false));
|
||||
return;
|
||||
}
|
||||
|
||||
await dataStorage.InsertFiles(torrentFiles);
|
||||
|
||||
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
|
||||
|
||||
if (subtitles.Any())
|
||||
{
|
||||
await dataStorage.InsertSubtitles(subtitles);
|
||||
}
|
||||
|
||||
await context.Publish(new MetadataWritten(request.Metadata, true));
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
// Global using directives
|
||||
|
||||
global using System.Text.Json;
|
||||
global using System.Text.Json.Serialization;
|
||||
global using System.Threading.Channels;
|
||||
global using DebridCollector.Extensions;
|
||||
global using DebridCollector.Features.Configuration;
|
||||
global using DebridCollector.Features.Debrid;
|
||||
global using DebridCollector.Features.Worker;
|
||||
global using MassTransit;
|
||||
global using Microsoft.AspNetCore.Builder;
|
||||
global using Microsoft.Extensions.DependencyInjection;
|
||||
global using Polly;
|
||||
global using Polly.Extensions.Http;
|
||||
global using SharedContracts.Configuration;
|
||||
global using SharedContracts.Dapper;
|
||||
global using SharedContracts.Extensions;
|
||||
global using SharedContracts.Models;
|
||||
global using SharedContracts.Python;
|
||||
global using SharedContracts.Python.RTN;
|
||||
global using SharedContracts.Requests;
|
||||
@@ -1,17 +0,0 @@
|
||||
var builder = WebApplication.CreateBuilder();
|
||||
|
||||
builder.DisableIpPortBinding();
|
||||
|
||||
builder.Configuration
|
||||
.AddServiceConfiguration();
|
||||
|
||||
builder.Host
|
||||
.SetupSerilog(builder.Configuration);
|
||||
|
||||
builder.Services
|
||||
.AddServiceConfiguration()
|
||||
.AddDatabase()
|
||||
.RegisterMassTransit();
|
||||
|
||||
var app = builder.Build();
|
||||
app.Run();
|
||||
@@ -1,2 +0,0 @@
|
||||
mkdir -p ../python
|
||||
python -m pip install -r ../requirements.txt -t ../python/
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -rf ../python
|
||||
mkdir -p ../python
|
||||
python3 -m pip install -r ../requirements.txt -t ../python/
|
||||
@@ -1 +0,0 @@
|
||||
rank-torrent-name==0.2.13
|
||||
@@ -9,29 +9,22 @@ public static class ServiceCollectionExtensions
|
||||
return services;
|
||||
}
|
||||
|
||||
internal static IServiceCollection AddDatabase(this IServiceCollection services)
|
||||
internal static IServiceCollection AddMongoDb(this IServiceCollection services)
|
||||
{
|
||||
services.LoadConfigurationFromEnv<PostgresConfiguration>();
|
||||
services.AddScoped<ImdbDbService>();
|
||||
|
||||
services.LoadConfigurationFromEnv<MongoConfiguration>();
|
||||
services.AddTransient<ImdbMongoDbService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
internal static IServiceCollection AddImporters(this IServiceCollection services)
|
||||
|
||||
internal static IServiceCollection AddJobSupport(this IServiceCollection services)
|
||||
{
|
||||
services.AddScoped<IFileImport<ImdbBasicEntry>, BasicsFile>();
|
||||
services.AddScoped<IFileImport<ImdbAkaEntry>, AkasFile>();
|
||||
services.AddScoped<IFileImport<ImdbEpisodeEntry>, EpisodesFile>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
|
||||
{
|
||||
services.LoadConfigurationFromEnv<ServiceConfiguration>();
|
||||
services.AddScoped<IImdbFileDownloader, ImdbFileDownloader>();
|
||||
services.AddHostedService<DownloadImdbDataJob>();
|
||||
|
||||
services.LoadConfigurationFromEnv<JobConfiguration>();
|
||||
|
||||
services.AddScheduler()
|
||||
.AddTransient<DownloadImdbDataJob>()
|
||||
.AddHostedService<JobScheduler>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,8 +12,6 @@ internal static class WebApplicationBuilderExtensions
|
||||
options =>
|
||||
{
|
||||
options.DefaultExecutionTimeout = 6.Hours();
|
||||
options.CodeGeneration.TypeLoadMode = TypeLoadMode.Static;
|
||||
options.Services.AssertAllExpectedPreBuiltTypesExistOnStartUp();
|
||||
});
|
||||
|
||||
return builder;
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
namespace Metadata.Features.ClearExistingImdbData;
|
||||
|
||||
public record ClearExistingImdbDataRequest(string TitleBasicsFilePath, string TitleAkasFilePath, string EpisodesFilePath);
|
||||
@@ -1,16 +0,0 @@
|
||||
namespace Metadata.Features.ClearExistingImdbData;
|
||||
|
||||
public class ClearExistingImdbDataRequestHandler(ILogger<ClearExistingImdbDataRequestHandler> logger, ImdbDbService dbService)
|
||||
{
|
||||
public async Task<ImportImdbDataRequest> Handle(ClearExistingImdbDataRequest request, CancellationToken _)
|
||||
{
|
||||
logger.LogInformation("Clearing existing IMDB data from database");
|
||||
await dbService.DropPgtrmIndex();
|
||||
await dbService.TruncateTable(TableNames.EpisodesTable);
|
||||
await dbService.TruncateTable(TableNames.AkasTable);
|
||||
await dbService.TruncateTable(TableNames.MetadataTable, cascade: true);
|
||||
logger.LogInformation("Existing IMDB data cleared from database");
|
||||
|
||||
return new(request.TitleBasicsFilePath, request.TitleAkasFilePath, request.EpisodesFilePath);
|
||||
}
|
||||
}
|
||||
13
src/metadata/Features/Configuration/JobConfiguration.cs
Normal file
13
src/metadata/Features/Configuration/JobConfiguration.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
namespace Metadata.Features.Configuration;
|
||||
|
||||
public class JobConfiguration
|
||||
{
|
||||
private const string Prefix = "METADATA";
|
||||
private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE";
|
||||
private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE";
|
||||
private const string InsertBatchSizeVariable = "INSERT_BATCH_SIZE";
|
||||
|
||||
public int InsertBatchSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(InsertBatchSizeVariable, 25_000);
|
||||
public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour);
|
||||
public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable);
|
||||
}
|
||||
20
src/metadata/Features/Configuration/MongoConfiguration.cs
Normal file
20
src/metadata/Features/Configuration/MongoConfiguration.cs
Normal file
@@ -0,0 +1,20 @@
|
||||
namespace Metadata.Features.Configuration;
|
||||
|
||||
public class MongoConfiguration
|
||||
{
|
||||
private const string Prefix = "MONGODB";
|
||||
private const string HostVariable = "HOST";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string DbVariable = "DB";
|
||||
private const string UsernameVariable = "USER";
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
public string DbName { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DbVariable);
|
||||
|
||||
public string ConnectionString => $"mongodb://{Username}:{Password}@{Host}:{Port}/{DbName}?tls=false&directConnection=true&authSource=admin";
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
namespace Metadata.Features.Configuration;
|
||||
|
||||
public class PostgresConfiguration
|
||||
{
|
||||
private const string Prefix = "POSTGRES";
|
||||
private const string HostVariable = "HOST";
|
||||
private const string UsernameVariable = "USER";
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
private const string DatabaseVariable = "DB";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
|
||||
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
|
||||
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
|
||||
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
namespace Metadata.Features.Configuration;
|
||||
|
||||
public class ServiceConfiguration
|
||||
{
|
||||
private const string Prefix = "METADATA";
|
||||
private const string InsertBatchSizeVariable = "INSERT_BATCH_SIZE";
|
||||
|
||||
public int InsertBatchSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(InsertBatchSizeVariable, 25_000);
|
||||
}
|
||||
@@ -1,3 +1,3 @@
|
||||
namespace Metadata.Features.DeleteDownloadedImdbData;
|
||||
|
||||
public record DeleteDownloadedImdbDataRequest(string TitleBasicsFilePath, string TitleAkasFilePath, string EpisodesFilePath);
|
||||
public record DeleteDownloadedImdbDataRequest(string FilePath);
|
||||
|
||||
@@ -1,23 +1,21 @@
|
||||
namespace Metadata.Features.DeleteDownloadedImdbData;
|
||||
|
||||
public class DeleteDownloadedImdbDataRequestHandler(ILogger<DeleteDownloadedImdbDataRequestHandler> logger)
|
||||
public class DeleteDownloadedImdbDataRequestHandler(ILogger<DeleteDownloadedImdbDataRequestHandler> logger, JobConfiguration configuration)
|
||||
{
|
||||
public Task Handle(DeleteDownloadedImdbDataRequest request, CancellationToken _)
|
||||
{
|
||||
DeleteFile(request.TitleBasicsFilePath);
|
||||
DeleteFile(request.TitleAkasFilePath);
|
||||
DeleteFile(request.EpisodesFilePath);
|
||||
logger.LogInformation("Processing Completed");
|
||||
logger.LogInformation("Deleting file {FilePath}", request.FilePath);
|
||||
|
||||
Environment.Exit(0);
|
||||
File.Delete(request.FilePath);
|
||||
|
||||
logger.LogInformation("File Deleted");
|
||||
|
||||
if (configuration.DownloadImdbOnce)
|
||||
{
|
||||
logger.LogInformation("Processing Completed: Exiting application as DownloadImdbOnce is set to true");
|
||||
Environment.Exit(0);
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private void DeleteFile(string file)
|
||||
{
|
||||
logger.LogInformation("Deleting file {FilePath}", file);
|
||||
File.Delete(file);
|
||||
logger.LogInformation("File Deleted");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
namespace Metadata.Features.DownloadImdbData;
|
||||
|
||||
public class DownloadImdbDataJob(IMessageBus messageBus, JobConfiguration configuration) : BaseJob
|
||||
{
|
||||
public override bool IsScheduelable => !configuration.DownloadImdbOnce && !string.IsNullOrEmpty(configuration.DownloadImdbCronSchedule);
|
||||
public override string JobName => nameof(DownloadImdbDataJob);
|
||||
public override async Task Invoke() => await messageBus.SendAsync(new GetImdbDataRequest());
|
||||
}
|
||||
@@ -1,20 +1,30 @@
|
||||
namespace Metadata.Features.DownloadImdbData;
|
||||
|
||||
public class GetImdbDataRequestHandler(IHttpClientFactory clientFactory, IImdbFileDownloader downloader, ILogger<GetImdbDataRequestHandler> logger)
|
||||
public class GetImdbDataRequestHandler(IHttpClientFactory clientFactory, ILogger<GetImdbDataRequestHandler> logger)
|
||||
{
|
||||
private const string TitleBasicsFileName = "title.basics.tsv";
|
||||
private const string TitleAkasFileName = "title.akas.tsv";
|
||||
private const string EpisodesFileName = "title.episode.tsv";
|
||||
|
||||
public async Task<ClearExistingImdbDataRequest> Handle(GetImdbDataRequest _, CancellationToken cancellationToken)
|
||||
public async Task<ImportImdbDataRequest> Handle(GetImdbDataRequest _, CancellationToken cancellationToken)
|
||||
{
|
||||
logger.LogInformation("Downloading IMDB data");
|
||||
|
||||
var client = clientFactory.CreateClient("imdb-data");
|
||||
var tempBasicsFile = await downloader.DownloadFileToTempPath(client, TitleBasicsFileName, cancellationToken);
|
||||
var tempAkasFile = await downloader.DownloadFileToTempPath(client, TitleAkasFileName, cancellationToken);
|
||||
var tempEpisodesFile = await downloader.DownloadFileToTempPath(client, EpisodesFileName, cancellationToken);
|
||||
var response = await client.GetAsync($"{TitleBasicsFileName}.gz", cancellationToken);
|
||||
|
||||
return new(tempBasicsFile, tempAkasFile, tempEpisodesFile);
|
||||
var tempFile = Path.Combine(Path.GetTempPath(), TitleBasicsFileName);
|
||||
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
|
||||
await using var gzipStream = new GZipStream(stream, CompressionMode.Decompress);
|
||||
await using var fileStream = File.Create(tempFile);
|
||||
|
||||
await gzipStream.CopyToAsync(fileStream, cancellationToken);
|
||||
|
||||
logger.LogInformation("Downloaded IMDB data to {TempFile}", tempFile);
|
||||
|
||||
fileStream.Close();
|
||||
|
||||
return new(tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
namespace Metadata.Features.DownloadImdbData;
|
||||
|
||||
public interface IImdbFileDownloader
|
||||
{
|
||||
Task<string> DownloadFileToTempPath(HttpClient client, string fileName, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
namespace Metadata.Features.DownloadImdbData;
|
||||
|
||||
public class ImdbFileDownloader(ILogger<ImdbFileDownloader> logger) : IImdbFileDownloader
|
||||
{
|
||||
public async Task<string> DownloadFileToTempPath(HttpClient client, string fileName, CancellationToken cancellationToken)
|
||||
{
|
||||
var response = await client.GetAsync($"{fileName}.gz", cancellationToken);
|
||||
|
||||
var tempFile = Path.Combine(Path.GetTempPath(), fileName);
|
||||
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
|
||||
await using var gzipStream = new GZipStream(stream, CompressionMode.Decompress);
|
||||
await using var fileStream = File.Create(tempFile);
|
||||
|
||||
await gzipStream.CopyToAsync(fileStream, cancellationToken);
|
||||
|
||||
logger.LogInformation("Downloaded IMDB data '{Filename}' to {TempFile}", fileName, tempFile);
|
||||
|
||||
fileStream.Close();
|
||||
return tempFile;
|
||||
}
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
namespace Metadata.Features.Files;
|
||||
|
||||
public class AkasFile(ILogger<AkasFile> logger, ImdbDbService dbService) : IFileImport<ImdbAkaEntry>
|
||||
{
|
||||
public async Task Import(string fileName, int batchSize, CancellationToken cancellationToken)
|
||||
{
|
||||
logger.LogInformation("Importing Downloaded IMDB AKAs data from {FilePath}", fileName);
|
||||
|
||||
var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
|
||||
{
|
||||
Delimiter = "\t",
|
||||
BadDataFound = null, // Skip Bad Data from Imdb
|
||||
MissingFieldFound = null, // Skip Missing Fields from Imdb
|
||||
};
|
||||
|
||||
using var reader = new StreamReader(fileName);
|
||||
using var csv = new CsvReader(reader, csvConfig);
|
||||
|
||||
var channel = Channel.CreateBounded<ImdbAkaEntry>(new BoundedChannelOptions(batchSize)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
});
|
||||
|
||||
await csv.ReadAsync();
|
||||
|
||||
var batchInsertTask = CreateBatchOfAkaEntries(channel, batchSize, cancellationToken);
|
||||
|
||||
await ReadAkaEntries(csv, channel, cancellationToken);
|
||||
|
||||
channel.Writer.Complete();
|
||||
|
||||
await batchInsertTask;
|
||||
}
|
||||
|
||||
private Task CreateBatchOfAkaEntries(Channel<ImdbAkaEntry, ImdbAkaEntry> channel, int batchSize, CancellationToken cancellationToken) =>
|
||||
Task.Run(async () =>
|
||||
{
|
||||
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var batch = new List<ImdbAkaEntry>
|
||||
{
|
||||
movieData,
|
||||
};
|
||||
|
||||
while (batch.Count < batchSize && channel.Reader.TryRead(out var nextMovieData))
|
||||
{
|
||||
batch.Add(nextMovieData);
|
||||
}
|
||||
|
||||
if (batch.Count > 0)
|
||||
{
|
||||
await dbService.InsertImdbAkaEntries(batch);
|
||||
logger.LogInformation("Imported batch of {BatchSize} aka entries starting with ImdbId {FirstImdbId}", batch.Count, batch.First().ImdbId);
|
||||
}
|
||||
}
|
||||
}, cancellationToken);
|
||||
|
||||
private static async Task ReadAkaEntries(CsvReader csv, Channel<ImdbAkaEntry, ImdbAkaEntry> channel, CancellationToken cancellationToken)
|
||||
{
|
||||
while (await csv.ReadAsync())
|
||||
{
|
||||
var data = new ImdbAkaEntry
|
||||
{
|
||||
ImdbId = csv.GetField(0),
|
||||
Ordering = csv.GetField<int>(1),
|
||||
LocalizedTitle = csv.GetField(2),
|
||||
Region = csv.GetField(3),
|
||||
Language = csv.GetField(4),
|
||||
Types = csv.GetField(5),
|
||||
Attributes = csv.GetField(6),
|
||||
};
|
||||
|
||||
var isOriginalTitle = int.TryParse(csv.GetField(7), out var original);
|
||||
data.IsOriginalTitle = isOriginalTitle && original == 1;
|
||||
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await channel.Writer.WriteAsync(data, cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
namespace Metadata.Features.Files;
|
||||
|
||||
public class BasicsFile(ILogger<BasicsFile> logger, ImdbDbService dbService): IFileImport<ImdbBasicEntry>
|
||||
{
|
||||
public async Task Import(string fileName, int batchSize, CancellationToken cancellationToken)
|
||||
{
|
||||
logger.LogInformation("Importing Downloaded IMDB Basics data from {FilePath}", fileName);
|
||||
|
||||
var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
|
||||
{
|
||||
Delimiter = "\t",
|
||||
BadDataFound = null, // Skip Bad Data from Imdb
|
||||
MissingFieldFound = null, // Skip Missing Fields from Imdb
|
||||
};
|
||||
|
||||
using var reader = new StreamReader(fileName);
|
||||
using var csv = new CsvReader(reader, csvConfig);
|
||||
|
||||
var channel = Channel.CreateBounded<ImdbBasicEntry>(new BoundedChannelOptions(batchSize)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
});
|
||||
|
||||
await csv.ReadAsync();
|
||||
|
||||
var batchInsertTask = CreateBatchOfBasicEntries(channel, batchSize, cancellationToken);
|
||||
|
||||
await ReadBasicEntries(csv, channel, cancellationToken);
|
||||
|
||||
channel.Writer.Complete();
|
||||
|
||||
await batchInsertTask;
|
||||
}
|
||||
|
||||
private Task CreateBatchOfBasicEntries(Channel<ImdbBasicEntry, ImdbBasicEntry> channel, int batchSize, CancellationToken cancellationToken) =>
|
||||
Task.Run(async () =>
|
||||
{
|
||||
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var batch = new List<ImdbBasicEntry>
|
||||
{
|
||||
movieData,
|
||||
};
|
||||
|
||||
while (batch.Count < batchSize && channel.Reader.TryRead(out var nextMovieData))
|
||||
{
|
||||
batch.Add(nextMovieData);
|
||||
}
|
||||
|
||||
if (batch.Count > 0)
|
||||
{
|
||||
await dbService.InsertImdbEntries(batch);
|
||||
logger.LogInformation("Imported batch of {BatchSize} basics starting with ImdbId {FirstImdbId}", batch.Count, batch.First().ImdbId);
|
||||
}
|
||||
}
|
||||
}, cancellationToken);
|
||||
|
||||
private static async Task ReadBasicEntries(CsvReader csv, Channel<ImdbBasicEntry, ImdbBasicEntry> channel, CancellationToken cancellationToken)
|
||||
{
|
||||
while (await csv.ReadAsync())
|
||||
{
|
||||
var isAdultSet = int.TryParse(csv.GetField(4), out var adult);
|
||||
|
||||
var movieData = new ImdbBasicEntry
|
||||
{
|
||||
ImdbId = csv.GetField(0),
|
||||
Category = csv.GetField(1),
|
||||
Title = csv.GetField(2),
|
||||
Adult = isAdultSet && adult == 1,
|
||||
Year = csv.GetField(5) == @"\N" ? 0 : int.Parse(csv.GetField(5)),
|
||||
};
|
||||
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await channel.Writer.WriteAsync(movieData, cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,83 +0,0 @@
|
||||
namespace Metadata.Features.Files;
|
||||
|
||||
public class EpisodesFile(ILogger<EpisodesFile> logger, ImdbDbService dbService): IFileImport<ImdbEpisodeEntry>
|
||||
{
|
||||
public async Task Import(string fileName, int batchSize, CancellationToken cancellationToken)
|
||||
{
|
||||
logger.LogInformation("Importing Downloaded IMDB Episodes data from {FilePath}", fileName);
|
||||
|
||||
var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture)
|
||||
{
|
||||
Delimiter = "\t",
|
||||
BadDataFound = null, // Skip Bad Data from Imdb
|
||||
MissingFieldFound = null, // Skip Missing Fields from Imdb
|
||||
};
|
||||
|
||||
using var reader = new StreamReader(fileName);
|
||||
using var csv = new CsvReader(reader, csvConfig);
|
||||
|
||||
var channel = Channel.CreateBounded<ImdbEpisodeEntry>(new BoundedChannelOptions(batchSize)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
});
|
||||
|
||||
await csv.ReadAsync();
|
||||
|
||||
var batchInsertTask = CreateBatchOfAkaEntries(channel, batchSize, cancellationToken);
|
||||
|
||||
await ReadAkaEntries(csv, channel, cancellationToken);
|
||||
|
||||
channel.Writer.Complete();
|
||||
|
||||
await batchInsertTask;
|
||||
}
|
||||
|
||||
private Task CreateBatchOfAkaEntries(Channel<ImdbEpisodeEntry, ImdbEpisodeEntry> channel, int batchSize, CancellationToken cancellationToken) =>
|
||||
Task.Run(async () =>
|
||||
{
|
||||
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var batch = new List<ImdbEpisodeEntry>
|
||||
{
|
||||
movieData,
|
||||
};
|
||||
|
||||
while (batch.Count < batchSize && channel.Reader.TryRead(out var nextMovieData))
|
||||
{
|
||||
batch.Add(nextMovieData);
|
||||
}
|
||||
|
||||
if (batch.Count > 0)
|
||||
{
|
||||
await dbService.InsertImdbEpisodeEntries(batch);
|
||||
logger.LogInformation("Imported batch of {BatchSize} episodes starting with ImdbId {FirstImdbId}", batch.Count, batch.First().EpisodeImdbId);
|
||||
}
|
||||
}
|
||||
}, cancellationToken);
|
||||
|
||||
private static async Task ReadAkaEntries(CsvReader csv, Channel<ImdbEpisodeEntry, ImdbEpisodeEntry> channel, CancellationToken cancellationToken)
|
||||
{
|
||||
while (await csv.ReadAsync())
|
||||
{
|
||||
var data = new ImdbEpisodeEntry
|
||||
{
|
||||
EpisodeImdbId = csv.GetField(0),
|
||||
ParentImdbId = csv.GetField(1),
|
||||
SeasonNumber = csv.GetField(2),
|
||||
EpisodeNumber = csv.GetField(3),
|
||||
};
|
||||
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await channel.Writer.WriteAsync(data, cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
namespace Metadata.Features.Files;
|
||||
|
||||
public interface IFileImport<TImportType>
|
||||
{
|
||||
Task Import(string fileName, int batchSize, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
namespace Metadata.Features.Files;
|
||||
|
||||
public class ImdbAkaEntry
|
||||
{
|
||||
public string ImdbId { get; set; } = default!;
|
||||
public int Ordering { get; set; }
|
||||
public string? LocalizedTitle { get; set; }
|
||||
public string? Region { get; set; }
|
||||
public string? Language { get; set; }
|
||||
public string? Types { get; set; }
|
||||
public string? Attributes { get; set; }
|
||||
public bool IsOriginalTitle { get; set; }
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
namespace Metadata.Features.Files;
|
||||
|
||||
public class ImdbBasicEntry
|
||||
{
|
||||
public string ImdbId { get; set; } = default!;
|
||||
public string? Category { get; set; }
|
||||
public string? Title { get; set; }
|
||||
public bool Adult { get; set; }
|
||||
public int Year { get; set; }
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
namespace Metadata.Features.Files;
|
||||
|
||||
public class ImdbEpisodeEntry
|
||||
{
|
||||
public string EpisodeImdbId { get; set; } = default!;
|
||||
public string? ParentImdbId { get; set; }
|
||||
public string? SeasonNumber { get; set; }
|
||||
public string? EpisodeNumber { get; set; }
|
||||
}
|
||||
@@ -1,164 +0,0 @@
|
||||
namespace Metadata.Features.ImportImdbData;
|
||||
|
||||
public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbService> logger)
|
||||
{
|
||||
public Task InsertImdbEntries(IEnumerable<ImdbBasicEntry> entries) =>
|
||||
ExecuteCommandAsync(
|
||||
async connection =>
|
||||
{
|
||||
await using var writer = await connection.BeginBinaryImportAsync(
|
||||
$"COPY {TableNames.MetadataTable} (\"imdb_id\", \"category\", \"title\", \"year\", \"adult\") FROM STDIN (FORMAT BINARY)");
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
try
|
||||
{
|
||||
await writer.StartRowAsync();
|
||||
await writer.WriteAsync(entry.ImdbId, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Category, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Title, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Year, NpgsqlDbType.Integer);
|
||||
await writer.WriteAsync(entry.Adult, NpgsqlDbType.Boolean);
|
||||
}
|
||||
catch (Npgsql.PostgresException e)
|
||||
{
|
||||
if (e.Message.Contains("duplicate key value violates unique constraint", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
await writer.CompleteAsync();
|
||||
}, "Error while inserting imdb entries into database");
|
||||
|
||||
public Task InsertImdbAkaEntries(IEnumerable<ImdbAkaEntry> entries) =>
|
||||
ExecuteCommandAsync(
|
||||
async connection =>
|
||||
{
|
||||
await using var writer = await connection.BeginBinaryImportAsync(
|
||||
$"COPY {TableNames.AkasTable} (\"imdb_id\", \"ordering\", \"localized_title\", \"region\", \"language\", \"types\", \"attributes\", \"is_original_title\") FROM STDIN (FORMAT BINARY)");
|
||||
|
||||
foreach (var entry in entries.Where(x=>x.LocalizedTitle?.Length <= 8000))
|
||||
{
|
||||
try
|
||||
{
|
||||
await writer.StartRowAsync();
|
||||
await writer.WriteAsync(entry.ImdbId, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Ordering, NpgsqlDbType.Integer);
|
||||
await writer.WriteAsync(entry.LocalizedTitle, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Region, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Language, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Types, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Attributes, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.IsOriginalTitle, NpgsqlDbType.Boolean);
|
||||
|
||||
}
|
||||
catch (PostgresException e)
|
||||
{
|
||||
if (e.Message.Contains("value too long for type character", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
await writer.CompleteAsync();
|
||||
}, "Error while inserting imdb entries into database");
|
||||
|
||||
public Task InsertImdbEpisodeEntries(IEnumerable<ImdbEpisodeEntry> entries) =>
|
||||
ExecuteCommandAsync(
|
||||
async connection =>
|
||||
{
|
||||
await using var writer = await connection.BeginBinaryImportAsync(
|
||||
$"COPY {TableNames.EpisodesTable} (\"episode_id\", \"parent_id\", \"season\", \"episode\") FROM STDIN (FORMAT BINARY)");
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
try
|
||||
{
|
||||
await writer.StartRowAsync();
|
||||
await writer.WriteAsync(entry.EpisodeImdbId, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.ParentImdbId, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.SeasonNumber, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.EpisodeNumber, NpgsqlDbType.Text);
|
||||
|
||||
}
|
||||
catch (PostgresException e)
|
||||
{
|
||||
if (e.Message.Contains("value too long for type character", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
await writer.CompleteAsync();
|
||||
}, "Error while inserting imdb entries into database");
|
||||
|
||||
public Task TruncateTable(string table, bool cascade = false) =>
|
||||
ExecuteCommandAsync(
|
||||
async connection =>
|
||||
{
|
||||
var cascadeOption = cascade ? "CASCADE" : string.Empty;
|
||||
logger.LogInformation("Truncating '{Table}' table", table);
|
||||
await using var command = new NpgsqlCommand($"TRUNCATE TABLE {table} {cascadeOption}", connection);
|
||||
await command.ExecuteNonQueryAsync();
|
||||
}, $"Error while clearing '{table}' table");
|
||||
|
||||
public Task CreatePgtrmIndex() =>
|
||||
ExecuteCommandAsync(
|
||||
async connection =>
|
||||
{
|
||||
await using var command = new NpgsqlCommand($"CREATE INDEX title_gin ON {TableNames.MetadataTable} USING gin(title gin_trgm_ops)", connection);
|
||||
await command.ExecuteNonQueryAsync();
|
||||
}, "Error while creating index on imdb_metadata table");
|
||||
|
||||
public Task DropPgtrmIndex() =>
|
||||
ExecuteCommandAsync(
|
||||
async connection =>
|
||||
{
|
||||
logger.LogInformation("Dropping Trigrams index if it exists already");
|
||||
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gin", connection);
|
||||
await dropCommand.ExecuteNonQueryAsync();
|
||||
}, $"Error while dropping index on {TableNames.MetadataTable} table");
|
||||
|
||||
|
||||
private async Task ExecuteCommandAsync(Func<NpgsqlConnection, Task> operation, string errorMessage)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
|
||||
await connection.OpenAsync();
|
||||
|
||||
await operation(connection);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.LogError(e, errorMessage);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ExecuteCommandWithTransactionAsync(Func<NpgsqlConnection, NpgsqlTransaction, Task> operation, NpgsqlTransaction transaction, string errorMessage)
|
||||
{
|
||||
try
|
||||
{
|
||||
await operation(transaction.Connection, transaction);
|
||||
}
|
||||
catch (PostgresException)
|
||||
{
|
||||
await transaction.RollbackAsync();
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.LogError(e, errorMessage);
|
||||
}
|
||||
}
|
||||
}
|
||||
15
src/metadata/Features/ImportImdbData/ImdbEntry.cs
Normal file
15
src/metadata/Features/ImportImdbData/ImdbEntry.cs
Normal file
@@ -0,0 +1,15 @@
|
||||
namespace Metadata.Features.ImportImdbData;
|
||||
|
||||
public class ImdbEntry
|
||||
{
|
||||
[BsonId]
|
||||
public string ImdbId { get; set; } = default!;
|
||||
public string? TitleType { get; set; }
|
||||
public string? PrimaryTitle { get; set; }
|
||||
public string? OriginalTitle { get; set; }
|
||||
public string? IsAdult { get; set; }
|
||||
public string? StartYear { get; set; }
|
||||
public string? EndYear { get; set; }
|
||||
public string? RuntimeMinutes { get; set; }
|
||||
public string? Genres { get; set; }
|
||||
}
|
||||
64
src/metadata/Features/ImportImdbData/ImdbMongoDbService.cs
Normal file
64
src/metadata/Features/ImportImdbData/ImdbMongoDbService.cs
Normal file
@@ -0,0 +1,64 @@
|
||||
namespace Metadata.Features.ImportImdbData;
|
||||
|
||||
public class ImdbMongoDbService
|
||||
{
|
||||
private readonly ILogger<ImdbMongoDbService> _logger;
|
||||
private readonly IMongoCollection<ImdbEntry> _imdbCollection;
|
||||
|
||||
public ImdbMongoDbService(MongoConfiguration configuration, ILogger<ImdbMongoDbService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
|
||||
var client = new MongoClient(configuration.ConnectionString);
|
||||
var database = client.GetDatabase(configuration.DbName);
|
||||
|
||||
_imdbCollection = database.GetCollection<ImdbEntry>("imdb-entries");
|
||||
}
|
||||
|
||||
public async Task InsertImdbEntries(IEnumerable<ImdbEntry> entries)
|
||||
{
|
||||
var operations = new List<WriteModel<ImdbEntry>>();
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
var filter = Builders<ImdbEntry>.Filter.Eq(e => e.ImdbId, entry.ImdbId);
|
||||
var update = Builders<ImdbEntry>.Update
|
||||
.SetOnInsert(e => e.TitleType, entry.TitleType)
|
||||
.SetOnInsert(e => e.PrimaryTitle, entry.PrimaryTitle)
|
||||
.SetOnInsert(e => e.OriginalTitle, entry.OriginalTitle)
|
||||
.SetOnInsert(e => e.IsAdult, entry.IsAdult)
|
||||
.SetOnInsert(e => e.StartYear, entry.StartYear)
|
||||
.SetOnInsert(e => e.EndYear, entry.EndYear)
|
||||
.SetOnInsert(e => e.RuntimeMinutes, entry.RuntimeMinutes)
|
||||
.SetOnInsert(e => e.Genres, entry.Genres);
|
||||
|
||||
operations.Add(new UpdateOneModel<ImdbEntry>(filter, update) { IsUpsert = true });
|
||||
}
|
||||
|
||||
await _imdbCollection.BulkWriteAsync(operations);
|
||||
}
|
||||
|
||||
public bool IsDatabaseInitialized()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Create compound index for PrimaryTitle, TitleType, and StartYear
|
||||
var indexKeysDefinition = Builders<ImdbEntry>.IndexKeys
|
||||
.Text(e => e.PrimaryTitle)
|
||||
.Ascending(e => e.TitleType)
|
||||
.Ascending(e => e.StartYear);
|
||||
|
||||
var createIndexOptions = new CreateIndexOptions { Background = true };
|
||||
var indexModel = new CreateIndexModel<ImdbEntry>(indexKeysDefinition, createIndexOptions);
|
||||
|
||||
_imdbCollection.Indexes.CreateOne(indexModel);
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
_logger.LogError(e, "Error initializing database");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,3 @@
|
||||
namespace Metadata.Features.ImportImdbData;
|
||||
|
||||
public record ImportImdbDataRequest(string TitleBasicsFilePath, string TitleAkasFilePath, string EpisodesFilePath);
|
||||
public record ImportImdbDataRequest(string FilePath);
|
||||
|
||||
@@ -1,17 +1,92 @@
|
||||
namespace Metadata.Features.ImportImdbData;
|
||||
|
||||
public class ImportImdbDataRequestHandler(
|
||||
ServiceConfiguration configuration,
|
||||
IFileImport<ImdbBasicEntry> basicsFile,
|
||||
IFileImport<ImdbAkaEntry> akasFile,
|
||||
IFileImport<ImdbEpisodeEntry> episodesFile)
|
||||
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService, JobConfiguration configuration)
|
||||
{
|
||||
public async Task<IndexImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
|
||||
public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
await basicsFile.Import(request.TitleBasicsFilePath, configuration.InsertBatchSize, cancellationToken);
|
||||
await akasFile.Import(request.TitleAkasFilePath, configuration.InsertBatchSize, cancellationToken);
|
||||
await episodesFile.Import(request.EpisodesFilePath, configuration.InsertBatchSize, cancellationToken);
|
||||
|
||||
return new(request.TitleBasicsFilePath, request.TitleAkasFilePath, request.EpisodesFilePath);
|
||||
logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath);
|
||||
|
||||
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
|
||||
{
|
||||
Delimiter = "\t",
|
||||
BadDataFound = null, // Skip Bad Data from Imdb
|
||||
MissingFieldFound = null, // Skip Missing Fields from Imdb
|
||||
};
|
||||
|
||||
using var reader = new StreamReader(request.FilePath);
|
||||
using var csv = new CsvReader(reader, config);
|
||||
|
||||
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(configuration.InsertBatchSize)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
});
|
||||
|
||||
|
||||
// Skip the header row
|
||||
await csv.ReadAsync();
|
||||
|
||||
var batchInsertTask = CreateBatchOfEntries(channel, cancellationToken);
|
||||
|
||||
await ReadEntries(csv, channel, cancellationToken);
|
||||
|
||||
channel.Writer.Complete();
|
||||
|
||||
await batchInsertTask;
|
||||
|
||||
return new(request.FilePath);
|
||||
}
|
||||
}
|
||||
|
||||
private Task CreateBatchOfEntries(Channel<ImdbEntry, ImdbEntry> channel, CancellationToken cancellationToken) =>
|
||||
Task.Run(async () =>
|
||||
{
|
||||
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var batch = new List<ImdbEntry>
|
||||
{
|
||||
movieData,
|
||||
};
|
||||
|
||||
while (batch.Count < configuration.InsertBatchSize && channel.Reader.TryRead(out var nextMovieData))
|
||||
{
|
||||
batch.Add(nextMovieData);
|
||||
}
|
||||
|
||||
if (batch.Count > 0)
|
||||
{
|
||||
await mongoDbService.InsertImdbEntries(batch);
|
||||
logger.LogInformation("Imported batch of {BatchSize} starting with ImdbId {FirstImdbId}", batch.Count, batch.First().ImdbId);
|
||||
}
|
||||
}
|
||||
}, cancellationToken);
|
||||
|
||||
private static async Task ReadEntries(CsvReader csv, Channel<ImdbEntry, ImdbEntry> channel, CancellationToken cancellationToken)
|
||||
{
|
||||
while (await csv.ReadAsync())
|
||||
{
|
||||
var movieData = new ImdbEntry
|
||||
{
|
||||
ImdbId = csv.GetField(0),
|
||||
TitleType = csv.GetField(1),
|
||||
PrimaryTitle = csv.GetField(2),
|
||||
OriginalTitle = csv.GetField(3),
|
||||
IsAdult = csv.GetField(4),
|
||||
StartYear = csv.GetField(5),
|
||||
EndYear = csv.GetField(6),
|
||||
RuntimeMinutes = csv.GetField(7),
|
||||
Genres = csv.GetField(8),
|
||||
};
|
||||
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await channel.Writer.WriteAsync(movieData, cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
namespace Metadata.Features.ImportImdbData;
|
||||
|
||||
public static class TableNames
|
||||
{
|
||||
public const string MetadataTable = "imdb_metadata";
|
||||
public const string EpisodesTable = "imdb_metadata_episodes";
|
||||
public const string AkasTable = "imdb_metadata_akas";
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
namespace Metadata.Features.IndexImdbData;
|
||||
|
||||
public record IndexImdbDataRequest(string TitleBasicsFilePath, string TitleAkasFilePath, string EpisodesFilePath);
|
||||
@@ -1,13 +0,0 @@
|
||||
namespace Metadata.Features.IndexImdbData;
|
||||
|
||||
public class IndexImdbDataRequestHandler(ILogger<IndexImdbDataRequestHandler> logger, ImdbDbService dbService)
|
||||
{
|
||||
public async Task<DeleteDownloadedImdbDataRequest> Handle(IndexImdbDataRequest request, CancellationToken _)
|
||||
{
|
||||
logger.LogInformation("Creating Trigram Indexes for IMDB data");
|
||||
|
||||
await dbService.CreatePgtrmIndex();
|
||||
|
||||
return new(request.TitleBasicsFilePath, request.TitleAkasFilePath, request.EpisodesFilePath);
|
||||
}
|
||||
}
|
||||
10
src/metadata/Features/Jobs/BaseJob.cs
Normal file
10
src/metadata/Features/Jobs/BaseJob.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace Metadata.Features.Jobs;
|
||||
|
||||
public abstract class BaseJob : IMetadataJob
|
||||
{
|
||||
public abstract bool IsScheduelable { get; }
|
||||
|
||||
public abstract string JobName { get; }
|
||||
|
||||
public abstract Task Invoke();
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
namespace Metadata.Features.Jobs;
|
||||
|
||||
public class DownloadImdbDataJob(IMessageBus messageBus) : IHostedService
|
||||
{
|
||||
public async Task StartAsync(CancellationToken cancellationToken) =>
|
||||
await messageBus.SendAsync(new GetImdbDataRequest());
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||
}
|
||||
7
src/metadata/Features/Jobs/IMetadataJob.cs
Normal file
7
src/metadata/Features/Jobs/IMetadataJob.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace Metadata.Features.Jobs;
|
||||
|
||||
public interface IMetadataJob : IInvocable
|
||||
{
|
||||
bool IsScheduelable { get; }
|
||||
string JobName { get; }
|
||||
}
|
||||
34
src/metadata/Features/Jobs/JobScheduler.cs
Normal file
34
src/metadata/Features/Jobs/JobScheduler.cs
Normal file
@@ -0,0 +1,34 @@
|
||||
namespace Metadata.Features.Jobs;
|
||||
|
||||
public class JobScheduler(IServiceProvider serviceProvider) : IHostedService
|
||||
{
|
||||
public Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using var scope = serviceProvider.CreateAsyncScope();
|
||||
|
||||
var mongoDbService = scope.ServiceProvider.GetRequiredService<ImdbMongoDbService>();
|
||||
|
||||
if (!mongoDbService.IsDatabaseInitialized())
|
||||
{
|
||||
throw new InvalidOperationException("MongoDb is not initialized");
|
||||
}
|
||||
|
||||
var jobConfigurations = scope.ServiceProvider.GetRequiredService<JobConfiguration>();
|
||||
var downloadJob = scope.ServiceProvider.GetRequiredService<DownloadImdbDataJob>();
|
||||
|
||||
if (!downloadJob.IsScheduelable)
|
||||
{
|
||||
return downloadJob.Invoke();
|
||||
}
|
||||
|
||||
var scheduler = scope.ServiceProvider.GetRequiredService<IScheduler>();
|
||||
|
||||
scheduler.Schedule<DownloadImdbDataJob>()
|
||||
.Cron(jobConfigurations.DownloadImdbCronSchedule)
|
||||
.PreventOverlapping(nameof(downloadJob.JobName));
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||
}
|
||||
@@ -4,26 +4,23 @@ global using System.Globalization;
|
||||
global using System.IO.Compression;
|
||||
global using System.Text.Json;
|
||||
global using System.Threading.Channels;
|
||||
global using Coravel;
|
||||
global using Coravel.Invocable;
|
||||
global using Coravel.Scheduling.Schedule.Interfaces;
|
||||
global using CsvHelper;
|
||||
global using CsvHelper.Configuration;
|
||||
global using JasperFx.CodeGeneration;
|
||||
global using JasperFx.CodeGeneration.Commands;
|
||||
global using JasperFx.Core;
|
||||
global using Metadata.Extensions;
|
||||
global using Metadata.Features.ClearExistingImdbData;
|
||||
global using Metadata.Features.Configuration;
|
||||
global using Metadata.Features.DeleteDownloadedImdbData;
|
||||
global using Metadata.Features.DownloadImdbData;
|
||||
global using Metadata.Features.Files;
|
||||
global using Metadata.Features.ImportImdbData;
|
||||
global using Metadata.Features.IndexImdbData;
|
||||
global using Metadata.Features.Jobs;
|
||||
global using Metadata.Features.Literals;
|
||||
global using Microsoft.AspNetCore.Builder;
|
||||
global using Microsoft.Extensions.DependencyInjection;
|
||||
global using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
global using Npgsql;
|
||||
global using NpgsqlTypes;
|
||||
global using Oakton;
|
||||
global using MongoDB.Bson.Serialization.Attributes;
|
||||
global using MongoDB.Driver;
|
||||
global using Serilog;
|
||||
global using Wolverine;
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
// <auto-generated/>
|
||||
#pragma warning disable
|
||||
using Metadata.Features.Configuration;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Internal.Generated.WolverineHandlers
|
||||
{
|
||||
// START: ClearExistingImdbDataRequestHandler2085209125
|
||||
public class ClearExistingImdbDataRequestHandler2085209125 : Wolverine.Runtime.Handlers.MessageHandler
|
||||
{
|
||||
private readonly Microsoft.Extensions.Logging.ILogger<Metadata.Features.ClearExistingImdbData.ClearExistingImdbDataRequestHandler> _logger1;
|
||||
private readonly Microsoft.Extensions.Logging.ILogger<Metadata.Features.ImportImdbData.ImdbDbService> _logger2;
|
||||
private readonly Metadata.Features.Configuration.PostgresConfiguration _postgresConfiguration;
|
||||
|
||||
public ClearExistingImdbDataRequestHandler2085209125(Microsoft.Extensions.Logging.ILogger<Metadata.Features.ClearExistingImdbData.ClearExistingImdbDataRequestHandler> __logger1, Microsoft.Extensions.Logging.ILogger<Metadata.Features.ImportImdbData.ImdbDbService> __logger2, Metadata.Features.Configuration.PostgresConfiguration postgresConfiguration)
|
||||
{
|
||||
_logger1 = __logger1;
|
||||
_logger2 = __logger2;
|
||||
_postgresConfiguration = postgresConfiguration;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public override async System.Threading.Tasks.Task HandleAsync(Wolverine.Runtime.MessageContext context, System.Threading.CancellationToken cancellation)
|
||||
{
|
||||
var imdbDbService = new Metadata.Features.ImportImdbData.ImdbDbService(_postgresConfiguration, _logger2);
|
||||
var clearExistingImdbDataRequestHandler = new Metadata.Features.ClearExistingImdbData.ClearExistingImdbDataRequestHandler(_logger1, imdbDbService);
|
||||
// The actual message body
|
||||
var clearExistingImdbDataRequest = (Metadata.Features.ClearExistingImdbData.ClearExistingImdbDataRequest)context.Envelope.Message;
|
||||
|
||||
|
||||
// The actual message execution
|
||||
var outgoing1 = await clearExistingImdbDataRequestHandler.Handle(clearExistingImdbDataRequest, cancellation).ConfigureAwait(false);
|
||||
|
||||
|
||||
// Outgoing, cascaded message
|
||||
await context.EnqueueCascadingAsync(outgoing1).ConfigureAwait(false);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// END: ClearExistingImdbDataRequestHandler2085209125
|
||||
|
||||
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user