4 Commits

Author SHA1 Message Date
iPromKnight
3c8ffd5082 Fix Duplicates (#199)
* Fix Duplicates

* Version
2024-04-02 20:31:22 +01:00
iPromKnight
79e0a0f102 DMM Offline (#198)
* Process DMM all locally

single call to github to download the repo archive.
remove need for PAT
update RTN to 0.2.13
change to batch_parse for title parsing from RTN

* introduce concurrent dictionary, and parallelism
2024-04-02 17:01:22 +01:00
purple_emily
6181207513 Fix incorrect file index stored (#197)
* Fix incorrect file index stored

* Update `rank-torrent-name` to latest version

* Knight Crawler version update
2024-04-01 23:08:32 +01:00
iPromKnight
684dbba2f0 RTN-025 and title category parsing (#195)
* update rtn to 025

* Implement movie / show type parsing

* switch to RTN in collectors

* ensure env for pythonnet is loaded, and that requirements copy for qbit

* version bump
2024-03-31 22:01:09 +01:00
72 changed files with 617 additions and 985 deletions

4
.gitignore vendored
View File

@@ -612,3 +612,7 @@ fabric.properties
# Mac directory indexes # Mac directory indexes
.DS_Store .DS_Store
deployment/docker/stack.env deployment/docker/stack.env
src/producer/src/python/
src/debrid-collector/python/
src/qbit-collector/python/

View File

@@ -94,7 +94,7 @@ services:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
hostname: knightcrawler-addon hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:2.0.17 image: gabisonfire/knightcrawler-addon:2.0.21
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -117,7 +117,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-consumer:2.0.17 image: gabisonfire/knightcrawler-consumer:2.0.21
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -138,7 +138,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-debrid-collector:2.0.17 image: gabisonfire/knightcrawler-debrid-collector:2.0.21
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -152,7 +152,7 @@ services:
migrator: migrator:
condition: service_completed_successfully condition: service_completed_successfully
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-metadata:2.0.17 image: gabisonfire/knightcrawler-metadata:2.0.21
networks: networks:
- knightcrawler-network - knightcrawler-network
restart: "no" restart: "no"
@@ -163,7 +163,7 @@ services:
postgres: postgres:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-migrator:2.0.17 image: gabisonfire/knightcrawler-migrator:2.0.21
networks: networks:
- knightcrawler-network - knightcrawler-network
restart: "no" restart: "no"
@@ -182,7 +182,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-producer:2.0.17 image: gabisonfire/knightcrawler-producer:2.0.21
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -207,7 +207,7 @@ services:
deploy: deploy:
replicas: ${QBIT_REPLICAS:-0} replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-qbit-collector:2.0.17 image: gabisonfire/knightcrawler-qbit-collector:2.0.21
labels: labels:
logging: promtail logging: promtail
networks: networks:

View File

@@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends
services: services:
metadata: metadata:
image: gabisonfire/knightcrawler-metadata:2.0.17 image: gabisonfire/knightcrawler-metadata:2.0.21
env_file: ../../.env env_file: ../../.env
networks: networks:
- knightcrawler-network - knightcrawler-network
@@ -30,7 +30,7 @@ services:
condition: service_completed_successfully condition: service_completed_successfully
migrator: migrator:
image: gabisonfire/knightcrawler-migrator:2.0.17 image: gabisonfire/knightcrawler-migrator:2.0.21
env_file: ../../.env env_file: ../../.env
networks: networks:
- knightcrawler-network - knightcrawler-network
@@ -40,7 +40,7 @@ services:
condition: service_healthy condition: service_healthy
addon: addon:
image: gabisonfire/knightcrawler-addon:2.0.17 image: gabisonfire/knightcrawler-addon:2.0.21
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
hostname: knightcrawler-addon hostname: knightcrawler-addon
@@ -48,22 +48,22 @@ services:
- "7000:7000" - "7000:7000"
consumer: consumer:
image: gabisonfire/knightcrawler-consumer:2.0.17 image: gabisonfire/knightcrawler-consumer:2.0.21
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
debridcollector: debridcollector:
image: gabisonfire/knightcrawler-debrid-collector:2.0.17 image: gabisonfire/knightcrawler-debrid-collector:2.0.21
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
producer: producer:
image: gabisonfire/knightcrawler-producer:2.0.17 image: gabisonfire/knightcrawler-producer:2.0.21
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
qbitcollector: qbitcollector:
image: gabisonfire/knightcrawler-qbit-collector:2.0.17 image: gabisonfire/knightcrawler-qbit-collector:2.0.21
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:

View File

@@ -38,6 +38,3 @@ QBIT_REPLICAS=0
# Addon # Addon
DEBUG_MODE=false DEBUG_MODE=false
# Producer
GITHUB_PAT=

View File

@@ -1,6 +0,0 @@
apiVersion: v2
appVersion: 2.0.17
description: A helm chart for Knightcrawler
name: knightcrawler
type: application
version: 0.1.0

View File

@@ -1,6 +0,0 @@
Congratulations,
Knightcrawler is now deployed. This may take a while to be up and responding.

View File

@@ -1,27 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: '{{ .Release.Name }}-config'
labels:
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
data:
COLLECTOR_DEBRID_ENABLED: '{{ .Values.knightcrawler.debridcollector.enabled }}'
COLLECTOR_QBIT_ENABLED: '{{ .Values.knightcrawler.qbitcollector.enabled }}'
DEBUG_MODE: '{{ .Values.knightcrawler.debug }}'
METADATA_INSERT_BATCH_SIZE: '{{ .Values.environment.metadata.insertBatchSize }}'
POSTGRES_DB: '{{ .Values.environment.postgres.dbName }}'
POSTGRES_HOST: '{{ if .Values.environment.postgres.external }}{{ .Values.environment.postgres.host }}{{ else }}{{ .Release.Name }}-postgres{{ end }}'
POSTGRES_PORT: '{{ .Values.environment.postgres.port }}'
QBIT_HOST: '{{ .Values.environment.qbitcollector.qbitHost }}'
QBIT_TRACKERS_URL: '{{ .Values.environment.qbitcollector.trackersUrl }}'
RABBITMQ_CONSUMER_QUEUE_NAME: '{{ .Values.environment.producer.queueName }}'
RABBITMQ_DURABLE: '{{ .Values.environment.producer.durable }}'
RABBITMQ_HOST: '{{ if .Values.environment.lavinmq.external }}{{ .Values.environment.lavinmq.host }}{{ else }}{{ .Release.Name }}-lavinmq{{ end }}'
RABBITMQ_MAX_PUBLISH_BATCH_SIZE: '{{ .Values.environment.producer.maxPublishBatchSize }}'
RABBITMQ_MAX_QUEUE_SIZE: '{{ .Values.environment.producer.maxQueueSize }}'
RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS: '{{ .Values.environment.producer.publishIntervalSeconds }}'
REDIS_EXTRA: '{{ .Values.environment.redis.extra }}'
REDIS_HOST: '{{ if .Values.environment.redis.external }}{{ .Values.environment.redis.host }}{{ else }}{{ .Release.Name }}-redis{{ end }}'
REDIS_PORT: '{{ .Values.environment.redis.port }}'
TZ: '{{ .Values.shared.timezone }}'

View File

@@ -1,15 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: '{{ .Release.Name }}-secrets'
labels:
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
type: Opaque
data:
GITHUB_PAT: '{{ .Values.environment.producer.githubPat | b64enc }}'
COLLECTOR_REAL_DEBRID_API_KEY: '{{ .Values.environment.debridcollector.realDebridApiKey | b64enc }}'
POSTGRES_USER: '{{ .Values.environment.postgres.user | b64enc }}'
POSTGRES_PASSWORD: '{{ .Values.environment.postgres.password | b64enc }}'
RABBITMQ_PASSWORD: '{{ .Values.environment.lavinmq.password | b64enc }}'
RABBITMQ_USER: '{{ .Values.environment.lavinmq.user | b64enc }}'

View File

@@ -1,25 +0,0 @@
{{ if .Values.infrastructure.lavinmq.enabled }}
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}-lavinmq'
labels:
component: lavinmq
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
spec:
selector:
component: lavinmq
release: '{{ .Release.Name }}'
type: ClusterIP
ports:
- protocol: TCP
port: 5672
targetPort: 5672
- protocol: TCP
port: 15672
targetPort: 15672
- protocol: TCP
port: 15692
targetPort: 15692
{{- end -}}

View File

@@ -1,60 +0,0 @@
{{ if .Values.infrastructure.lavinmq.enabled }}
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: '{{ .Release.Name }}-lavinmq'
labels:
component: lavinmq
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
spec:
serviceName: '{{ .Release.Name }}-lavinmq'
replicas: 1
selector:
matchLabels:
component: lavinmq
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: lavinmq
release: '{{ .Release.Name }}'
spec:
containers:
- name: lavinmq
image: '{{ .Values.infrastructure.lavinmq.image }}:{{ .Values.infrastructure.lavinmq.tag }}'
ports:
- name: lavinmq
containerPort: 5672
- name: lavinmq-15672
containerPort: 15672
- name: lavinmq-15692
containerPort: 15692
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'
volumeMounts:
- mountPath: /var/lib/lavinmq
name: lavinmq
livenessProbe:
exec:
command:
- lavinmqctl status
periodSeconds: 10
initialDelaySeconds: 10
successThreshold: 1
failureThreshold: 3
volumeClaimTemplates:
- metadata:
name: lavinmq
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: '{{ .Values.persistence.lavinmq.capacity }}'
{{- end -}}

View File

@@ -1,19 +0,0 @@
{{ if .Values.infrastructure.postgres.enabled }}
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}-postgres'
labels:
component: postgres
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
spec:
selector:
component: postgres
release: '{{ .Release.Name }}'
type: ClusterIP
ports:
- protocol: TCP
port: 5432
targetPort: 5432
{{- end -}}

View File

@@ -1,58 +0,0 @@
{{ if .Values.infrastructure.postgres.enabled }}
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: '{{ .Release.Name }}-postgres'
labels:
component: postgres
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
spec:
serviceName: '{{ .Release.Name }}-postgres'
replicas: 1
selector:
matchLabels:
component: postgres
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: postgres
release: '{{ .Release.Name }}'
spec:
containers:
- name: postgres
image: '{{ .Values.infrastructure.postgres.image }}:{{ .Values.infrastructure.postgres.tag }}'
ports:
- name: postgres
containerPort: 5432
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'
volumeMounts:
- mountPath: /var/lib/postgresql/data
name: postgres
livenessProbe:
exec:
command:
- sh
- -c
- pg_isready -h localhost -U $POSTGRES_USER
periodSeconds: 10
initialDelaySeconds: 10
successThreshold: 1
failureThreshold: 3
volumeClaimTemplates:
- metadata:
name: postgres
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: '{{ .Values.persistence.postgres.capacity }}'
{{- end -}}

View File

@@ -1,57 +0,0 @@
{{ if .Values.knightcrawler.qbitcollector.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}-qbittorrent'
labels:
component: qbittorrent
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
spec:
replicas: 1
selector:
matchLabels:
component: qbittorrent
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: qbittorrent
release: '{{ .Release.Name }}'
spec:
containers:
- name: qbittorrent
image: '{{ .Values.infrastructure.qbittorrent.image }}:{{ .Values.infrastructure.qbittorrent.tag }}'
ports:
- name: qbittorrent
containerPort: 6881
- name: qbittorrent-6881
containerPort: 6881
- name: qbittorrent-8080
containerPort: 8080
env:
- name: PUID
value: '{{ .Values.environment.qbittorrent.puid }}'
- name: PGID
value: '{{ .Values.environment.qbittorrent.pgid }}'
- name: TORRENTING_PORT
value: '{{ .Values.environment.qbittorrent.torrentingPort }}'
- name: WEBUI_PORT
value: '{{ .Values.environment.qbittorrent.webuiPort }}'
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'
livenessProbe:
exec:
command:
- curl --fail http://localhost:8080
periodSeconds: 10
initialDelaySeconds: 10
successThreshold: 1
failureThreshold: 3
{{- end -}}

View File

@@ -1,25 +0,0 @@
{{ if .Values.knightcrawler.qbitcollector.enabled }}
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}-qbittorrent'
labels:
component: qbittorrent
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
spec:
selector:
component: qbittorrent
release: '{{ .Release.Name }}'
type: ClusterIP
ports:
- protocol: TCP
port: 6881
targetPort: 6881
- protocol: TCP
port: 6881
targetPort: 6881
- protocol: TCP
port: 8080
targetPort: 8080
{{- end -}}

View File

@@ -1,19 +0,0 @@
{{ if .Values.infrastructure.redis.enabled }}
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}-redis'
labels:
component: redis
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
spec:
selector:
component: redis
release: '{{ .Release.Name }}'
type: ClusterIP
ports:
- protocol: TCP
port: 6379
targetPort: 6379
{{- end -}}

View File

@@ -1,56 +0,0 @@
{{ if .Values.infrastructure.redis.enabled }}
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: '{{ .Release.Name }}-redis'
labels:
component: redis
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
spec:
serviceName: '{{ .Release.Name }}-redis'
replicas: 1
selector:
matchLabels:
component: redis
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: redis
release: '{{ .Release.Name }}'
spec:
containers:
- name: redis
image: '{{ .Values.infrastructure.redis.image }}:{{ .Values.infrastructure.redis.tag }}'
ports:
- name: redis
containerPort: 6379
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'
volumeMounts:
- mountPath: /data
name: redis
livenessProbe:
exec:
command:
- redis-cli ping
periodSeconds: 10
initialDelaySeconds: 10
successThreshold: 1
failureThreshold: 3
volumeClaimTemplates:
- metadata:
name: redis
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: '{{ .Values.persistence.redis.capacity }}'
{{- end -}}

View File

@@ -1,28 +0,0 @@
apiVersion: batch/v1
kind: Job
metadata:
name: '{{ .Release.Name }}-metadata'
labels:
component: metadata
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "2"
"helm.sh/hook-delete-policy": hook-succeeded
spec:
template:
metadata:
labels:
component: metadata
release: '{{ .Release.Name }}'
spec:
restartPolicy: OnFailure
containers:
- name: metadata
image: '{{ .Values.knightcrawler.metadata.image }}{{ if ne .Values.knightcrawler.globalImageTagOverride "" }}:{{ .Values.knightcrawler.globalImageTagOverride }}{{else}}:{{ .Values.knightcrawler.metadata.tag}}{{ end }}'
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'

View File

@@ -1,28 +0,0 @@
apiVersion: batch/v1
kind: Job
metadata:
name: '{{ .Release.Name }}-migrator'
labels:
component: migrator
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": hook-succeeded
spec:
template:
metadata:
labels:
component: migrator
release: '{{ .Release.Name }}'
spec:
restartPolicy: OnFailure
containers:
- name: migrator
image: '{{ .Values.knightcrawler.migrator.image }}{{ if ne .Values.knightcrawler.globalImageTagOverride "" }}:{{ .Values.knightcrawler.globalImageTagOverride }}{{else}}:{{ .Values.knightcrawler.migrator.tag}}{{ end }}'
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'

View File

@@ -1,35 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}-addon'
labels:
component: addon
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "4"
spec:
replicas: {{ .Values.knightcrawler.addon.replicas }}
selector:
matchLabels:
component: addon
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: addon
release: '{{ .Release.Name }}'
spec:
containers:
- name: addon
image: '{{ .Values.knightcrawler.addon.image }}{{ if ne .Values.knightcrawler.globalImageTagOverride "" }}:{{ .Values.knightcrawler.globalImageTagOverride }}{{else}}:{{ .Values.knightcrawler.addon.tag}}{{ end }}'
ports:
- name: addon
containerPort: 7000
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'

View File

@@ -1,32 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}-consumer'
labels:
component: consumer
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "4"
spec:
replicas: {{ .Values.knightcrawler.consumer.replicas }}
selector:
matchLabels:
component: consumer
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: consumer
release: '{{ .Release.Name }}'
spec:
containers:
- name: consumer
image: '{{ .Values.knightcrawler.consumer.image }}{{ if ne .Values.knightcrawler.globalImageTagOverride "" }}:{{ .Values.knightcrawler.globalImageTagOverride }}{{else}}:{{ .Values.knightcrawler.consumer.tag}}{{ end }}'
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'

View File

@@ -1,31 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}-debridcollector'
labels:
component: debridcollector
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "4"
spec:
replicas: {{ .Values.knightcrawler.debridcollector.replicas }}
selector:
matchLabels:
component: debridcollector
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: debridcollector
release: '{{ .Release.Name }}'
spec:
containers:
- name: debridcollector
image: '{{ .Values.knightcrawler.debridcollector.image }}{{ if ne .Values.knightcrawler.globalImageTagOverride "" }}:{{ .Values.knightcrawler.globalImageTagOverride }}{{else}}:{{ .Values.knightcrawler.debridcollector.tag}}{{ end }}'
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'

View File

@@ -1,31 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}-producer'
labels:
component: producer
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "4"
spec:
replicas: {{ .Values.knightcrawler.producer.replicas }}
selector:
matchLabels:
component: producer
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: producer
release: '{{ .Release.Name }}'
spec:
containers:
- name: producer
image: '{{ .Values.knightcrawler.producer.image }}{{ if ne .Values.knightcrawler.globalImageTagOverride "" }}:{{ .Values.knightcrawler.globalImageTagOverride }}{{else}}:{{ .Values.knightcrawler.producer.tag}}{{ end }}'
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'

View File

@@ -1,33 +0,0 @@
{{ if .Values.knightcrawler.qbitcollector.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: '{{ .Release.Name }}-qbitcollector'
labels:
component: qbitcollector
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "4"
spec:
replicas: {{ .Values.knightcrawler.qbitcollector.replicas }}
selector:
matchLabels:
component: qbitcollector
release: '{{ .Release.Name }}'
template:
metadata:
labels:
component: qbitcollector
release: '{{ .Release.Name }}'
spec:
containers:
- name: qbitcollector
image: '{{ .Values.knightcrawler.qbitcollector.image }}{{ if ne .Values.knightcrawler.globalImageTagOverride "" }}:{{ .Values.knightcrawler.globalImageTagOverride }}{{else}}:{{ .Values.knightcrawler.qbitcollector.tag}}{{ end }}'
envFrom:
- configMapRef:
name: '{{ .Release.Name }}-config'
- secretRef:
name: '{{ .Release.Name }}-secrets'
{{- end -}}

View File

@@ -1,17 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: '{{ .Release.Name }}-addon'
labels:
component: addon
project: '{{ .Chart.Name }}'
release: '{{ .Release.Name }}'
spec:
selector:
component: addon
release: '{{ .Release.Name }}'
type: ClusterIP
ports:
- protocol: TCP
port: 7000
targetPort: 7000

View File

@@ -1,100 +0,0 @@
knightcrawler:
debug: false
globalImageTagOverride: ""
addon:
image: "gabisonfire/knightcrawler-addon"
tag: "2.0.17"
replicas: 1
consumer:
image: "gabisonfire/knightcrawler-consumer"
tag: "2.0.17"
replicas: 1
metadata:
image: "gabisonfire/knightcrawler-metadata"
tag: "2.0.17"
replicas: 1
migrator:
image: "gabisonfire/knightcrawler-migrator"
tag: "2.0.17"
replicas: 1
debridcollector:
image: "gabisonfire/knightcrawler-debrid-collector"
tag: "2.0.17"
enabled: true
replicas: 1
qbitcollector:
image: "gabisonfire/knightcrawler-qbit-collector"
tag: "2.0.17"
enabled: false
replicas: 1
producer:
image: "gabisonfire/knightcrawler-producer"
tag: "2.0.17"
replicas: 1
infrastructure:
lavinmq:
image: "cloudamqp/lavinmq"
tag: "latest"
enabled: true
postgres:
image: "postgres"
tag: "latest"
enabled: true
redis:
image: "redis/redis-stack-server"
tag: "latest"
enabled: true
qbittorrent:
image: "lscr.io/linuxserver/qbittorrent"
tag: "latest"
environment:
redis:
external: false
host: ""
port: "6379"
extra: "abortConnect=false,allowAdmin=true"
postgres:
external: false
host: ""
port: "5432"
dbName: "knightcrawler"
user: "postgres"
password: "postgres"
lavinmq:
external: false
host: ""
user: "guest"
password: "guest"
qbitcollector:
qbitHost: "http://qbittorrent:8080"
trackersUrl: "https://raw.githubusercontent.com/ngosang/trackerslist/master/trackers_all_http.txt"
debridcollector:
realDebridApiKey: ""
producer:
githubPat: ""
queueName: "ingested"
durable: true
maxPublishBatchSize: 500
maxQueueSize: 0
publishIntervalSeconds: 10
metadata:
insertBatchSize: 50000
qbittorrent:
pgid: "1000"
puid: "1000"
torrentingPort: "6881"
webuiPort: "8080"
persistence:
storageClassName: ""
redis:
capacity: 1Gi
postgres:
capacity: 1Gi
lavinmq:
capacity: 1Gi
shared:
timezone: "London/Europe"

View File

@@ -17,7 +17,6 @@
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" /> <PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" /> <PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" /> <PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" /> <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
@@ -29,10 +28,30 @@
<None Include="Configuration\logging.json"> <None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory> <CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None> </None>
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<Content Remove="eng\**" />
<None Remove="eng\**" />
</ItemGroup>
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<Content Remove="python\**" />
<None Include="python\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\shared\SharedContracts.csproj" /> <ProjectReference Include="..\shared\SharedContracts.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<Compile Remove="eng\**" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="eng\**" />
</ItemGroup>
</Project> </Project>

View File

@@ -6,6 +6,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\share
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}" Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}"
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{72A042C3-B4F3-45C5-AC20-041FE8F41EFC}"
ProjectSection(SolutionItems) = preProject
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
eng\install-python-reqs.sh = eng\install-python-reqs.sh
EndProjectSection
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU

View File

@@ -9,12 +9,23 @@ RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11.8-r0 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out . COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S debrid && adduser -S -G debrid debrid RUN addgroup -S debrid && adduser -S -G debrid debrid
USER debrid USER debrid
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1 CMD pgrep -f dotnet || exit 1
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
ENTRYPOINT ["dotnet", "DebridCollector.dll"] ENTRYPOINT ["dotnet", "DebridCollector.dll"]

View File

@@ -1,5 +1,3 @@
using DebridCollector.Features.Configuration;
namespace DebridCollector.Extensions; namespace DebridCollector.Extensions;
public static class ServiceCollectionExtensions public static class ServiceCollectionExtensions
@@ -17,7 +15,8 @@ public static class ServiceCollectionExtensions
var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>(); var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>();
services.AddRealDebridClient(serviceConfiguration); services.AddRealDebridClient(serviceConfiguration);
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>(); services.RegisterPythonEngine();
services.AddSingleton<IRankTorrentName, RankTorrentName>();
services.AddHostedService<DebridRequestProcessor>(); services.AddHostedService<DebridRequestProcessor>();
return services; return services;

View File

@@ -1,6 +1,4 @@
using DebridCollector.Features.Configuration; namespace DebridCollector.Features.Debrid;
namespace DebridCollector.Features.Debrid;
public static class ServiceCollectionExtensions public static class ServiceCollectionExtensions
{ {

View File

@@ -3,10 +3,11 @@ namespace DebridCollector.Features.Worker;
public static class DebridMetaToTorrentMeta public static class DebridMetaToTorrentMeta
{ {
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection( public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
IParseTorrentTitle torrentTitle, IRankTorrentName rankTorrentName,
Torrent torrent, Torrent torrent,
string ImdbId, string ImdbId,
FileDataDictionary Metadata) FileDataDictionary Metadata,
ILogger<WriteMetadataConsumer> logger)
{ {
try try
{ {
@@ -15,34 +16,42 @@ public static class DebridMetaToTorrentMeta
foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext)))) foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{ {
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex); var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
var file = new TorrentFile var file = new TorrentFile
{ {
ImdbId = ImdbId, ImdbId = ImdbId,
KitsuId = 0, KitsuId = 0,
InfoHash = torrent.InfoHash, InfoHash = torrent.InfoHash,
FileIndex = validFileIndex ? fileIndex : 0, FileIndex = validFileIndex ? fileIndexMinusOne : 0,
Title = metadataEntry.Value.Filename, Title = metadataEntry.Value.Filename,
Size = metadataEntry.Value.Filesize.GetValueOrDefault(), Size = metadataEntry.Value.Filesize.GetValueOrDefault(),
}; };
var parsedTitle = torrentTitle.Parse(file.Title); var parsedTitle = rankTorrentName.Parse(file.Title, false);
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault(); if (!parsedTitle.Success)
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault(); {
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
continue;
}
file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
files.Add(file); files.Add(file);
} }
return files; return files;
} }
catch (Exception) catch (Exception ex)
{ {
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
return []; return [];
} }
} }
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata) public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata, ILogger<WriteMetadataConsumer> logger)
{ {
try try
{ {
@@ -58,13 +67,14 @@ public static class DebridMetaToTorrentMeta
foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext)))) foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{ {
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex); var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
var fileId = torrentFiles.FirstOrDefault( var fileId = torrentFiles.FirstOrDefault(
t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0; t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0;
var file = new SubtitleFile var file = new SubtitleFile
{ {
InfoHash = InfoHash, InfoHash = InfoHash,
FileIndex = validFileIndex ? fileIndex : 0, FileIndex = validFileIndex ? fileIndexMinusOne : 0,
FileId = fileId, FileId = fileId,
Title = metadataEntry.Value.Filename, Title = metadataEntry.Value.Filename,
}; };
@@ -74,8 +84,9 @@ public static class DebridMetaToTorrentMeta
return files; return files;
} }
catch (Exception) catch (Exception ex)
{ {
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
return []; return [];
} }
} }

View File

@@ -53,6 +53,12 @@ public class InfohashMetadataSagaStateMachine : MassTransitStateMachine<Infohash
.Then( .Then(
context => context =>
{ {
if (!context.Message.WithFiles)
{
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
return;
}
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId); logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
}) })
.TransitionTo(Completed) .TransitionTo(Completed)

View File

@@ -16,7 +16,7 @@ public record WriteMetadata(Torrent Torrent, TorrentMetadataResponse Metadata, s
} }
[EntityName("metadata-written-debrid-colloctor")] [EntityName("metadata-written-debrid-colloctor")]
public record MetadataWritten(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid> public record MetadataWritten(TorrentMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
{ {
public Guid CorrelationId { get; init; } = Metadata.CorrelationId; public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
} }

View File

@@ -1,25 +1,28 @@
namespace DebridCollector.Features.Worker; namespace DebridCollector.Features.Worker;
public class WriteMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteMetadata> public class WriteMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteMetadataConsumer> logger) : IConsumer<WriteMetadata>
{ {
public async Task Consume(ConsumeContext<WriteMetadata> context) public async Task Consume(ConsumeContext<WriteMetadata> context)
{ {
var request = context.Message; var request = context.Message;
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata); var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
if (torrentFiles.Any()) if (!torrentFiles.Any())
{ {
await dataStorage.InsertFiles(torrentFiles); await context.Publish(new MetadataWritten(request.Metadata, false));
return;
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
} }
await context.Publish(new MetadataWritten(request.Metadata)); await dataStorage.InsertFiles(torrentFiles);
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new MetadataWritten(request.Metadata, true));
} }
} }

View File

@@ -4,17 +4,18 @@ global using System.Text.Json;
global using System.Text.Json.Serialization; global using System.Text.Json.Serialization;
global using System.Threading.Channels; global using System.Threading.Channels;
global using DebridCollector.Extensions; global using DebridCollector.Extensions;
global using DebridCollector.Features.Configuration;
global using DebridCollector.Features.Debrid; global using DebridCollector.Features.Debrid;
global using DebridCollector.Features.Worker; global using DebridCollector.Features.Worker;
global using MassTransit; global using MassTransit;
global using MassTransit.Mediator;
global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.DependencyInjection;
global using Polly; global using Polly;
global using Polly.Extensions.Http; global using Polly.Extensions.Http;
global using PromKnight.ParseTorrentTitle;
global using SharedContracts.Configuration; global using SharedContracts.Configuration;
global using SharedContracts.Dapper; global using SharedContracts.Dapper;
global using SharedContracts.Extensions; global using SharedContracts.Extensions;
global using SharedContracts.Models; global using SharedContracts.Models;
global using SharedContracts.Python;
global using SharedContracts.Python.RTN;
global using SharedContracts.Requests; global using SharedContracts.Requests;

View File

@@ -0,0 +1,2 @@
mkdir -p ../python
python -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1,5 @@
#!/bin/bash
rm -rf ../python
mkdir -p ../python
python3 -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1 @@
rank-torrent-name==0.2.13

View File

@@ -0,0 +1,43 @@
-- Drop Duplicate Files in Files Table
DELETE FROM public.files
WHERE id NOT IN (
SELECT MAX(id)
FROM public.files
GROUP BY "infoHash", "fileIndex"
);
-- Add Index to files table
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_constraint
WHERE conname = 'files_unique_infohash_fileindex'
) THEN
ALTER TABLE public.files
ADD CONSTRAINT files_unique_infohash_fileindex UNIQUE ("infoHash", "fileIndex");
END IF;
END $$;
-- Drop Duplicate subtitles in Subtitles Table
DELETE FROM public.subtitles
WHERE id NOT IN (
SELECT MAX(id)
FROM public.subtitles
GROUP BY "infoHash", "fileIndex"
);
-- Add Index to subtitles table
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_constraint
WHERE conname = 'subtitles_unique_infohash_fileindex'
) THEN
ALTER TABLE public.subtitles
ADD CONSTRAINT subtitles_unique_infohash_fileindex UNIQUE ("infoHash", "fileIndex");
END IF;
END $$;

View File

@@ -28,7 +28,7 @@
}, },
{ {
"Name": "SyncDmmJob", "Name": "SyncDmmJob",
"IntervalSeconds": 1800, "IntervalSeconds": 10800,
"Enabled": true "Enabled": true
}, },
{ {

View File

@@ -0,0 +1,70 @@
namespace Producer.Features.Crawlers.Dmm;
public class DMMFileDownloader(HttpClient client, ILogger<DMMFileDownloader> logger) : IDMMFileDownloader
{
private const string Filename = "main.zip";
private readonly IReadOnlyCollection<string> _filesToIgnore = [
"index.html",
"404.html",
"dedupe.sh",
"CNAME",
];
public const string ClientName = "DmmFileDownloader";
public async Task<string> DownloadFileToTempPath(CancellationToken cancellationToken)
{
logger.LogInformation("Downloading DMM Hashlists");
var response = await client.GetAsync(Filename, cancellationToken);
var tempDirectory = Path.Combine(Path.GetTempPath(), "DMMHashlists");
EnsureDirectoryIsClean(tempDirectory);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
using var archive = new ZipArchive(stream);
logger.LogInformation("Extracting DMM Hashlists to {TempDirectory}", tempDirectory);
foreach (var entry in archive.Entries)
{
var entryPath = Path.Combine(tempDirectory, Path.GetFileName(entry.FullName));
if (!entry.FullName.EndsWith('/')) // It's a file
{
entry.ExtractToFile(entryPath, true);
}
}
foreach (var file in _filesToIgnore)
{
CleanRepoExtras(tempDirectory, file);
}
logger.LogInformation("Downloaded and extracted Repository to {TempDirectory}", tempDirectory);
return tempDirectory;
}
private static void CleanRepoExtras(string tempDirectory, string fileName)
{
var repoIndex = Path.Combine(tempDirectory, fileName);
if (File.Exists(repoIndex))
{
File.Delete(repoIndex);
}
}
private static void EnsureDirectoryIsClean(string tempDirectory)
{
if (Directory.Exists(tempDirectory))
{
Directory.Delete(tempDirectory, true);
}
Directory.CreateDirectory(tempDirectory);
}
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.Crawlers.Dmm;
public class DMMHttpClient
{
}

View File

@@ -1,64 +1,99 @@
namespace Producer.Features.Crawlers.Dmm; namespace Producer.Features.Crawlers.Dmm;
public partial class DebridMediaManagerCrawler( public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory, IDMMFileDownloader dmmFileDownloader,
ILogger<DebridMediaManagerCrawler> logger, ILogger<DebridMediaManagerCrawler> logger,
IDataStorage storage, IDataStorage storage,
GithubConfiguration githubConfiguration,
IRankTorrentName rankTorrentName, IRankTorrentName rankTorrentName,
IDistributedCache cache) : BaseCrawler(logger, storage) IDistributedCache cache) : BaseCrawler(logger, storage)
{ {
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")] [GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher(); private static partial Regex HashCollectionMatcher();
protected override string Url => "";
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>(); protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/git/trees/main?recursive=1";
protected override string Source => "DMM"; protected override string Source => "DMM";
private const int ParallelismCount = 4;
public override async Task Execute() public override async Task Execute()
{ {
var client = httpClientFactory.CreateClient("Scraper"); var tempDirectory = await dmmFileDownloader.DownloadFileToTempPath(CancellationToken.None);
client.DefaultRequestHeaders.Authorization = new("Bearer", githubConfiguration.PAT);
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
var jsonBody = await client.GetStringAsync(Url); var files = Directory.GetFiles(tempDirectory, "*.html", SearchOption.AllDirectories);
var json = JsonDocument.Parse(jsonBody); logger.LogInformation("Found {Files} files to parse", files.Length);
var entriesArray = json.RootElement.GetProperty("tree"); var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
logger.LogInformation("Found {Entries} total DMM pages", entriesArray.GetArrayLength()); await Parallel.ForEachAsync(files, options, async (file, token) =>
foreach (var entry in entriesArray.EnumerateArray())
{ {
await ParsePage(entry, client); var fileName = Path.GetFileName(file);
} var torrentDictionary = await ExtractPageContents(file, fileName);
if (torrentDictionary == null)
{
return;
}
await ParseTitlesWithRtn(fileName, torrentDictionary);
var results = await ParseTorrents(torrentDictionary);
if (results.Count <= 0)
{
return;
}
await InsertTorrents(results);
await Storage.MarkPageAsIngested(fileName, token);
});
} }
private async Task ParsePage(JsonElement entry, HttpClient client) private async Task ParseTitlesWithRtn(string fileName, IDictionary<string, DmmContent> page)
{ {
var (pageIngested, name) = await IsAlreadyIngested(entry); logger.LogInformation("Parsing titles for {Page}", fileName);
if (string.IsNullOrEmpty(name) || pageIngested) var batchProcessables = page.Select(value => new RtnBatchProcessable(value.Key, value.Value.Filename)).ToList();
var parsedResponses = rankTorrentName.BatchParse(
batchProcessables.Select<RtnBatchProcessable, string>(bp => bp.Filename).ToList(), trashGarbage: false);
// Filter out unsuccessful responses and match RawTitle to requesting title
var successfulResponses = parsedResponses
.Where(response => response != null && response.Success)
.GroupBy(response => response.Response.RawTitle!)
.ToDictionary(group => group.Key, group => group.First());
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
await Parallel.ForEachAsync(batchProcessables.Select(t => t.InfoHash), options, (infoHash, _) =>
{ {
return; if (page.TryGetValue(infoHash, out var dmmContent) &&
} successfulResponses.TryGetValue(dmmContent.Filename, out var parsedResponse))
{
var pageSource = await client.GetStringAsync($"{DownloadBaseUrl}/{name}"); page[infoHash] = dmmContent with {ParseResponse = parsedResponse};
}
await ExtractPageContents(pageSource, name);
return ValueTask.CompletedTask;
});
} }
private async Task ExtractPageContents(string pageSource, string name) private async Task<ConcurrentDictionary<string, DmmContent>?> ExtractPageContents(string filePath, string filenameOnly)
{ {
var (pageIngested, name) = await IsAlreadyIngested(filenameOnly);
if (pageIngested)
{
return [];
}
var pageSource = await File.ReadAllTextAsync(filePath);
var match = HashCollectionMatcher().Match(pageSource); var match = HashCollectionMatcher().Match(pageSource);
if (!match.Success) if (!match.Success)
{ {
logger.LogWarning("Failed to match hash collection for {Name}", name); logger.LogWarning("Failed to match hash collection for {Name}", name);
await Storage.MarkPageAsIngested(name); await Storage.MarkPageAsIngested(filenameOnly);
return; return [];
} }
var encodedJson = match.Groups.Values.ElementAtOrDefault(1); var encodedJson = match.Groups.Values.ElementAtOrDefault(1);
@@ -66,90 +101,92 @@ public partial class DebridMediaManagerCrawler(
if (string.IsNullOrEmpty(encodedJson?.Value)) if (string.IsNullOrEmpty(encodedJson?.Value))
{ {
logger.LogWarning("Failed to extract encoded json for {Name}", name); logger.LogWarning("Failed to extract encoded json for {Name}", name);
return; return [];
} }
await ProcessExtractedContentsAsTorrentCollection(encodedJson.Value, name); var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson.Value);
}
private async Task ProcessExtractedContentsAsTorrentCollection(string encodedJson, string name)
{
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson);
var json = JsonDocument.Parse(decodedJson); var json = JsonDocument.Parse(decodedJson);
var torrents = await json.RootElement.EnumerateArray()
.ToAsyncEnumerable()
.Select(ParsePageContent)
.Where(t => t is not null)
.ToListAsync();
await InsertTorrentsForPage(json); if (torrents.Count == 0)
var result = await Storage.MarkPageAsIngested(name);
if (!result.IsSuccess)
{ {
logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.Failure.ErrorMessage); logger.LogWarning("No torrents found in {Name}", name);
return; await Storage.MarkPageAsIngested(filenameOnly);
return [];
} }
var torrentDictionary = torrents
.Where(x => x is not null)
.GroupBy(x => x.InfoHash)
.ToConcurrentDictionary(g => g.Key, g => new DmmContent(g.First().Filename, g.First().Bytes, null));
logger.LogInformation("Successfully marked page as ingested"); logger.LogInformation("Parsed {Torrents} torrents for {Name}", torrentDictionary.Count, name);
return torrentDictionary;
} }
private async Task<IngestedTorrent?> ParseTorrent(JsonElement item) private async Task<List<IngestedTorrent>> ParseTorrents(IDictionary<string, DmmContent> page)
{ {
var ingestedTorrents = new List<IngestedTorrent>();
if (!item.TryGetProperty("filename", out var filenameElement) || var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
!item.TryGetProperty("bytes", out var bytesElement) ||
!item.TryGetProperty("hash", out var hashElement)) await Parallel.ForEachAsync(page, options, async (kvp, ct) =>
{ {
return null; var (infoHash, dmmContent) = kvp;
} var parsedTorrent = dmmContent.ParseResponse;
if (parsedTorrent is not {Success: true})
{
return;
}
var torrentTitle = filenameElement.GetString(); var torrentType = parsedTorrent.Response.IsMovie ? "movie" : "tvSeries";
var cacheKey = GetCacheKey(torrentType, parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.Year);
var (cached, cachedResult) = await CheckIfInCacheAndReturn(cacheKey);
if (torrentTitle.IsNullOrEmpty()) if (cached)
{ {
return null; logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
} lock (ingestedTorrents)
{
var parsedTorrent = rankTorrentName.Parse(torrentTitle); ingestedTorrents.Add(MapToTorrent(cachedResult, dmmContent.Bytes, infoHash, parsedTorrent));
}
if (!parsedTorrent.Success) return;
{ }
return null;
}
var torrentType = parsedTorrent.Response.IsMovie ? "movie" : "tvSeries";
var cacheKey = GetCacheKey(torrentType, parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.Year);
var (cached, cachedResult) = await CheckIfInCacheAndReturn(cacheKey);
if (cached)
{
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
return MapToTorrent(cachedResult, bytesElement, hashElement, parsedTorrent);
}
int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null; int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null;
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, torrentType, year); var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, torrentType, year, ct);
if (imdbEntry is null) if (imdbEntry is null)
{ {
return null; return;
} }
await AddToCache(cacheKey, imdbEntry);
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
return MapToTorrent(imdbEntry, bytesElement, hashElement, parsedTorrent); await AddToCache(cacheKey, imdbEntry);
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
lock (ingestedTorrents)
{
ingestedTorrents.Add(MapToTorrent(imdbEntry, dmmContent.Bytes, infoHash, parsedTorrent));
}
});
return ingestedTorrents;
} }
private IngestedTorrent MapToTorrent(ImdbEntry result, JsonElement bytesElement, JsonElement hashElement, ParseTorrentTitleResponse parsedTorrent) => private IngestedTorrent MapToTorrent(ImdbEntry result, long size, string infoHash, ParseTorrentTitleResponse parsedTorrent) =>
new() new()
{ {
Source = Source, Source = Source,
Name = result.Title, Name = result.Title,
Imdb = result.ImdbId, Imdb = result.ImdbId,
Size = bytesElement.GetInt64().ToString(), Size = size.ToString(),
InfoHash = hashElement.ToString(), InfoHash = infoHash,
Seeders = 0, Seeders = 0,
Leechers = 0, Leechers = 0,
Category = AssignCategory(result), Category = AssignCategory(result),
@@ -179,35 +216,11 @@ public partial class DebridMediaManagerCrawler(
return (false, null); return (false, null);
} }
private async Task InsertTorrentsForPage(JsonDocument json) private async Task<(bool Success, string? Name)> IsAlreadyIngested(string filename)
{ {
var torrents = await json.RootElement.EnumerateArray() var pageIngested = await Storage.PageIngested(filename);
.ToAsyncEnumerable()
.SelectAwait(async x => await ParseTorrent(x))
.Where(t => t is not null)
.ToListAsync();
if (torrents.Count == 0) return (pageIngested, filename);
{
logger.LogWarning("No torrents found in {Source} response", Source);
return;
}
await InsertTorrents(torrents!);
}
private async Task<(bool Success, string? Name)> IsAlreadyIngested(JsonElement entry)
{
var name = entry.GetProperty("path").GetString();
if (string.IsNullOrEmpty(name))
{
return (false, null);
}
var pageIngested = await Storage.PageIngested(name);
return (pageIngested, name);
} }
private static string AssignCategory(ImdbEntry entry) => private static string AssignCategory(ImdbEntry entry) =>
@@ -219,4 +232,20 @@ public partial class DebridMediaManagerCrawler(
}; };
private static string GetCacheKey(string category, string title, int year) => $"{category.ToLowerInvariant()}:{year}:{title.ToLowerInvariant()}"; private static string GetCacheKey(string category, string title, int year) => $"{category.ToLowerInvariant()}:{year}:{title.ToLowerInvariant()}";
private static ExtractedDMMContent? ParsePageContent(JsonElement item)
{
if (!item.TryGetProperty("filename", out var filenameElement) ||
!item.TryGetProperty("bytes", out var bytesElement) ||
!item.TryGetProperty("hash", out var hashElement))
{
return null;
}
return new(filenameElement.GetString(), bytesElement.GetInt64(), hashElement.GetString());
}
private record DmmContent(string Filename, long Bytes, ParseTorrentTitleResponse? ParseResponse);
private record ExtractedDMMContent(string Filename, long Bytes, string InfoHash);
private record RtnBatchProcessable(string InfoHash, string Filename);
} }

View File

@@ -1,9 +0,0 @@
namespace Producer.Features.Crawlers.Dmm;
public class GithubConfiguration
{
private const string Prefix = "GITHUB";
private const string PatVariable = "PAT";
public string? PAT { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(PatVariable);
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.Crawlers.Dmm;
public interface IDMMFileDownloader
{
Task<string> DownloadFileToTempPath(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,16 @@
namespace Producer.Features.Crawlers.Dmm;
public static class ServiceCollectionExtensions
{
public static IServiceCollection AddDmmSupport(this IServiceCollection services)
{
services.AddHttpClient<IDMMFileDownloader, DMMFileDownloader>(DMMFileDownloader.ClientName, client =>
{
client.BaseAddress = new("https://github.com/debridmediamanager/hashlists/zipball/main/");
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip");
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
});
return services;
}
}

View File

@@ -5,7 +5,6 @@ internal static class ServiceCollectionExtensions
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration) internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
{ {
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName); var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>(); var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
var jobTypes = Assembly.GetAssembly(typeof(BaseJob)) var jobTypes = Assembly.GetAssembly(typeof(BaseJob))
@@ -19,18 +18,13 @@ internal static class ServiceCollectionExtensions
services.AddTransient(type); services.AddTransient(type);
} }
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
services.AddTransient<SyncDmmJob>();
}
var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance); var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance);
services.AddQuartz( services.AddQuartz(
quartz => quartz =>
{ {
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration); RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration); RegisterDmmJob(quartz, scrapeConfiguration);
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration); RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
RegisterPublisher(quartz, rabbitConfiguration); RegisterPublisher(quartz, rabbitConfiguration);
}); });
@@ -64,13 +58,8 @@ internal static class ServiceCollectionExtensions
} }
} }
private static void RegisterDmmJob(GithubConfiguration githubConfiguration, IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration) private static void RegisterDmmJob(IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration) =>
{ AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
}
}
private static void RegisterTorrentioJob( private static void RegisterTorrentioJob(
IServiceCollection services, IServiceCollection services,

View File

@@ -1,12 +1,12 @@
// Global using directives // Global using directives
global using System.Collections.Concurrent;
global using System.IO.Compression;
global using System.Reflection; global using System.Reflection;
global using System.Text; global using System.Text;
global using System.Text.Json; global using System.Text.Json;
global using System.Text.RegularExpressions; global using System.Text.RegularExpressions;
global using System.Xml.Linq; global using System.Xml.Linq;
global using FuzzySharp;
global using FuzzySharp.Extractor;
global using FuzzySharp.PreProcess; global using FuzzySharp.PreProcess;
global using FuzzySharp.SimilarityRatio.Scorer; global using FuzzySharp.SimilarityRatio.Scorer;
global using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; global using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;

View File

@@ -12,7 +12,8 @@ builder.Services
.RegisterMassTransit() .RegisterMassTransit()
.AddDataStorage() .AddDataStorage()
.AddCrawlers() .AddCrawlers()
.AddDmmSupport()
.AddQuartz(builder.Configuration); .AddQuartz(builder.Configuration);
var app = builder.Build(); var app = builder.Build();
app.Run(); app.Run();

View File

@@ -1 +1 @@
rank-torrent-name==0.1.8 rank-torrent-name==0.2.13

View File

@@ -9,12 +9,23 @@ RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11.8-r0 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out . COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S qbit && adduser -S -G qbit qbit RUN addgroup -S qbit && adduser -S -G qbit qbit
USER qbit USER qbit
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1 CMD pgrep -f dotnet || exit 1
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
ENTRYPOINT ["dotnet", "QBitCollector.dll"] ENTRYPOINT ["dotnet", "QBitCollector.dll"]

View File

@@ -13,7 +13,8 @@ public static class ServiceCollectionExtensions
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services) internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
{ {
services.AddQBitTorrentClient(); services.AddQBitTorrentClient();
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>(); services.RegisterPythonEngine();
services.AddSingleton<IRankTorrentName, RankTorrentName>();
services.AddSingleton<QbitRequestProcessor>(); services.AddSingleton<QbitRequestProcessor>();
services.AddHttpClient(); services.AddHttpClient();
services.AddSingleton<ITrackersService, TrackersService>(); services.AddSingleton<ITrackersService, TrackersService>();

View File

@@ -3,10 +3,11 @@ namespace QBitCollector.Features.Worker;
public static class QbitMetaToTorrentMeta public static class QbitMetaToTorrentMeta
{ {
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection( public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
IParseTorrentTitle torrentTitle, IRankTorrentName rankTorrentName,
Torrent torrent, Torrent torrent,
string ImdbId, string ImdbId,
IReadOnlyList<TorrentContent> Metadata) IReadOnlyList<TorrentContent> Metadata,
ILogger<WriteQbitMetadataConsumer> logger)
{ {
try try
{ {
@@ -24,23 +25,31 @@ public static class QbitMetaToTorrentMeta
Size = metadataEntry.Size, Size = metadataEntry.Size,
}; };
var parsedTitle = torrentTitle.Parse(file.Title); var parsedTitle = rankTorrentName.Parse(file.Title, false);
if (!parsedTitle.Success)
{
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
continue;
}
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault(); file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault(); file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
files.Add(file); files.Add(file);
} }
return files; return files;
} }
catch (Exception) catch (Exception ex)
{ {
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
return []; return [];
} }
} }
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, IReadOnlyList<TorrentContent> Metadata) public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, IReadOnlyList<TorrentContent> Metadata,
ILogger<WriteQbitMetadataConsumer> logger)
{ {
try try
{ {
@@ -70,8 +79,9 @@ public static class QbitMetaToTorrentMeta
return files; return files;
} }
catch (Exception) catch (Exception ex)
{ {
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
return []; return [];
} }
} }

View File

@@ -53,6 +53,12 @@ public class QbitMetadataSagaStateMachine : MassTransitStateMachine<QbitMetadata
.Then( .Then(
context => context =>
{ {
if (!context.Message.WithFiles)
{
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
return;
}
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId); logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
}) })
.TransitionTo(Completed) .TransitionTo(Completed)

View File

@@ -16,7 +16,7 @@ public record WriteQbitMetadata(Torrent Torrent, QBitMetadataResponse Metadata,
} }
[EntityName("metadata-written-qbit-collector")] [EntityName("metadata-written-qbit-collector")]
public record QbitMetadataWritten(QBitMetadataResponse Metadata) : CorrelatedBy<Guid> public record QbitMetadataWritten(QBitMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
{ {
public Guid CorrelationId { get; init; } = Metadata.CorrelationId; public Guid CorrelationId { get; init; } = Metadata.CorrelationId;

View File

@@ -1,25 +1,30 @@
namespace QBitCollector.Features.Worker; namespace QBitCollector.Features.Worker;
public class WriteQbitMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteQbitMetadata> public class WriteQbitMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteQbitMetadataConsumer> logger) : IConsumer<WriteQbitMetadata>
{ {
public async Task Consume(ConsumeContext<WriteQbitMetadata> context) public async Task Consume(ConsumeContext<WriteQbitMetadata> context)
{ {
var request = context.Message; var request = context.Message;
var torrentFiles = QbitMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata);
if (torrentFiles.Any()) var torrentFiles = QbitMetaToTorrentMeta.MapMetadataToFilesCollection(
rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
if (!torrentFiles.Any())
{ {
await dataStorage.InsertFiles(torrentFiles); await context.Publish(new QbitMetadataWritten(request.Metadata, false));
return;
var subtitles = await QbitMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
} }
await context.Publish(new QbitMetadataWritten(request.Metadata)); await dataStorage.InsertFiles(torrentFiles);
var subtitles = await QbitMetaToTorrentMeta.MapMetadataToSubtitlesCollection(
dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new QbitMetadataWritten(request.Metadata, true));
} }
} }

View File

@@ -1,17 +1,11 @@
// Global using directives // Global using directives
global using System.Text.Json; global using System.Text.Json;
global using System.Text.Json.Serialization;
global using System.Threading.Channels;
global using MassTransit; global using MassTransit;
global using MassTransit.Mediator;
global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.Caching.Distributed; global using Microsoft.Extensions.Caching.Distributed;
global using Microsoft.Extensions.Caching.Memory; global using Microsoft.Extensions.Caching.Memory;
global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.DependencyInjection;
global using Polly;
global using Polly.Extensions.Http;
global using PromKnight.ParseTorrentTitle;
global using QBitCollector.Extensions; global using QBitCollector.Extensions;
global using QBitCollector.Features.Qbit; global using QBitCollector.Features.Qbit;
global using QBitCollector.Features.Trackers; global using QBitCollector.Features.Trackers;
@@ -21,4 +15,6 @@ global using SharedContracts.Configuration;
global using SharedContracts.Dapper; global using SharedContracts.Dapper;
global using SharedContracts.Extensions; global using SharedContracts.Extensions;
global using SharedContracts.Models; global using SharedContracts.Models;
global using SharedContracts.Python;
global using SharedContracts.Python.RTN;
global using SharedContracts.Requests; global using SharedContracts.Requests;

View File

@@ -18,7 +18,6 @@
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" /> <PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" /> <PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="QBittorrent.Client" Version="1.9.23349.1" /> <PackageReference Include="QBittorrent.Client" Version="1.9.23349.1" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" /> <PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
@@ -31,10 +30,30 @@
<None Include="Configuration\logging.json"> <None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory> <CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None> </None>
<Content Remove="eng\**" />
<None Remove="eng\**" />
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\shared\SharedContracts.csproj" /> <ProjectReference Include="..\shared\SharedContracts.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<Content Remove="python\**" />
<None Include="python\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<Compile Remove="eng\**" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="eng\**" />
</ItemGroup>
</Project> </Project>

View File

@@ -6,6 +6,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "QBitCollector", "QBitCollector.csproj", "{1EF124BE-6EBE-4D9E-846C-FFF814999F3B}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "QBitCollector", "QBitCollector.csproj", "{1EF124BE-6EBE-4D9E-846C-FFF814999F3B}"
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{2F2EA33A-1303-405D-939B-E9394D262BC9}"
ProjectSection(SolutionItems) = preProject
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
eng\install-python-reqs.sh = eng\install-python-reqs.sh
EndProjectSection
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU

View File

@@ -0,0 +1,3 @@
Remove-Item -Recurse -Force ../python
mkdir -p ../python
python -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1,5 @@
#!/bin/bash
rm -rf ../python
mkdir -p ../python
python3 -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1 @@
rank-torrent-name==0.2.13

View File

@@ -152,7 +152,8 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
INSERT INTO files INSERT INTO files
("infoHash", "fileIndex", title, "size", "imdbId", "imdbSeason", "imdbEpisode", "kitsuId", "kitsuEpisode", "createdAt", "updatedAt") ("infoHash", "fileIndex", title, "size", "imdbId", "imdbSeason", "imdbEpisode", "kitsuId", "kitsuEpisode", "createdAt", "updatedAt")
VALUES VALUES
(@InfoHash, @FileIndex, @Title, @Size, @ImdbId, @ImdbSeason, @ImdbEpisode, @KitsuId, @KitsuEpisode, Now(), Now()); (@InfoHash, @FileIndex, @Title, @Size, @ImdbId, @ImdbSeason, @ImdbEpisode, @KitsuId, @KitsuEpisode, Now(), Now())
ON CONFLICT ("infoHash", "fileIndex") DO NOTHING;
"""; """;
await connection.ExecuteAsync(query, files); await connection.ExecuteAsync(query, files);
@@ -168,11 +169,7 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
("infoHash", "fileIndex", "fileId", "title") ("infoHash", "fileIndex", "fileId", "title")
VALUES VALUES
(@InfoHash, @FileIndex, @FileId, @Title) (@InfoHash, @FileIndex, @FileId, @Title)
ON CONFLICT ON CONFLICT ("infoHash", "fileIndex") DO NOTHING;
("infoHash", "fileIndex")
DO UPDATE SET
"fileId" = COALESCE(subtitles."fileId", EXCLUDED."fileId"),
"title" = COALESCE(subtitles."title", EXCLUDED."title");
"""; """;
await connection.ExecuteAsync(query, subtitles); await connection.ExecuteAsync(query, subtitles);

View File

@@ -0,0 +1,19 @@
namespace SharedContracts.Extensions;
public static class DictionaryExtensions
{
public static ConcurrentDictionary<TKey, TValue> ToConcurrentDictionary<TSource, TKey, TValue>(
this IEnumerable<TSource> source,
Func<TSource, TKey> keySelector,
Func<TSource, TValue> valueSelector) where TKey : notnull
{
var concurrentDictionary = new ConcurrentDictionary<TKey, TValue>();
foreach (var element in source)
{
concurrentDictionary.TryAdd(keySelector(element), valueSelector(element));
}
return concurrentDictionary;
}
}

View File

@@ -1,5 +1,6 @@
// Global using directives // Global using directives
global using System.Collections.Concurrent;
global using System.Text.Json; global using System.Text.Json;
global using System.Text.Json.Serialization; global using System.Text.Json.Serialization;
global using System.Text.RegularExpressions; global using System.Text.RegularExpressions;

View File

@@ -2,5 +2,6 @@ namespace SharedContracts.Python.RTN;
public interface IRankTorrentName public interface IRankTorrentName
{ {
ParseTorrentTitleResponse Parse(string title); ParseTorrentTitleResponse Parse(string title, bool trashGarbage = true, bool logErrors = false, bool throwOnErrors = false);
List<ParseTorrentTitleResponse?> BatchParse(IReadOnlyCollection<string> titles, int chunkSize = 500, int workers = 20, bool trashGarbage = true, bool logErrors = false, bool throwOnErrors = false);
} }

View File

@@ -12,32 +12,102 @@ public class RankTorrentName : IRankTorrentName
_pythonEngineService = pythonEngineService; _pythonEngineService = pythonEngineService;
InitModules(); InitModules();
} }
public ParseTorrentTitleResponse Parse(string title) => public ParseTorrentTitleResponse Parse(string title, bool trashGarbage = true, bool logErrors = false, bool throwOnErrors = false)
_pythonEngineService.ExecutePythonOperationWithDefault(
() =>
{
var result = _rtn?.parse(title);
return ParseResult(result);
}, new ParseTorrentTitleResponse(false, null), nameof(Parse), throwOnErrors: false, logErrors: false);
private static ParseTorrentTitleResponse ParseResult(dynamic result)
{ {
if (result == null) try
{ {
using var gil = Py.GIL();
var result = _rtn?.parse(title, trashGarbage);
return ParseResult(result);
}
catch (Exception ex)
{
if (logErrors)
{
_pythonEngineService.Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(Parse));
}
if (throwOnErrors)
{
throw;
}
return new(false, null); return new(false, null);
} }
}
public List<ParseTorrentTitleResponse?> BatchParse(IReadOnlyCollection<string> titles, int chunkSize = 500, int workers = 20, bool trashGarbage = true, bool logErrors = false, bool throwOnErrors = false)
{
var responses = new List<ParseTorrentTitleResponse?>();
var json = result.model_dump_json()?.As<string?>(); try
if (json is null || string.IsNullOrEmpty(json))
{ {
return new(false, null); if (titles.Count == 0)
{
return responses;
}
using var gil = Py.GIL();
var pythonList = new PyList(titles.Select(x => new PyString(x).As<PyObject>()).ToArray());
PyList results = _rtn?.batch_parse(pythonList, trashGarbage, chunkSize, workers);
if (results == null)
{
return responses;
}
responses.AddRange(results.Select(ParseResult));
} }
catch (Exception ex)
{
if (logErrors)
{
_pythonEngineService.Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(Parse));
}
if (throwOnErrors)
{
throw;
}
}
return responses;
}
private static ParseTorrentTitleResponse? ParseResult(dynamic result)
{
try
{
if (result == null)
{
return new(false, null);
}
var json = result.model_dump_json()?.As<string?>();
if (json is null || string.IsNullOrEmpty(json))
{
return new(false, null);
}
var mediaType = result.GetAttr("type")?.As<string>();
var response = JsonSerializer.Deserialize<RtnResponse>(json); if (string.IsNullOrEmpty(mediaType))
{
return new(true, response); return new(false, null);
}
var response = JsonSerializer.Deserialize<RtnResponse>(json);
response.IsMovie = mediaType.Equals("movie", StringComparison.OrdinalIgnoreCase);
return new(true, response);
}
catch
{
return new(false, null);
}
} }
private void InitModules() => private void InitModules() =>

View File

@@ -76,23 +76,8 @@ public class RtnResponse
[JsonPropertyName("extended")] [JsonPropertyName("extended")]
public bool Extended { get; set; } public bool Extended { get; set; }
// [JsonPropertyName("is_show")] public bool IsMovie { get; set; }
// public bool IsTvShow { get; set; }
//
// [JsonPropertyName("is_movie")]
// public bool IsMovie { get; set; }
public string ToJson() => this.AsJson(); public string ToJson() => this.AsJson();
public bool IsMovie => !TvRegexes.Any(regex => regex.IsMatch(RawTitle)) && Season?.Count == 0 && Episode?.Count == 0;
private static List<Regex> TvRegexes { get; set; } =
[
new(@"[se]\d\d", RegexOptions.IgnoreCase),
new(@"\b(tv|complete)\b", RegexOptions.IgnoreCase),
new(@"\b(saisons?|stages?|seasons?).?\d", RegexOptions.IgnoreCase),
new(@"[a-z]\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase),
new(@"\d{2,4}\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase),
];
} }

View File

@@ -82,11 +82,4 @@ public static class ServiceCollectionExtensions
x.AddConsumer<PerformIngestionConsumer>(); x.AddConsumer<PerformIngestionConsumer>();
} }
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
{
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
return services;
}
} }

View File

@@ -5,7 +5,6 @@ global using MassTransit;
global using MassTransit.Mediator; global using MassTransit.Mediator;
global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.DependencyInjection;
global using PromKnight.ParseTorrentTitle;
global using SharedContracts.Configuration; global using SharedContracts.Configuration;
global using SharedContracts.Dapper; global using SharedContracts.Dapper;
global using SharedContracts.Extensions; global using SharedContracts.Extensions;

View File

@@ -10,7 +10,6 @@ builder.Host
builder.Services builder.Services
.RegisterMassTransit() .RegisterMassTransit()
.AddServiceConfiguration()
.AddDatabase(); .AddDatabase();
var app = builder.Build(); var app = builder.Build();

View File

@@ -16,7 +16,6 @@
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" /> <PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" /> <PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" /> <PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" /> <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />