Producer / Consumer / Collector rewrite (#160)

* Converted metadata service to redis

* move to postgres instead

* fix global usings

* [skip ci] optimize wolverine by prebuilding static types

* [skip ci] Stop indexing mac folder indexes

* [skip ci] producer, metadata and migrations

removed mongodb
added redis cache
imdb meta in postgres
Enable pgtrm
Create trigrams index
Add search meta postgres function

* [skip ci] get rid of node folder, replace mongo with redis in consumer

also wire up postgres metadata searches

* [skip ci] change mongo to redis in the addon

* [skip ci] jackettio to redis

* Rest of mongo removed...

* Cleaner rerunning of metadata - without conflicts

* Add akas import as well as basic metadata

* Include episodes file too

* cascade truncate pre-import

* reverse order to avoid cascadeing

* separate out clean to separate handler

* Switch producer to use metadata matching pre-preocessing dmm

* More work

* Still porting PTN

* PTN port, adding tests

* [skip ci] Codec tests

* [skip ci] Complete Collection handler tests

* [skip ci] container tests

* [skip ci] Convert handlers tests

* [skip ci] DateHandler tests

* [skip ci] Dual Audio matching tests

* [skip ci] episode code tests

* [skip ci] Extended handler tests

* [skip ci] group handler tests

* [skip ci] some broken stuff right now

* [skip ci] more ptn

* [skip ci] PTN now in a separate nuget package, rebased this on the redis changes - i need them.

* [skip ci] Wire up PTN port. Tired - will test tomorrow

* [skip ci] Needs a lot of work - too many titles being missed now

* cleaner. done?

* Handle the date in the imdb search

- add integer function to confirm its a valid integer
- use the input date as a range of -+1 year

* [skip ci] Start of collector service for RD

[skip ci] WIP

Implemented metadata saga, along with channels to process up to a maximum of 100 infohashes each time
The saga will rety for each infohas by requeuing up to three times, before just marking as complete for that infoHash - meaning no data will be updated in the db for that torrent.

[skip ci] Ready to test with queue publishing

Will provision a fanout exchange if it doesn't exist, and create and bind a queue to it. Listens to the queue with 50 prefetch count.
Still needs PTN rewrite bringing in to parse the filename response from real debrid, and extract season and episode numbers if the file is a tvshow

[skip ci] Add Debrid Collector Build Job

Debrid Collector ready for testing

New consumer, new collector, producer has meta lookup and anti porn measures

[skip ci] WIP - moving from wolverine to MassTransit.

 not happy that wolverine cannot effectively control saga concurrency. we need to really.

[skip ci] Producer and new Consumer moved to MassTransit

Just the debrid collector to go now, then to write the optional qbit collector.

Collector now switched to mass transit too

hide porn titles in logs, clean up cache name in redis for imdb titles

[skip ci] Allow control of queues

[skip ci] Update deployment

Remove old consumer, fix deployment files, fix dockerfiles for shared project import

fix base deployment

* Add collector missing env var

* edits to kick off builds

* Add optional qbit deployment which qbit collector will use

* Qbit collector done

* reorder compose, and bring both qbit and qbitcollector into the compose, with 0 replicas as default

* Clean up compose file

* Ensure debrid collector errors if no debrid api key
This commit is contained in:
iPromKnight
2024-03-25 23:32:28 +00:00
committed by GitHub
parent 9c6c1ac249
commit 9a831e92d0
443 changed files with 4154 additions and 476262 deletions

View File

@@ -14,7 +14,6 @@
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^3.4.4",
"cache-manager-mongodb": "^0.3.0",
"cors": "^2.8.5",
"debrid-link-api": "^1.0.1",
"express": "^4.18.2",
@@ -33,7 +32,11 @@
"user-agents": "^1.0.1444",
"video-name-parser": "^1.4.6",
"xml-js": "^1.6.11",
"xml2js": "^0.6.2"
"xml2js": "^0.6.2",
"@redis/client": "^1.5.14",
"@redis/json": "^1.0.6",
"@redis/search": "^1.1.6",
"cache-manager-redis-store": "^2.0.0"
},
"devDependencies": {
"@types/node": "^20.11.6",

View File

@@ -1,7 +1,7 @@
import cacheManager from 'cache-manager';
import mangodbStore from 'cache-manager-mongodb';
import { isStaticUrl } from '../moch/static.js';
import {cacheConfig} from "./settings.js";
import redisStore from 'cache-manager-redis-store';
const STREAM_KEY_PREFIX = `${cacheConfig.GLOBAL_KEY_PREFIX}|stream`;
const IMDB_KEY_PREFIX = `${cacheConfig.GLOBAL_KEY_PREFIX}|imdb`;
@@ -12,28 +12,20 @@ const memoryCache = initiateMemoryCache();
const remoteCache = initiateRemoteCache();
function initiateRemoteCache() {
if (cacheConfig.NO_CACHE) {
return null;
} else if (cacheConfig.MONGODB_URI) {
return cacheManager.caching({
store: mangodbStore,
uri: cacheConfig.MONGODB_URI,
options: {
collection: 'jackettio_addon_collection',
socketTimeoutMS: 120000,
useNewUrlParser: true,
useUnifiedTopology: false,
ttl: cacheConfig.STREAM_EMPTY_TTL
},
ttl: cacheConfig.STREAM_EMPTY_TTL,
ignoreCacheErrors: true
});
} else {
return cacheManager.caching({
store: 'memory',
ttl: cacheConfig.STREAM_EMPTY_TTL
});
}
if (cacheConfig.NO_CACHE) {
return null;
} else if (cacheConfig.REDIS_CONNECTION_STRING) {
return cacheManager.caching({
store: redisStore,
ttl: cacheConfig.STREAM_EMPTY_TTL,
url: cacheConfig.REDIS_CONNECTION_STRING
});
} else {
return cacheManager.caching({
store: 'memory',
ttl: cacheConfig.STREAM_EMPTY_TTL
});
}
}
function initiateMemoryCache() {

View File

@@ -25,7 +25,7 @@ export const cinemetaConfig = {
}
export const cacheConfig = {
MONGODB_URI: process.env.MONGODB_URI,
REDIS_CONNECTION_STRING: process.env.REDIS_CONNECTION_STRING || 'redis://localhost:6379/0',
NO_CACHE: parseBool(process.env.NO_CACHE, false),
IMDB_TTL: parseInt(process.env.IMDB_TTL || 60 * 60 * 4), // 4 Hours
STREAM_TTL: parseInt(process.env.STREAM_TTL || 60 * 60 * 4), // 1 Hour

File diff suppressed because it is too large Load Diff

View File

@@ -14,7 +14,6 @@
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^3.4.4",
"cache-manager-mongodb": "^0.3.0",
"cors": "^2.8.5",
"debrid-link-api": "^1.0.1",
"express-rate-limit": "^6.7.0",
@@ -35,7 +34,11 @@
"stremio-addon-sdk": "^1.6.10",
"swagger-stats": "^0.99.7",
"ua-parser-js": "^1.0.36",
"user-agents": "^1.0.1444"
"user-agents": "^1.0.1444",
"@redis/client": "^1.5.14",
"@redis/json": "^1.0.6",
"@redis/search": "^1.1.6",
"cache-manager-redis-store": "^2.0.0"
},
"devDependencies": {
"@types/node": "^20.11.6",

View File

@@ -1,7 +1,7 @@
import cacheManager from 'cache-manager';
import mangodbStore from 'cache-manager-mongodb';
import { cacheConfig } from './config.js';
import { isStaticUrl } from '../moch/static.js';
import redisStore from "cache-manager-redis-store";
const GLOBAL_KEY_PREFIX = 'knightcrawler-addon';
const STREAM_KEY_PREFIX = `${GLOBAL_KEY_PREFIX}|stream`;
@@ -21,19 +21,11 @@ const remoteCache = initiateRemoteCache();
function initiateRemoteCache() {
if (cacheConfig.NO_CACHE) {
return null;
} else if (cacheConfig.MONGO_URI) {
} else if (cacheConfig.REDIS_CONNECTION_STRING) {
return cacheManager.caching({
store: mangodbStore,
uri: cacheConfig.MONGO_URI,
options: {
collection: 'knightcrawler_addon_collection',
socketTimeoutMS: 120000,
useNewUrlParser: true,
useUnifiedTopology: false,
ttl: STREAM_EMPTY_TTL
},
store: redisStore,
ttl: STREAM_EMPTY_TTL,
ignoreCacheErrors: true
url: cacheConfig.REDIS_CONNECTION_STRING
});
} else {
return cacheManager.caching({

View File

@@ -1,18 +1,8 @@
export const cacheConfig = {
MONGODB_HOST: process.env.MONGODB_HOST || 'mongodb',
MONGODB_PORT: process.env.MONGODB_PORT || '27017',
MONGODB_DB: process.env.MONGODB_DB || 'knightcrawler',
MONGODB_USER: process.env.MONGODB_USER || 'mongo',
MONGODB_PASSWORD: process.env.MONGODB_PASSWORD || 'mongo',
COLLECTION_NAME: process.env.MONGODB_ADDON_COLLECTION || 'knightcrawler_addon_collection',
REDIS_CONNECTION_STRING: process.env.REDIS_CONNECTION_STRING || 'redis://localhost:6379/0',
NO_CACHE: parseBool(process.env.NO_CACHE, false),
}
// Combine the environment variables into a connection string
// The combined string will look something like:
// 'mongodb://mongo:mongo@localhost:27017/knightcrawler?authSource=admin'
cacheConfig.MONGO_URI = 'mongodb://' + cacheConfig.MONGODB_USER + ':' + cacheConfig.MONGODB_PASSWORD + '@' + cacheConfig.MONGODB_HOST + ':' + cacheConfig.MONGODB_PORT + '/' + cacheConfig.MONGODB_DB + '?authSource=admin';
export const databaseConfig = {
POSTGRES_HOST: process.env.POSTGRES_HOST || 'postgres',
POSTGRES_PORT: process.env.POSTGRES_PORT || '5432',

View File

@@ -0,0 +1,36 @@
{
"Serilog": {
"Using": [ "Serilog.Sinks.Console" ],
"MinimumLevel": {
"Default": "Information",
"Override": {
"Microsoft": "Warning",
"System": "Warning",
"Npgsql.Command": "Warning",
"Marten.IDocumentStore": "Warning",
"Wolverine.Runtime.WolverineRuntime": "Warning",
"Wolverine.Runtime.Agents.NodeAgentController": "Warning",
"Oakton.Resources.ResourceSetupHostService": "Warning",
"System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning",
"System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning",
"Quartz.Impl.StdSchedulerFactory": "Warning",
"Quartz.Core.QuartzScheduler": "Warning",
"Quartz.Simpl.RAMJobStore": "Warning",
"Quartz.Core.JobRunShell": "Warning",
"Quartz.Core.SchedulerSignalerImpl": "Warning"
}
},
"WriteTo": [
{
"Name": "Console",
"Args": {
"outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}"
}
}
],
"Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ],
"Properties": {
"Application": "Metadata"
}
}
}

View File

@@ -0,0 +1,38 @@
<Project Sdk="Microsoft.NET.Sdk.Worker">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<UserSecretsId>54cad2ee-57df-4bb2-a192-d5d501448e0a</UserSecretsId>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Dapper" Version="2.1.35" />
<PackageReference Include="MassTransit" Version="8.2.0" />
<PackageReference Include="MassTransit.RabbitMQ" Version="8.2.0" />
<PackageReference Include="MassTransit.Redis" Version="8.2.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
<PackageReference Include="System.Interactive.Async" Version="6.0.1" />
</ItemGroup>
<ItemGroup>
<Content Remove="Configuration\logging.json" />
<None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\shared\SharedContracts.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,27 @@

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DebridCollector", "DebridCollector.csproj", "{64C3253C-0638-4825-AC82-7D5600D1F9C9}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\shared\SharedContracts.csproj", "{C9BE500C-CE04-480B-874F-A85D33CAA821}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{64C3253C-0638-4825-AC82-7D5600D1F9C9}.Release|Any CPU.Build.0 = Release|Any CPU
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C9BE500C-CE04-480B-874F-A85D33CAA821}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{C9BE500C-CE04-480B-874F-A85D33CAA821} = {2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,20 @@
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build
ARG TARGETARCH
WORKDIR /src
COPY shared/ shared/
COPY debrid-collector/ debrid-collector/
WORKDIR /src/debrid-collector/
RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
WORKDIR /app
COPY --from=build /src/out .
RUN addgroup -S debrid && adduser -S -G debrid debrid
USER debrid
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1
ENTRYPOINT ["dotnet", "DebridCollector.dll"]

View File

@@ -0,0 +1,73 @@
using DebridCollector.Features.Configuration;
namespace DebridCollector.Extensions;
public static class ServiceCollectionExtensions
{
internal static IServiceCollection AddDatabase(this IServiceCollection services)
{
services.LoadConfigurationFromEnv<PostgresConfiguration>();
services.AddTransient<IDataStorage, DapperDataStorage>();
return services;
}
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
{
var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>();
services.AddRealDebridClient(serviceConfiguration);
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
services.AddHostedService<DebridRequestProcessor>();
return services;
}
internal static IServiceCollection RegisterMassTransit(this IServiceCollection services)
{
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
var redisConfiguration = services.LoadConfigurationFromEnv<RedisConfiguration>();
services.AddMassTransit(x =>
{
x.SetKebabCaseEndpointNameFormatter();
x.UsingRabbitMq((context, cfg) =>
{
cfg.AutoStart = true;
cfg.Host(
rabbitConfiguration.Host, h =>
{
h.Username(rabbitConfiguration.Username);
h.Password(rabbitConfiguration.Password);
});
cfg.Message<CollectMetadata>(e => e.SetEntityName(rabbitConfiguration.DebridCollectorQueueName));
cfg.ConfigureEndpoints(context);
});
x.AddConsumer<PerformMetadataRequestConsumer>();
x.AddConsumer<WriteMetadataConsumer>();
x.RegisterMetadataIngestionSaga(redisConfiguration, rabbitConfiguration);
});
return services;
}
private static void RegisterMetadataIngestionSaga(this IBusRegistrationConfigurator x, RedisConfiguration redisConfiguration, RabbitMqConfiguration rabbitMqConfiguration) =>
x.AddSagaStateMachine<InfohashMetadataSagaStateMachine, InfohashMetadataSagaState>(
cfg =>
{
cfg.UseMessageRetry(r => r.Intervals(1000,2000,5000));
cfg.UseInMemoryOutbox();
})
.RedisRepository(redisConfiguration.ConnectionString)
.Endpoint(
e =>
{
e.Name = rabbitMqConfiguration.DebridCollectorQueueName;
e.ConcurrentMessageLimit = 50;
e.PrefetchCount = 50;
});
}

View File

@@ -0,0 +1,8 @@
namespace DebridCollector.Features.Configuration;
public class DebridCollectorConfiguration
{
private const string Prefix = "COLLECTOR";
private const string RealDebridApiKeyVariable = "REAL_DEBRID_API_KEY";
public string RealDebridApiKey { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(RealDebridApiKeyVariable);
}

View File

@@ -0,0 +1,64 @@
namespace DebridCollector.Features.Debrid;
public class DebridRequestProcessor(IDebridHttpClient debridHttpClient, ILogger<DebridRequestProcessor> logger, IBus messageBus) : BackgroundService
{
private const int BatchDelay = 3000;
public const int MaxBatchSize = 100;
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
var requests = new List<PerformMetadataRequest>(MaxBatchSize);
var delay = TimeSpan.FromMilliseconds(BatchDelay);
while (!stoppingToken.IsCancellationRequested)
{
while (ProcessorChannel.Queue.Reader.TryRead(out var request))
{
if (requests.Count >= MaxBatchSize)
{
break;
}
if (requests.All(x => x.InfoHash != request.InfoHash))
{
requests.Add(request);
}
}
if (requests.Any())
{
await ProcessRequests(requests, stoppingToken);
requests.Clear();
}
await Task.Delay(delay, stoppingToken);
}
// After the loop ends, there may be remaining requests which were not processed. Let's process them:
if (requests.Count != 0)
{
await ProcessRequests(requests, stoppingToken);
requests.Clear();
}
}
private async Task ProcessRequests(IReadOnlyCollection<PerformMetadataRequest> requests, CancellationToken stoppingToken = default)
{
try
{
var results = await debridHttpClient.GetMetadataAsync(requests, stoppingToken);
await ProcessResponses(results);
logger.LogInformation("Processed: {Count} infoHashes", requests.Count);
}
catch (Exception e)
{
logger.LogError(e, "Failed to process infoHashes");
}
}
private async Task ProcessResponses(IEnumerable<TorrentMetadataResponse> results)
{
var messages = results.Select(response => new GotMetadata(response)).ToList();
await messageBus.PublishBatch(messages);
}
}

Some files were not shown because too many files have changed in this diff Show More