Producer / Consumer / Collector rewrite (#160)

* Converted metadata service to redis

* move to postgres instead

* fix global usings

* [skip ci] optimize wolverine by prebuilding static types

* [skip ci] Stop indexing mac folder indexes

* [skip ci] producer, metadata and migrations

removed mongodb
added redis cache
imdb meta in postgres
Enable pgtrm
Create trigrams index
Add search meta postgres function

* [skip ci] get rid of node folder, replace mongo with redis in consumer

also wire up postgres metadata searches

* [skip ci] change mongo to redis in the addon

* [skip ci] jackettio to redis

* Rest of mongo removed...

* Cleaner rerunning of metadata - without conflicts

* Add akas import as well as basic metadata

* Include episodes file too

* cascade truncate pre-import

* reverse order to avoid cascadeing

* separate out clean to separate handler

* Switch producer to use metadata matching pre-preocessing dmm

* More work

* Still porting PTN

* PTN port, adding tests

* [skip ci] Codec tests

* [skip ci] Complete Collection handler tests

* [skip ci] container tests

* [skip ci] Convert handlers tests

* [skip ci] DateHandler tests

* [skip ci] Dual Audio matching tests

* [skip ci] episode code tests

* [skip ci] Extended handler tests

* [skip ci] group handler tests

* [skip ci] some broken stuff right now

* [skip ci] more ptn

* [skip ci] PTN now in a separate nuget package, rebased this on the redis changes - i need them.

* [skip ci] Wire up PTN port. Tired - will test tomorrow

* [skip ci] Needs a lot of work - too many titles being missed now

* cleaner. done?

* Handle the date in the imdb search

- add integer function to confirm its a valid integer
- use the input date as a range of -+1 year

* [skip ci] Start of collector service for RD

[skip ci] WIP

Implemented metadata saga, along with channels to process up to a maximum of 100 infohashes each time
The saga will rety for each infohas by requeuing up to three times, before just marking as complete for that infoHash - meaning no data will be updated in the db for that torrent.

[skip ci] Ready to test with queue publishing

Will provision a fanout exchange if it doesn't exist, and create and bind a queue to it. Listens to the queue with 50 prefetch count.
Still needs PTN rewrite bringing in to parse the filename response from real debrid, and extract season and episode numbers if the file is a tvshow

[skip ci] Add Debrid Collector Build Job

Debrid Collector ready for testing

New consumer, new collector, producer has meta lookup and anti porn measures

[skip ci] WIP - moving from wolverine to MassTransit.

 not happy that wolverine cannot effectively control saga concurrency. we need to really.

[skip ci] Producer and new Consumer moved to MassTransit

Just the debrid collector to go now, then to write the optional qbit collector.

Collector now switched to mass transit too

hide porn titles in logs, clean up cache name in redis for imdb titles

[skip ci] Allow control of queues

[skip ci] Update deployment

Remove old consumer, fix deployment files, fix dockerfiles for shared project import

fix base deployment

* Add collector missing env var

* edits to kick off builds

* Add optional qbit deployment which qbit collector will use

* Qbit collector done

* reorder compose, and bring both qbit and qbitcollector into the compose, with 0 replicas as default

* Clean up compose file

* Ensure debrid collector errors if no debrid api key
This commit is contained in:
iPromKnight
2024-03-25 23:32:28 +00:00
committed by GitHub
parent 9c6c1ac249
commit 9a831e92d0
443 changed files with 4154 additions and 476262 deletions

View File

@@ -0,0 +1,8 @@
namespace DebridCollector.Features.Configuration;
public class DebridCollectorConfiguration
{
private const string Prefix = "COLLECTOR";
private const string RealDebridApiKeyVariable = "REAL_DEBRID_API_KEY";
public string RealDebridApiKey { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(RealDebridApiKeyVariable);
}

View File

@@ -0,0 +1,64 @@
namespace DebridCollector.Features.Debrid;
public class DebridRequestProcessor(IDebridHttpClient debridHttpClient, ILogger<DebridRequestProcessor> logger, IBus messageBus) : BackgroundService
{
private const int BatchDelay = 3000;
public const int MaxBatchSize = 100;
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
var requests = new List<PerformMetadataRequest>(MaxBatchSize);
var delay = TimeSpan.FromMilliseconds(BatchDelay);
while (!stoppingToken.IsCancellationRequested)
{
while (ProcessorChannel.Queue.Reader.TryRead(out var request))
{
if (requests.Count >= MaxBatchSize)
{
break;
}
if (requests.All(x => x.InfoHash != request.InfoHash))
{
requests.Add(request);
}
}
if (requests.Any())
{
await ProcessRequests(requests, stoppingToken);
requests.Clear();
}
await Task.Delay(delay, stoppingToken);
}
// After the loop ends, there may be remaining requests which were not processed. Let's process them:
if (requests.Count != 0)
{
await ProcessRequests(requests, stoppingToken);
requests.Clear();
}
}
private async Task ProcessRequests(IReadOnlyCollection<PerformMetadataRequest> requests, CancellationToken stoppingToken = default)
{
try
{
var results = await debridHttpClient.GetMetadataAsync(requests, stoppingToken);
await ProcessResponses(results);
logger.LogInformation("Processed: {Count} infoHashes", requests.Count);
}
catch (Exception e)
{
logger.LogError(e, "Failed to process infoHashes");
}
}
private async Task ProcessResponses(IEnumerable<TorrentMetadataResponse> results)
{
var messages = results.Select(response => new GotMetadata(response)).ToList();
await messageBus.PublishBatch(messages);
}
}

View File

@@ -0,0 +1,6 @@
namespace DebridCollector.Features.Debrid;
public interface IDebridHttpClient
{
public Task<IReadOnlyList<TorrentMetadataResponse>> GetMetadataAsync(IReadOnlyCollection<PerformMetadataRequest> infoHashes, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,12 @@
namespace DebridCollector.Features.Debrid;
public static class ProcessorChannel
{
public static Channel<PerformMetadataRequest> Queue = Channel.CreateUnbounded<PerformMetadataRequest>(new()
{
SingleReader = true,
SingleWriter = true,
});
public static bool AddToQueue(PerformMetadataRequest infoHash) => Queue.Writer.TryWrite(infoHash);
}

View File

@@ -0,0 +1,65 @@
namespace DebridCollector.Features.Debrid;
public class RealDebridClient(HttpClient client) : IDebridHttpClient
{
private const string TorrentsInstantAvailability = "torrents/instantAvailability/";
public async Task<IReadOnlyList<TorrentMetadataResponse>> GetMetadataAsync(IReadOnlyCollection<PerformMetadataRequest> requests, CancellationToken cancellationToken = default)
{
var responseAsString = await client.GetStringAsync($"{TorrentsInstantAvailability}{string.Join("/", requests.Select(x => x.InfoHash.ToLowerInvariant()))}", cancellationToken);
var document = JsonDocument.Parse(responseAsString);
var torrentMetadataResponses = new List<TorrentMetadataResponse>();
foreach (var request in requests)
{
if (document.RootElement.TryGetProperty(request.InfoHash.ToLowerInvariant(), out var dataElement) &&
dataElement.ValueKind == JsonValueKind.Object &&
dataElement.TryGetProperty("rd", out var rdDataElement) &&
rdDataElement.ValueKind == JsonValueKind.Array &&
rdDataElement.GetArrayLength() > 0)
{
MapResponseToMetadata(rdDataElement, torrentMetadataResponses, request);
continue;
}
torrentMetadataResponses.Add(new(request.CorrelationId, new()));
}
return torrentMetadataResponses;
}
private static void MapResponseToMetadata(JsonElement rdDataElement, List<TorrentMetadataResponse> torrentMetadataResponses, PerformMetadataRequest request)
{
var metaData = new FileDataDictionary();
foreach (var item in rdDataElement.EnumerateArray())
{
if (item.ValueKind == JsonValueKind.Object)
{
foreach (var property in item.EnumerateObject())
{
if (property.Value.ValueKind == JsonValueKind.Object)
{
var fileData = new FileData();
if (property.Value.TryGetProperty("filename", out var filenameElement) && filenameElement.ValueKind == JsonValueKind.String)
{
fileData.Filename = filenameElement.GetString();
}
if (property.Value.TryGetProperty("filesize", out var filesizeElement) && filesizeElement.ValueKind == JsonValueKind.Number)
{
fileData.Filesize = filesizeElement.GetInt64();
}
metaData[property.Name] = fileData;
}
}
}
}
torrentMetadataResponses.Add(new(request.CorrelationId, metaData));
}
}

View File

@@ -0,0 +1,24 @@
namespace DebridCollector.Features.Debrid;
public class RealDebridResponse : Dictionary<string, RdData?>
{
}
public class RdData
{
[JsonPropertyName("rd")]
public List<FileDataDictionary>? Rd { get; set; }
}
public class FileDataDictionary : Dictionary<string, FileData>
{
}
public class FileData
{
[JsonPropertyName("filename")]
public string? Filename { get; set; }
[JsonPropertyName("filesize")]
public long? Filesize { get; set; }
}

View File

@@ -0,0 +1,32 @@
using DebridCollector.Features.Configuration;
namespace DebridCollector.Features.Debrid;
public static class ServiceCollectionExtensions
{
public static IServiceCollection AddRealDebridClient(this IServiceCollection services, DebridCollectorConfiguration serviceConfiguration)
{
services.AddHttpClient<IDebridHttpClient, RealDebridClient>(
client =>
{
client.BaseAddress = new("https://api.real-debrid.com/rest/1.0/");
client.DefaultRequestHeaders.Add("Authorization", $"Bearer {serviceConfiguration.RealDebridApiKey}");
})
.AddPolicyHandler(GetRetryPolicy())
.AddPolicyHandler(GetCircuitBreakerPolicy());
return services;
}
private static AsyncPolicy<HttpResponseMessage> GetRetryPolicy(int MaxRetryCount = 5, int MaxJitterTime = 1000) =>
HttpPolicyExtensions
.HandleTransientHttpError()
.WaitAndRetryAsync(MaxRetryCount, RetryAttempt =>
TimeSpan.FromSeconds(Math.Pow(2, RetryAttempt)) +
TimeSpan.FromMilliseconds(Random.Shared.Next(0, MaxJitterTime)));
private static AsyncPolicy<HttpResponseMessage> GetCircuitBreakerPolicy() =>
HttpPolicyExtensions
.HandleTransientHttpError()
.CircuitBreakerAsync(handledEventsAllowedBeforeBreaking: 5, TimeSpan.FromSeconds(30));
}

View File

@@ -0,0 +1,82 @@
namespace DebridCollector.Features.Worker;
public static class DebridMetaToTorrentMeta
{
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
IParseTorrentTitle torrentTitle,
Torrent torrent,
string ImdbId,
FileDataDictionary Metadata)
{
try
{
var files = new List<TorrentFile>();
foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var file = new TorrentFile
{
ImdbId = ImdbId,
KitsuId = 0,
InfoHash = torrent.InfoHash,
FileIndex = validFileIndex ? fileIndex : 0,
Title = metadataEntry.Value.Filename,
Size = metadataEntry.Value.Filesize.GetValueOrDefault(),
};
var parsedTitle = torrentTitle.Parse(file.Title);
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault();
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault();
files.Add(file);
}
return files;
}
catch (Exception)
{
return [];
}
}
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata)
{
try
{
var files = new List<SubtitleFile>();
var torrentFiles = await storage.GetTorrentFiles(InfoHash.ToLowerInvariant());
if (torrentFiles.Count == 0)
{
return files;
}
foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileId = torrentFiles.FirstOrDefault(
t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0;
var file = new SubtitleFile
{
InfoHash = InfoHash,
FileIndex = validFileIndex ? fileIndex : 0,
FileId = fileId,
Title = metadataEntry.Value.Filename,
};
files.Add(file);
}
return files;
}
catch (Exception)
{
return [];
}
}
}

View File

@@ -0,0 +1,104 @@
namespace DebridCollector.Features.Worker;
public static class Filetypes
{
public static IReadOnlyList<string> VideoFileExtensions =
[
".3g2",
".3gp",
".3gp2",
".3gpp",
".60d",
".ajp",
".asf",
".asx",
".avchd",
".avi",
".bik",
".bix",
".box",
".cam",
".dat",
".divx",
".dmf",
".dv",
".dvr-ms",
".evo",
".flc",
".fli",
".flic",
".flv",
".flx",
".gvi",
".gvp",
".h264",
".m1v",
".m2p",
".m2ts",
".m2v",
".m4e",
".m4v",
".mjp",
".mjpeg",
".mjpg",
".mkv",
".moov",
".mov",
".movhd",
".movie",
".movx",
".mp4",
".mpe",
".mpeg",
".mpg",
".mpv",
".mpv2",
".mxf",
".nsv",
".nut",
".ogg",
".ogm",
".omf",
".ps",
".qt",
".ram",
".rm",
".rmvb",
".swf",
".ts",
".vfw",
".vid",
".video",
".viv",
".vivo",
".vob",
".vro",
".wm",
".wmv",
".wmx",
".wrap",
".wvx",
".wx",
".x264",
".xvid",
];
public static IReadOnlyList<string> SubtitleFileExtensions =
[
".a",
".srt",
".ass",
".ssa",
".stl",
".scc",
".ttml",
".sbv",
".dks",
".qtx",
".jss",
".vtt",
".smi",
".usf",
".idx"
];
}

View File

@@ -0,0 +1,14 @@
namespace DebridCollector.Features.Worker;
public class InfohashMetadataSagaState : SagaStateMachineInstance, ISagaVersion
{
public Torrent? Torrent { get; set; }
public string? Title { get; set; }
public string? ImdbId { get; set; }
public TorrentMetadataResponse? Metadata { get; set; }
public int RetriesAllowed { get; set; } = 2;
public Guid CorrelationId { get; set; }
public int Version { get; set; }
public int CurrentState { get; set; }
}

View File

@@ -0,0 +1,63 @@
namespace DebridCollector.Features.Worker;
public class InfohashMetadataSagaStateMachine : MassTransitStateMachine<InfohashMetadataSagaState>
{
public State Ingesting { get; private set; } = null!;
public State Writing { get; private set; } = null!;
public State Completed { get; private set; } = null!;
public Event<CollectMetadata> CollectMetadata { get; private set; } = null!;
public Event<GotMetadata> GotMetadata { get; private set; } = null!;
public Event<MetadataWritten> MetadataWritten { get; private set; } = null!;
public InfohashMetadataSagaStateMachine(ILogger<InfohashMetadataSagaStateMachine> logger)
{
InstanceState(x => x.CurrentState);
Event(() => CollectMetadata, x => x.CorrelateById(context => context.Message.CorrelationId));
Event(() => GotMetadata, x => x.CorrelateById(context => context.Message.CorrelationId));
Event(() => MetadataWritten, x => x.CorrelateById(context => context.Message.CorrelationId));
Initially(
When(CollectMetadata)
.ThenAsync(
async context =>
{
context.Saga.CorrelationId = context.Data.CorrelationId;
context.Saga.Torrent = context.Data.Torrent;
context.Saga.ImdbId = context.Data.ImdbId;
await context.Publish(new PerformMetadataRequest(context.Saga.CorrelationId, context.Saga.Torrent.InfoHash));
logger.LogInformation("Collecting Metadata for torrent {InfoHash} in Saga {SagaId}", context.Instance.Torrent.InfoHash, context.Instance.CorrelationId);
})
.TransitionTo(Ingesting));
During(
Ingesting,
When(GotMetadata)
.ThenAsync(
async context =>
{
context.Saga.Metadata = context.Data.Metadata;
await context.Publish(new WriteMetadata(context.Saga.Torrent, context.Saga.Metadata, context.Saga.ImdbId));
logger.LogInformation("Got Metadata for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
})
.TransitionTo(Writing));
During(
Writing,
When(MetadataWritten)
.Then(
context =>
{
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
})
.TransitionTo(Completed)
.Finalize());
SetCompletedWhenFinalized();
}
}

View File

@@ -0,0 +1,10 @@
namespace DebridCollector.Features.Worker;
public class PerformMetadataRequestConsumer : IConsumer<PerformMetadataRequest>
{
public Task Consume(ConsumeContext<PerformMetadataRequest> context)
{
ProcessorChannel.AddToQueue(context.Message);
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,22 @@
namespace DebridCollector.Features.Worker;
[EntityName("perform-metadata-request")]
public record PerformMetadataRequest(Guid CorrelationId, string InfoHash) : CorrelatedBy<Guid>;
[EntityName("torrent-metadata-response")]
public record GotMetadata(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}
[EntityName("write-metadata")]
public record WriteMetadata(Torrent Torrent, TorrentMetadataResponse Metadata, string ImdbId) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}
[EntityName("metadata-written")]
public record MetadataWritten(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid>
{
public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
}

View File

@@ -0,0 +1,4 @@
namespace DebridCollector.Features.Worker;
[EntityName("torrent-metadata-response")]
public record TorrentMetadataResponse(Guid CorrelationId, FileDataDictionary Metadata) : CorrelatedBy<Guid>;

View File

@@ -0,0 +1,25 @@
namespace DebridCollector.Features.Worker;
public class WriteMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteMetadata>
{
public async Task Consume(ConsumeContext<WriteMetadata> context)
{
var request = context.Message;
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata);
if (torrentFiles.Any())
{
await dataStorage.InsertFiles(torrentFiles);
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
}
await context.Publish(new MetadataWritten(request.Metadata));
}
}