Producer / Consumer / Collector rewrite (#160)

* Converted metadata service to redis

* move to postgres instead

* fix global usings

* [skip ci] optimize wolverine by prebuilding static types

* [skip ci] Stop indexing mac folder indexes

* [skip ci] producer, metadata and migrations

removed mongodb
added redis cache
imdb meta in postgres
Enable pgtrm
Create trigrams index
Add search meta postgres function

* [skip ci] get rid of node folder, replace mongo with redis in consumer

also wire up postgres metadata searches

* [skip ci] change mongo to redis in the addon

* [skip ci] jackettio to redis

* Rest of mongo removed...

* Cleaner rerunning of metadata - without conflicts

* Add akas import as well as basic metadata

* Include episodes file too

* cascade truncate pre-import

* reverse order to avoid cascadeing

* separate out clean to separate handler

* Switch producer to use metadata matching pre-preocessing dmm

* More work

* Still porting PTN

* PTN port, adding tests

* [skip ci] Codec tests

* [skip ci] Complete Collection handler tests

* [skip ci] container tests

* [skip ci] Convert handlers tests

* [skip ci] DateHandler tests

* [skip ci] Dual Audio matching tests

* [skip ci] episode code tests

* [skip ci] Extended handler tests

* [skip ci] group handler tests

* [skip ci] some broken stuff right now

* [skip ci] more ptn

* [skip ci] PTN now in a separate nuget package, rebased this on the redis changes - i need them.

* [skip ci] Wire up PTN port. Tired - will test tomorrow

* [skip ci] Needs a lot of work - too many titles being missed now

* cleaner. done?

* Handle the date in the imdb search

- add integer function to confirm its a valid integer
- use the input date as a range of -+1 year

* [skip ci] Start of collector service for RD

[skip ci] WIP

Implemented metadata saga, along with channels to process up to a maximum of 100 infohashes each time
The saga will rety for each infohas by requeuing up to three times, before just marking as complete for that infoHash - meaning no data will be updated in the db for that torrent.

[skip ci] Ready to test with queue publishing

Will provision a fanout exchange if it doesn't exist, and create and bind a queue to it. Listens to the queue with 50 prefetch count.
Still needs PTN rewrite bringing in to parse the filename response from real debrid, and extract season and episode numbers if the file is a tvshow

[skip ci] Add Debrid Collector Build Job

Debrid Collector ready for testing

New consumer, new collector, producer has meta lookup and anti porn measures

[skip ci] WIP - moving from wolverine to MassTransit.

 not happy that wolverine cannot effectively control saga concurrency. we need to really.

[skip ci] Producer and new Consumer moved to MassTransit

Just the debrid collector to go now, then to write the optional qbit collector.

Collector now switched to mass transit too

hide porn titles in logs, clean up cache name in redis for imdb titles

[skip ci] Allow control of queues

[skip ci] Update deployment

Remove old consumer, fix deployment files, fix dockerfiles for shared project import

fix base deployment

* Add collector missing env var

* edits to kick off builds

* Add optional qbit deployment which qbit collector will use

* Qbit collector done

* reorder compose, and bring both qbit and qbitcollector into the compose, with 0 replicas as default

* Clean up compose file

* Ensure debrid collector errors if no debrid api key
This commit is contained in:
iPromKnight
2024-03-25 23:32:28 +00:00
committed by GitHub
parent 9c6c1ac249
commit 9a831e92d0
443 changed files with 4154 additions and 476262 deletions

View File

@@ -0,0 +1,53 @@
namespace SharedContracts.Dapper;
public abstract class BaseDapperStorage(ILogger<IDataStorage> logger, PostgresConfiguration configuration)
{
protected async Task ExecuteCommandAsync(Func<NpgsqlConnection, Task> operation, string errorMessage, CancellationToken cancellationToken = default)
{
try
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
await operation(connection);
}
catch (Exception e)
{
logger.LogError(e, errorMessage);
}
}
protected async Task<TResult> ExecuteCommandAsync<TResult>(Func<NpgsqlConnection, Task<TResult>> operation, string errorMessage, CancellationToken cancellationToken = default)
{
try
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
var result = await operation(connection);
return result;
}
catch (Exception e)
{
logger.LogError(e, errorMessage);
throw;
}
}
protected async Task<DapperResult<TResult, TFailure>> ExecuteCommandAsync<TResult, TFailure>(Func<NpgsqlConnection, Task<TResult>> operation, Func<Exception, TFailure> createFailureResult, CancellationToken cancellationToken = default)
{
try
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
var result = await operation(connection);
return DapperResult<TResult, TFailure>.Ok(result);
}
catch (Exception e)
{
var failureResult = createFailureResult(e);
return DapperResult<TResult, TFailure>.Fail(failureResult);
}
}
}

View File

@@ -0,0 +1,189 @@
namespace SharedContracts.Dapper;
public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConfiguration rabbitConfig, ILogger<DapperDataStorage> logger) :
BaseDapperStorage(logger, configuration), IDataStorage
{
public async Task<DapperResult<InsertTorrentResult, InsertTorrentResult>> InsertTorrents(IReadOnlyCollection<IngestedTorrent> torrents, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
const string query =
"""
INSERT INTO ingested_torrents
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt")
VALUES
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt)
ON CONFLICT (source, info_hash) DO NOTHING
""";
var inserted = await connection.ExecuteAsync(query, torrents);
return new InsertTorrentResult(true, inserted);
}, _ => new InsertTorrentResult(false, 0, "Failed to insert torrents."), cancellationToken);
public async Task<DapperResult<List<IngestedTorrent>, List<IngestedTorrent>>> GetPublishableTorrents(CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
const string query =
"""
SELECT
"id" as "Id",
"name" as "Name",
"source" as "Source",
"category" as "Category",
"info_hash" as "InfoHash",
"size" as "Size",
"seeders" as "Seeders",
"leechers" as "Leechers",
"imdb" as "Imdb",
"processed" as "Processed",
"createdAt" as "CreatedAt",
"updatedAt" as "UpdatedAt"
FROM ingested_torrents
WHERE processed = false AND category != 'xxx'
""";
var torrents = await connection.QueryAsync<IngestedTorrent>(query);
return torrents.Take(rabbitConfig.MaxPublishBatchSize).ToList();
}, _ => new List<IngestedTorrent>(), cancellationToken);
public async Task<DapperResult<UpdatedTorrentResult, UpdatedTorrentResult>> SetTorrentsProcessed(IReadOnlyCollection<IngestedTorrent> torrents, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
foreach (var torrent in torrents)
{
torrent.UpdatedAt = DateTime.UtcNow;
}
const string query =
"""
UPDATE ingested_torrents
Set
processed = true,
"updatedAt" = @UpdatedAt
WHERE id = @Id
""";
var updated = await connection.ExecuteAsync(query, torrents);
return new UpdatedTorrentResult(true, updated);
}, _ => new UpdatedTorrentResult(false, 0, "Failed to mark torrents as processed"), cancellationToken);
public async Task<bool> PageIngested(string pageId, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
const string query = "SELECT EXISTS (SELECT 1 FROM ingested_pages WHERE url = @Url)";
return await connection.ExecuteScalarAsync<bool>(query, new { Url = pageId });
}, "Failed to check if page is ingested", cancellationToken);
public async Task<DapperResult<PageIngestedResult, PageIngestedResult>> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
var date = DateTime.UtcNow;
const string query =
"""
INSERT INTO ingested_pages
(url, "createdAt", "updatedAt")
VALUES
(@Url, @CreatedAt, @UpdatedAt)
""";
await connection.ExecuteAsync(query, new
{
Url = pageId,
CreatedAt = date,
UpdatedAt = date,
});
return new PageIngestedResult(true, "Page successfully marked as ingested");
}, _ => new PageIngestedResult(false, "Page successfully marked as ingested"), cancellationToken);
public async Task<DapperResult<int, int>> GetRowCountImdbMetadata(CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
const string query = "SELECT COUNT(*) FROM imdb_metadata";
var result = await connection.ExecuteScalarAsync<int>(query);
return result;
}, _ => 0, cancellationToken);
public async Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE CAST(NULLIF(Year, '\N') AS INTEGER) <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
var result = await connection.QueryAsync<ImdbEntry>(query, new { Year = year, LastProcessedImdbId = stateLastProcessedImdbId, BatchSize = batchSize });
return result.ToList();
}, "Error getting imdb metadata.", cancellationToken);
public async Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, TorrentType torrentType, string? year, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType == TorrentType.Movie ? "movie" : "tvSeries")}'";
query += year is not null ? $", '{year}'" : ", NULL";
query += ", 15)";
var result = await connection.QueryAsync<ImdbEntry>(query);
return result.ToList();
}, "Error finding imdb metadata.", cancellationToken);
public Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default) =>
ExecuteCommandAsync(
async connection =>
{
const string query =
"""
INSERT INTO "torrents"
("infoHash", "provider", "torrentId", "title", "size", "type", "uploadDate", "seeders", "trackers", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
VALUES
(@InfoHash, @Provider, @TorrentId, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, NULL, false, false, NOW(), NOW())
ON CONFLICT ("infoHash") DO NOTHING
""";
await connection.ExecuteAsync(query, torrent);
}, "Failed to insert torrent files into database", cancellationToken);
public Task InsertFiles(IEnumerable<TorrentFile> files, CancellationToken cancellationToken = default) =>
ExecuteCommandAsync(
async connection =>
{
const string query =
"""
INSERT INTO files
("infoHash", "fileIndex", title, "size", "imdbId", "imdbSeason", "imdbEpisode", "kitsuId", "kitsuEpisode", "createdAt", "updatedAt")
VALUES
(@InfoHash, @FileIndex, @Title, @Size, @ImdbId, @ImdbSeason, @ImdbEpisode, @KitsuId, @KitsuEpisode, Now(), Now());
""";
await connection.ExecuteAsync(query, files);
}, "Failed to insert torrent files into database", cancellationToken);
public Task InsertSubtitles(IEnumerable<SubtitleFile> subtitles, CancellationToken cancellationToken = default) =>
ExecuteCommandAsync(
async connection =>
{
const string query =
"""
INSERT INTO subtitles
("infoHash", "fileIndex", "fileId", "title")
VALUES
(@InfoHash, @FileIndex, @FileId, @Title)
ON CONFLICT
("infoHash", "fileIndex")
DO UPDATE SET
"fileId" = COALESCE(subtitles."fileId", EXCLUDED."fileId"),
"title" = COALESCE(subtitles."title", EXCLUDED."title");
""";
await connection.ExecuteAsync(query, subtitles);
}, "Failed to insert subtitles into database", cancellationToken);
public Task<List<TorrentFile>> GetTorrentFiles(string infoHash, CancellationToken cancellationToken = default) =>
ExecuteCommandAsync(
async connection =>
{
const string query = "SELECT * FROM files WHERE LOWER(\"infoHash\") = @InfoHash";
var files = await connection.QueryAsync<TorrentFile>(query, new { InfoHash = infoHash });
return files.ToList();
}, "Failed to insert subtitles into database", cancellationToken);
}

View File

@@ -0,0 +1,21 @@
namespace SharedContracts.Dapper;
public class DapperResult<TSuccess, TFailure>
{
public TSuccess Success { get; }
public TFailure Failure { get; }
public bool IsSuccess { get; }
private DapperResult(TSuccess success, TFailure failure, bool isSuccess)
{
Success = success;
Failure = failure;
IsSuccess = isSuccess;
}
public static DapperResult<TSuccess, TFailure> Ok(TSuccess success) =>
new(success, default, true);
public static DapperResult<TSuccess, TFailure> Fail(TFailure failure) =>
new(default, failure, false);
}

View File

@@ -0,0 +1,17 @@
namespace SharedContracts.Dapper;
public interface IDataStorage
{
Task<DapperResult<InsertTorrentResult, InsertTorrentResult>> InsertTorrents(IReadOnlyCollection<IngestedTorrent> torrents, CancellationToken cancellationToken = default);
Task<DapperResult<List<IngestedTorrent>, List<IngestedTorrent>>> GetPublishableTorrents(CancellationToken cancellationToken = default);
Task<DapperResult<UpdatedTorrentResult, UpdatedTorrentResult>> SetTorrentsProcessed(IReadOnlyCollection<IngestedTorrent> torrents, CancellationToken cancellationToken = default);
Task<bool> PageIngested(string pageId, CancellationToken cancellationToken = default);
Task<DapperResult<PageIngestedResult, PageIngestedResult>> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default);
Task<DapperResult<int, int>> GetRowCountImdbMetadata(CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, TorrentType parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default);
Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default);
Task InsertFiles(IEnumerable<TorrentFile> files, CancellationToken cancellationToken = default);
Task InsertSubtitles(IEnumerable<SubtitleFile> subtitles, CancellationToken cancellationToken = default);
Task<List<TorrentFile>> GetTorrentFiles(string infoHash, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,5 @@
namespace SharedContracts.Dapper;
public record InsertTorrentResult(bool Success, int InsertedCount = 0, string? ErrorMessage = null);
public record UpdatedTorrentResult(bool Success, int UpdatedCount = 0, string? ErrorMessage = null);
public record PageIngestedResult(bool Success, string? ErrorMessage = null);