trigram performance increased and housekeeping (#184)
* add new indexes, and change year column to int * Change gist to gin, and change year to int * Producer changes for new gin query * Fully map the rtn response using json dump from Pydantic Also updates Rtn to 0.1.9 * Add housekeeping script to reconcile imdb ids. * Join Torrent onto the ingested torrent table Ensure that a torrent can always find the details of where it came from, and how it was parsed. * Version bump for release * missing quote on table name
This commit is contained in:
@@ -9,9 +9,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
const string query =
|
||||
"""
|
||||
INSERT INTO ingested_torrents
|
||||
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt")
|
||||
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt", "rtn_response")
|
||||
VALUES
|
||||
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt)
|
||||
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt, @RtnResponse::jsonb)
|
||||
ON CONFLICT (source, info_hash) DO NOTHING
|
||||
""";
|
||||
|
||||
@@ -110,21 +110,21 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
public async Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default) =>
|
||||
await ExecuteCommandAsync(async connection =>
|
||||
{
|
||||
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE CAST(NULLIF(Year, '\N') AS INTEGER) <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
|
||||
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE Year <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
|
||||
var result = await connection.QueryAsync<ImdbEntry>(query, new { Year = year, LastProcessedImdbId = stateLastProcessedImdbId, BatchSize = batchSize });
|
||||
return result.ToList();
|
||||
}, "Error getting imdb metadata.", cancellationToken);
|
||||
|
||||
public async Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, string? year, CancellationToken cancellationToken = default) =>
|
||||
public async Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, int? year, CancellationToken cancellationToken = default) =>
|
||||
await ExecuteCommandAsync(async connection =>
|
||||
{
|
||||
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'";
|
||||
query += year is not null ? $", '{year}'" : ", NULL";
|
||||
query += ", 15)";
|
||||
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\", \"score\" as Score from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'";
|
||||
query += year is not null ? $", {year}" : ", NULL";
|
||||
query += ", 1)";
|
||||
|
||||
var result = await connection.QueryAsync<ImdbEntry>(query);
|
||||
|
||||
return result.ToList();
|
||||
var results = result.ToList();
|
||||
return results.FirstOrDefault();
|
||||
}, "Error finding imdb metadata.", cancellationToken);
|
||||
|
||||
public Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default) =>
|
||||
@@ -134,9 +134,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
const string query =
|
||||
"""
|
||||
INSERT INTO "torrents"
|
||||
("infoHash", "provider", "torrentId", "title", "size", "type", "uploadDate", "seeders", "trackers", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
|
||||
("infoHash", "ingestedTorrentId", "provider", "title", "size", "type", "uploadDate", "seeders", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
|
||||
VALUES
|
||||
(@InfoHash, @Provider, @TorrentId, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, NULL, false, false, NOW(), NOW())
|
||||
(@InfoHash, @IngestedTorrentId, @Provider, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, false, false, NOW(), NOW())
|
||||
ON CONFLICT ("infoHash") DO NOTHING
|
||||
""";
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ public interface IDataStorage
|
||||
Task<DapperResult<PageIngestedResult, PageIngestedResult>> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default);
|
||||
Task<DapperResult<int, int>> GetRowCountImdbMetadata(CancellationToken cancellationToken = default);
|
||||
Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default);
|
||||
Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default);
|
||||
Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, int? parsedTorrentYear, CancellationToken cancellationToken = default);
|
||||
Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default);
|
||||
Task InsertFiles(IEnumerable<TorrentFile> files, CancellationToken cancellationToken = default);
|
||||
Task InsertSubtitles(IEnumerable<SubtitleFile> subtitles, CancellationToken cancellationToken = default);
|
||||
|
||||
14
src/shared/Extensions/JsonExtensions.cs
Normal file
14
src/shared/Extensions/JsonExtensions.cs
Normal file
@@ -0,0 +1,14 @@
|
||||
namespace SharedContracts.Extensions;
|
||||
|
||||
public static class JsonExtensions
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonSerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
WriteIndented = false,
|
||||
ReferenceHandler = ReferenceHandler.IgnoreCycles,
|
||||
NumberHandling = JsonNumberHandling.Strict,
|
||||
};
|
||||
|
||||
public static string AsJson<T>(this T obj) => JsonSerializer.Serialize(obj, JsonSerializerOptions);
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
// Global using directives
|
||||
|
||||
global using System.Collections.Concurrent;
|
||||
global using System.Globalization;
|
||||
global using System.Text;
|
||||
global using System.Text.Json;
|
||||
global using System.Text.Json.Serialization;
|
||||
global using Dapper;
|
||||
global using MassTransit;
|
||||
global using Microsoft.AspNetCore.Builder;
|
||||
@@ -17,4 +15,4 @@ global using Python.Runtime;
|
||||
global using Serilog;
|
||||
global using SharedContracts.Configuration;
|
||||
global using SharedContracts.Extensions;
|
||||
global using SharedContracts.Models;
|
||||
global using SharedContracts.Models;
|
||||
|
||||
@@ -7,4 +7,5 @@ public class ImdbEntry
|
||||
public string? Category { get; set; }
|
||||
public string? Year { get; set; }
|
||||
public bool? Adult { get; set; }
|
||||
public decimal? Score { get; set; }
|
||||
}
|
||||
|
||||
@@ -12,7 +12,9 @@ public class IngestedTorrent
|
||||
public int Leechers { get; set; }
|
||||
public string? Imdb { get; set; }
|
||||
|
||||
public bool Processed { get; set; } = false;
|
||||
public bool Processed { get; set; }
|
||||
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
|
||||
|
||||
public string? RtnResponse { get; set; }
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ namespace SharedContracts.Models;
|
||||
public class Torrent
|
||||
{
|
||||
public string? InfoHash { get; set; }
|
||||
public long? IngestedTorrentId { get; set; }
|
||||
public string? Provider { get; set; }
|
||||
public string? TorrentId { get; set; }
|
||||
public string? Title { get; set; }
|
||||
|
||||
@@ -7,7 +7,7 @@ public interface IPythonEngineService
|
||||
Task InitializePythonEngine(CancellationToken cancellationToken);
|
||||
T ExecuteCommandOrScript<T>(string command, PyModule module, bool throwOnErrors);
|
||||
T ExecutePythonOperation<T>(Func<T> operation, string operationName, bool throwOnErrors);
|
||||
T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors);
|
||||
T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors);
|
||||
Task StopPythonEngine(CancellationToken cancellationToken);
|
||||
dynamic? Sys { get; }
|
||||
}
|
||||
@@ -53,10 +53,10 @@ public class PythonEngineService(ILogger<PythonEngineService> logger) : IPythonE
|
||||
}, nameof(ExecuteCommandOrScript), throwOnErrors);
|
||||
|
||||
public T ExecutePythonOperation<T>(Func<T> operation, string operationName, bool throwOnErrors) =>
|
||||
ExecutePythonOperationWithDefault(operation, default, operationName, throwOnErrors);
|
||||
ExecutePythonOperationWithDefault(operation, default, operationName, throwOnErrors, true);
|
||||
|
||||
public T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors) =>
|
||||
ExecutePythonOperationInternal(operation, defaultValue, operationName, throwOnErrors);
|
||||
public T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors) =>
|
||||
ExecutePythonOperationInternal(operation, defaultValue, operationName, throwOnErrors, logErrors);
|
||||
|
||||
public void ExecuteOnGIL(Action act, bool throwOnErrors)
|
||||
{
|
||||
@@ -95,7 +95,7 @@ public class PythonEngineService(ILogger<PythonEngineService> logger) : IPythonE
|
||||
}
|
||||
|
||||
// ReSharper disable once EntityNameCapturedOnly.Local
|
||||
private T ExecutePythonOperationInternal<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors)
|
||||
private T ExecutePythonOperationInternal<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors)
|
||||
{
|
||||
Sys ??= LoadSys();
|
||||
|
||||
@@ -108,7 +108,10 @@ public class PythonEngineService(ILogger<PythonEngineService> logger) : IPythonE
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(operationName));
|
||||
if (logErrors)
|
||||
{
|
||||
Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(operationName));
|
||||
}
|
||||
|
||||
if (throwOnErrors)
|
||||
{
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
namespace SharedContracts.Python.RTN;
|
||||
|
||||
public record ParseTorrentTitleResponse(bool Success, string ParsedTitle, int Year, int[]? Season = null, int[]? Episode = null)
|
||||
{
|
||||
public bool IsMovie => Season == null && Episode == null;
|
||||
}
|
||||
public record ParseTorrentTitleResponse(bool Success, RtnResponse? Response);
|
||||
@@ -14,34 +14,31 @@ public class RankTorrentName : IRankTorrentName
|
||||
}
|
||||
|
||||
public ParseTorrentTitleResponse Parse(string title) =>
|
||||
_pythonEngineService.ExecutePythonOperation(
|
||||
_pythonEngineService.ExecutePythonOperationWithDefault(
|
||||
() =>
|
||||
{
|
||||
var result = _rtn?.parse(title);
|
||||
return ParseResult(result);
|
||||
}, nameof(Parse), throwOnErrors: false);
|
||||
|
||||
}, new ParseTorrentTitleResponse(false, null), nameof(Parse), throwOnErrors: false, logErrors: false);
|
||||
|
||||
|
||||
private static ParseTorrentTitleResponse ParseResult(dynamic result)
|
||||
{
|
||||
if (result == null)
|
||||
{
|
||||
return new(false, string.Empty, 0);
|
||||
return new(false, null);
|
||||
}
|
||||
|
||||
var json = result.model_dump_json()?.As<string?>();
|
||||
|
||||
if (json is null || string.IsNullOrEmpty(json))
|
||||
{
|
||||
return new(false, null);
|
||||
}
|
||||
|
||||
var parsedTitle = result.GetAttr("parsed_title")?.As<string>() ?? string.Empty;
|
||||
var year = result.GetAttr("year")?.As<int>() ?? 0;
|
||||
var seasons = GetIntArray(result, "season");
|
||||
var episodes = GetIntArray(result, "episode");
|
||||
var response = JsonSerializer.Deserialize<RtnResponse>(json);
|
||||
|
||||
return new ParseTorrentTitleResponse(true, parsedTitle, year, seasons, episodes);
|
||||
}
|
||||
|
||||
private static int[]? GetIntArray(dynamic result, string field)
|
||||
{
|
||||
var theList = result.GetAttr(field)?.As<PyList>();
|
||||
int[]? results = theList?.Length() > 0 ? theList.As<int[]>() : null;
|
||||
|
||||
return results;
|
||||
return new(true, response);
|
||||
}
|
||||
|
||||
private void InitModules() =>
|
||||
|
||||
83
src/shared/Python/RTN/RtnResponse.cs
Normal file
83
src/shared/Python/RTN/RtnResponse.cs
Normal file
@@ -0,0 +1,83 @@
|
||||
namespace SharedContracts.Python.RTN;
|
||||
|
||||
public class RtnResponse
|
||||
{
|
||||
[JsonPropertyName("raw_title")]
|
||||
public string? RawTitle { get; set; }
|
||||
|
||||
[JsonPropertyName("parsed_title")]
|
||||
public string? ParsedTitle { get; set; }
|
||||
|
||||
[JsonPropertyName("fetch")]
|
||||
public bool Fetch { get; set; }
|
||||
|
||||
[JsonPropertyName("is_4k")]
|
||||
public bool Is4K { get; set; }
|
||||
|
||||
[JsonPropertyName("is_multi_audio")]
|
||||
public bool IsMultiAudio { get; set; }
|
||||
|
||||
[JsonPropertyName("is_multi_subtitle")]
|
||||
public bool IsMultiSubtitle { get; set; }
|
||||
|
||||
[JsonPropertyName("is_complete")]
|
||||
public bool IsComplete { get; set; }
|
||||
|
||||
[JsonPropertyName("year")]
|
||||
public int Year { get; set; }
|
||||
|
||||
[JsonPropertyName("resolution")]
|
||||
public List<string>? Resolution { get; set; }
|
||||
|
||||
[JsonPropertyName("quality")]
|
||||
public List<string>? Quality { get; set; }
|
||||
|
||||
[JsonPropertyName("season")]
|
||||
public List<int>? Season { get; set; }
|
||||
|
||||
[JsonPropertyName("episode")]
|
||||
public List<int>? Episode { get; set; }
|
||||
|
||||
[JsonPropertyName("codec")]
|
||||
public List<string>? Codec { get; set; }
|
||||
|
||||
[JsonPropertyName("audio")]
|
||||
public List<string>? Audio { get; set; }
|
||||
|
||||
[JsonPropertyName("subtitles")]
|
||||
public List<string>? Subtitles { get; set; }
|
||||
|
||||
[JsonPropertyName("language")]
|
||||
public List<string>? Language { get; set; }
|
||||
|
||||
[JsonPropertyName("bit_depth")]
|
||||
public List<int>? BitDepth { get; set; }
|
||||
|
||||
[JsonPropertyName("hdr")]
|
||||
public string? Hdr { get; set; }
|
||||
|
||||
[JsonPropertyName("proper")]
|
||||
public bool Proper { get; set; }
|
||||
|
||||
[JsonPropertyName("repack")]
|
||||
public bool Repack { get; set; }
|
||||
|
||||
[JsonPropertyName("remux")]
|
||||
public bool Remux { get; set; }
|
||||
|
||||
[JsonPropertyName("upscaled")]
|
||||
public bool Upscaled { get; set; }
|
||||
|
||||
[JsonPropertyName("remastered")]
|
||||
public bool Remastered { get; set; }
|
||||
|
||||
[JsonPropertyName("directors_cut")]
|
||||
public bool DirectorsCut { get; set; }
|
||||
|
||||
[JsonPropertyName("extended")]
|
||||
public bool Extended { get; set; }
|
||||
|
||||
public bool IsMovie => Season == null && Episode == null;
|
||||
|
||||
public string ToJson() => this.AsJson();
|
||||
}
|
||||
Reference in New Issue
Block a user