[wip] bridge python and c# and bring in rank torrent name (#177)

* [wip] bridge python and c# and bring in rank torrent name

* Container restores package now

Includes two dev scripts to install the python packages locally for debugging purposes.

* Introduce slightly turned title matching scoring, by making it length aware

this should help with sequels such as Terminator 2, vs Terminator etc

* Version bump

Also fixes postgres healthcheck so that it utilises the user from the stack.env file
This commit is contained in:
iPromKnight
2024-03-28 10:13:50 +00:00
committed by GitHub
parent cc15a69517
commit 4308a0ee71
24 changed files with 318 additions and 112 deletions

View File

@@ -115,10 +115,10 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
return result.ToList();
}, "Error getting imdb metadata.", cancellationToken);
public async Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, TorrentType torrentType, string? year, CancellationToken cancellationToken = default) =>
public async Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, string? year, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType == TorrentType.Movie ? "movie" : "tvSeries")}'";
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'";
query += year is not null ? $", '{year}'" : ", NULL";
query += ", 15)";

View File

@@ -9,7 +9,7 @@ public interface IDataStorage
Task<DapperResult<PageIngestedResult, PageIngestedResult>> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default);
Task<DapperResult<int, int>> GetRowCountImdbMetadata(CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, TorrentType parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default);
Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default);
Task InsertFiles(IEnumerable<TorrentFile> files, CancellationToken cancellationToken = default);
Task InsertSubtitles(IEnumerable<SubtitleFile> subtitles, CancellationToken cancellationToken = default);

View File

@@ -1,4 +1,3 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
namespace SharedContracts.Extensions;

View File

@@ -6,10 +6,11 @@ global using MassTransit;
global using Microsoft.AspNetCore.Builder;
global using Microsoft.AspNetCore.Hosting;
global using Microsoft.Extensions.Configuration;
global using Microsoft.Extensions.DependencyInjection;
global using Microsoft.Extensions.Hosting;
global using Microsoft.Extensions.Logging;
global using Npgsql;
global using PromKnight.ParseTorrentTitle;
global using Python.Runtime;
global using Serilog;
global using SharedContracts.Configuration;
global using SharedContracts.Extensions;

View File

@@ -0,0 +1,49 @@
namespace SharedContracts.Python;
public class PythonEngineService(ILogger<PythonEngineService> logger) : IHostedService
{
private IntPtr _mainThreadState;
private bool _isInitialized;
public Task StartAsync(CancellationToken cancellationToken)
{
if (_isInitialized)
{
return Task.CompletedTask;
}
try
{
var pythonDllEnv = Environment.GetEnvironmentVariable("PYTHONNET_PYDLL");
if (string.IsNullOrWhiteSpace(pythonDllEnv))
{
logger.LogWarning("PYTHONNET_PYDLL env is not set. Exiting Application");
Environment.Exit(1);
return Task.CompletedTask;
}
Runtime.PythonDLL = pythonDllEnv;
PythonEngine.Initialize();
_mainThreadState = PythonEngine.BeginAllowThreads();
_isInitialized = true;
logger.LogInformation("Python engine initialized");
}
catch (Exception e)
{
logger.LogWarning(e, "Failed to initialize Python engine");
Environment.Exit(1);
}
return Task.CompletedTask;
}
public Task StopAsync(CancellationToken cancellationToken)
{
PythonEngine.EndAllowThreads(_mainThreadState);
PythonEngine.Shutdown();
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,8 @@
namespace SharedContracts.Python.RTN;
public interface IRankTorrentName
{
ParseTorrentTitleResponse Parse(string title);
bool IsTrash(string title);
bool TitleMatch(string title, string checkTitle);
}

View File

@@ -0,0 +1,6 @@
namespace SharedContracts.Python.RTN;
public record ParseTorrentTitleResponse(bool Success, string ParsedTitle, int Year, int[]? Season = null, int[]? Episode = null)
{
public bool IsMovie => Season == null && Episode == null;
}

View File

@@ -0,0 +1,118 @@
namespace SharedContracts.Python.RTN;
public class RankTorrentName : IRankTorrentName
{
private const string SysModuleName = "sys";
private const string RtnModuleName = "RTN";
private readonly ILogger<RankTorrentName> _logger;
private dynamic? _sys;
private dynamic? _rtn;
public RankTorrentName(ILogger<RankTorrentName> logger)
{
_logger = logger;
InitModules();
}
public ParseTorrentTitleResponse Parse(string title)
{
try
{
using var py = Py.GIL();
var result = _rtn?.parse(title);
if (result == null)
{
return new(false, string.Empty, 0);
}
return ParseResult(result);
}
catch (Exception e)
{
_logger.LogError(e, "Failed to parse title");
return new(false, string.Empty, 0);
}
}
public bool IsTrash(string title)
{
try
{
using var py = Py.GIL();
var result = _rtn?.check_trash(title);
if (result == null)
{
return false;
}
var response = result.As<bool>() ?? false;
return response;
}
catch (Exception e)
{
_logger.LogError(e, "Failed to parse title");
return false;
}
}
public bool TitleMatch(string title, string checkTitle)
{
try
{
using var py = Py.GIL();
var result = _rtn?.title_match(title, checkTitle);
if (result == null)
{
return false;
}
var response = result.As<bool>() ?? false;
return response;
}
catch (Exception e)
{
_logger.LogError(e, "Failed to parse title");
return false;
}
}
private static ParseTorrentTitleResponse ParseResult(dynamic result)
{
var parsedTitle = result.GetAttr("parsed_title")?.As<string>() ?? string.Empty;
var year = result.GetAttr("year")?.As<int>() ?? 0;
var seasonList = result.GetAttr("season")?.As<PyList>();
var episodeList = result.GetAttr("episode")?.As<PyList>();
int[]? seasons = seasonList?.Length() > 0 ? seasonList.As<int[]>() : null;
int[]? episodes = episodeList?.Length() > 0 ? episodeList.As<int[]>() : null;
return new ParseTorrentTitleResponse(true, parsedTitle, year, seasons, episodes);
}
private void InitModules()
{
using var py = Py.GIL();
_sys = Py.Import(SysModuleName);
if (_sys == null)
{
_logger.LogError($"Failed to import Python module: {SysModuleName}");
return;
}
_sys.path.append(Path.Combine(AppContext.BaseDirectory, "python"));
_rtn = Py.Import(RtnModuleName);
if (_rtn == null)
{
_logger.LogError($"Failed to import Python module: {RtnModuleName}");
}
}
}

View File

@@ -0,0 +1,13 @@
namespace SharedContracts.Python;
public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterPythonEngine(this IServiceCollection services)
{
services.AddSingleton<PythonEngineService>();
services.AddHostedService(p => p.GetRequiredService<PythonEngineService>());
return services;
}
}

View File

@@ -16,7 +16,7 @@
<PackageReference Include="MassTransit.Abstractions" Version="8.2.0" />
<PackageReference Include="MassTransit.RabbitMQ" Version="8.2.0" />
<PackageReference Include="Npgsql" Version="8.0.2" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="pythonnet" Version="3.0.3" />
<PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Serilog.Settings.Configuration" Version="8.0.0" />