From 4308a0ee7110a5a694f96e8725a8f07dbcea4a37 Mon Sep 17 00:00:00 2001 From: iPromKnight <156901906+iPromKnight@users.noreply.github.com> Date: Thu, 28 Mar 2024 10:13:50 +0000 Subject: [PATCH] [wip] bridge python and c# and bring in rank torrent name (#177) * [wip] bridge python and c# and bring in rank torrent name * Container restores package now Includes two dev scripts to install the python packages locally for debugging purposes. * Introduce slightly turned title matching scoring, by making it length aware this should help with sequels such as Terminator 2, vs Terminator etc * Version bump Also fixes postgres healthcheck so that it utilises the user from the stack.env file --- deployment/docker/docker-compose.yaml | 55 ++------ .../docker/src/components/infrastructure.yaml | 2 +- .../docker/src/components/knightcrawler.yaml | 14 +-- src/producer/.dockerignore | 2 + src/producer/Producer.sln | 6 + src/producer/eng/install-python-reqs.ps1 | 2 + src/producer/eng/install-python-reqs.sh | 4 + src/producer/src/.dockerignore | 2 + src/producer/src/Dockerfile | 12 +- .../Crawlers/Dmm/DebridMediaManagerCrawler.cs | 82 ++++++------ .../DataProcessing/LengthAwareRatioScorer.cs | 24 ++++ .../ServiceCollectionExtensions.cs | 3 +- src/producer/src/GlobalUsings.cs | 9 +- src/producer/src/Producer.csproj | 7 +- src/shared/Dapper/DapperDataStorage.cs | 4 +- src/shared/Dapper/IDataStorage.cs | 2 +- .../Extensions/ConfigurationExtensions.cs | 1 - src/shared/GlobalUsings.cs | 3 +- src/shared/Python/PythonEngineService.cs | 49 ++++++++ src/shared/Python/RTN/IRankTorrentName.cs | 8 ++ .../Python/RTN/ParseTorrentTitleResponse.cs | 6 + src/shared/Python/RTN/RankTorrentName.cs | 118 ++++++++++++++++++ .../Python/ServiceCollectionExtensions.cs | 13 ++ src/shared/SharedContracts.csproj | 2 +- 24 files changed, 318 insertions(+), 112 deletions(-) create mode 100644 src/producer/.dockerignore create mode 100644 src/producer/eng/install-python-reqs.ps1 create mode 100644 src/producer/eng/install-python-reqs.sh create mode 100644 src/producer/src/.dockerignore create mode 100644 src/producer/src/Features/DataProcessing/LengthAwareRatioScorer.cs create mode 100644 src/shared/Python/PythonEngineService.cs create mode 100644 src/shared/Python/RTN/IRankTorrentName.cs create mode 100644 src/shared/Python/RTN/ParseTorrentTitleResponse.cs create mode 100644 src/shared/Python/RTN/RankTorrentName.cs create mode 100644 src/shared/Python/ServiceCollectionExtensions.cs diff --git a/deployment/docker/docker-compose.yaml b/deployment/docker/docker-compose.yaml index dc5e361..bba9e74 100644 --- a/deployment/docker/docker-compose.yaml +++ b/deployment/docker/docker-compose.yaml @@ -17,12 +17,8 @@ services: ## All downloaded metadata is stored in this database. postgres: env_file: stack.env - environment: - PGUSER: ${POSTGRES_USER} healthcheck: - test: - - CMD-SHELL - - pg_isready + test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ] timeout: 10s interval: 10s retries: 3 @@ -44,9 +40,7 @@ services: redis: env_file: stack.env healthcheck: - test: - - CMD-SHELL - - redis-cli ping + test: ["CMD-SHELL", "redis-cli ping"] timeout: 10s interval: 10s retries: 3 @@ -66,9 +60,7 @@ services: rabbitmq: env_file: stack.env healthcheck: - test: - - CMD-SHELL - - rabbitmq-diagnostics -q ping + test: ["CMD-SHELL", "rabbitmq-diagnostics -q ping"] timeout: 10s interval: 10s retries: 3 @@ -91,22 +83,17 @@ services: depends_on: metadata: condition: service_completed_successfully - required: true migrator: condition: service_completed_successfully - required: true postgres: condition: service_healthy - required: true rabbitmq: condition: service_healthy - required: true redis: condition: service_healthy - required: true env_file: stack.env hostname: knightcrawler-addon - image: gabisonfire/knightcrawler-addon:2.0.7 + image: gabisonfire/knightcrawler-addon:2.0.8 labels: logging: promtail networks: @@ -120,21 +107,16 @@ services: depends_on: metadata: condition: service_completed_successfully - required: true migrator: condition: service_completed_successfully - required: true postgres: condition: service_healthy - required: true rabbitmq: condition: service_healthy - required: true redis: condition: service_healthy - required: true env_file: stack.env - image: gabisonfire/knightcrawler-consumer:2.0.7 + image: gabisonfire/knightcrawler-consumer:2.0.8 labels: logging: promtail networks: @@ -146,21 +128,16 @@ services: depends_on: metadata: condition: service_completed_successfully - required: true migrator: condition: service_completed_successfully - required: true postgres: condition: service_healthy - required: true rabbitmq: condition: service_healthy - required: true redis: condition: service_healthy - required: true env_file: stack.env - image: gabisonfire/knightcrawler-debrid-collector:2.0.7 + image: gabisonfire/knightcrawler-debrid-collector:2.0.8 labels: logging: promtail networks: @@ -173,9 +150,8 @@ services: depends_on: migrator: condition: service_completed_successfully - required: true env_file: stack.env - image: gabisonfire/knightcrawler-metadata:2.0.7 + image: gabisonfire/knightcrawler-metadata:2.0.8 networks: - knightcrawler-network restart: "no" @@ -185,9 +161,8 @@ services: depends_on: postgres: condition: service_healthy - required: true env_file: stack.env - image: gabisonfire/knightcrawler-migrator:2.0.7 + image: gabisonfire/knightcrawler-migrator:2.0.8 networks: - knightcrawler-network restart: "no" @@ -197,21 +172,16 @@ services: depends_on: metadata: condition: service_completed_successfully - required: true migrator: condition: service_completed_successfully - required: true postgres: condition: service_healthy - required: true rabbitmq: condition: service_healthy - required: true redis: condition: service_healthy - required: true env_file: stack.env - image: gabisonfire/knightcrawler-producer:2.0.7 + image: gabisonfire/knightcrawler-producer:2.0.8 labels: logging: promtail networks: @@ -223,11 +193,10 @@ services: depends_on: qbittorrent: condition: service_healthy - required: true deploy: replicas: ${QBIT_REPLICAS:-0} env_file: stack.env - image: gabisonfire/knightcrawler-qbit-collector:2.0.7 + image: gabisonfire/knightcrawler-qbit-collector:2.0.8 labels: logging: promtail networks: @@ -246,9 +215,7 @@ services: TORRENTING_PORT: "6881" WEBUI_PORT: "8080" healthcheck: - test: - - CMD-SHELL - - curl --fail http://localhost:8080 + test: ["CMD-SHELL", "curl --fail http://localhost:8080"] timeout: 10s interval: 10s retries: 3 diff --git a/deployment/docker/src/components/infrastructure.yaml b/deployment/docker/src/components/infrastructure.yaml index 4de9400..e4043b3 100644 --- a/deployment/docker/src/components/infrastructure.yaml +++ b/deployment/docker/src/components/infrastructure.yaml @@ -13,7 +13,7 @@ x-redishealth: &redis-health <<: *base-health x-postgreshealth: &postgresdb-health - test: pg_isready + test: [ "CMD", "sh", "-c", "pg_isready -h localhost -U $$POSTGRES_USER" ] <<: *base-health x-qbit: &qbit-health diff --git a/deployment/docker/src/components/knightcrawler.yaml b/deployment/docker/src/components/knightcrawler.yaml index 3e677bb..6d4bcbb 100644 --- a/deployment/docker/src/components/knightcrawler.yaml +++ b/deployment/docker/src/components/knightcrawler.yaml @@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends services: metadata: - image: gabisonfire/knightcrawler-metadata:2.0.7 + image: gabisonfire/knightcrawler-metadata:2.0.8 env_file: ../../.env networks: - knightcrawler-network @@ -30,7 +30,7 @@ services: condition: service_completed_successfully migrator: - image: gabisonfire/knightcrawler-migrator:2.0.7 + image: gabisonfire/knightcrawler-migrator:2.0.8 env_file: ../../.env networks: - knightcrawler-network @@ -40,7 +40,7 @@ services: condition: service_healthy addon: - image: gabisonfire/knightcrawler-addon:2.0.7 + image: gabisonfire/knightcrawler-addon:2.0.8 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped hostname: knightcrawler-addon @@ -48,22 +48,22 @@ services: - "7000:7000" consumer: - image: gabisonfire/knightcrawler-consumer:2.0.7 + image: gabisonfire/knightcrawler-consumer:2.0.8 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped debridcollector: - image: gabisonfire/knightcrawler-debrid-collector:2.0.7 + image: gabisonfire/knightcrawler-debrid-collector:2.0.8 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped producer: - image: gabisonfire/knightcrawler-producer:2.0.7 + image: gabisonfire/knightcrawler-producer:2.0.8 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped qbitcollector: - image: gabisonfire/knightcrawler-qbit-collector:2.0.7 + image: gabisonfire/knightcrawler-qbit-collector:2.0.8 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped depends_on: diff --git a/src/producer/.dockerignore b/src/producer/.dockerignore new file mode 100644 index 0000000..65d3c05 --- /dev/null +++ b/src/producer/.dockerignore @@ -0,0 +1,2 @@ +**/python/ +.idea/ \ No newline at end of file diff --git a/src/producer/Producer.sln b/src/producer/Producer.sln index ef1a909..c95c618 100644 --- a/src/producer/Producer.sln +++ b/src/producer/Producer.sln @@ -6,6 +6,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\share EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{FF5CA857-51E8-4446-8840-2A1D24ED3952}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{1AE7F597-24C4-4575-B59F-67A625D95C1E}" + ProjectSection(SolutionItems) = preProject + eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1 + eng\install-python-reqs.sh = eng\install-python-reqs.sh + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU diff --git a/src/producer/eng/install-python-reqs.ps1 b/src/producer/eng/install-python-reqs.ps1 new file mode 100644 index 0000000..cb1275b --- /dev/null +++ b/src/producer/eng/install-python-reqs.ps1 @@ -0,0 +1,2 @@ +mkdir -p ../src/python +pip install --force-reinstall rank-torrent-name==0.1.6 -t ../src/python/ \ No newline at end of file diff --git a/src/producer/eng/install-python-reqs.sh b/src/producer/eng/install-python-reqs.sh new file mode 100644 index 0000000..16beb5c --- /dev/null +++ b/src/producer/eng/install-python-reqs.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +mkdir -p ../src/python +pip install --force-reinstall rank-torrent-name==0.1.6 -t ../src/python/ \ No newline at end of file diff --git a/src/producer/src/.dockerignore b/src/producer/src/.dockerignore new file mode 100644 index 0000000..65d3c05 --- /dev/null +++ b/src/producer/src/.dockerignore @@ -0,0 +1,2 @@ +**/python/ +.idea/ \ No newline at end of file diff --git a/src/producer/src/Dockerfile b/src/producer/src/Dockerfile index d6e115a..b8cfda4 100644 --- a/src/producer/src/Dockerfile +++ b/src/producer/src/Dockerfile @@ -8,13 +8,21 @@ WORKDIR /src/producer/src RUN dotnet restore -a $TARGETARCH RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH - -FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine +FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19 WORKDIR /app + +ENV PYTHONUNBUFFERED=1 +RUN apk add --update --no-cache python3=~3.11.8-r0 py3-pip && ln -sf python3 /usr/bin/python + COPY --from=build /src/out . +RUN rm -rf /app/python && mkdir -p /app/python +RUN pip3 install --force-reinstall rank-torrent-name==0.1.6 -t /app/python RUN addgroup -S producer && adduser -S -G producer producer USER producer HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ CMD pgrep -f dotnet || exit 1 + +ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0 + ENTRYPOINT ["dotnet", "Producer.dll"] diff --git a/src/producer/src/Features/Crawlers/Dmm/DebridMediaManagerCrawler.cs b/src/producer/src/Features/Crawlers/Dmm/DebridMediaManagerCrawler.cs index 673522d..675a6c3 100644 --- a/src/producer/src/Features/Crawlers/Dmm/DebridMediaManagerCrawler.cs +++ b/src/producer/src/Features/Crawlers/Dmm/DebridMediaManagerCrawler.cs @@ -1,3 +1,5 @@ +using Microsoft.VisualBasic; + namespace Producer.Features.Crawlers.Dmm; public partial class DebridMediaManagerCrawler( @@ -5,11 +7,12 @@ public partial class DebridMediaManagerCrawler( ILogger logger, IDataStorage storage, GithubConfiguration githubConfiguration, - IParseTorrentTitle parseTorrentTitle, + IRankTorrentName rankTorrentName, IDistributedCache cache) : BaseCrawler(logger, storage) { [GeneratedRegex("""""")] private static partial Regex HashCollectionMatcher(); + private LengthAwareRatioScorer _lengthAwareRatioScorer = new(); private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main"; protected override IReadOnlyDictionary Mappings => new Dictionary(); @@ -107,74 +110,65 @@ public partial class DebridMediaManagerCrawler( { return null; } - - var parsedTorrent = parseTorrentTitle.Parse(torrentTitle.CleanTorrentTitleForImdb()); - var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.Title); + var parsedTorrent = rankTorrentName.Parse(torrentTitle.CleanTorrentTitleForImdb()); - if (cached) - { - logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Title); - return new() - { - Source = Source, - Name = cachedResult.Title, - Imdb = cachedResult.ImdbId, - Size = bytesElement.GetInt64().ToString(), - InfoHash = hashElement.ToString(), - Seeders = 0, - Leechers = 0, - Category = parsedTorrent.TorrentType switch - { - TorrentType.Movie => "movies", - TorrentType.Tv => "tv", - _ => "unknown", - }, - }; - } - - var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Title, parsedTorrent.TorrentType, parsedTorrent.Year); - - if (imdbEntry.Count == 0) + if (!parsedTorrent.Success) { return null; } - var scoredTitles = await ScoreTitles(parsedTorrent, imdbEntry); + var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.ParsedTitle); + + if (cached) + { + logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.ParsedTitle); + return MapToTorrent(cachedResult, bytesElement, hashElement, parsedTorrent); + } + + var year = parsedTorrent.Year != 0 ? parsedTorrent.Year.ToString() : null; + var imdbEntries = await Storage.FindImdbMetadata(parsedTorrent.ParsedTitle, parsedTorrent.IsMovie ? "movies" : "tv", year); + + if (imdbEntries.Count == 0) + { + return null; + } + + var scoredTitles = await ScoreTitles(parsedTorrent, imdbEntries); if (!scoredTitles.Success) { return null; } - logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", scoredTitles.BestMatch.Value.ImdbId, parsedTorrent.Title, scoredTitles.BestMatch.Value.Title, scoredTitles.BestMatch.Score); + logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", scoredTitles.BestMatch.Value.ImdbId, parsedTorrent.ParsedTitle, scoredTitles.BestMatch.Value.Title, scoredTitles.BestMatch.Score); - var torrent = new IngestedTorrent + return MapToTorrent(scoredTitles.BestMatch.Value, bytesElement, hashElement, parsedTorrent); + } + + private IngestedTorrent MapToTorrent(ImdbEntry result, JsonElement bytesElement, JsonElement hashElement, ParseTorrentTitleResponse parsedTorrent) => + new() { Source = Source, - Name = scoredTitles.BestMatch.Value.Title, - Imdb = scoredTitles.BestMatch.Value.ImdbId, + Name = result.Title, + Imdb = result.ImdbId, Size = bytesElement.GetInt64().ToString(), InfoHash = hashElement.ToString(), Seeders = 0, Leechers = 0, - Category = parsedTorrent.TorrentType switch + Category = parsedTorrent.IsMovie switch { - TorrentType.Movie => "movies", - TorrentType.Tv => "tv", - _ => "unknown", + true => "movies", + false => "tv", }, }; - return torrent; - } - - private async Task<(bool Success, ExtractedResult? BestMatch)> ScoreTitles(TorrentMetadata parsedTorrent, List imdbEntries) + private async Task<(bool Success, ExtractedResult? BestMatch)> ScoreTitles(ParseTorrentTitleResponse parsedTorrent, List imdbEntries) { - var lowerCaseTitle = parsedTorrent.Title.ToLowerInvariant(); + var lowerCaseTitle = parsedTorrent.ParsedTitle.ToLowerInvariant(); // Scoring directly operates on the List, no need for lookup table. - var scoredResults = Process.ExtractAll(new(){Title = lowerCaseTitle}, imdbEntries, x => x.Title?.ToLowerInvariant(), scorer: new DefaultRatioScorer(), cutoff: 90); + var scoredResults = Process.ExtractAll(new(){Title = lowerCaseTitle}, imdbEntries, x => x.Title?.ToLowerInvariant(), scorer: _lengthAwareRatioScorer, cutoff: 90); var best = scoredResults.MaxBy(x => x.Score); @@ -192,7 +186,7 @@ public partial class DebridMediaManagerCrawler( { var cacheOptions = new DistributedCacheEntryOptions { - AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(15), + AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1), }; return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best.Value), cacheOptions); diff --git a/src/producer/src/Features/DataProcessing/LengthAwareRatioScorer.cs b/src/producer/src/Features/DataProcessing/LengthAwareRatioScorer.cs new file mode 100644 index 0000000..abc0974 --- /dev/null +++ b/src/producer/src/Features/DataProcessing/LengthAwareRatioScorer.cs @@ -0,0 +1,24 @@ +namespace Producer.Features.DataProcessing +{ + public class LengthAwareRatioScorer : IRatioScorer + { + private readonly IRatioScorer _defaultScorer = new DefaultRatioScorer(); + + public int Score(string input1, string input2) + { + var score = _defaultScorer.Score(input1, input2); + var lengthRatio = (double)Math.Min(input1.Length, input2.Length) / Math.Max(input1.Length, input2.Length); + var result = (int)(score * lengthRatio); + return result > 100 ? 100 : result; + } + + public int Score(string input1, string input2, PreprocessMode preprocessMode) + { + var score = _defaultScorer.Score(input1, input2, preprocessMode); + var lengthRatio = (double)Math.Min(input1.Length, input2.Length) / Math.Max(input1.Length, input2.Length); + var result = (int)(score * lengthRatio); + + return result > 100 ? 100 : result; + } + } +} \ No newline at end of file diff --git a/src/producer/src/Features/DataProcessing/ServiceCollectionExtensions.cs b/src/producer/src/Features/DataProcessing/ServiceCollectionExtensions.cs index 4f1655b..32fe0cd 100644 --- a/src/producer/src/Features/DataProcessing/ServiceCollectionExtensions.cs +++ b/src/producer/src/Features/DataProcessing/ServiceCollectionExtensions.cs @@ -9,7 +9,8 @@ internal static class ServiceCollectionExtensions services.AddTransient(); services.AddTransient(); - services.AddSingleton(); + services.RegisterPythonEngine(); + services.AddSingleton(); services.AddStackExchangeRedisCache(options => { options.Configuration = redisConfiguration.ConnectionString; diff --git a/src/producer/src/GlobalUsings.cs b/src/producer/src/GlobalUsings.cs index 37a699f..5dec1bf 100644 --- a/src/producer/src/GlobalUsings.cs +++ b/src/producer/src/GlobalUsings.cs @@ -7,6 +7,8 @@ global using System.Text.RegularExpressions; global using System.Xml.Linq; global using FuzzySharp; global using FuzzySharp.Extractor; +global using FuzzySharp.PreProcess; +global using FuzzySharp.SimilarityRatio.Scorer; global using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; global using LZStringCSharp; global using MassTransit; @@ -23,11 +25,10 @@ global using Producer.Features.Crawlers.Torrentio; global using Producer.Features.CrawlerSupport; global using Producer.Features.DataProcessing; global using Producer.Features.JobSupport; -global using PromKnight.ParseTorrentTitle; -global using Serilog; global using SharedContracts.Configuration; global using SharedContracts.Dapper; global using SharedContracts.Extensions; global using SharedContracts.Models; -global using SharedContracts.Requests; -global using StackExchange.Redis; \ No newline at end of file +global using SharedContracts.Python; +global using SharedContracts.Python.RTN; +global using SharedContracts.Requests; \ No newline at end of file diff --git a/src/producer/src/Producer.csproj b/src/producer/src/Producer.csproj index 374290f..49f7ccf 100644 --- a/src/producer/src/Producer.csproj +++ b/src/producer/src/Producer.csproj @@ -19,6 +19,7 @@ + @@ -34,9 +35,9 @@ - - - + + + Always diff --git a/src/shared/Dapper/DapperDataStorage.cs b/src/shared/Dapper/DapperDataStorage.cs index b4e2e74..28ad700 100644 --- a/src/shared/Dapper/DapperDataStorage.cs +++ b/src/shared/Dapper/DapperDataStorage.cs @@ -115,10 +115,10 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf return result.ToList(); }, "Error getting imdb metadata.", cancellationToken); - public async Task> FindImdbMetadata(string? parsedTorrentTitle, TorrentType torrentType, string? year, CancellationToken cancellationToken = default) => + public async Task> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, string? year, CancellationToken cancellationToken = default) => await ExecuteCommandAsync(async connection => { - var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType == TorrentType.Movie ? "movie" : "tvSeries")}'"; + var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'"; query += year is not null ? $", '{year}'" : ", NULL"; query += ", 15)"; diff --git a/src/shared/Dapper/IDataStorage.cs b/src/shared/Dapper/IDataStorage.cs index d460f03..d9b0dfd 100644 --- a/src/shared/Dapper/IDataStorage.cs +++ b/src/shared/Dapper/IDataStorage.cs @@ -9,7 +9,7 @@ public interface IDataStorage Task> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default); Task> GetRowCountImdbMetadata(CancellationToken cancellationToken = default); Task> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default); - Task> FindImdbMetadata(string? parsedTorrentTitle, TorrentType parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default); + Task> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default); Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default); Task InsertFiles(IEnumerable files, CancellationToken cancellationToken = default); Task InsertSubtitles(IEnumerable subtitles, CancellationToken cancellationToken = default); diff --git a/src/shared/Extensions/ConfigurationExtensions.cs b/src/shared/Extensions/ConfigurationExtensions.cs index d2fcd02..537e6f2 100644 --- a/src/shared/Extensions/ConfigurationExtensions.cs +++ b/src/shared/Extensions/ConfigurationExtensions.cs @@ -1,4 +1,3 @@ -using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection.Extensions; namespace SharedContracts.Extensions; diff --git a/src/shared/GlobalUsings.cs b/src/shared/GlobalUsings.cs index 973c519..f57716e 100644 --- a/src/shared/GlobalUsings.cs +++ b/src/shared/GlobalUsings.cs @@ -6,10 +6,11 @@ global using MassTransit; global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Hosting; global using Microsoft.Extensions.Configuration; +global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.Hosting; global using Microsoft.Extensions.Logging; global using Npgsql; -global using PromKnight.ParseTorrentTitle; +global using Python.Runtime; global using Serilog; global using SharedContracts.Configuration; global using SharedContracts.Extensions; diff --git a/src/shared/Python/PythonEngineService.cs b/src/shared/Python/PythonEngineService.cs new file mode 100644 index 0000000..ab80689 --- /dev/null +++ b/src/shared/Python/PythonEngineService.cs @@ -0,0 +1,49 @@ +namespace SharedContracts.Python; + +public class PythonEngineService(ILogger logger) : IHostedService +{ + private IntPtr _mainThreadState; + private bool _isInitialized; + + public Task StartAsync(CancellationToken cancellationToken) + { + if (_isInitialized) + { + return Task.CompletedTask; + } + + try + { + var pythonDllEnv = Environment.GetEnvironmentVariable("PYTHONNET_PYDLL"); + + if (string.IsNullOrWhiteSpace(pythonDllEnv)) + { + logger.LogWarning("PYTHONNET_PYDLL env is not set. Exiting Application"); + Environment.Exit(1); + return Task.CompletedTask; + } + + Runtime.PythonDLL = pythonDllEnv; + PythonEngine.Initialize(); + _mainThreadState = PythonEngine.BeginAllowThreads(); + + _isInitialized = true; + logger.LogInformation("Python engine initialized"); + } + catch (Exception e) + { + logger.LogWarning(e, "Failed to initialize Python engine"); + Environment.Exit(1); + } + + return Task.CompletedTask; + } + + public Task StopAsync(CancellationToken cancellationToken) + { + PythonEngine.EndAllowThreads(_mainThreadState); + PythonEngine.Shutdown(); + + return Task.CompletedTask; + } +} \ No newline at end of file diff --git a/src/shared/Python/RTN/IRankTorrentName.cs b/src/shared/Python/RTN/IRankTorrentName.cs new file mode 100644 index 0000000..9029534 --- /dev/null +++ b/src/shared/Python/RTN/IRankTorrentName.cs @@ -0,0 +1,8 @@ +namespace SharedContracts.Python.RTN; + +public interface IRankTorrentName +{ + ParseTorrentTitleResponse Parse(string title); + bool IsTrash(string title); + bool TitleMatch(string title, string checkTitle); +} \ No newline at end of file diff --git a/src/shared/Python/RTN/ParseTorrentTitleResponse.cs b/src/shared/Python/RTN/ParseTorrentTitleResponse.cs new file mode 100644 index 0000000..3c1d905 --- /dev/null +++ b/src/shared/Python/RTN/ParseTorrentTitleResponse.cs @@ -0,0 +1,6 @@ +namespace SharedContracts.Python.RTN; + +public record ParseTorrentTitleResponse(bool Success, string ParsedTitle, int Year, int[]? Season = null, int[]? Episode = null) +{ + public bool IsMovie => Season == null && Episode == null; +} \ No newline at end of file diff --git a/src/shared/Python/RTN/RankTorrentName.cs b/src/shared/Python/RTN/RankTorrentName.cs new file mode 100644 index 0000000..fd7c492 --- /dev/null +++ b/src/shared/Python/RTN/RankTorrentName.cs @@ -0,0 +1,118 @@ +namespace SharedContracts.Python.RTN; + +public class RankTorrentName : IRankTorrentName +{ + private const string SysModuleName = "sys"; + private const string RtnModuleName = "RTN"; + + private readonly ILogger _logger; + private dynamic? _sys; + private dynamic? _rtn; + + public RankTorrentName(ILogger logger) + { + _logger = logger; + InitModules(); + } + + + public ParseTorrentTitleResponse Parse(string title) + { + try + { + using var py = Py.GIL(); + var result = _rtn?.parse(title); + + if (result == null) + { + return new(false, string.Empty, 0); + } + + return ParseResult(result); + } + catch (Exception e) + { + _logger.LogError(e, "Failed to parse title"); + return new(false, string.Empty, 0); + } + } + + public bool IsTrash(string title) + { + try + { + using var py = Py.GIL(); + var result = _rtn?.check_trash(title); + + if (result == null) + { + return false; + } + + var response = result.As() ?? false; + + return response; + } + catch (Exception e) + { + _logger.LogError(e, "Failed to parse title"); + return false; + } + } + + public bool TitleMatch(string title, string checkTitle) + { + try + { + using var py = Py.GIL(); + var result = _rtn?.title_match(title, checkTitle); + + if (result == null) + { + return false; + } + + var response = result.As() ?? false; + + return response; + } + catch (Exception e) + { + _logger.LogError(e, "Failed to parse title"); + return false; + } + } + + + private static ParseTorrentTitleResponse ParseResult(dynamic result) + { + var parsedTitle = result.GetAttr("parsed_title")?.As() ?? string.Empty; + var year = result.GetAttr("year")?.As() ?? 0; + var seasonList = result.GetAttr("season")?.As(); + var episodeList = result.GetAttr("episode")?.As(); + int[]? seasons = seasonList?.Length() > 0 ? seasonList.As() : null; + int[]? episodes = episodeList?.Length() > 0 ? episodeList.As() : null; + + return new ParseTorrentTitleResponse(true, parsedTitle, year, seasons, episodes); + } + + private void InitModules() + { + using var py = Py.GIL(); + _sys = Py.Import(SysModuleName); + + if (_sys == null) + { + _logger.LogError($"Failed to import Python module: {SysModuleName}"); + return; + } + + _sys.path.append(Path.Combine(AppContext.BaseDirectory, "python")); + + _rtn = Py.Import(RtnModuleName); + if (_rtn == null) + { + _logger.LogError($"Failed to import Python module: {RtnModuleName}"); + } + } +} \ No newline at end of file diff --git a/src/shared/Python/ServiceCollectionExtensions.cs b/src/shared/Python/ServiceCollectionExtensions.cs new file mode 100644 index 0000000..0fe2503 --- /dev/null +++ b/src/shared/Python/ServiceCollectionExtensions.cs @@ -0,0 +1,13 @@ +namespace SharedContracts.Python; + +public static class ServiceCollectionExtensions +{ + public static IServiceCollection RegisterPythonEngine(this IServiceCollection services) + { + services.AddSingleton(); + + services.AddHostedService(p => p.GetRequiredService()); + + return services; + } +} \ No newline at end of file diff --git a/src/shared/SharedContracts.csproj b/src/shared/SharedContracts.csproj index 4f97a35..ed1ef64 100644 --- a/src/shared/SharedContracts.csproj +++ b/src/shared/SharedContracts.csproj @@ -16,7 +16,7 @@ - +