5 Commits

Author SHA1 Message Date
iPromKnight
79e0a0f102 DMM Offline (#198)
* Process DMM all locally

single call to github to download the repo archive.
remove need for PAT
update RTN to 0.2.13
change to batch_parse for title parsing from RTN

* introduce concurrent dictionary, and parallelism
2024-04-02 17:01:22 +01:00
purple_emily
6181207513 Fix incorrect file index stored (#197)
* Fix incorrect file index stored

* Update `rank-torrent-name` to latest version

* Knight Crawler version update
2024-04-01 23:08:32 +01:00
iPromKnight
684dbba2f0 RTN-025 and title category parsing (#195)
* update rtn to 025

* Implement movie / show type parsing

* switch to RTN in collectors

* ensure env for pythonnet is loaded, and that requirements copy for qbit

* version bump
2024-03-31 22:01:09 +01:00
iPromKnight
c75ecd2707 add qbit housekeeping service to remove stale torrents (#193)
* Add housekeeping service to clean stale torrents

* version bump
2024-03-30 11:52:23 +00:00
iPromKnight
c493ef3376 Hotfix category, and roll back RTN to 0.1.8 (#192)
* Hotfix categories

Also roll back RTN to 0.1.8 as regression introduced in 0.2

* bump version
2024-03-30 04:47:36 +00:00
52 changed files with 635 additions and 274 deletions

4
.gitignore vendored
View File

@@ -612,3 +612,7 @@ fabric.properties
# Mac directory indexes # Mac directory indexes
.DS_Store .DS_Store
deployment/docker/stack.env deployment/docker/stack.env
src/producer/src/python/
src/debrid-collector/python/
src/qbit-collector/python/

View File

@@ -94,7 +94,7 @@ services:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
hostname: knightcrawler-addon hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:2.0.15 image: gabisonfire/knightcrawler-addon:2.0.20
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -117,7 +117,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-consumer:2.0.15 image: gabisonfire/knightcrawler-consumer:2.0.20
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -138,7 +138,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-debrid-collector:2.0.15 image: gabisonfire/knightcrawler-debrid-collector:2.0.20
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -152,7 +152,7 @@ services:
migrator: migrator:
condition: service_completed_successfully condition: service_completed_successfully
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-metadata:2.0.15 image: gabisonfire/knightcrawler-metadata:2.0.20
networks: networks:
- knightcrawler-network - knightcrawler-network
restart: "no" restart: "no"
@@ -163,7 +163,7 @@ services:
postgres: postgres:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-migrator:2.0.15 image: gabisonfire/knightcrawler-migrator:2.0.20
networks: networks:
- knightcrawler-network - knightcrawler-network
restart: "no" restart: "no"
@@ -182,7 +182,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-producer:2.0.15 image: gabisonfire/knightcrawler-producer:2.0.20
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -207,7 +207,7 @@ services:
deploy: deploy:
replicas: ${QBIT_REPLICAS:-0} replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-qbit-collector:2.0.15 image: gabisonfire/knightcrawler-qbit-collector:2.0.20
labels: labels:
logging: promtail logging: promtail
networks: networks:

View File

@@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends
services: services:
metadata: metadata:
image: gabisonfire/knightcrawler-metadata:2.0.15 image: gabisonfire/knightcrawler-metadata:2.0.20
env_file: ../../.env env_file: ../../.env
networks: networks:
- knightcrawler-network - knightcrawler-network
@@ -30,7 +30,7 @@ services:
condition: service_completed_successfully condition: service_completed_successfully
migrator: migrator:
image: gabisonfire/knightcrawler-migrator:2.0.15 image: gabisonfire/knightcrawler-migrator:2.0.20
env_file: ../../.env env_file: ../../.env
networks: networks:
- knightcrawler-network - knightcrawler-network
@@ -40,7 +40,7 @@ services:
condition: service_healthy condition: service_healthy
addon: addon:
image: gabisonfire/knightcrawler-addon:2.0.15 image: gabisonfire/knightcrawler-addon:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
hostname: knightcrawler-addon hostname: knightcrawler-addon
@@ -48,22 +48,22 @@ services:
- "7000:7000" - "7000:7000"
consumer: consumer:
image: gabisonfire/knightcrawler-consumer:2.0.15 image: gabisonfire/knightcrawler-consumer:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
debridcollector: debridcollector:
image: gabisonfire/knightcrawler-debrid-collector:2.0.15 image: gabisonfire/knightcrawler-debrid-collector:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
producer: producer:
image: gabisonfire/knightcrawler-producer:2.0.15 image: gabisonfire/knightcrawler-producer:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
qbitcollector: qbitcollector:
image: gabisonfire/knightcrawler-qbit-collector:2.0.15 image: gabisonfire/knightcrawler-qbit-collector:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:

View File

@@ -38,6 +38,3 @@ QBIT_REPLICAS=0
# Addon # Addon
DEBUG_MODE=false DEBUG_MODE=false
# Producer
GITHUB_PAT=

View File

@@ -17,7 +17,6 @@
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" /> <PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" /> <PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" /> <PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" /> <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
@@ -29,10 +28,30 @@
<None Include="Configuration\logging.json"> <None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory> <CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None> </None>
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<Content Remove="eng\**" />
<None Remove="eng\**" />
</ItemGroup>
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<Content Remove="python\**" />
<None Include="python\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\shared\SharedContracts.csproj" /> <ProjectReference Include="..\shared\SharedContracts.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<Compile Remove="eng\**" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="eng\**" />
</ItemGroup>
</Project> </Project>

View File

@@ -6,6 +6,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharedContracts", "..\share
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}" Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F53-28E6-404F-9EFE-DADFBEF8338B}"
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{72A042C3-B4F3-45C5-AC20-041FE8F41EFC}"
ProjectSection(SolutionItems) = preProject
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
eng\install-python-reqs.sh = eng\install-python-reqs.sh
EndProjectSection
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU

View File

@@ -9,12 +9,23 @@ RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11.8-r0 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out . COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S debrid && adduser -S -G debrid debrid RUN addgroup -S debrid && adduser -S -G debrid debrid
USER debrid USER debrid
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1 CMD pgrep -f dotnet || exit 1
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
ENTRYPOINT ["dotnet", "DebridCollector.dll"] ENTRYPOINT ["dotnet", "DebridCollector.dll"]

View File

@@ -1,5 +1,3 @@
using DebridCollector.Features.Configuration;
namespace DebridCollector.Extensions; namespace DebridCollector.Extensions;
public static class ServiceCollectionExtensions public static class ServiceCollectionExtensions
@@ -17,7 +15,8 @@ public static class ServiceCollectionExtensions
var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>(); var serviceConfiguration = services.LoadConfigurationFromEnv<DebridCollectorConfiguration>();
services.AddRealDebridClient(serviceConfiguration); services.AddRealDebridClient(serviceConfiguration);
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>(); services.RegisterPythonEngine();
services.AddSingleton<IRankTorrentName, RankTorrentName>();
services.AddHostedService<DebridRequestProcessor>(); services.AddHostedService<DebridRequestProcessor>();
return services; return services;

View File

@@ -1,6 +1,4 @@
using DebridCollector.Features.Configuration; namespace DebridCollector.Features.Debrid;
namespace DebridCollector.Features.Debrid;
public static class ServiceCollectionExtensions public static class ServiceCollectionExtensions
{ {

View File

@@ -3,10 +3,11 @@ namespace DebridCollector.Features.Worker;
public static class DebridMetaToTorrentMeta public static class DebridMetaToTorrentMeta
{ {
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection( public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
IParseTorrentTitle torrentTitle, IRankTorrentName rankTorrentName,
Torrent torrent, Torrent torrent,
string ImdbId, string ImdbId,
FileDataDictionary Metadata) FileDataDictionary Metadata,
ILogger<WriteMetadataConsumer> logger)
{ {
try try
{ {
@@ -15,34 +16,42 @@ public static class DebridMetaToTorrentMeta
foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext)))) foreach (var metadataEntry in Metadata.Where(m => Filetypes.VideoFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{ {
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex); var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
var file = new TorrentFile var file = new TorrentFile
{ {
ImdbId = ImdbId, ImdbId = ImdbId,
KitsuId = 0, KitsuId = 0,
InfoHash = torrent.InfoHash, InfoHash = torrent.InfoHash,
FileIndex = validFileIndex ? fileIndex : 0, FileIndex = validFileIndex ? fileIndexMinusOne : 0,
Title = metadataEntry.Value.Filename, Title = metadataEntry.Value.Filename,
Size = metadataEntry.Value.Filesize.GetValueOrDefault(), Size = metadataEntry.Value.Filesize.GetValueOrDefault(),
}; };
var parsedTitle = torrentTitle.Parse(file.Title); var parsedTitle = rankTorrentName.Parse(file.Title, false);
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault(); if (!parsedTitle.Success)
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault(); {
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
continue;
}
file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
files.Add(file); files.Add(file);
} }
return files; return files;
} }
catch (Exception) catch (Exception ex)
{ {
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
return []; return [];
} }
} }
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata) public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, FileDataDictionary Metadata, ILogger<WriteMetadataConsumer> logger)
{ {
try try
{ {
@@ -58,13 +67,14 @@ public static class DebridMetaToTorrentMeta
foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext)))) foreach (var metadataEntry in Metadata.Where(m => Filetypes.SubtitleFileExtensions.Any(ext => m.Value.Filename.EndsWith(ext))))
{ {
var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex); var validFileIndex = int.TryParse(metadataEntry.Key, out var fileIndex);
var fileIndexMinusOne = Math.Max(0, fileIndex - 1);
var fileId = torrentFiles.FirstOrDefault( var fileId = torrentFiles.FirstOrDefault(
t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0; t => Path.GetFileNameWithoutExtension(t.Title) == Path.GetFileNameWithoutExtension(metadataEntry.Value.Filename))?.Id ?? 0;
var file = new SubtitleFile var file = new SubtitleFile
{ {
InfoHash = InfoHash, InfoHash = InfoHash,
FileIndex = validFileIndex ? fileIndex : 0, FileIndex = validFileIndex ? fileIndexMinusOne : 0,
FileId = fileId, FileId = fileId,
Title = metadataEntry.Value.Filename, Title = metadataEntry.Value.Filename,
}; };
@@ -74,8 +84,9 @@ public static class DebridMetaToTorrentMeta
return files; return files;
} }
catch (Exception) catch (Exception ex)
{ {
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
return []; return [];
} }
} }

View File

@@ -53,6 +53,12 @@ public class InfohashMetadataSagaStateMachine : MassTransitStateMachine<Infohash
.Then( .Then(
context => context =>
{ {
if (!context.Message.WithFiles)
{
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
return;
}
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId); logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
}) })
.TransitionTo(Completed) .TransitionTo(Completed)

View File

@@ -16,7 +16,7 @@ public record WriteMetadata(Torrent Torrent, TorrentMetadataResponse Metadata, s
} }
[EntityName("metadata-written-debrid-colloctor")] [EntityName("metadata-written-debrid-colloctor")]
public record MetadataWritten(TorrentMetadataResponse Metadata) : CorrelatedBy<Guid> public record MetadataWritten(TorrentMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
{ {
public Guid CorrelationId { get; init; } = Metadata.CorrelationId; public Guid CorrelationId { get; init; } = Metadata.CorrelationId;
} }

View File

@@ -1,25 +1,28 @@
namespace DebridCollector.Features.Worker; namespace DebridCollector.Features.Worker;
public class WriteMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteMetadata> public class WriteMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteMetadataConsumer> logger) : IConsumer<WriteMetadata>
{ {
public async Task Consume(ConsumeContext<WriteMetadata> context) public async Task Consume(ConsumeContext<WriteMetadata> context)
{ {
var request = context.Message; var request = context.Message;
var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata); var torrentFiles = DebridMetaToTorrentMeta.MapMetadataToFilesCollection(rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
if (torrentFiles.Any()) if (!torrentFiles.Any())
{ {
await dataStorage.InsertFiles(torrentFiles); await context.Publish(new MetadataWritten(request.Metadata, false));
return;
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
} }
await context.Publish(new MetadataWritten(request.Metadata)); await dataStorage.InsertFiles(torrentFiles);
var subtitles = await DebridMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new MetadataWritten(request.Metadata, true));
} }
} }

View File

@@ -4,17 +4,18 @@ global using System.Text.Json;
global using System.Text.Json.Serialization; global using System.Text.Json.Serialization;
global using System.Threading.Channels; global using System.Threading.Channels;
global using DebridCollector.Extensions; global using DebridCollector.Extensions;
global using DebridCollector.Features.Configuration;
global using DebridCollector.Features.Debrid; global using DebridCollector.Features.Debrid;
global using DebridCollector.Features.Worker; global using DebridCollector.Features.Worker;
global using MassTransit; global using MassTransit;
global using MassTransit.Mediator;
global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.DependencyInjection;
global using Polly; global using Polly;
global using Polly.Extensions.Http; global using Polly.Extensions.Http;
global using PromKnight.ParseTorrentTitle;
global using SharedContracts.Configuration; global using SharedContracts.Configuration;
global using SharedContracts.Dapper; global using SharedContracts.Dapper;
global using SharedContracts.Extensions; global using SharedContracts.Extensions;
global using SharedContracts.Models; global using SharedContracts.Models;
global using SharedContracts.Python;
global using SharedContracts.Python.RTN;
global using SharedContracts.Requests; global using SharedContracts.Requests;

View File

@@ -0,0 +1,2 @@
mkdir -p ../python
python -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1,5 @@
#!/bin/bash
rm -rf ../python
mkdir -p ../python
python3 -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1 @@
rank-torrent-name==0.2.13

View File

@@ -28,7 +28,7 @@
}, },
{ {
"Name": "SyncDmmJob", "Name": "SyncDmmJob",
"IntervalSeconds": 1800, "IntervalSeconds": 10800,
"Enabled": true "Enabled": true
}, },
{ {

View File

@@ -0,0 +1,70 @@
namespace Producer.Features.Crawlers.Dmm;
public class DMMFileDownloader(HttpClient client, ILogger<DMMFileDownloader> logger) : IDMMFileDownloader
{
private const string Filename = "main.zip";
private readonly IReadOnlyCollection<string> _filesToIgnore = [
"index.html",
"404.html",
"dedupe.sh",
"CNAME",
];
public const string ClientName = "DmmFileDownloader";
public async Task<string> DownloadFileToTempPath(CancellationToken cancellationToken)
{
logger.LogInformation("Downloading DMM Hashlists");
var response = await client.GetAsync(Filename, cancellationToken);
var tempDirectory = Path.Combine(Path.GetTempPath(), "DMMHashlists");
EnsureDirectoryIsClean(tempDirectory);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
using var archive = new ZipArchive(stream);
logger.LogInformation("Extracting DMM Hashlists to {TempDirectory}", tempDirectory);
foreach (var entry in archive.Entries)
{
var entryPath = Path.Combine(tempDirectory, Path.GetFileName(entry.FullName));
if (!entry.FullName.EndsWith('/')) // It's a file
{
entry.ExtractToFile(entryPath, true);
}
}
foreach (var file in _filesToIgnore)
{
CleanRepoExtras(tempDirectory, file);
}
logger.LogInformation("Downloaded and extracted Repository to {TempDirectory}", tempDirectory);
return tempDirectory;
}
private static void CleanRepoExtras(string tempDirectory, string fileName)
{
var repoIndex = Path.Combine(tempDirectory, fileName);
if (File.Exists(repoIndex))
{
File.Delete(repoIndex);
}
}
private static void EnsureDirectoryIsClean(string tempDirectory)
{
if (Directory.Exists(tempDirectory))
{
Directory.Delete(tempDirectory, true);
}
Directory.CreateDirectory(tempDirectory);
}
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.Crawlers.Dmm;
public class DMMHttpClient
{
}

View File

@@ -1,64 +1,99 @@
namespace Producer.Features.Crawlers.Dmm; namespace Producer.Features.Crawlers.Dmm;
public partial class DebridMediaManagerCrawler( public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory, IDMMFileDownloader dmmFileDownloader,
ILogger<DebridMediaManagerCrawler> logger, ILogger<DebridMediaManagerCrawler> logger,
IDataStorage storage, IDataStorage storage,
GithubConfiguration githubConfiguration,
IRankTorrentName rankTorrentName, IRankTorrentName rankTorrentName,
IDistributedCache cache) : BaseCrawler(logger, storage) IDistributedCache cache) : BaseCrawler(logger, storage)
{ {
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")] [GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher(); private static partial Regex HashCollectionMatcher();
protected override string Url => "";
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>(); protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/git/trees/main?recursive=1";
protected override string Source => "DMM"; protected override string Source => "DMM";
private const int ParallelismCount = 4;
public override async Task Execute() public override async Task Execute()
{ {
var client = httpClientFactory.CreateClient("Scraper"); var tempDirectory = await dmmFileDownloader.DownloadFileToTempPath(CancellationToken.None);
client.DefaultRequestHeaders.Authorization = new("Bearer", githubConfiguration.PAT);
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
var jsonBody = await client.GetStringAsync(Url); var files = Directory.GetFiles(tempDirectory, "*.html", SearchOption.AllDirectories);
var json = JsonDocument.Parse(jsonBody); logger.LogInformation("Found {Files} files to parse", files.Length);
var entriesArray = json.RootElement.GetProperty("tree"); var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
logger.LogInformation("Found {Entries} total DMM pages", entriesArray.GetArrayLength()); await Parallel.ForEachAsync(files, options, async (file, token) =>
foreach (var entry in entriesArray.EnumerateArray())
{ {
await ParsePage(entry, client); var fileName = Path.GetFileName(file);
} var torrentDictionary = await ExtractPageContents(file, fileName);
if (torrentDictionary == null)
{
return;
}
await ParseTitlesWithRtn(fileName, torrentDictionary);
var results = await ParseTorrents(torrentDictionary);
if (results.Count <= 0)
{
return;
}
await InsertTorrents(results);
await Storage.MarkPageAsIngested(fileName, token);
});
} }
private async Task ParsePage(JsonElement entry, HttpClient client) private async Task ParseTitlesWithRtn(string fileName, IDictionary<string, DmmContent> page)
{ {
var (pageIngested, name) = await IsAlreadyIngested(entry); logger.LogInformation("Parsing titles for {Page}", fileName);
if (string.IsNullOrEmpty(name) || pageIngested) var batchProcessables = page.Select(value => new RtnBatchProcessable(value.Key, value.Value.Filename)).ToList();
var parsedResponses = rankTorrentName.BatchParse(
batchProcessables.Select<RtnBatchProcessable, string>(bp => bp.Filename).ToList(), trashGarbage: false);
// Filter out unsuccessful responses and match RawTitle to requesting title
var successfulResponses = parsedResponses
.Where(response => response != null && response.Success)
.GroupBy(response => response.Response.RawTitle!)
.ToDictionary(group => group.Key, group => group.First());
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
await Parallel.ForEachAsync(batchProcessables.Select(t => t.InfoHash), options, (infoHash, _) =>
{ {
return; if (page.TryGetValue(infoHash, out var dmmContent) &&
} successfulResponses.TryGetValue(dmmContent.Filename, out var parsedResponse))
{
var pageSource = await client.GetStringAsync($"{DownloadBaseUrl}/{name}"); page[infoHash] = dmmContent with {ParseResponse = parsedResponse};
}
await ExtractPageContents(pageSource, name);
return ValueTask.CompletedTask;
});
} }
private async Task ExtractPageContents(string pageSource, string name) private async Task<ConcurrentDictionary<string, DmmContent>?> ExtractPageContents(string filePath, string filenameOnly)
{ {
var (pageIngested, name) = await IsAlreadyIngested(filenameOnly);
if (pageIngested)
{
return [];
}
var pageSource = await File.ReadAllTextAsync(filePath);
var match = HashCollectionMatcher().Match(pageSource); var match = HashCollectionMatcher().Match(pageSource);
if (!match.Success) if (!match.Success)
{ {
logger.LogWarning("Failed to match hash collection for {Name}", name); logger.LogWarning("Failed to match hash collection for {Name}", name);
await Storage.MarkPageAsIngested(name); await Storage.MarkPageAsIngested(filenameOnly);
return; return [];
} }
var encodedJson = match.Groups.Values.ElementAtOrDefault(1); var encodedJson = match.Groups.Values.ElementAtOrDefault(1);
@@ -66,86 +101,92 @@ public partial class DebridMediaManagerCrawler(
if (string.IsNullOrEmpty(encodedJson?.Value)) if (string.IsNullOrEmpty(encodedJson?.Value))
{ {
logger.LogWarning("Failed to extract encoded json for {Name}", name); logger.LogWarning("Failed to extract encoded json for {Name}", name);
return; return [];
} }
await ProcessExtractedContentsAsTorrentCollection(encodedJson.Value, name); var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson.Value);
}
private async Task ProcessExtractedContentsAsTorrentCollection(string encodedJson, string name)
{
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson);
var json = JsonDocument.Parse(decodedJson); var json = JsonDocument.Parse(decodedJson);
var torrents = await json.RootElement.EnumerateArray()
.ToAsyncEnumerable()
.Select(ParsePageContent)
.Where(t => t is not null)
.ToListAsync();
await InsertTorrentsForPage(json); if (torrents.Count == 0)
var result = await Storage.MarkPageAsIngested(name);
if (!result.IsSuccess)
{ {
logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.Failure.ErrorMessage); logger.LogWarning("No torrents found in {Name}", name);
return; await Storage.MarkPageAsIngested(filenameOnly);
return [];
} }
var torrentDictionary = torrents
.Where(x => x is not null)
.GroupBy(x => x.InfoHash)
.ToConcurrentDictionary(g => g.Key, g => new DmmContent(g.First().Filename, g.First().Bytes, null));
logger.LogInformation("Successfully marked page as ingested"); logger.LogInformation("Parsed {Torrents} torrents for {Name}", torrentDictionary.Count, name);
return torrentDictionary;
} }
private async Task<IngestedTorrent?> ParseTorrent(JsonElement item) private async Task<List<IngestedTorrent>> ParseTorrents(IDictionary<string, DmmContent> page)
{ {
var ingestedTorrents = new List<IngestedTorrent>();
if (!item.TryGetProperty("filename", out var filenameElement) || var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
!item.TryGetProperty("bytes", out var bytesElement) ||
!item.TryGetProperty("hash", out var hashElement)) await Parallel.ForEachAsync(page, options, async (kvp, ct) =>
{ {
return null; var (infoHash, dmmContent) = kvp;
} var parsedTorrent = dmmContent.ParseResponse;
if (parsedTorrent is not {Success: true})
{
return;
}
var torrentTitle = filenameElement.GetString(); var torrentType = parsedTorrent.Response.IsMovie ? "movie" : "tvSeries";
var cacheKey = GetCacheKey(torrentType, parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.Year);
var (cached, cachedResult) = await CheckIfInCacheAndReturn(cacheKey);
if (torrentTitle.IsNullOrEmpty()) if (cached)
{ {
return null; logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
} lock (ingestedTorrents)
{
var parsedTorrent = rankTorrentName.Parse(torrentTitle.CleanTorrentTitleForImdb()); ingestedTorrents.Add(MapToTorrent(cachedResult, dmmContent.Bytes, infoHash, parsedTorrent));
}
if (!parsedTorrent.Success) return;
{ }
return null;
}
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.Response.ParsedTitle);
if (cached)
{
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
return MapToTorrent(cachedResult, bytesElement, hashElement, parsedTorrent);
}
int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null; int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null;
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.IsMovie ? "movies" : "tv", year); var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, torrentType, year, ct);
if (imdbEntry is null) if (imdbEntry is null)
{ {
return null; return;
} }
await AddToCache(parsedTorrent.Response.ParsedTitle.ToLowerInvariant(), imdbEntry);
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
return MapToTorrent(imdbEntry, bytesElement, hashElement, parsedTorrent); await AddToCache(cacheKey, imdbEntry);
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
lock (ingestedTorrents)
{
ingestedTorrents.Add(MapToTorrent(imdbEntry, dmmContent.Bytes, infoHash, parsedTorrent));
}
});
return ingestedTorrents;
} }
private IngestedTorrent MapToTorrent(ImdbEntry result, JsonElement bytesElement, JsonElement hashElement, ParseTorrentTitleResponse parsedTorrent) => private IngestedTorrent MapToTorrent(ImdbEntry result, long size, string infoHash, ParseTorrentTitleResponse parsedTorrent) =>
new() new()
{ {
Source = Source, Source = Source,
Name = result.Title, Name = result.Title,
Imdb = result.ImdbId, Imdb = result.ImdbId,
Size = bytesElement.GetInt64().ToString(), Size = size.ToString(),
InfoHash = hashElement.ToString(), InfoHash = infoHash,
Seeders = 0, Seeders = 0,
Leechers = 0, Leechers = 0,
Category = AssignCategory(result), Category = AssignCategory(result),
@@ -153,19 +194,19 @@ public partial class DebridMediaManagerCrawler(
}; };
private Task AddToCache(string lowerCaseTitle, ImdbEntry best) private Task AddToCache(string cacheKey, ImdbEntry best)
{ {
var cacheOptions = new DistributedCacheEntryOptions var cacheOptions = new DistributedCacheEntryOptions
{ {
AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1), AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1),
}; };
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best), cacheOptions); return cache.SetStringAsync(cacheKey, JsonSerializer.Serialize(best), cacheOptions);
} }
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string title) private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string cacheKey)
{ {
var cachedImdbId = await cache.GetStringAsync(title.ToLowerInvariant()); var cachedImdbId = await cache.GetStringAsync(cacheKey);
if (!string.IsNullOrEmpty(cachedImdbId)) if (!string.IsNullOrEmpty(cachedImdbId))
{ {
@@ -175,48 +216,36 @@ public partial class DebridMediaManagerCrawler(
return (false, null); return (false, null);
} }
private async Task InsertTorrentsForPage(JsonDocument json) private async Task<(bool Success, string? Name)> IsAlreadyIngested(string filename)
{ {
var torrents = await json.RootElement.EnumerateArray() var pageIngested = await Storage.PageIngested(filename);
.ToAsyncEnumerable()
.SelectAwait(async x => await ParseTorrent(x))
.Where(t => t is not null)
.ToListAsync();
if (torrents.Count == 0) return (pageIngested, filename);
{
logger.LogWarning("No torrents found in {Source} response", Source);
return;
}
await InsertTorrents(torrents!);
}
private async Task<(bool Success, string? Name)> IsAlreadyIngested(JsonElement entry)
{
var name = entry.GetProperty("path").GetString();
if (string.IsNullOrEmpty(name))
{
return (false, null);
}
var pageIngested = await Storage.PageIngested(name);
return (pageIngested, name);
} }
private static string AssignCategory(ImdbEntry entry) => private static string AssignCategory(ImdbEntry entry) =>
entry.Category switch entry.Category.ToLower() switch
{ {
"movie" => "movies", var category when string.Equals(category, "movie", StringComparison.OrdinalIgnoreCase) => "movies",
"tvMovie" => "movies", var category when string.Equals(category, "tvSeries", StringComparison.OrdinalIgnoreCase) => "tv",
"tvSeries" => "tv",
"tvEpisode" => "tv",
"tvSpecial" => "tv",
"tvMiniSeries" => "tv",
"tv" => "tv",
"short" => "tv",
_ => "unknown", _ => "unknown",
}; };
private static string GetCacheKey(string category, string title, int year) => $"{category.ToLowerInvariant()}:{year}:{title.ToLowerInvariant()}";
private static ExtractedDMMContent? ParsePageContent(JsonElement item)
{
if (!item.TryGetProperty("filename", out var filenameElement) ||
!item.TryGetProperty("bytes", out var bytesElement) ||
!item.TryGetProperty("hash", out var hashElement))
{
return null;
}
return new(filenameElement.GetString(), bytesElement.GetInt64(), hashElement.GetString());
}
private record DmmContent(string Filename, long Bytes, ParseTorrentTitleResponse? ParseResponse);
private record ExtractedDMMContent(string Filename, long Bytes, string InfoHash);
private record RtnBatchProcessable(string InfoHash, string Filename);
} }

View File

@@ -1,9 +0,0 @@
namespace Producer.Features.Crawlers.Dmm;
public class GithubConfiguration
{
private const string Prefix = "GITHUB";
private const string PatVariable = "PAT";
public string? PAT { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(PatVariable);
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.Crawlers.Dmm;
public interface IDMMFileDownloader
{
Task<string> DownloadFileToTempPath(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,16 @@
namespace Producer.Features.Crawlers.Dmm;
public static class ServiceCollectionExtensions
{
public static IServiceCollection AddDmmSupport(this IServiceCollection services)
{
services.AddHttpClient<IDMMFileDownloader, DMMFileDownloader>(DMMFileDownloader.ClientName, client =>
{
client.BaseAddress = new("https://github.com/debridmediamanager/hashlists/zipball/main/");
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip");
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
});
return services;
}
}

View File

@@ -5,7 +5,6 @@ internal static class ServiceCollectionExtensions
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration) internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
{ {
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName); var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>(); var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
var jobTypes = Assembly.GetAssembly(typeof(BaseJob)) var jobTypes = Assembly.GetAssembly(typeof(BaseJob))
@@ -19,18 +18,13 @@ internal static class ServiceCollectionExtensions
services.AddTransient(type); services.AddTransient(type);
} }
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
services.AddTransient<SyncDmmJob>();
}
var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance); var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance);
services.AddQuartz( services.AddQuartz(
quartz => quartz =>
{ {
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration); RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration); RegisterDmmJob(quartz, scrapeConfiguration);
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration); RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
RegisterPublisher(quartz, rabbitConfiguration); RegisterPublisher(quartz, rabbitConfiguration);
}); });
@@ -64,13 +58,8 @@ internal static class ServiceCollectionExtensions
} }
} }
private static void RegisterDmmJob(GithubConfiguration githubConfiguration, IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration) private static void RegisterDmmJob(IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration) =>
{ AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
}
}
private static void RegisterTorrentioJob( private static void RegisterTorrentioJob(
IServiceCollection services, IServiceCollection services,

View File

@@ -1,12 +1,12 @@
// Global using directives // Global using directives
global using System.Collections.Concurrent;
global using System.IO.Compression;
global using System.Reflection; global using System.Reflection;
global using System.Text; global using System.Text;
global using System.Text.Json; global using System.Text.Json;
global using System.Text.RegularExpressions; global using System.Text.RegularExpressions;
global using System.Xml.Linq; global using System.Xml.Linq;
global using FuzzySharp;
global using FuzzySharp.Extractor;
global using FuzzySharp.PreProcess; global using FuzzySharp.PreProcess;
global using FuzzySharp.SimilarityRatio.Scorer; global using FuzzySharp.SimilarityRatio.Scorer;
global using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; global using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;

View File

@@ -12,7 +12,8 @@ builder.Services
.RegisterMassTransit() .RegisterMassTransit()
.AddDataStorage() .AddDataStorage()
.AddCrawlers() .AddCrawlers()
.AddDmmSupport()
.AddQuartz(builder.Configuration); .AddQuartz(builder.Configuration);
var app = builder.Build(); var app = builder.Build();
app.Run(); app.Run();

View File

@@ -1 +1 @@
rank-torrent-name==0.1.9 rank-torrent-name==0.2.13

View File

@@ -9,12 +9,23 @@ RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH RUN dotnet publish -c Release --no-restore -o /src/out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11.8-r0 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out . COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S qbit && adduser -S -G qbit qbit RUN addgroup -S qbit && adduser -S -G qbit qbit
USER qbit USER qbit
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1 CMD pgrep -f dotnet || exit 1
ENV PYTHONNET_PYDLL=/usr/lib/libpython3.11.so.1.0
ENTRYPOINT ["dotnet", "QBitCollector.dll"] ENTRYPOINT ["dotnet", "QBitCollector.dll"]

View File

@@ -13,11 +13,13 @@ public static class ServiceCollectionExtensions
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services) internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
{ {
services.AddQBitTorrentClient(); services.AddQBitTorrentClient();
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>(); services.RegisterPythonEngine();
services.AddSingleton<IRankTorrentName, RankTorrentName>();
services.AddSingleton<QbitRequestProcessor>(); services.AddSingleton<QbitRequestProcessor>();
services.AddHttpClient(); services.AddHttpClient();
services.AddSingleton<ITrackersService, TrackersService>(); services.AddSingleton<ITrackersService, TrackersService>();
services.AddHostedService<TrackersBackgroundService>(); services.AddHostedService<TrackersBackgroundService>();
services.AddHostedService<HousekeepingBackgroundService>();
return services; return services;
} }

View File

@@ -0,0 +1,52 @@
namespace QBitCollector.Features.Qbit;
public class HousekeepingBackgroundService(IQBittorrentClient client, ILogger<HousekeepingBackgroundService> logger) : BackgroundService
{
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
logger.LogInformation("Service is Running.");
await DoWork();
using PeriodicTimer timer = new(TimeSpan.FromMinutes(2));
try
{
while (await timer.WaitForNextTickAsync(stoppingToken))
{
await DoWork();
}
}
catch (OperationCanceledException)
{
logger.LogInformation("Service stopping.");
}
}
private async Task DoWork()
{
try
{
logger.LogInformation("Cleaning Stale Entries in Qbit...");
var torrents = await client.GetTorrentListAsync();
foreach (var torrentInfo in torrents)
{
if (!(torrentInfo.AddedOn < DateTimeOffset.UtcNow.AddMinutes(-1)))
{
continue;
}
logger.LogInformation("Torrent [{InfoHash}] Identified as stale because was added at {AddedOn}", torrentInfo.Hash, torrentInfo.AddedOn);
await client.DeleteAsync(new[] {torrentInfo.Hash}, deleteDownloadedData: true);
logger.LogInformation("Cleaned up stale torrent: [{InfoHash}]", torrentInfo.Hash);
}
}
catch (Exception e)
{
logger.LogError(e, "Error cleaning up stale torrents this interval.");
}
}
}

View File

@@ -3,10 +3,11 @@ namespace QBitCollector.Features.Worker;
public static class QbitMetaToTorrentMeta public static class QbitMetaToTorrentMeta
{ {
public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection( public static IReadOnlyList<TorrentFile> MapMetadataToFilesCollection(
IParseTorrentTitle torrentTitle, IRankTorrentName rankTorrentName,
Torrent torrent, Torrent torrent,
string ImdbId, string ImdbId,
IReadOnlyList<TorrentContent> Metadata) IReadOnlyList<TorrentContent> Metadata,
ILogger<WriteQbitMetadataConsumer> logger)
{ {
try try
{ {
@@ -24,23 +25,31 @@ public static class QbitMetaToTorrentMeta
Size = metadataEntry.Size, Size = metadataEntry.Size,
}; };
var parsedTitle = torrentTitle.Parse(file.Title); var parsedTitle = rankTorrentName.Parse(file.Title, false);
if (!parsedTitle.Success)
{
logger.LogWarning("Failed to parse title {Title} for metadata mapping", file.Title);
continue;
}
file.ImdbSeason = parsedTitle.Seasons.FirstOrDefault(); file.ImdbSeason = parsedTitle.Response?.Season?.FirstOrDefault() ?? 0;
file.ImdbEpisode = parsedTitle.Episodes.FirstOrDefault(); file.ImdbEpisode = parsedTitle.Response?.Episode?.FirstOrDefault() ?? 0;
files.Add(file); files.Add(file);
} }
return files; return files;
} }
catch (Exception) catch (Exception ex)
{ {
logger.LogWarning("Failed to map metadata to files collection: {Exception}", ex.Message);
return []; return [];
} }
} }
public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, IReadOnlyList<TorrentContent> Metadata) public static async Task<IReadOnlyList<SubtitleFile>> MapMetadataToSubtitlesCollection(IDataStorage storage, string InfoHash, IReadOnlyList<TorrentContent> Metadata,
ILogger<WriteQbitMetadataConsumer> logger)
{ {
try try
{ {
@@ -70,8 +79,9 @@ public static class QbitMetaToTorrentMeta
return files; return files;
} }
catch (Exception) catch (Exception ex)
{ {
logger.LogWarning("Failed to map metadata to subtitles collection: {Exception}", ex.Message);
return []; return [];
} }
} }

View File

@@ -53,6 +53,12 @@ public class QbitMetadataSagaStateMachine : MassTransitStateMachine<QbitMetadata
.Then( .Then(
context => context =>
{ {
if (!context.Message.WithFiles)
{
logger.LogInformation("No files written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
return;
}
logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId); logger.LogInformation("Metadata Written for torrent {InfoHash} in Saga {SagaId}", context.Saga.Torrent.InfoHash, context.Saga.CorrelationId);
}) })
.TransitionTo(Completed) .TransitionTo(Completed)

View File

@@ -16,7 +16,7 @@ public record WriteQbitMetadata(Torrent Torrent, QBitMetadataResponse Metadata,
} }
[EntityName("metadata-written-qbit-collector")] [EntityName("metadata-written-qbit-collector")]
public record QbitMetadataWritten(QBitMetadataResponse Metadata) : CorrelatedBy<Guid> public record QbitMetadataWritten(QBitMetadataResponse Metadata, bool WithFiles) : CorrelatedBy<Guid>
{ {
public Guid CorrelationId { get; init; } = Metadata.CorrelationId; public Guid CorrelationId { get; init; } = Metadata.CorrelationId;

View File

@@ -1,25 +1,30 @@
namespace QBitCollector.Features.Worker; namespace QBitCollector.Features.Worker;
public class WriteQbitMetadataConsumer(IParseTorrentTitle parseTorrentTitle, IDataStorage dataStorage) : IConsumer<WriteQbitMetadata> public class WriteQbitMetadataConsumer(IRankTorrentName rankTorrentName, IDataStorage dataStorage, ILogger<WriteQbitMetadataConsumer> logger) : IConsumer<WriteQbitMetadata>
{ {
public async Task Consume(ConsumeContext<WriteQbitMetadata> context) public async Task Consume(ConsumeContext<WriteQbitMetadata> context)
{ {
var request = context.Message; var request = context.Message;
var torrentFiles = QbitMetaToTorrentMeta.MapMetadataToFilesCollection(parseTorrentTitle, request.Torrent, request.ImdbId, request.Metadata.Metadata);
if (torrentFiles.Any()) var torrentFiles = QbitMetaToTorrentMeta.MapMetadataToFilesCollection(
rankTorrentName, request.Torrent, request.ImdbId, request.Metadata.Metadata, logger);
if (!torrentFiles.Any())
{ {
await dataStorage.InsertFiles(torrentFiles); await context.Publish(new QbitMetadataWritten(request.Metadata, false));
return;
var subtitles = await QbitMetaToTorrentMeta.MapMetadataToSubtitlesCollection(dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
} }
await context.Publish(new QbitMetadataWritten(request.Metadata)); await dataStorage.InsertFiles(torrentFiles);
var subtitles = await QbitMetaToTorrentMeta.MapMetadataToSubtitlesCollection(
dataStorage, request.Torrent.InfoHash, request.Metadata.Metadata, logger);
if (subtitles.Any())
{
await dataStorage.InsertSubtitles(subtitles);
}
await context.Publish(new QbitMetadataWritten(request.Metadata, true));
} }
} }

View File

@@ -1,17 +1,11 @@
// Global using directives // Global using directives
global using System.Text.Json; global using System.Text.Json;
global using System.Text.Json.Serialization;
global using System.Threading.Channels;
global using MassTransit; global using MassTransit;
global using MassTransit.Mediator;
global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.Caching.Distributed; global using Microsoft.Extensions.Caching.Distributed;
global using Microsoft.Extensions.Caching.Memory; global using Microsoft.Extensions.Caching.Memory;
global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.DependencyInjection;
global using Polly;
global using Polly.Extensions.Http;
global using PromKnight.ParseTorrentTitle;
global using QBitCollector.Extensions; global using QBitCollector.Extensions;
global using QBitCollector.Features.Qbit; global using QBitCollector.Features.Qbit;
global using QBitCollector.Features.Trackers; global using QBitCollector.Features.Trackers;
@@ -21,4 +15,6 @@ global using SharedContracts.Configuration;
global using SharedContracts.Dapper; global using SharedContracts.Dapper;
global using SharedContracts.Extensions; global using SharedContracts.Extensions;
global using SharedContracts.Models; global using SharedContracts.Models;
global using SharedContracts.Python;
global using SharedContracts.Python.RTN;
global using SharedContracts.Requests; global using SharedContracts.Requests;

View File

@@ -18,7 +18,6 @@
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" /> <PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" /> <PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="QBittorrent.Client" Version="1.9.23349.1" /> <PackageReference Include="QBittorrent.Client" Version="1.9.23349.1" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" /> <PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
@@ -31,10 +30,30 @@
<None Include="Configuration\logging.json"> <None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory> <CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None> </None>
<Content Remove="eng\**" />
<None Remove="eng\**" />
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\shared\SharedContracts.csproj" /> <ProjectReference Include="..\shared\SharedContracts.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
<Content Remove="python\**" />
<None Include="python\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<Compile Remove="eng\**" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Remove="eng\**" />
</ItemGroup>
</Project> </Project>

View File

@@ -6,6 +6,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shared", "shared", "{2C0A0F
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "QBitCollector", "QBitCollector.csproj", "{1EF124BE-6EBE-4D9E-846C-FFF814999F3B}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "QBitCollector", "QBitCollector.csproj", "{1EF124BE-6EBE-4D9E-846C-FFF814999F3B}"
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{2F2EA33A-1303-405D-939B-E9394D262BC9}"
ProjectSection(SolutionItems) = preProject
eng\install-python-reqs.ps1 = eng\install-python-reqs.ps1
eng\install-python-reqs.sh = eng\install-python-reqs.sh
EndProjectSection
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU

View File

@@ -0,0 +1,3 @@
Remove-Item -Recurse -Force ../python
mkdir -p ../python
python -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1,5 @@
#!/bin/bash
rm -rf ../python
mkdir -p ../python
python3 -m pip install -r ../requirements.txt -t ../python/

View File

@@ -0,0 +1 @@
rank-torrent-name==0.2.13

View File

@@ -118,7 +118,7 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
public async Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, int? year, CancellationToken cancellationToken = default) => public async Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, int? year, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection => await ExecuteCommandAsync(async connection =>
{ {
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\", \"score\" as Score from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'"; var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\", \"score\" as Score, \"category\" as Category from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{torrentType}'";
query += year is not null ? $", {year}" : ", NULL"; query += year is not null ? $", {year}" : ", NULL";
query += ", 1)"; query += ", 1)";
@@ -167,12 +167,7 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
INSERT INTO subtitles INSERT INTO subtitles
("infoHash", "fileIndex", "fileId", "title") ("infoHash", "fileIndex", "fileId", "title")
VALUES VALUES
(@InfoHash, @FileIndex, @FileId, @Title) (@InfoHash, @FileIndex, @FileId, @Title);
ON CONFLICT
("infoHash", "fileIndex")
DO UPDATE SET
"fileId" = COALESCE(subtitles."fileId", EXCLUDED."fileId"),
"title" = COALESCE(subtitles."title", EXCLUDED."title");
"""; """;
await connection.ExecuteAsync(query, subtitles); await connection.ExecuteAsync(query, subtitles);

View File

@@ -0,0 +1,19 @@
namespace SharedContracts.Extensions;
public static class DictionaryExtensions
{
public static ConcurrentDictionary<TKey, TValue> ToConcurrentDictionary<TSource, TKey, TValue>(
this IEnumerable<TSource> source,
Func<TSource, TKey> keySelector,
Func<TSource, TValue> valueSelector) where TKey : notnull
{
var concurrentDictionary = new ConcurrentDictionary<TKey, TValue>();
foreach (var element in source)
{
concurrentDictionary.TryAdd(keySelector(element), valueSelector(element));
}
return concurrentDictionary;
}
}

View File

@@ -1,5 +1,3 @@
using System.Text.RegularExpressions;
namespace SharedContracts.Extensions; namespace SharedContracts.Extensions;
public static partial class StringExtensions public static partial class StringExtensions

View File

@@ -1,7 +1,9 @@
// Global using directives // Global using directives
global using System.Collections.Concurrent;
global using System.Text.Json; global using System.Text.Json;
global using System.Text.Json.Serialization; global using System.Text.Json.Serialization;
global using System.Text.RegularExpressions;
global using Dapper; global using Dapper;
global using MassTransit; global using MassTransit;
global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Builder;

View File

@@ -2,5 +2,6 @@ namespace SharedContracts.Python.RTN;
public interface IRankTorrentName public interface IRankTorrentName
{ {
ParseTorrentTitleResponse Parse(string title); ParseTorrentTitleResponse Parse(string title, bool trashGarbage = true, bool logErrors = false, bool throwOnErrors = false);
List<ParseTorrentTitleResponse?> BatchParse(IReadOnlyCollection<string> titles, int chunkSize = 500, int workers = 20, bool trashGarbage = true, bool logErrors = false, bool throwOnErrors = false);
} }

View File

@@ -12,33 +12,102 @@ public class RankTorrentName : IRankTorrentName
_pythonEngineService = pythonEngineService; _pythonEngineService = pythonEngineService;
InitModules(); InitModules();
} }
public ParseTorrentTitleResponse Parse(string title) =>
_pythonEngineService.ExecutePythonOperationWithDefault(
() =>
{
var result = _rtn?.parse(title);
return ParseResult(result);
}, new ParseTorrentTitleResponse(false, null), nameof(Parse), throwOnErrors: false, logErrors: false);
public ParseTorrentTitleResponse Parse(string title, bool trashGarbage = true, bool logErrors = false, bool throwOnErrors = false)
private static ParseTorrentTitleResponse ParseResult(dynamic result)
{ {
if (result == null) try
{ {
using var gil = Py.GIL();
var result = _rtn?.parse(title, trashGarbage);
return ParseResult(result);
}
catch (Exception ex)
{
if (logErrors)
{
_pythonEngineService.Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(Parse));
}
if (throwOnErrors)
{
throw;
}
return new(false, null); return new(false, null);
} }
}
public List<ParseTorrentTitleResponse?> BatchParse(IReadOnlyCollection<string> titles, int chunkSize = 500, int workers = 20, bool trashGarbage = true, bool logErrors = false, bool throwOnErrors = false)
{
var responses = new List<ParseTorrentTitleResponse?>();
var json = result.model_dump_json()?.As<string?>(); try
if (json is null || string.IsNullOrEmpty(json))
{ {
return new(false, null); if (titles.Count == 0)
{
return responses;
}
using var gil = Py.GIL();
var pythonList = new PyList(titles.Select(x => new PyString(x).As<PyObject>()).ToArray());
PyList results = _rtn?.batch_parse(pythonList, trashGarbage, chunkSize, workers);
if (results == null)
{
return responses;
}
responses.AddRange(results.Select(ParseResult));
} }
catch (Exception ex)
{
if (logErrors)
{
_pythonEngineService.Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(Parse));
}
if (throwOnErrors)
{
throw;
}
}
return responses;
}
private static ParseTorrentTitleResponse? ParseResult(dynamic result)
{
try
{
if (result == null)
{
return new(false, null);
}
var json = result.model_dump_json()?.As<string?>();
if (json is null || string.IsNullOrEmpty(json))
{
return new(false, null);
}
var mediaType = result.GetAttr("type")?.As<string>();
var response = JsonSerializer.Deserialize<RtnResponse>(json); if (string.IsNullOrEmpty(mediaType))
{
return new(true, response); return new(false, null);
}
var response = JsonSerializer.Deserialize<RtnResponse>(json);
response.IsMovie = mediaType.Equals("movie", StringComparison.OrdinalIgnoreCase);
return new(true, response);
}
catch
{
return new(false, null);
}
} }
private void InitModules() => private void InitModules() =>

View File

@@ -76,8 +76,8 @@ public class RtnResponse
[JsonPropertyName("extended")] [JsonPropertyName("extended")]
public bool Extended { get; set; } public bool Extended { get; set; }
public bool IsMovie => (Season == null && Episode == null) || (Season?.Count == 0 && Episode?.Count == 0); public bool IsMovie { get; set; }
public string ToJson() => this.AsJson(); public string ToJson() => this.AsJson();
} }

View File

@@ -82,11 +82,4 @@ public static class ServiceCollectionExtensions
x.AddConsumer<PerformIngestionConsumer>(); x.AddConsumer<PerformIngestionConsumer>();
} }
internal static IServiceCollection AddServiceConfiguration(this IServiceCollection services)
{
services.AddSingleton<IParseTorrentTitle, ParseTorrentTitle>();
return services;
}
} }

View File

@@ -5,7 +5,6 @@ global using MassTransit;
global using MassTransit.Mediator; global using MassTransit.Mediator;
global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.DependencyInjection;
global using PromKnight.ParseTorrentTitle;
global using SharedContracts.Configuration; global using SharedContracts.Configuration;
global using SharedContracts.Dapper; global using SharedContracts.Dapper;
global using SharedContracts.Extensions; global using SharedContracts.Extensions;

View File

@@ -10,7 +10,6 @@ builder.Host
builder.Services builder.Services
.RegisterMassTransit() .RegisterMassTransit()
.AddServiceConfiguration()
.AddDatabase(); .AddDatabase();
var app = builder.Build(); var app = builder.Build();

View File

@@ -16,7 +16,6 @@
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" /> <PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.3" />
<PackageReference Include="Polly" Version="8.3.1" /> <PackageReference Include="Polly" Version="8.3.1" />
<PackageReference Include="PromKnight.ParseTorrentTitle" Version="1.0.4" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" /> <PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" /> <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />