Fix DMM parser and a couple of other minor issues. (#226)

This commit is contained in:
Giorgi
2024-11-19 11:00:06 -08:00
committed by GitHub
parent a7d5944d25
commit 594320ed63
11 changed files with 60 additions and 39 deletions

View File

@@ -15,7 +15,7 @@ WORKDIR /app
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11.9-r0 py3-pip && ln -sf python3 /usr/bin/python RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out . COPY --from=build /src/out .

View File

@@ -8,12 +8,14 @@ public class PostgresConfiguration
private const string PasswordVariable = "PASSWORD"; private const string PasswordVariable = "PASSWORD";
private const string DatabaseVariable = "DB"; private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT"; private const string PortVariable = "PORT";
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable); private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable); private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable); private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable); private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432); private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};"; public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
} }

View File

@@ -134,7 +134,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
{ {
try try
{ {
await using var connection = CreateNpgsqlConnection(); await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(); await connection.OpenAsync();
await operation(connection); await operation(connection);
@@ -145,16 +145,6 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
} }
} }
private NpgsqlConnection CreateNpgsqlConnection()
{
var connectionStringBuilder = new NpgsqlConnectionStringBuilder(configuration.StorageConnectionString)
{
CommandTimeout = 3000,
};
return new(connectionStringBuilder.ConnectionString);
}
private async Task ExecuteCommandWithTransactionAsync(Func<NpgsqlConnection, NpgsqlTransaction, Task> operation, NpgsqlTransaction transaction, string errorMessage) private async Task ExecuteCommandWithTransactionAsync(Func<NpgsqlConnection, NpgsqlTransaction, Task> operation, NpgsqlTransaction transaction, string errorMessage)
{ {
try try

View File

@@ -13,7 +13,7 @@
<PackageReference Include="Dapper" Version="2.1.35" /> <PackageReference Include="Dapper" Version="2.1.35" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Npgsql" Version="8.0.2" /> <PackageReference Include="Npgsql" Version="8.0.3" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" /> <PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" /> <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />

View File

@@ -14,7 +14,7 @@ WORKDIR /app
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11.9-r0 py3-pip && ln -sf python3 /usr/bin/python RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out . COPY --from=build /src/out .

View File

@@ -14,7 +14,7 @@ public partial class DebridMediaManagerCrawler(
protected override string Source => "DMM"; protected override string Source => "DMM";
private const int ParallelismCount = 4; private const int ParallelismCount = 4;
public override async Task Execute() public override async Task Execute()
{ {
var tempDirectory = await dmmFileDownloader.DownloadFileToTempPath(CancellationToken.None); var tempDirectory = await dmmFileDownloader.DownloadFileToTempPath(CancellationToken.None);
@@ -24,7 +24,7 @@ public partial class DebridMediaManagerCrawler(
logger.LogInformation("Found {Files} files to parse", files.Length); logger.LogInformation("Found {Files} files to parse", files.Length);
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount }; var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
await Parallel.ForEachAsync(files, options, async (file, token) => await Parallel.ForEachAsync(files, options, async (file, token) =>
{ {
var fileName = Path.GetFileName(file); var fileName = Path.GetFileName(file);
@@ -69,9 +69,9 @@ public partial class DebridMediaManagerCrawler(
if (page.TryGetValue(infoHash, out var dmmContent) && if (page.TryGetValue(infoHash, out var dmmContent) &&
successfulResponses.TryGetValue(dmmContent.Filename, out var parsedResponse)) successfulResponses.TryGetValue(dmmContent.Filename, out var parsedResponse))
{ {
page[infoHash] = dmmContent with {ParseResponse = parsedResponse}; page[infoHash] = dmmContent with { ParseResponse = parsedResponse };
} }
return ValueTask.CompletedTask; return ValueTask.CompletedTask;
}); });
} }
@@ -86,7 +86,7 @@ public partial class DebridMediaManagerCrawler(
} }
var pageSource = await File.ReadAllTextAsync(filePath); var pageSource = await File.ReadAllTextAsync(filePath);
var match = HashCollectionMatcher().Match(pageSource); var match = HashCollectionMatcher().Match(pageSource);
if (!match.Success) if (!match.Success)
@@ -106,9 +106,34 @@ public partial class DebridMediaManagerCrawler(
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson.Value); var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson.Value);
var json = JsonDocument.Parse(decodedJson); JsonElement arrayToProcess;
try
var torrents = await json.RootElement.EnumerateArray() {
var json = JsonDocument.Parse(decodedJson);
if (json.RootElement.ValueKind == JsonValueKind.Object &&
json.RootElement.TryGetProperty("torrents", out var torrentsProperty) &&
torrentsProperty.ValueKind == JsonValueKind.Array)
{
arrayToProcess = torrentsProperty;
}
else if (json.RootElement.ValueKind == JsonValueKind.Array)
{
arrayToProcess = json.RootElement;
}
else
{
logger.LogWarning("Unexpected JSON format in {Name}", name);
return [];
}
}
catch (Exception ex)
{
logger.LogError("Failed to parse JSON {decodedJson} for {Name}: {Exception}", decodedJson, name, ex);
return [];
}
var torrents = await arrayToProcess.EnumerateArray()
.ToAsyncEnumerable() .ToAsyncEnumerable()
.Select(ParsePageContent) .Select(ParsePageContent)
.Where(t => t is not null) .Where(t => t is not null)
@@ -120,7 +145,7 @@ public partial class DebridMediaManagerCrawler(
await Storage.MarkPageAsIngested(filenameOnly); await Storage.MarkPageAsIngested(filenameOnly);
return []; return [];
} }
var torrentDictionary = torrents var torrentDictionary = torrents
.Where(x => x is not null) .Where(x => x is not null)
.GroupBy(x => x.InfoHash) .GroupBy(x => x.InfoHash)
@@ -141,7 +166,7 @@ public partial class DebridMediaManagerCrawler(
{ {
var (infoHash, dmmContent) = kvp; var (infoHash, dmmContent) = kvp;
var parsedTorrent = dmmContent.ParseResponse; var parsedTorrent = dmmContent.ParseResponse;
if (parsedTorrent is not {Success: true}) if (parsedTorrent is not { Success: true })
{ {
return; return;
} }
@@ -192,7 +217,7 @@ public partial class DebridMediaManagerCrawler(
Category = AssignCategory(result), Category = AssignCategory(result),
RtnResponse = parsedTorrent.Response.ToJson(), RtnResponse = parsedTorrent.Response.ToJson(),
}; };
private Task AddToCache(string cacheKey, ImdbEntry best) private Task AddToCache(string cacheKey, ImdbEntry best)
{ {
@@ -200,19 +225,19 @@ public partial class DebridMediaManagerCrawler(
{ {
AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1), AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1),
}; };
return cache.SetStringAsync(cacheKey, JsonSerializer.Serialize(best), cacheOptions); return cache.SetStringAsync(cacheKey, JsonSerializer.Serialize(best), cacheOptions);
} }
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string cacheKey) private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string cacheKey)
{ {
var cachedImdbId = await cache.GetStringAsync(cacheKey); var cachedImdbId = await cache.GetStringAsync(cacheKey);
if (!string.IsNullOrEmpty(cachedImdbId)) if (!string.IsNullOrEmpty(cachedImdbId))
{ {
return (true, JsonSerializer.Deserialize<ImdbEntry>(cachedImdbId)); return (true, JsonSerializer.Deserialize<ImdbEntry>(cachedImdbId));
} }
return (false, null); return (false, null);
} }
@@ -222,7 +247,7 @@ public partial class DebridMediaManagerCrawler(
return (pageIngested, filename); return (pageIngested, filename);
} }
private static string AssignCategory(ImdbEntry entry) => private static string AssignCategory(ImdbEntry entry) =>
entry.Category.ToLower() switch entry.Category.ToLower() switch
{ {
@@ -230,9 +255,9 @@ public partial class DebridMediaManagerCrawler(
var category when string.Equals(category, "tvSeries", StringComparison.OrdinalIgnoreCase) => "tv", var category when string.Equals(category, "tvSeries", StringComparison.OrdinalIgnoreCase) => "tv",
_ => "unknown", _ => "unknown",
}; };
private static string GetCacheKey(string category, string title, int year) => $"{category.ToLowerInvariant()}:{year}:{title.ToLowerInvariant()}"; private static string GetCacheKey(string category, string title, int year) => $"{category.ToLowerInvariant()}:{year}:{title.ToLowerInvariant()}";
private static ExtractedDMMContent? ParsePageContent(JsonElement item) private static ExtractedDMMContent? ParsePageContent(JsonElement item)
{ {
if (!item.TryGetProperty("filename", out var filenameElement) || if (!item.TryGetProperty("filename", out var filenameElement) ||
@@ -241,10 +266,10 @@ public partial class DebridMediaManagerCrawler(
{ {
return null; return null;
} }
return new(filenameElement.GetString(), bytesElement.GetInt64(), hashElement.GetString()); return new(filenameElement.GetString(), bytesElement.GetInt64(), hashElement.GetString());
} }
private record DmmContent(string Filename, long Bytes, ParseTorrentTitleResponse? ParseResponse); private record DmmContent(string Filename, long Bytes, ParseTorrentTitleResponse? ParseResponse);
private record ExtractedDMMContent(string Filename, long Bytes, string InfoHash); private record ExtractedDMMContent(string Filename, long Bytes, string InfoHash);
private record RtnBatchProcessable(string InfoHash, string Filename); private record RtnBatchProcessable(string InfoHash, string Filename);

View File

@@ -15,7 +15,7 @@ WORKDIR /app
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11.9-r0 py3-pip && ln -sf python3 /usr/bin/python RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out . COPY --from=build /src/out .

View File

@@ -8,12 +8,14 @@ public class PostgresConfiguration
private const string PasswordVariable = "PASSWORD"; private const string PasswordVariable = "PASSWORD";
private const string DatabaseVariable = "DB"; private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT"; private const string PortVariable = "PORT";
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable); private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable); private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable); private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable); private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432); private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};"; public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
} }

View File

@@ -15,7 +15,7 @@
<PackageReference Include="Dapper" Version="2.1.35" /> <PackageReference Include="Dapper" Version="2.1.35" />
<PackageReference Include="MassTransit.Abstractions" Version="8.2.0" /> <PackageReference Include="MassTransit.Abstractions" Version="8.2.0" />
<PackageReference Include="MassTransit.RabbitMQ" Version="8.2.0" /> <PackageReference Include="MassTransit.RabbitMQ" Version="8.2.0" />
<PackageReference Include="Npgsql" Version="8.0.2" /> <PackageReference Include="Npgsql" Version="8.0.3" />
<PackageReference Include="pythonnet" Version="3.0.3" /> <PackageReference Include="pythonnet" Version="3.0.3" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" /> <PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />

View File

@@ -8,12 +8,14 @@ public class PostgresConfiguration
private const string PasswordVariable = "PASSWORD"; private const string PasswordVariable = "PASSWORD";
private const string DatabaseVariable = "DB"; private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT"; private const string PortVariable = "PORT";
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable); private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable); private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable); private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable); private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432); private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};"; public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
} }

View File

@@ -12,7 +12,7 @@
<PackageReference Include="Dapper" Version="2.1.28" /> <PackageReference Include="Dapper" Version="2.1.28" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Npgsql" Version="8.0.1" /> <PackageReference Include="Npgsql" Version="8.0.3" />
<PackageReference Include="Serilog" Version="3.1.1" /> <PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" /> <PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" /> <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />