Fix DMM parser and a couple of other minor issues. (#226)
This commit is contained in:
@@ -15,7 +15,7 @@ WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN apk add --update --no-cache python3=~3.11.9-r0 py3-pip && ln -sf python3 /usr/bin/python
|
||||
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
|
||||
|
||||
COPY --from=build /src/out .
|
||||
|
||||
|
||||
@@ -8,12 +8,14 @@ public class PostgresConfiguration
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
private const string DatabaseVariable = "DB";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
|
||||
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
|
||||
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
|
||||
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var connection = CreateNpgsqlConnection();
|
||||
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
|
||||
await connection.OpenAsync();
|
||||
|
||||
await operation(connection);
|
||||
@@ -145,16 +145,6 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
}
|
||||
}
|
||||
|
||||
private NpgsqlConnection CreateNpgsqlConnection()
|
||||
{
|
||||
var connectionStringBuilder = new NpgsqlConnectionStringBuilder(configuration.StorageConnectionString)
|
||||
{
|
||||
CommandTimeout = 3000,
|
||||
};
|
||||
|
||||
return new(connectionStringBuilder.ConnectionString);
|
||||
}
|
||||
|
||||
private async Task ExecuteCommandWithTransactionAsync(Func<NpgsqlConnection, NpgsqlTransaction, Task> operation, NpgsqlTransaction transaction, string errorMessage)
|
||||
{
|
||||
try
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
<PackageReference Include="Dapper" Version="2.1.35" />
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.2" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.3" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
|
||||
|
||||
@@ -14,7 +14,7 @@ WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN apk add --update --no-cache python3=~3.11.9-r0 py3-pip && ln -sf python3 /usr/bin/python
|
||||
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
|
||||
|
||||
COPY --from=build /src/out .
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ public partial class DebridMediaManagerCrawler(
|
||||
protected override string Source => "DMM";
|
||||
|
||||
private const int ParallelismCount = 4;
|
||||
|
||||
|
||||
public override async Task Execute()
|
||||
{
|
||||
var tempDirectory = await dmmFileDownloader.DownloadFileToTempPath(CancellationToken.None);
|
||||
@@ -24,7 +24,7 @@ public partial class DebridMediaManagerCrawler(
|
||||
logger.LogInformation("Found {Files} files to parse", files.Length);
|
||||
|
||||
var options = new ParallelOptions { MaxDegreeOfParallelism = ParallelismCount };
|
||||
|
||||
|
||||
await Parallel.ForEachAsync(files, options, async (file, token) =>
|
||||
{
|
||||
var fileName = Path.GetFileName(file);
|
||||
@@ -69,9 +69,9 @@ public partial class DebridMediaManagerCrawler(
|
||||
if (page.TryGetValue(infoHash, out var dmmContent) &&
|
||||
successfulResponses.TryGetValue(dmmContent.Filename, out var parsedResponse))
|
||||
{
|
||||
page[infoHash] = dmmContent with {ParseResponse = parsedResponse};
|
||||
page[infoHash] = dmmContent with { ParseResponse = parsedResponse };
|
||||
}
|
||||
|
||||
|
||||
return ValueTask.CompletedTask;
|
||||
});
|
||||
}
|
||||
@@ -86,7 +86,7 @@ public partial class DebridMediaManagerCrawler(
|
||||
}
|
||||
|
||||
var pageSource = await File.ReadAllTextAsync(filePath);
|
||||
|
||||
|
||||
var match = HashCollectionMatcher().Match(pageSource);
|
||||
|
||||
if (!match.Success)
|
||||
@@ -106,9 +106,34 @@ public partial class DebridMediaManagerCrawler(
|
||||
|
||||
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson.Value);
|
||||
|
||||
var json = JsonDocument.Parse(decodedJson);
|
||||
|
||||
var torrents = await json.RootElement.EnumerateArray()
|
||||
JsonElement arrayToProcess;
|
||||
try
|
||||
{
|
||||
var json = JsonDocument.Parse(decodedJson);
|
||||
|
||||
if (json.RootElement.ValueKind == JsonValueKind.Object &&
|
||||
json.RootElement.TryGetProperty("torrents", out var torrentsProperty) &&
|
||||
torrentsProperty.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
arrayToProcess = torrentsProperty;
|
||||
}
|
||||
else if (json.RootElement.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
arrayToProcess = json.RootElement;
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.LogWarning("Unexpected JSON format in {Name}", name);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError("Failed to parse JSON {decodedJson} for {Name}: {Exception}", decodedJson, name, ex);
|
||||
return [];
|
||||
}
|
||||
|
||||
var torrents = await arrayToProcess.EnumerateArray()
|
||||
.ToAsyncEnumerable()
|
||||
.Select(ParsePageContent)
|
||||
.Where(t => t is not null)
|
||||
@@ -120,7 +145,7 @@ public partial class DebridMediaManagerCrawler(
|
||||
await Storage.MarkPageAsIngested(filenameOnly);
|
||||
return [];
|
||||
}
|
||||
|
||||
|
||||
var torrentDictionary = torrents
|
||||
.Where(x => x is not null)
|
||||
.GroupBy(x => x.InfoHash)
|
||||
@@ -141,7 +166,7 @@ public partial class DebridMediaManagerCrawler(
|
||||
{
|
||||
var (infoHash, dmmContent) = kvp;
|
||||
var parsedTorrent = dmmContent.ParseResponse;
|
||||
if (parsedTorrent is not {Success: true})
|
||||
if (parsedTorrent is not { Success: true })
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -192,7 +217,7 @@ public partial class DebridMediaManagerCrawler(
|
||||
Category = AssignCategory(result),
|
||||
RtnResponse = parsedTorrent.Response.ToJson(),
|
||||
};
|
||||
|
||||
|
||||
|
||||
private Task AddToCache(string cacheKey, ImdbEntry best)
|
||||
{
|
||||
@@ -200,19 +225,19 @@ public partial class DebridMediaManagerCrawler(
|
||||
{
|
||||
AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1),
|
||||
};
|
||||
|
||||
|
||||
return cache.SetStringAsync(cacheKey, JsonSerializer.Serialize(best), cacheOptions);
|
||||
}
|
||||
|
||||
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string cacheKey)
|
||||
{
|
||||
var cachedImdbId = await cache.GetStringAsync(cacheKey);
|
||||
|
||||
|
||||
if (!string.IsNullOrEmpty(cachedImdbId))
|
||||
{
|
||||
return (true, JsonSerializer.Deserialize<ImdbEntry>(cachedImdbId));
|
||||
}
|
||||
|
||||
|
||||
return (false, null);
|
||||
}
|
||||
|
||||
@@ -222,7 +247,7 @@ public partial class DebridMediaManagerCrawler(
|
||||
|
||||
return (pageIngested, filename);
|
||||
}
|
||||
|
||||
|
||||
private static string AssignCategory(ImdbEntry entry) =>
|
||||
entry.Category.ToLower() switch
|
||||
{
|
||||
@@ -230,9 +255,9 @@ public partial class DebridMediaManagerCrawler(
|
||||
var category when string.Equals(category, "tvSeries", StringComparison.OrdinalIgnoreCase) => "tv",
|
||||
_ => "unknown",
|
||||
};
|
||||
|
||||
|
||||
private static string GetCacheKey(string category, string title, int year) => $"{category.ToLowerInvariant()}:{year}:{title.ToLowerInvariant()}";
|
||||
|
||||
|
||||
private static ExtractedDMMContent? ParsePageContent(JsonElement item)
|
||||
{
|
||||
if (!item.TryGetProperty("filename", out var filenameElement) ||
|
||||
@@ -241,10 +266,10 @@ public partial class DebridMediaManagerCrawler(
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
return new(filenameElement.GetString(), bytesElement.GetInt64(), hashElement.GetString());
|
||||
}
|
||||
|
||||
|
||||
private record DmmContent(string Filename, long Bytes, ParseTorrentTitleResponse? ParseResponse);
|
||||
private record ExtractedDMMContent(string Filename, long Bytes, string InfoHash);
|
||||
private record RtnBatchProcessable(string InfoHash, string Filename);
|
||||
|
||||
@@ -15,7 +15,7 @@ WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN apk add --update --no-cache python3=~3.11.9-r0 py3-pip && ln -sf python3 /usr/bin/python
|
||||
RUN apk add --update --no-cache python3=~3.11 py3-pip && ln -sf python3 /usr/bin/python
|
||||
|
||||
COPY --from=build /src/out .
|
||||
|
||||
|
||||
@@ -8,12 +8,14 @@ public class PostgresConfiguration
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
private const string DatabaseVariable = "DB";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
|
||||
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
|
||||
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
|
||||
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
<PackageReference Include="Dapper" Version="2.1.35" />
|
||||
<PackageReference Include="MassTransit.Abstractions" Version="8.2.0" />
|
||||
<PackageReference Include="MassTransit.RabbitMQ" Version="8.2.0" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.2" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.3" />
|
||||
<PackageReference Include="pythonnet" Version="3.0.3" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
|
||||
|
||||
@@ -8,12 +8,14 @@ public class PostgresConfiguration
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
private const string DatabaseVariable = "DB";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string CommandTimeoutVariable = "COMMAND_TIMEOUT_SEC"; // Seconds
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
|
||||
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
|
||||
private int CommandTimeout { get; init; } = Prefix.GetEnvironmentVariableAsInt(CommandTimeoutVariable, 300);
|
||||
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};CommandTimeout={CommandTimeout}";
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
<PackageReference Include="Dapper" Version="2.1.28" />
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.1" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.3" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
|
||||
|
||||
Reference in New Issue
Block a user