From 49a6283f26a7e023fd2b5c2540036349c86c4838 Mon Sep 17 00:00:00 2001 From: iPromKnight Date: Tue, 27 Feb 2024 13:51:21 +0000 Subject: [PATCH] Fix DMM so that all pages are enumerated Fixes #95 by switching to git trees instead of the content api. --- .../Crawlers/Sites/DebridMediaManagerCrawler.cs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/producer/Crawlers/Sites/DebridMediaManagerCrawler.cs b/src/producer/Crawlers/Sites/DebridMediaManagerCrawler.cs index 8daf3ae..1c5e49e 100644 --- a/src/producer/Crawlers/Sites/DebridMediaManagerCrawler.cs +++ b/src/producer/Crawlers/Sites/DebridMediaManagerCrawler.cs @@ -13,9 +13,11 @@ public partial class DebridMediaManagerCrawler( [GeneratedRegex(@"[sS]([0-9]{1,2})|seasons?[\s-]?([0-9]{1,2})", RegexOptions.IgnoreCase, "en-GB")] private static partial Regex SeasonMatcher(); + + private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main"; protected override IReadOnlyDictionary Mappings => new Dictionary(); - protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/contents"; + protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/git/trees/main?recursive=1"; protected override string Source => "DMM"; public override async Task Execute() @@ -28,7 +30,11 @@ public partial class DebridMediaManagerCrawler( var json = JsonDocument.Parse(jsonBody); - foreach (var entry in json.RootElement.EnumerateArray()) + var entriesArray = json.RootElement.GetProperty("tree"); + + logger.LogInformation("Found {Entries} total DMM pages", entriesArray.GetArrayLength()); + + foreach (var entry in entriesArray.EnumerateArray()) { await ParsePage(entry, client); } @@ -43,9 +49,7 @@ public partial class DebridMediaManagerCrawler( return; } - var url = entry.GetProperty("download_url").GetString(); - - var pageSource = await client.GetStringAsync(url); + var pageSource = await client.GetStringAsync($"{DownloadBaseUrl}/{name}"); await ExtractPageContents(pageSource, name); } @@ -130,7 +134,7 @@ public partial class DebridMediaManagerCrawler( private async Task<(bool Success, string? Name)> IsAlreadyIngested(JsonElement entry) { - var name = entry.GetProperty("name").GetString(); + var name = entry.GetProperty("path").GetString(); if (string.IsNullOrEmpty(name)) {