Run pre-commit

This commit is contained in:
purple_emily
2024-03-08 14:34:53 +00:00
parent 31e16df720
commit 79409915cf
221 changed files with 525 additions and 526 deletions

View File

@@ -8,12 +8,12 @@ public partial class DebridMediaManagerCrawler(
{
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher();
[GeneratedRegex(@"[sS]([0-9]{1,2})|seasons?[\s-]?([0-9]{1,2})", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonMatcher();
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/git/trees/main?recursive=1";
protected override string Source => "DMM";
@@ -23,13 +23,13 @@ public partial class DebridMediaManagerCrawler(
var client = httpClientFactory.CreateClient("Scraper");
client.DefaultRequestHeaders.Authorization = new("Bearer", githubConfiguration.PAT);
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
var jsonBody = await client.GetStringAsync(Url);
var json = JsonDocument.Parse(jsonBody);
var entriesArray = json.RootElement.GetProperty("tree");
logger.LogInformation("Found {Entries} total DMM pages", entriesArray.GetArrayLength());
foreach (var entry in entriesArray.EnumerateArray())
@@ -41,21 +41,21 @@ public partial class DebridMediaManagerCrawler(
private async Task ParsePage(JsonElement entry, HttpClient client)
{
var (pageIngested, name) = await IsAlreadyIngested(entry);
if (string.IsNullOrEmpty(name) || pageIngested)
{
return;
}
var pageSource = await client.GetStringAsync($"{DownloadBaseUrl}/{name}");
await ExtractPageContents(pageSource, name);
}
private async Task ExtractPageContents(string pageSource, string name)
{
var match = HashCollectionMatcher().Match(pageSource);
if (!match.Success)
{
logger.LogWarning("Failed to match hash collection for {Name}", name);
@@ -64,32 +64,32 @@ public partial class DebridMediaManagerCrawler(
}
var encodedJson = match.Groups.Values.ElementAtOrDefault(1);
if (string.IsNullOrEmpty(encodedJson?.Value))
{
logger.LogWarning("Failed to extract encoded json for {Name}", name);
return;
}
await ProcessExtractedContentsAsTorrentCollection(encodedJson.Value, name);
}
private async Task ProcessExtractedContentsAsTorrentCollection(string encodedJson, string name)
{
var decodedJson = LZString.DecompressFromEncodedURIComponent(encodedJson);
var json = JsonDocument.Parse(decodedJson);
await InsertTorrentsForPage(json);
var result = await Storage.MarkPageAsIngested(name);
if (!result.Success)
{
logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.ErrorMessage);
return;
}
logger.LogInformation("Successfully marked page as ingested");
}
@@ -109,7 +109,7 @@ public partial class DebridMediaManagerCrawler(
{
return null;
}
torrent.Category = SeasonMatcher().IsMatch(torrent.Name) ? "tv" : "movies";
return torrent;
@@ -120,16 +120,16 @@ public partial class DebridMediaManagerCrawler(
var torrents = json.RootElement.EnumerateArray()
.Select(ParseTorrent)
.ToList();
if (torrents.Count == 0)
{
logger.LogWarning("No torrents found in {Source} response", Source);
return;
}
await InsertTorrents(torrents!);
}
private async Task<(bool Success, string? Name)> IsAlreadyIngested(JsonElement entry)
{
var name = entry.GetProperty("path").GetString();
@@ -138,9 +138,9 @@ public partial class DebridMediaManagerCrawler(
{
return (false, null);
}
var pageIngested = await Storage.PageIngested(name);
return (pageIngested, name);
}
}
}

View File

@@ -4,6 +4,6 @@ public class GithubConfiguration
{
private const string Prefix = "GITHUB";
private const string PatVariable = "PAT";
public string? PAT { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(PatVariable);
}
}

View File

@@ -10,4 +10,4 @@ public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvi
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
protected override string Crawler => nameof(DebridMediaManagerCrawler);
}
}

View File

@@ -4,8 +4,8 @@ public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawl
{
protected override string Url => "https://eztv1.xyz/ezrss.xml";
protected override string Source => "EZTV";
private static readonly XNamespace XmlNamespace = "http://xmlns.ezrss.it/0.1/";
private static readonly XNamespace XmlNamespace = "http://xmlns.ezrss.it/0.1/";
protected override IReadOnlyDictionary<string, string> Mappings =>
new Dictionary<string, string>
@@ -29,4 +29,4 @@ public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawl
InfoHash = itemNode.Element(XmlNamespace + Mappings[nameof(Torrent.InfoHash)])?.Value,
Category = itemNode.Element(Mappings[nameof(Torrent.Category)])?.Value.ToLowerInvariant(),
};
}
}

View File

@@ -9,4 +9,4 @@ public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProv
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
protected override string Crawler => nameof(EzTvCrawler);
}
}

View File

@@ -4,8 +4,8 @@ public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawl
{
protected override string Url => "https://nyaa.si/?page=rss&c=1_2&f=0";
protected override string Source => "Nyaa";
private static readonly XNamespace XmlNamespace = "https://nyaa.si/xmlns/nyaa";
private static readonly XNamespace XmlNamespace = "https://nyaa.si/xmlns/nyaa";
protected override IReadOnlyDictionary<string, string> Mappings =>
new Dictionary<string, string>
@@ -29,4 +29,4 @@ public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawl
InfoHash = itemNode.Element(XmlNamespace + Mappings[nameof(Torrent.InfoHash)])?.Value,
Category = itemNode.Element(Mappings[nameof(Torrent.Category)])?.Value.ToLowerInvariant(),
};
}
}

View File

@@ -9,4 +9,4 @@ public class SyncNyaaJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProv
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
protected override string Crawler => nameof(NyaaCrawler);
}
}

View File

@@ -9,4 +9,4 @@ public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvi
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
protected override string Crawler => nameof(TgxCrawler);
}
}

View File

@@ -6,7 +6,7 @@ public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<Tg
private static partial Regex SizeStringExtractor();
[GeneratedRegex(@"(?i)\b(\d+(\.\d+)?)\s*([KMGT]?B)\b", RegexOptions.None, "en-GB")]
private static partial Regex SizeStringParser();
protected override string Url => "https://tgx.rs/rss";
protected override string Source => "TorrentGalaxy";
@@ -18,8 +18,8 @@ public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<Tg
[nameof(Torrent.InfoHash)] = "guid",
[nameof(Torrent.Category)] = "category",
};
private static readonly HashSet<string> AllowedCategories =
private static readonly HashSet<string> AllowedCategories =
[
"movies",
"tv",
@@ -28,18 +28,18 @@ public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<Tg
protected override Torrent? ParseTorrent(XElement itemNode)
{
var category = itemNode.Element(Mappings["Category"])?.Value.ToLowerInvariant();
if (category is null)
{
return null;
}
if (!IsAllowedCategory(category))
{
return null;
}
var torrent = new Torrent
{
Source = Source,
@@ -49,11 +49,11 @@ public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<Tg
Seeders = 0,
Leechers = 0,
};
HandleSize(itemNode, torrent, "Size");
torrent.Category = SetCategory(category);
return torrent;
}
@@ -88,12 +88,12 @@ public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<Tg
private long? ExtractSizeFromDescription(string input)
{
var sizeMatch = SizeStringExtractor().Match(input);
if (!sizeMatch.Success)
{
throw new FormatException("Unable to parse size from the input.");
}
var sizeString = sizeMatch.Groups[1].Value;
var units = new Dictionary<string, long>
@@ -106,7 +106,7 @@ public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<Tg
};
var match = SizeStringParser().Match(sizeString);
if (match.Success)
{
var val = double.Parse(match.Groups[1].Value);
@@ -137,7 +137,7 @@ public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<Tg
private static bool IsAllowedCategory(string category)
{
var parsedCategory = category.Split(':').ElementAtOrDefault(0)?.Trim().ToLower();
return parsedCategory is not null && AllowedCategories.Contains(parsedCategory);
}
}
}

View File

@@ -10,4 +10,4 @@ public class SyncTorrentioJob(ICrawlerProvider crawlerProvider) : BaseJob(crawle
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
protected override string Crawler => nameof(TorrentioCrawler);
}
}

View File

@@ -4,6 +4,6 @@ public class TorrentioConfiguration
{
public const string SectionName = "TorrentioConfiguration";
public const string Filename = "torrentio.json";
public List<TorrentioInstance> Instances { get; set; } = [];
}
}

View File

@@ -12,7 +12,7 @@ public partial class TorrentioCrawler(
[GeneratedRegex(@"(\d+(\.\d+)?) (GB|MB)")]
private static partial Regex SizeMatcher();
private const int MaximumEmptyItemsCount = 5;
private const string MovieSlug = "movie/{0}.json";
protected override string Url => "sort=size%7Cqualityfilter=other,scr,cam,unknown/stream/{0}";
protected override IReadOnlyDictionary<string, string> Mappings { get; } = new Dictionary<string, string>();
@@ -33,21 +33,21 @@ public partial class TorrentioCrawler(
async () =>
{
var emptyMongoDbItemsCount = 0;
var state = instance.EnsureStateExists(_instanceStates);
SetupResiliencyPolicyForInstance(instance, state);
while (state.TotalProcessed < totalRecordCount)
{
logger.LogInformation("Processing {TorrentioInstance}", instance.Name);
logger.LogInformation("Current processed requests: {ProcessedRequests}", state.TotalProcessed);
var items = await imdbDataService.GetImdbEntriesForRequests(
DateTime.UtcNow.Year.ToString(),
instance.RateLimit.MongoBatchSize,
state.LastProcessedImdbId);
if (items.Count == 0)
{
emptyMongoDbItemsCount++;
@@ -58,10 +58,10 @@ public partial class TorrentioCrawler(
logger.LogInformation("Maximum empty document count reached. Cancelling {TorrentioInstance}", instance.Name);
break;
}
continue;
}
var newTorrents = new List<Torrent>();
var processedItemsCount = 0;
@@ -70,7 +70,7 @@ public partial class TorrentioCrawler(
try
{
var currentCount = processedItemsCount;
await state.ResiliencyPolicy.ExecuteAsync(
async () =>
{
@@ -97,7 +97,7 @@ public partial class TorrentioCrawler(
newTorrents.AddRange(torrentInfo.Where(x => x != null).Select(x => x!));
}
});
processedItemsCount++;
}
catch (Exception)
@@ -127,7 +127,7 @@ public partial class TorrentioCrawler(
{
logger.LogWarning("Retry {RetryCount} encountered an exception: {Message}. Pausing for {Timespan} seconds instance {TorrentioInstance}", retryCount, exception.Message, timeSpan.Seconds, instance.Name);
});
var circuitBreakerPolicy = Policy
.Handle<Exception>()
.CircuitBreakerAsync(
@@ -139,9 +139,9 @@ public partial class TorrentioCrawler(
},
onReset: () => logger.LogInformation("Circuit closed for {TorrentioInstance}, calls will flow again", instance.Name),
onHalfOpen: () => logger.LogInformation("Circuit is half-open for {TorrentioInstance}, next call is a trial if it should close or break again", instance.Name));
var policyWrap = Policy.WrapAsync(retryPolicy, circuitBreakerPolicy);
state.ResiliencyPolicy = policyWrap;
}
@@ -162,24 +162,24 @@ public partial class TorrentioCrawler(
{
throw new("Failed to fetch " + requestUrl);
}
var json = JsonDocument.Parse(await response.Content.ReadAsStringAsync());
var streams = json.RootElement.GetProperty("streams").EnumerateArray();
return streams.Select(x => ParseTorrent(instance, x, imdbId)).Where(x => x != null).ToList();
}
private Torrent? ParseTorrent(TorrentioInstance instance, JsonElement item, string imdId)
{
var title = item.GetProperty("title").GetString();
var infoHash = item.GetProperty("infoHash").GetString();
if (string.IsNullOrEmpty(title) || string.IsNullOrEmpty(infoHash))
{
return null;
}
var torrent = ParseTorrentDetails(title, instance, infoHash, imdId);
return string.IsNullOrEmpty(torrent.Name) ? null : torrent;
}
@@ -218,4 +218,4 @@ public partial class TorrentioCrawler(
return torrent;
}
}
}

View File

@@ -7,4 +7,4 @@ public class TorrentioInstance
public string Url { get; init; } = default!;
public TorrentioRateLimit RateLimit { get; init; } = default!;
}
}

View File

@@ -21,7 +21,7 @@ public static class TorrentioInstancesExtensions
return remaining > TimeSpan.Zero ? remaining : TimeSpan.Zero;
}
public static void SetPossiblyRateLimited(this TorrentioInstance instance, TorrentioScrapeInstance state, int minutesToWait = 5)
{
// Set the start time to 15 minutes in the past so that the next check will result in a rate limit period of 15 minutes
@@ -33,12 +33,12 @@ public static class TorrentioInstancesExtensions
state.RequestCount = requestCount;
}
public static long TotalProcessedRequests(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState) =>
public static long TotalProcessedRequests(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState) =>
!scraperState.TryGetValue(instance.Name, out var state) ? 0 : state.TotalProcessed;
public static string? LastProcessedImdbId(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState) =>
public static string? LastProcessedImdbId(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState) =>
!scraperState.TryGetValue(instance.Name, out var state) ? null : state.LastProcessedImdbId;
public static TorrentioScrapeInstance EnsureStateExists(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState)
{
if (!scraperState.TryGetValue(instance.Name, out var state))
@@ -49,4 +49,4 @@ public static class TorrentioInstancesExtensions
return state;
}
}
}

View File

@@ -4,10 +4,10 @@ public class TorrentioRateLimit
{
public int RequestLimit { get; set; }
public int IntervalInSeconds { get; set; }
public int MongoBatchSize { get; set; }
public int ExceptionLimit { get; set; }
public int ExceptionIntervalInSeconds { get; set; }
}
}

View File

@@ -7,4 +7,4 @@ public class TorrentioScrapeInstance
public int TotalProcessed { get; set; }
public string? LastProcessedImdbId { get; set; }
public IAsyncPolicy? ResiliencyPolicy { get; set; }
}
}

View File

@@ -9,4 +9,4 @@ public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvi
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
protected override string Crawler => nameof(TpbCrawler);
}
}

View File

@@ -5,7 +5,7 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json";
protected override string Source => "TPB";
// ReSharper disable once UnusedMember.Local
private readonly Dictionary<string, Dictionary<string, int>> TpbCategories = new()
{
@@ -33,12 +33,12 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
{"OTHER", 599},
}},
};
private static readonly HashSet<int> TvSeriesCategories = [ 205, 208 ];
private static readonly HashSet<int> MovieCategories = [ 201, 202, 207, 209 ];
private static readonly HashSet<int> PornCategories = [ 500, 501, 502, 505, 506 ];
private static readonly HashSet<int> AllowedCategories = [ ..MovieCategories, ..TvSeriesCategories ];
protected override IReadOnlyDictionary<string, string> Mappings
=> new Dictionary<string, string>
{
@@ -54,12 +54,12 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
protected override Torrent? ParseTorrent(JsonElement item)
{
var incomingCategory = item.GetProperty(Mappings["Category"]).GetInt32();
if (!AllowedCategories.Contains(incomingCategory))
{
return null;
}
var torrent = new Torrent
{
Source = Source,
@@ -69,11 +69,11 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
Leechers = item.GetProperty(Mappings["Leechers"]).GetInt32(),
Imdb = item.GetProperty(Mappings["Imdb"]).GetString(),
};
HandleInfoHash(item, torrent, "InfoHash");
torrent.Category = HandleCategory(incomingCategory);
return torrent;
}
@@ -81,7 +81,7 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
MovieCategories.Contains(category) switch
{
true => "movies",
_ => TvSeriesCategories.Contains(category) switch
_ => TvSeriesCategories.Contains(category) switch
{
true => "tv",
_ => "xxx",
@@ -89,4 +89,4 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
};
public override Task Execute() => Execute("items");
}
}

View File

@@ -9,4 +9,4 @@ public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvi
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
protected override string Crawler => nameof(YtsCrawler);
}
}

View File

@@ -26,19 +26,19 @@ public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler
Seeders = 0,
Leechers = 0,
};
HandleInfoHash(itemNode, torrent, "InfoHash");
return torrent;
}
protected override void HandleInfoHash(XElement itemNode, Torrent torrent, string infoHashKey)
{
var infoHash = itemNode.Element(Mappings[infoHashKey])?.Attribute("url")?.Value.Split("/download/").ElementAtOrDefault(1);
if (infoHash is not null)
{
torrent.InfoHash = infoHash;
}
}
}
}