Files
torrentio/src/producer/Features/Crawlers/Tgx/TgxCrawler.cs
purple_emily 79409915cf Run pre-commit
2024-03-08 14:34:53 +00:00

144 lines
4.0 KiB
C#

namespace Producer.Features.Crawlers.Tgx;
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
{
[GeneratedRegex(@"Size:\s+(.+?)\s+Added")]
private static partial Regex SizeStringExtractor();
[GeneratedRegex(@"(?i)\b(\d+(\.\d+)?)\s*([KMGT]?B)\b", RegexOptions.None, "en-GB")]
private static partial Regex SizeStringParser();
protected override string Url => "https://tgx.rs/rss";
protected override string Source => "TorrentGalaxy";
protected override IReadOnlyDictionary<string, string> Mappings
=> new Dictionary<string, string>
{
[nameof(Torrent.Name)] = "title",
[nameof(Torrent.Size)] = "description",
[nameof(Torrent.InfoHash)] = "guid",
[nameof(Torrent.Category)] = "category",
};
private static readonly HashSet<string> AllowedCategories =
[
"movies",
"tv",
];
protected override Torrent? ParseTorrent(XElement itemNode)
{
var category = itemNode.Element(Mappings["Category"])?.Value.ToLowerInvariant();
if (category is null)
{
return null;
}
if (!IsAllowedCategory(category))
{
return null;
}
var torrent = new Torrent
{
Source = Source,
Name = itemNode.Element(Mappings["Name"])?.Value,
InfoHash = itemNode.Element(Mappings[nameof(Torrent.InfoHash)])?.Value,
Size = "0",
Seeders = 0,
Leechers = 0,
};
HandleSize(itemNode, torrent, "Size");
torrent.Category = SetCategory(category);
return torrent;
}
private static string SetCategory(string category) =>
category.Contains("tv") switch
{
true => "tv",
_ => category.Contains("movies") switch
{
true => "movies",
_ => "xxx",
},
};
private void HandleSize(XContainer itemNode, Torrent torrent, string key)
{
var description = itemNode.Element(Mappings[key])?.Value;
if (description is null)
{
return;
}
var size = ExtractSizeFromDescription(description);
if (size is not null)
{
torrent.Size = size.ToString();
}
}
private long? ExtractSizeFromDescription(string input)
{
var sizeMatch = SizeStringExtractor().Match(input);
if (!sizeMatch.Success)
{
throw new FormatException("Unable to parse size from the input.");
}
var sizeString = sizeMatch.Groups[1].Value;
var units = new Dictionary<string, long>
{
{ "B", 1 },
{ "KB", 1L << 10 },
{ "MB", 1L << 20 },
{ "GB", 1L << 30 },
{ "TB", 1L << 40 },
};
var match = SizeStringParser().Match(sizeString);
if (match.Success)
{
var val = double.Parse(match.Groups[1].Value);
var unit = match.Groups[3].Value.ToUpper();
if (units.TryGetValue(unit, out var multiplier))
{
try
{
var bytes = checked((long)(val * multiplier));
return bytes;
}
catch (OverflowException)
{
logger.LogWarning("The size '{Size}' is too large.", sizeString);
return null;
}
}
logger.LogWarning("The size unit '{Unit}' is not supported.", unit);
return null;
}
logger.LogWarning("The size '{Size}' is not in a supported format.", sizeString);
return null;
}
private static bool IsAllowedCategory(string category)
{
var parsedCategory = category.Split(':').ElementAtOrDefault(0)?.Trim().ToLower();
return parsedCategory is not null && AllowedCategories.Contains(parsedCategory);
}
}