Big rewrite - distributed consumers for ingestion / scraping(scalable) - single producer written in c#.

Changed from page scraping to rss xml scraping
Includes RealDebridManager hashlist decoding (requires a github readonly PAT as requests must be authenticated) - This allows ingestion of 200k+ entries in a few hours.
Simplifies a lot of torrentio to deal with new data
This commit is contained in:
iPromKnight
2024-02-01 16:38:45 +00:00
parent 6fb4ddcf23
commit ab17ef81be
255 changed files with 18489 additions and 69074 deletions

View File

@@ -0,0 +1,25 @@
namespace Scraper.Crawlers;
public abstract class BaseCrawler(ILogger<BaseCrawler> logger, IDataStorage storage) : ICrawler
{
protected abstract IReadOnlyDictionary<string, string> Mappings { get; }
protected abstract string Url { get; }
protected abstract string Source { get; }
protected IDataStorage Storage => storage;
public virtual Task Execute() => Task.CompletedTask;
protected async Task<InsertTorrentResult> InsertTorrents(IReadOnlyCollection<Torrent> torrent)
{
var result = await storage.InsertTorrents(torrent);
if (!result.Success)
{
logger.LogWarning("Ingestion Failed: [{Error}]", result.ErrorMessage);
return result;
}
logger.LogInformation("Ingestion Successful - Wrote {Count} new torrents", result.InsertedCount);
return result;
}
}