mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Big rewrite - distributed consumers for ingestion / scraping(scalable) - single producer written in c#.
Changed from page scraping to rss xml scraping Includes RealDebridManager hashlist decoding (requires a github readonly PAT as requests must be authenticated) - This allows ingestion of 200k+ entries in a few hours. Simplifies a lot of torrentio to deal with new data
This commit is contained in:
25
src/producer/Crawlers/BaseCrawler.cs
Normal file
25
src/producer/Crawlers/BaseCrawler.cs
Normal file
@@ -0,0 +1,25 @@
|
||||
namespace Scraper.Crawlers;
|
||||
|
||||
public abstract class BaseCrawler(ILogger<BaseCrawler> logger, IDataStorage storage) : ICrawler
|
||||
{
|
||||
protected abstract IReadOnlyDictionary<string, string> Mappings { get; }
|
||||
protected abstract string Url { get; }
|
||||
protected abstract string Source { get; }
|
||||
protected IDataStorage Storage => storage;
|
||||
|
||||
public virtual Task Execute() => Task.CompletedTask;
|
||||
|
||||
protected async Task<InsertTorrentResult> InsertTorrents(IReadOnlyCollection<Torrent> torrent)
|
||||
{
|
||||
var result = await storage.InsertTorrents(torrent);
|
||||
|
||||
if (!result.Success)
|
||||
{
|
||||
logger.LogWarning("Ingestion Failed: [{Error}]", result.ErrorMessage);
|
||||
return result;
|
||||
}
|
||||
|
||||
logger.LogInformation("Ingestion Successful - Wrote {Count} new torrents", result.InsertedCount);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user