Woke up to see a discussion about torrentio scraping: powered by community

Was a little inspired. Now we have a database (self populating) of imdb id's - why shouldn't we actually have the ability to scrape any other instance of torrentio, or knightcrawler? Also restructured the producer to be vertically sliced to make it easier to work with Too much flicking back and forth between Jobs and Crawlers when configuring
2024-12-20 03:29:51 +00:00 · 2024-03-02 18:41:57 +00:00
parent 98115e0cf7
commit 95fa48c851
59 changed files with 733 additions and 261 deletions
--- a/src/producer/Features/CrawlerSupport/BaseCrawler.cs
+++ b/src/producer/Features/CrawlerSupport/BaseCrawler.cs
@@ -0,0 +1,25 @@
+namespace Producer.Features.CrawlerSupport;
+
+public abstract class BaseCrawler(ILogger<BaseCrawler> logger, IDataStorage storage) : ICrawler
+{
+    protected abstract IReadOnlyDictionary<string, string> Mappings { get; }
+    protected abstract string Url { get; }
+    protected abstract string Source { get; }
+    protected IDataStorage Storage => storage;
+
+    public virtual Task Execute() => Task.CompletedTask;
+
+    protected async Task<InsertTorrentResult> InsertTorrents(IReadOnlyCollection<Torrent> torrent)
+    {
+        var result = await storage.InsertTorrents(torrent);
+        
+        if (!result.Success)
+        {
+            logger.LogWarning("Ingestion Failed: [{Error}]", result.ErrorMessage);
+            return result;
+        }
+            
+        logger.LogInformation("Ingestion Successful - Wrote {Count} new torrents", result.InsertedCount);
+        return result;
+    }
+}