Big rewrite - distributed consumers for ingestion / scraping(scalable) - single producer written in c#.

Changed from page scraping to rss xml scraping
Includes RealDebridManager hashlist decoding (requires a github readonly PAT as requests must be authenticated) - This allows ingestion of 200k+ entries in a few hours.
Simplifies a lot of torrentio to deal with new data
This commit is contained in:
iPromKnight
2024-02-01 16:38:45 +00:00
parent 6fb4ddcf23
commit ab17ef81be
255 changed files with 18489 additions and 69074 deletions

View File

@@ -0,0 +1,6 @@
namespace Scraper.Interfaces;
public interface ICrawler
{
Task Execute();
}

View File

@@ -0,0 +1,8 @@
namespace Scraper.Interfaces;
public interface ICrawlerProvider
{
IEnumerable<ICrawler> GetAll();
ICrawler Get(string name);
}

View File

@@ -0,0 +1,10 @@
namespace Scraper.Interfaces;
public interface IDataStorage
{
Task<InsertTorrentResult> InsertTorrents(IReadOnlyCollection<Torrent> torrents, CancellationToken cancellationToken = default);
Task<IReadOnlyCollection<Torrent>> GetPublishableTorrents(CancellationToken cancellationToken = default);
Task<UpdatedTorrentResult> SetTorrentsProcessed(IReadOnlyCollection<Torrent> torrents, CancellationToken cancellationToken = default);
Task<bool> PageIngested(string pageId, CancellationToken cancellationToken = default);
Task<PageIngestedResult> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,6 @@
namespace Scraper.Interfaces;
public interface IIpService
{
Task GetPublicIpAddress();
}

View File

@@ -0,0 +1,6 @@
namespace Scraper.Interfaces;
public interface IMessagePublisher
{
Task PublishAsync(IEnumerable<Torrent> torrents, CancellationToken cancellationToken = default);
}