Big rewrite - distributed consumers for ingestion / scraping(scalable) - single producer written in c#.

Changed from page scraping to rss xml scraping
Includes RealDebridManager hashlist decoding (requires a github readonly PAT as requests must be authenticated) - This allows ingestion of 200k+ entries in a few hours.
Simplifies a lot of torrentio to deal with new data
This commit is contained in:
iPromKnight
2024-02-01 16:38:45 +00:00
parent 6fb4ddcf23
commit ab17ef81be
255 changed files with 18489 additions and 69074 deletions

View File

@@ -0,0 +1,23 @@
namespace Scraper.Jobs;
public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob
{
public async Task Execute(IJobExecutionContext context)
{
if (context.RefireCount > 5)
{
throw new InvalidOperationException("Job failed too many times");
}
try
{
await crawlerProvider.Get(Crawler).Execute();
}
catch (Exception ex)
{
throw new JobExecutionException(msg: "", refireImmediately: true, cause: ex);
}
}
protected abstract string Crawler { get; }
}

View File

@@ -0,0 +1,7 @@
namespace Scraper.Jobs;
public interface ICrawlerJob<out TCrawler> : IJob
where TCrawler : ICrawler
{
TCrawler CrawlerType { get; }
}

View File

@@ -0,0 +1,14 @@
namespace Scraper.Jobs;
[DisallowConcurrentExecution]
public class IPJob(IIpService ipService) : IJob
{
private const string JobName = nameof(IPJob);
public static readonly JobKey Key = new(JobName, nameof(Jobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Jobs));
public Task Execute(IJobExecutionContext context)
{
return ipService.GetPublicIpAddress();
}
}

View File

@@ -0,0 +1,31 @@
namespace Scraper.Jobs;
[DisallowConcurrentExecution]
public class PublisherJob(IMessagePublisher publisher, IDataStorage storage, ILogger<PublisherJob> logger) : IJob
{
private const string JobName = nameof(PublisherJob);
public static readonly JobKey Key = new(JobName, nameof(Jobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Jobs));
public async Task Execute(IJobExecutionContext context)
{
var cancellationToken = context.CancellationToken;
var torrents = await storage.GetPublishableTorrents(cancellationToken);
if (torrents.Count == 0)
{
return;
}
await publisher.PublishAsync(torrents, cancellationToken);
var result = await storage.SetTorrentsProcessed(torrents, cancellationToken);
if (!result.Success)
{
logger.LogWarning("Failed to set torrents as processed: [{Error}]", result.ErrorMessage);
return;
}
logger.LogInformation("Successfully set {Count} torrents as processed", result.UpdatedCount);
}
}

View File

@@ -0,0 +1,10 @@
namespace Scraper.Jobs;
[DisallowConcurrentExecution]
public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
{
private const string JobName = nameof(DebridMediaManagerCrawler);
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
protected override string Crawler => nameof(DebridMediaManagerCrawler);
}

View File

@@ -0,0 +1,10 @@
namespace Scraper.Jobs;
[DisallowConcurrentExecution]
public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
{
private const string JobName = nameof(EzTvCrawler);
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
protected override string Crawler => nameof(EzTvCrawler);
}

View File

@@ -0,0 +1,10 @@
namespace Scraper.Jobs;
[DisallowConcurrentExecution]
public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
{
private const string JobName = nameof(TgxCrawler);
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
protected override string Crawler => nameof(TgxCrawler);
}

View File

@@ -0,0 +1,10 @@
namespace Scraper.Jobs;
[DisallowConcurrentExecution]
public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
{
private const string JobName = nameof(TpbCrawler);
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
protected override string Crawler => nameof(TpbCrawler);
}

View File

@@ -0,0 +1,10 @@
namespace Scraper.Jobs;
[DisallowConcurrentExecution]
public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
{
private const string JobName = nameof(YtsCrawler);
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
protected override string Crawler => nameof(YtsCrawler);
}