Woke up to see a discussion about torrentio scraping: powered by community

Was a little inspired. Now we have a database (self populating) of imdb id's - why shouldn't we actually have the ability to scrape any other instance of torrentio, or knightcrawler?

Also restructured the producer to be vertically sliced to make it easier to work with
Too much flicking back and forth between Jobs and Crawlers when configuring
This commit is contained in:
iPromKnight
2024-03-02 18:41:57 +00:00
parent 98115e0cf7
commit 95fa48c851
59 changed files with 733 additions and 261 deletions

View File

@@ -0,0 +1,23 @@
namespace Producer.Features.JobSupport;
public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob
{
public async Task Execute(IJobExecutionContext context)
{
if (context.RefireCount > 5)
{
throw new InvalidOperationException("Job failed too many times");
}
try
{
await crawlerProvider.Get(Crawler).Execute();
}
catch (Exception ex)
{
throw new JobExecutionException(msg: "", refireImmediately: true, cause: ex);
}
}
protected abstract string Crawler { get; }
}

View File

@@ -0,0 +1,7 @@
namespace Producer.Features.JobSupport;
public interface ICrawlerJob<out TCrawler> : IJob
where TCrawler : ICrawler
{
TCrawler CrawlerType { get; }
}

View File

@@ -0,0 +1,7 @@
namespace Producer.Features.JobSupport;
public static class Literals
{
public const string CrawlersJobs = "CrawlersJobs";
public const string PublishingJobs = "PublishingJobs";
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.JobSupport;
[AttributeUsage(AttributeTargets.Class)]
public class ManualJobRegistrationAttribute : Attribute
{
}

View File

@@ -0,0 +1,9 @@
namespace Producer.Features.JobSupport;
public class ScrapeConfiguration
{
public const string SectionName = "ScrapeConfiguration";
public const string Filename = "scrapers.json";
public List<Scraper> Scrapers { get; set; } = [];
}

View File

@@ -0,0 +1,131 @@
namespace Producer.Features.JobSupport;
internal static class ServiceCollectionExtensions
{
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
{
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
var jobTypes = Assembly.GetAssembly(typeof(BaseJob))
.GetTypes()
.Where(t => t is {IsClass: true, IsAbstract: false} && typeof(IJob).IsAssignableFrom(t) &&
!Attribute.IsDefined(t, typeof(ManualJobRegistrationAttribute)))
.ToList();
foreach (var type in jobTypes)
{
services.AddTransient(type);
}
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
services.AddTransient<SyncDmmJob>();
}
var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance);
services.AddQuartz(
quartz =>
{
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration);
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
RegisterPublisher(quartz, rabbitConfiguration);
});
services.AddQuartzHostedService(
options =>
{
options.WaitForJobsToComplete = true;
});
return services;
}
private static void RegisterAutomaticRegistrationJobs(List<Type> jobTypes, MethodInfo? openMethod, IServiceCollectionQuartzConfigurator quartz,
ScrapeConfiguration scrapeConfiguration)
{
foreach (var jobType in jobTypes)
{
var key = jobType.GetField("Key")?.GetValue(jobType);
var trigger = jobType.GetField("Trigger")?.GetValue(jobType);
if (key is null || trigger is null)
{
Console.WriteLine($"Job {jobType.Name} does not have a JobKey or TriggerKey property");
continue;
}
var method = openMethod.MakeGenericMethod(jobType);
method.Invoke(null, [quartz, key, trigger, scrapeConfiguration]);
}
}
private static void RegisterDmmJob(GithubConfiguration githubConfiguration, IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration)
{
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
}
}
private static void RegisterTorrentioJob(
IServiceCollection services,
IServiceCollectionQuartzConfigurator quartz,
IConfiguration configuration,
ScrapeConfiguration scrapeConfiguration)
{
var torrentioConfiguration = services.LoadConfigurationFromConfig<TorrentioConfiguration>(configuration, TorrentioConfiguration.SectionName);
if (torrentioConfiguration.Instances.Count != 0)
{
AddJobWithTrigger<SyncTorrentioJob>(quartz, SyncTorrentioJob.Key, SyncTorrentioJob.Trigger, scrapeConfiguration);
}
}
private static void RegisterPublisher(IServiceCollectionQuartzConfigurator quartz, RabbitMqConfiguration rabbitConfig) =>
AddJobWithTriggerAndInterval<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds);
private static void AddJobWithTrigger<TJobType>(
IServiceCollectionQuartzConfigurator quartz,
JobKey key,
TriggerKey trigger,
ScrapeConfiguration scrapeConfiguration) where TJobType : IJob
{
var scraper = scrapeConfiguration.Scrapers
.FirstOrDefault(x => x.Name != null &&
x.Name.Equals(typeof(TJobType).Name, StringComparison.OrdinalIgnoreCase));
if (scraper is null || !scraper.Enabled)
{
return;
}
quartz.AddJob<TJobType>(opts => opts.WithIdentity(key).StoreDurably());
quartz.AddTrigger(
opts => opts
.ForJob(key)
.WithIdentity(trigger)
.StartAt(DateTimeOffset.Now.AddSeconds(20))
.WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(scraper.IntervalSeconds)).RepeatForever()));
}
private static void AddJobWithTriggerAndInterval<TJobType>(
IServiceCollectionQuartzConfigurator quartz,
JobKey key,
TriggerKey trigger,
int interval) where TJobType : IJob
{
quartz.AddJob<TJobType>(opts => opts.WithIdentity(key).StoreDurably());
quartz.AddTrigger(
opts => opts
.ForJob(key)
.WithIdentity(trigger)
.StartAt(DateTimeOffset.Now.AddSeconds(20))
.WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(interval)).RepeatForever()));
}
}