mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Woke up to see a discussion about torrentio scraping: powered by community
Was a little inspired. Now we have a database (self populating) of imdb id's - why shouldn't we actually have the ability to scrape any other instance of torrentio, or knightcrawler? Also restructured the producer to be vertically sliced to make it easier to work with Too much flicking back and forth between Jobs and Crawlers when configuring
This commit is contained in:
23
src/producer/Features/JobSupport/BaseJob.cs
Normal file
23
src/producer/Features/JobSupport/BaseJob.cs
Normal file
@@ -0,0 +1,23 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob
|
||||
{
|
||||
public async Task Execute(IJobExecutionContext context)
|
||||
{
|
||||
if (context.RefireCount > 5)
|
||||
{
|
||||
throw new InvalidOperationException("Job failed too many times");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await crawlerProvider.Get(Crawler).Execute();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new JobExecutionException(msg: "", refireImmediately: true, cause: ex);
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract string Crawler { get; }
|
||||
}
|
||||
7
src/producer/Features/JobSupport/ICrawlerJob.cs
Normal file
7
src/producer/Features/JobSupport/ICrawlerJob.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
public interface ICrawlerJob<out TCrawler> : IJob
|
||||
where TCrawler : ICrawler
|
||||
{
|
||||
TCrawler CrawlerType { get; }
|
||||
}
|
||||
7
src/producer/Features/JobSupport/Literals.cs
Normal file
7
src/producer/Features/JobSupport/Literals.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
public static class Literals
|
||||
{
|
||||
public const string CrawlersJobs = "CrawlersJobs";
|
||||
public const string PublishingJobs = "PublishingJobs";
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
[AttributeUsage(AttributeTargets.Class)]
|
||||
public class ManualJobRegistrationAttribute : Attribute
|
||||
{
|
||||
}
|
||||
9
src/producer/Features/JobSupport/ScrapeConfiguration.cs
Normal file
9
src/producer/Features/JobSupport/ScrapeConfiguration.cs
Normal file
@@ -0,0 +1,9 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
public class ScrapeConfiguration
|
||||
{
|
||||
public const string SectionName = "ScrapeConfiguration";
|
||||
public const string Filename = "scrapers.json";
|
||||
|
||||
public List<Scraper> Scrapers { get; set; } = [];
|
||||
}
|
||||
131
src/producer/Features/JobSupport/ServiceCollectionExtensions.cs
Normal file
131
src/producer/Features/JobSupport/ServiceCollectionExtensions.cs
Normal file
@@ -0,0 +1,131 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
internal static class ServiceCollectionExtensions
|
||||
{
|
||||
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
|
||||
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
|
||||
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
|
||||
|
||||
var jobTypes = Assembly.GetAssembly(typeof(BaseJob))
|
||||
.GetTypes()
|
||||
.Where(t => t is {IsClass: true, IsAbstract: false} && typeof(IJob).IsAssignableFrom(t) &&
|
||||
!Attribute.IsDefined(t, typeof(ManualJobRegistrationAttribute)))
|
||||
.ToList();
|
||||
|
||||
foreach (var type in jobTypes)
|
||||
{
|
||||
services.AddTransient(type);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
services.AddTransient<SyncDmmJob>();
|
||||
}
|
||||
|
||||
var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance);
|
||||
|
||||
services.AddQuartz(
|
||||
quartz =>
|
||||
{
|
||||
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
|
||||
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration);
|
||||
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
|
||||
RegisterPublisher(quartz, rabbitConfiguration);
|
||||
});
|
||||
|
||||
services.AddQuartzHostedService(
|
||||
options =>
|
||||
{
|
||||
options.WaitForJobsToComplete = true;
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
private static void RegisterAutomaticRegistrationJobs(List<Type> jobTypes, MethodInfo? openMethod, IServiceCollectionQuartzConfigurator quartz,
|
||||
ScrapeConfiguration scrapeConfiguration)
|
||||
{
|
||||
foreach (var jobType in jobTypes)
|
||||
{
|
||||
var key = jobType.GetField("Key")?.GetValue(jobType);
|
||||
var trigger = jobType.GetField("Trigger")?.GetValue(jobType);
|
||||
|
||||
if (key is null || trigger is null)
|
||||
{
|
||||
Console.WriteLine($"Job {jobType.Name} does not have a JobKey or TriggerKey property");
|
||||
continue;
|
||||
}
|
||||
|
||||
var method = openMethod.MakeGenericMethod(jobType);
|
||||
method.Invoke(null, [quartz, key, trigger, scrapeConfiguration]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void RegisterDmmJob(GithubConfiguration githubConfiguration, IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
|
||||
}
|
||||
}
|
||||
|
||||
private static void RegisterTorrentioJob(
|
||||
IServiceCollection services,
|
||||
IServiceCollectionQuartzConfigurator quartz,
|
||||
IConfiguration configuration,
|
||||
ScrapeConfiguration scrapeConfiguration)
|
||||
{
|
||||
var torrentioConfiguration = services.LoadConfigurationFromConfig<TorrentioConfiguration>(configuration, TorrentioConfiguration.SectionName);
|
||||
|
||||
if (torrentioConfiguration.Instances.Count != 0)
|
||||
{
|
||||
AddJobWithTrigger<SyncTorrentioJob>(quartz, SyncTorrentioJob.Key, SyncTorrentioJob.Trigger, scrapeConfiguration);
|
||||
}
|
||||
}
|
||||
|
||||
private static void RegisterPublisher(IServiceCollectionQuartzConfigurator quartz, RabbitMqConfiguration rabbitConfig) =>
|
||||
AddJobWithTriggerAndInterval<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds);
|
||||
|
||||
private static void AddJobWithTrigger<TJobType>(
|
||||
IServiceCollectionQuartzConfigurator quartz,
|
||||
JobKey key,
|
||||
TriggerKey trigger,
|
||||
ScrapeConfiguration scrapeConfiguration) where TJobType : IJob
|
||||
{
|
||||
var scraper = scrapeConfiguration.Scrapers
|
||||
.FirstOrDefault(x => x.Name != null &&
|
||||
x.Name.Equals(typeof(TJobType).Name, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (scraper is null || !scraper.Enabled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
quartz.AddJob<TJobType>(opts => opts.WithIdentity(key).StoreDurably());
|
||||
|
||||
quartz.AddTrigger(
|
||||
opts => opts
|
||||
.ForJob(key)
|
||||
.WithIdentity(trigger)
|
||||
.StartAt(DateTimeOffset.Now.AddSeconds(20))
|
||||
.WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(scraper.IntervalSeconds)).RepeatForever()));
|
||||
}
|
||||
|
||||
private static void AddJobWithTriggerAndInterval<TJobType>(
|
||||
IServiceCollectionQuartzConfigurator quartz,
|
||||
JobKey key,
|
||||
TriggerKey trigger,
|
||||
int interval) where TJobType : IJob
|
||||
{
|
||||
quartz.AddJob<TJobType>(opts => opts.WithIdentity(key).StoreDurably());
|
||||
|
||||
quartz.AddTrigger(
|
||||
opts => opts
|
||||
.ForJob(key)
|
||||
.WithIdentity(trigger)
|
||||
.StartAt(DateTimeOffset.Now.AddSeconds(20))
|
||||
.WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(interval)).RepeatForever()));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user