diff --git a/.gitignore b/.gitignore index c521616..9ca43c2 100644 --- a/.gitignore +++ b/.gitignore @@ -405,3 +405,5 @@ FodyWeavers.xsd dist/ deployment/docker/docker-compose-dev.yaml + +src/producer/.run/ diff --git a/src/producer/Configuration/scrapers.json b/src/producer/Configuration/scrapers.json index a1ca76b..6088d75 100644 --- a/src/producer/Configuration/scrapers.json +++ b/src/producer/Configuration/scrapers.json @@ -30,6 +30,11 @@ "Name": "SyncDmmJob", "IntervalSeconds": 1800, "Enabled": true + }, + { + "Name": "SyncTorrentioJob", + "IntervalSeconds": 604800, + "Enabled": true } ] } diff --git a/src/producer/Configuration/torrentio.json b/src/producer/Configuration/torrentio.json new file mode 100644 index 0000000..8209dd7 --- /dev/null +++ b/src/producer/Configuration/torrentio.json @@ -0,0 +1,14 @@ +{ + "TorrentioConfiguration": { + "Instances": [ + { + "Name": "Official", + "Url": "https://torrentio.strem.fun", + "RateLimit": { + "RequestLimit": 300, + "IntervalInSeconds": 3600 + } + } + ] + } +} \ No newline at end of file diff --git a/src/producer/Extensions/ConfigurationExtensions.cs b/src/producer/Extensions/ConfigurationExtensions.cs index a4d93b4..664fe08 100644 --- a/src/producer/Extensions/ConfigurationExtensions.cs +++ b/src/producer/Extensions/ConfigurationExtensions.cs @@ -11,6 +11,7 @@ public static class ConfigurationExtensions configuration.AddJsonFile(LoggingConfig, false, true); configuration.AddJsonFile(ScrapeConfiguration.Filename, false, true); + configuration.AddJsonFile(TorrentioConfiguration.Filename, false, true); configuration.AddEnvironmentVariables(); @@ -18,4 +19,28 @@ public static class ConfigurationExtensions return configuration; } + + public static TConfiguration LoadConfigurationFromConfig(this IServiceCollection services, IConfiguration configuration, string sectionName) + where TConfiguration : class + { + var instance = configuration.GetSection(sectionName).Get(); + + ArgumentNullException.ThrowIfNull(instance, nameof(instance)); + + services.TryAddSingleton(instance); + + return instance; + } + + public static TConfiguration LoadConfigurationFromEnv(this IServiceCollection services) + where TConfiguration : class + { + var instance = Activator.CreateInstance(); + + ArgumentNullException.ThrowIfNull(instance, nameof(instance)); + + services.TryAddSingleton(instance); + + return instance; + } } \ No newline at end of file diff --git a/src/producer/Extensions/ServiceCollectionExtensions.cs b/src/producer/Extensions/ServiceCollectionExtensions.cs deleted file mode 100644 index 01a7edb..0000000 --- a/src/producer/Extensions/ServiceCollectionExtensions.cs +++ /dev/null @@ -1,160 +0,0 @@ -namespace Producer.Extensions; - -public static class ServiceCollectionExtensions -{ - internal static IServiceCollection AddCrawlers(this IServiceCollection services) - { - services.AddHttpClient("Scraper"); - - services - .AddKeyedTransient(nameof(EzTvCrawler)) - .AddKeyedTransient(nameof(NyaaCrawler)) - .AddKeyedTransient(nameof(YtsCrawler)) - .AddKeyedTransient(nameof(TpbCrawler)) - .AddKeyedTransient(nameof(TgxCrawler)) - .AddKeyedTransient(nameof(DebridMediaManagerCrawler)) - .AddSingleton() - .AddTransient(); - - return services; - } - - internal static IServiceCollection AddDataStorage(this IServiceCollection services) - { - services.LoadConfigurationFromEnv(); - services.AddTransient(); - services.AddTransient(); - return services; - } - - internal static IServiceCollection RegisterMassTransit(this IServiceCollection services) - { - var rabbitConfig = services.LoadConfigurationFromEnv(); - - services.AddMassTransit(busConfigurator => - { - busConfigurator.SetKebabCaseEndpointNameFormatter(); - busConfigurator.UsingRabbitMq((_, busFactoryConfigurator) => - { - busFactoryConfigurator.Host(rabbitConfig.Host, hostConfigurator => - { - hostConfigurator.Username(rabbitConfig.Username); - hostConfigurator.Password(rabbitConfig.Password); - }); - }); - }); - - return services; - } - - internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration) - { - var scrapeConfiguration = services.LoadConfigurationFromConfig(configuration, ScrapeConfiguration.SectionName); - var githubConfiguration = services.LoadConfigurationFromEnv(); - var rabbitConfig = services.LoadConfigurationFromEnv(); - - services - .AddTransient() - .AddTransient() - .AddTransient() - .AddTransient() - .AddTransient() - .AddTransient() - .AddTransient(); - - if (!string.IsNullOrEmpty(githubConfiguration.PAT)) - { - services.AddTransient(); - } - - services.AddQuartz( - quartz => - { - AddJobWithTrigger(quartz, SyncEzTvJob.Key, SyncEzTvJob.Trigger, scrapeConfiguration); - AddJobWithTrigger(quartz, SyncNyaaJob.Key, SyncNyaaJob.Trigger, scrapeConfiguration); - AddJobWithTrigger(quartz, SyncTpbJob.Key, SyncTpbJob.Trigger, scrapeConfiguration); - AddJobWithTrigger(quartz, SyncYtsJob.Key, SyncYtsJob.Trigger, scrapeConfiguration); - AddJobWithTrigger(quartz, SyncTgxJob.Key, SyncTgxJob.Trigger, scrapeConfiguration); - AddJobWithTrigger(quartz, IPJob.Key, IPJob.Trigger, 60 * 5); - AddJobWithTrigger(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds); - - if (!string.IsNullOrEmpty(githubConfiguration.PAT)) - { - AddJobWithTrigger(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration); - } - }); - - services.AddQuartzHostedService( - options => - { - options.WaitForJobsToComplete = true; - }); - - return services; - } - - private static TConfiguration LoadConfigurationFromConfig(this IServiceCollection services, IConfiguration configuration, string sectionName) - where TConfiguration : class - { - var instance = configuration.GetSection(sectionName).Get(); - - ArgumentNullException.ThrowIfNull(instance, nameof(instance)); - - services.TryAddSingleton(instance); - - return instance; - } - - private static TConfiguration LoadConfigurationFromEnv(this IServiceCollection services) - where TConfiguration : class - { - var instance = Activator.CreateInstance(); - - ArgumentNullException.ThrowIfNull(instance, nameof(instance)); - - services.TryAddSingleton(instance); - - return instance; - } - - private static void AddJobWithTrigger( - IServiceCollectionQuartzConfigurator quartz, - JobKey key, - TriggerKey trigger, - ScrapeConfiguration scrapeConfiguration) where TJobType : IJob - { - var scraper = scrapeConfiguration.Scrapers - .FirstOrDefault(x => x.Name != null && - x.Name.Equals(typeof(TJobType).Name, StringComparison.OrdinalIgnoreCase)); - - if (scraper is null || !scraper.Enabled) - { - return; - } - - quartz.AddJob(opts => opts.WithIdentity(key).StoreDurably()); - - quartz.AddTrigger( - opts => opts - .ForJob(key) - .WithIdentity(trigger) - .StartAt(DateTimeOffset.Now.AddSeconds(20)) - .WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(scraper.IntervalSeconds)).RepeatForever())); - } - - private static void AddJobWithTrigger( - IServiceCollectionQuartzConfigurator quartz, - JobKey key, - TriggerKey trigger, - int interval) where TJobType : IJob - { - quartz.AddJob(opts => opts.WithIdentity(key).StoreDurably()); - - quartz.AddTrigger( - opts => opts - .ForJob(key) - .WithIdentity(trigger) - .StartAt(DateTimeOffset.Now.AddSeconds(20)) - .WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(interval)).RepeatForever())); - } -} \ No newline at end of file diff --git a/src/producer/Interfaces/IMessagePublisher.cs b/src/producer/Features/Amqp/IMessagePublisher.cs similarity index 80% rename from src/producer/Interfaces/IMessagePublisher.cs rename to src/producer/Features/Amqp/IMessagePublisher.cs index 19c273c..bb862e0 100644 --- a/src/producer/Interfaces/IMessagePublisher.cs +++ b/src/producer/Features/Amqp/IMessagePublisher.cs @@ -1,4 +1,4 @@ -namespace Producer.Interfaces; +namespace Producer.Features.Amqp; public interface IMessagePublisher { diff --git a/src/producer/Jobs/PublisherJob.cs b/src/producer/Features/Amqp/PublisherJob.cs similarity index 81% rename from src/producer/Jobs/PublisherJob.cs rename to src/producer/Features/Amqp/PublisherJob.cs index 2e25294..755034d 100644 --- a/src/producer/Jobs/PublisherJob.cs +++ b/src/producer/Features/Amqp/PublisherJob.cs @@ -1,11 +1,14 @@ -namespace Producer.Jobs; +using Literals = Producer.Features.JobSupport.Literals; + +namespace Producer.Features.Amqp; [DisallowConcurrentExecution] +[ManualJobRegistration] public class PublisherJob(IMessagePublisher publisher, IDataStorage storage, ILogger logger) : IJob { private const string JobName = nameof(PublisherJob); - public static readonly JobKey Key = new(JobName, nameof(Jobs)); - public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Jobs)); + public static readonly JobKey Key = new(JobName, nameof(Literals.PublishingJobs)); + public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.PublishingJobs)); public async Task Execute(IJobExecutionContext context) { diff --git a/src/producer/Models/Configuration/RabbitMqConfiguration.cs b/src/producer/Features/Amqp/RabbitMqConfiguration.cs similarity index 98% rename from src/producer/Models/Configuration/RabbitMqConfiguration.cs rename to src/producer/Features/Amqp/RabbitMqConfiguration.cs index d4eacb1..0a52a98 100644 --- a/src/producer/Models/Configuration/RabbitMqConfiguration.cs +++ b/src/producer/Features/Amqp/RabbitMqConfiguration.cs @@ -1,4 +1,4 @@ -namespace Producer.Models.Configuration; +namespace Producer.Features.Amqp; public class RabbitMqConfiguration { diff --git a/src/producer/Features/Amqp/ServiceCollectionExtensions.cs b/src/producer/Features/Amqp/ServiceCollectionExtensions.cs new file mode 100644 index 0000000..57e3fe6 --- /dev/null +++ b/src/producer/Features/Amqp/ServiceCollectionExtensions.cs @@ -0,0 +1,24 @@ +namespace Producer.Features.Amqp; + +internal static class ServiceCollectionExtensions +{ + internal static IServiceCollection RegisterMassTransit(this IServiceCollection services) + { + var rabbitConfig = services.LoadConfigurationFromEnv(); + + services.AddMassTransit(busConfigurator => + { + busConfigurator.SetKebabCaseEndpointNameFormatter(); + busConfigurator.UsingRabbitMq((_, busFactoryConfigurator) => + { + busFactoryConfigurator.Host(rabbitConfig.Host, hostConfigurator => + { + hostConfigurator.Username(rabbitConfig.Username); + hostConfigurator.Password(rabbitConfig.Password); + }); + }); + }); + + return services; + } +} \ No newline at end of file diff --git a/src/producer/Services/TorrentPublisher.cs b/src/producer/Features/Amqp/TorrentPublisher.cs similarity index 98% rename from src/producer/Services/TorrentPublisher.cs rename to src/producer/Features/Amqp/TorrentPublisher.cs index ac39745..259007f 100644 --- a/src/producer/Services/TorrentPublisher.cs +++ b/src/producer/Features/Amqp/TorrentPublisher.cs @@ -1,4 +1,4 @@ -namespace Producer.Services; +namespace Producer.Features.Amqp; public class TorrentPublisher( ISendEndpointProvider sendEndpointProvider, diff --git a/src/producer/Crawlers/BaseCrawler.cs b/src/producer/Features/CrawlerSupport/BaseCrawler.cs similarity index 95% rename from src/producer/Crawlers/BaseCrawler.cs rename to src/producer/Features/CrawlerSupport/BaseCrawler.cs index 2184b16..c0d4646 100644 --- a/src/producer/Crawlers/BaseCrawler.cs +++ b/src/producer/Features/CrawlerSupport/BaseCrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers; +namespace Producer.Features.CrawlerSupport; public abstract class BaseCrawler(ILogger logger, IDataStorage storage) : ICrawler { diff --git a/src/producer/Crawlers/BaseJsonCrawler.cs b/src/producer/Features/CrawlerSupport/BaseJsonCrawler.cs similarity index 94% rename from src/producer/Crawlers/BaseJsonCrawler.cs rename to src/producer/Features/CrawlerSupport/BaseJsonCrawler.cs index 1af5267..49e4da3 100644 --- a/src/producer/Crawlers/BaseJsonCrawler.cs +++ b/src/producer/Features/CrawlerSupport/BaseJsonCrawler.cs @@ -1,8 +1,8 @@ -namespace Producer.Crawlers; +namespace Producer.Features.CrawlerSupport; public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseCrawler(logger, storage) { - private readonly HttpClient _client = httpClientFactory.CreateClient("Scraper"); + private readonly HttpClient _client = httpClientFactory.CreateClient(Literals.CrawlerClient); protected virtual async Task Execute(string collectionName) { diff --git a/src/producer/Crawlers/BaseXmlCrawler.cs b/src/producer/Features/CrawlerSupport/BaseXmlCrawler.cs similarity index 90% rename from src/producer/Crawlers/BaseXmlCrawler.cs rename to src/producer/Features/CrawlerSupport/BaseXmlCrawler.cs index 2fd1c8c..b50aed7 100644 --- a/src/producer/Crawlers/BaseXmlCrawler.cs +++ b/src/producer/Features/CrawlerSupport/BaseXmlCrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers; +namespace Producer.Features.CrawlerSupport; public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseCrawler(logger, storage) { @@ -6,7 +6,7 @@ public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogg { logger.LogInformation("Starting {Source} crawl", Source); - using var client = httpClientFactory.CreateClient("Scraper"); + using var client = httpClientFactory.CreateClient(Literals.CrawlerClient); var xml = await client.GetStringAsync(Url); var xmlRoot = XElement.Parse(xml); diff --git a/src/producer/Crawlers/CrawlerProvider.cs b/src/producer/Features/CrawlerSupport/CrawlerProvider.cs similarity index 86% rename from src/producer/Crawlers/CrawlerProvider.cs rename to src/producer/Features/CrawlerSupport/CrawlerProvider.cs index af54a62..5d3c192 100644 --- a/src/producer/Crawlers/CrawlerProvider.cs +++ b/src/producer/Features/CrawlerSupport/CrawlerProvider.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers; +namespace Producer.Features.CrawlerSupport; public class CrawlerProvider(IServiceProvider serviceProvider) : ICrawlerProvider { diff --git a/src/producer/Interfaces/ICrawler.cs b/src/producer/Features/CrawlerSupport/ICrawler.cs similarity index 53% rename from src/producer/Interfaces/ICrawler.cs rename to src/producer/Features/CrawlerSupport/ICrawler.cs index 225db34..e4459f2 100644 --- a/src/producer/Interfaces/ICrawler.cs +++ b/src/producer/Features/CrawlerSupport/ICrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Interfaces; +namespace Producer.Features.CrawlerSupport; public interface ICrawler { diff --git a/src/producer/Interfaces/ICrawlerProvider.cs b/src/producer/Features/CrawlerSupport/ICrawlerProvider.cs similarity index 71% rename from src/producer/Interfaces/ICrawlerProvider.cs rename to src/producer/Features/CrawlerSupport/ICrawlerProvider.cs index da111f2..17eb6da 100644 --- a/src/producer/Interfaces/ICrawlerProvider.cs +++ b/src/producer/Features/CrawlerSupport/ICrawlerProvider.cs @@ -1,4 +1,4 @@ -namespace Producer.Interfaces; +namespace Producer.Features.CrawlerSupport; public interface ICrawlerProvider { diff --git a/src/producer/Features/CrawlerSupport/Literals.cs b/src/producer/Features/CrawlerSupport/Literals.cs new file mode 100644 index 0000000..eff9eb7 --- /dev/null +++ b/src/producer/Features/CrawlerSupport/Literals.cs @@ -0,0 +1,6 @@ +namespace Producer.Features.CrawlerSupport; + +public static class Literals +{ + public const string CrawlerClient = "Scraper"; +} \ No newline at end of file diff --git a/src/producer/Models/Results.cs b/src/producer/Features/CrawlerSupport/Results.cs similarity index 86% rename from src/producer/Models/Results.cs rename to src/producer/Features/CrawlerSupport/Results.cs index 00944f7..11d6421 100644 --- a/src/producer/Models/Results.cs +++ b/src/producer/Features/CrawlerSupport/Results.cs @@ -1,4 +1,4 @@ -namespace Producer.Models; +namespace Producer.Features.CrawlerSupport; public record InsertTorrentResult(bool Success, int InsertedCount = 0, string? ErrorMessage = null); public record UpdatedTorrentResult(bool Success, int UpdatedCount = 0, string? ErrorMessage = null); diff --git a/src/producer/Models/Scraper.cs b/src/producer/Features/CrawlerSupport/Scraper.cs similarity index 78% rename from src/producer/Models/Scraper.cs rename to src/producer/Features/CrawlerSupport/Scraper.cs index f44bf1b..1c27279 100644 --- a/src/producer/Models/Scraper.cs +++ b/src/producer/Features/CrawlerSupport/Scraper.cs @@ -1,4 +1,4 @@ -namespace Producer.Models; +namespace Producer.Features.CrawlerSupport; public class Scraper { diff --git a/src/producer/Features/CrawlerSupport/ServiceCollectionExtensions.cs b/src/producer/Features/CrawlerSupport/ServiceCollectionExtensions.cs new file mode 100644 index 0000000..889278c --- /dev/null +++ b/src/producer/Features/CrawlerSupport/ServiceCollectionExtensions.cs @@ -0,0 +1,23 @@ +namespace Producer.Features.CrawlerSupport; + +internal static class ServiceCollectionExtensions +{ + internal static IServiceCollection AddCrawlers(this IServiceCollection services) + { + services.AddHttpClient(Literals.CrawlerClient); + + var crawlerTypes = Assembly.GetAssembly(typeof(ICrawler)) + .GetTypes() + .Where(t => t is {IsClass: true, IsAbstract: false} && typeof(ICrawler).IsAssignableFrom(t)); + + foreach (var type in crawlerTypes) + { + services.AddKeyedTransient(typeof(ICrawler), type.Name, type); + } + + services + .AddSingleton(); + + return services; + } +} \ No newline at end of file diff --git a/src/producer/Models/Torrent.cs b/src/producer/Features/CrawlerSupport/Torrent.cs similarity index 93% rename from src/producer/Models/Torrent.cs rename to src/producer/Features/CrawlerSupport/Torrent.cs index 74230f3..e5ed62b 100644 --- a/src/producer/Models/Torrent.cs +++ b/src/producer/Features/CrawlerSupport/Torrent.cs @@ -1,4 +1,4 @@ -namespace Producer.Models; +namespace Producer.Features.CrawlerSupport; // Torrent represents a crawled torrent from one of our // supported sources. diff --git a/src/producer/Crawlers/Sites/DebridMediaManagerCrawler.cs b/src/producer/Features/Crawlers/Dmm/DebridMediaManagerCrawler.cs similarity index 99% rename from src/producer/Crawlers/Sites/DebridMediaManagerCrawler.cs rename to src/producer/Features/Crawlers/Dmm/DebridMediaManagerCrawler.cs index e18ed09..6d7fdbc 100644 --- a/src/producer/Crawlers/Sites/DebridMediaManagerCrawler.cs +++ b/src/producer/Features/Crawlers/Dmm/DebridMediaManagerCrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers.Sites; +namespace Producer.Features.Crawlers.Dmm; public partial class DebridMediaManagerCrawler( IHttpClientFactory httpClientFactory, diff --git a/src/producer/Models/Configuration/GithubConfiguration.cs b/src/producer/Features/Crawlers/Dmm/GithubConfiguration.cs similarity index 83% rename from src/producer/Models/Configuration/GithubConfiguration.cs rename to src/producer/Features/Crawlers/Dmm/GithubConfiguration.cs index 277308c..66315f8 100644 --- a/src/producer/Models/Configuration/GithubConfiguration.cs +++ b/src/producer/Features/Crawlers/Dmm/GithubConfiguration.cs @@ -1,4 +1,4 @@ -namespace Producer.Models.Configuration; +namespace Producer.Features.Crawlers.Dmm; public class GithubConfiguration { diff --git a/src/producer/Jobs/SyncDmmJob.cs b/src/producer/Features/Crawlers/Dmm/SyncDmmJob.cs similarity index 56% rename from src/producer/Jobs/SyncDmmJob.cs rename to src/producer/Features/Crawlers/Dmm/SyncDmmJob.cs index 6b57a74..f9c635b 100644 --- a/src/producer/Jobs/SyncDmmJob.cs +++ b/src/producer/Features/Crawlers/Dmm/SyncDmmJob.cs @@ -1,12 +1,13 @@ -using DebridMediaManagerCrawler = Producer.Crawlers.Sites.DebridMediaManagerCrawler; +using Literals = Producer.Features.JobSupport.Literals; -namespace Producer.Jobs; +namespace Producer.Features.Crawlers.Dmm; [DisallowConcurrentExecution] +[ManualJobRegistration] public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) { private const string JobName = nameof(DebridMediaManagerCrawler); - public static readonly JobKey Key = new(JobName, nameof(Crawlers)); - public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers)); + public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs)); + public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs)); protected override string Crawler => nameof(DebridMediaManagerCrawler); } \ No newline at end of file diff --git a/src/producer/Crawlers/Sites/EzTvCrawler.cs b/src/producer/Features/Crawlers/EzTv/EzTvCrawler.cs similarity index 97% rename from src/producer/Crawlers/Sites/EzTvCrawler.cs rename to src/producer/Features/Crawlers/EzTv/EzTvCrawler.cs index 8de23ba..06fbc6a 100644 --- a/src/producer/Crawlers/Sites/EzTvCrawler.cs +++ b/src/producer/Features/Crawlers/EzTv/EzTvCrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers.Sites; +namespace Producer.Features.Crawlers.EzTv; public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) { diff --git a/src/producer/Jobs/SyncEzTvJob.cs b/src/producer/Features/Crawlers/EzTv/SyncEzTvJob.cs similarity index 57% rename from src/producer/Jobs/SyncEzTvJob.cs rename to src/producer/Features/Crawlers/EzTv/SyncEzTvJob.cs index dde73d5..4c7f9cd 100644 --- a/src/producer/Jobs/SyncEzTvJob.cs +++ b/src/producer/Features/Crawlers/EzTv/SyncEzTvJob.cs @@ -1,12 +1,12 @@ -using Producer.Crawlers.Sites; +using Literals = Producer.Features.JobSupport.Literals; -namespace Producer.Jobs; +namespace Producer.Features.Crawlers.EzTv; [DisallowConcurrentExecution] public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) { private const string JobName = nameof(EzTvCrawler); - public static readonly JobKey Key = new(JobName, nameof(Crawlers)); - public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers)); + public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs)); + public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs)); protected override string Crawler => nameof(EzTvCrawler); } \ No newline at end of file diff --git a/src/producer/Crawlers/Sites/NyaaCrawler.cs b/src/producer/Features/Crawlers/Nyaa/NyaaCrawler.cs similarity index 97% rename from src/producer/Crawlers/Sites/NyaaCrawler.cs rename to src/producer/Features/Crawlers/Nyaa/NyaaCrawler.cs index cc03ecf..038eae1 100644 --- a/src/producer/Crawlers/Sites/NyaaCrawler.cs +++ b/src/producer/Features/Crawlers/Nyaa/NyaaCrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers.Sites; +namespace Producer.Features.Crawlers.Nyaa; public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) { diff --git a/src/producer/Jobs/SyncNyaaJob.cs b/src/producer/Features/Crawlers/Nyaa/SyncNyaaJob.cs similarity index 57% rename from src/producer/Jobs/SyncNyaaJob.cs rename to src/producer/Features/Crawlers/Nyaa/SyncNyaaJob.cs index 9f28e54..32d8f9b 100644 --- a/src/producer/Jobs/SyncNyaaJob.cs +++ b/src/producer/Features/Crawlers/Nyaa/SyncNyaaJob.cs @@ -1,12 +1,12 @@ -using Producer.Crawlers.Sites; +using Literals = Producer.Features.JobSupport.Literals; -namespace Producer.Jobs; +namespace Producer.Features.Crawlers.Nyaa; [DisallowConcurrentExecution] public class SyncNyaaJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) { private const string JobName = nameof(NyaaCrawler); - public static readonly JobKey Key = new(JobName, nameof(Crawlers)); - public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers)); + public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs)); + public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs)); protected override string Crawler => nameof(NyaaCrawler); } \ No newline at end of file diff --git a/src/producer/Jobs/SyncTgxJob.cs b/src/producer/Features/Crawlers/Tgx/SyncTgxJob.cs similarity index 57% rename from src/producer/Jobs/SyncTgxJob.cs rename to src/producer/Features/Crawlers/Tgx/SyncTgxJob.cs index 4c4fac9..e2e10f1 100644 --- a/src/producer/Jobs/SyncTgxJob.cs +++ b/src/producer/Features/Crawlers/Tgx/SyncTgxJob.cs @@ -1,12 +1,12 @@ -using TgxCrawler = Producer.Crawlers.Sites.TgxCrawler; +using Literals = Producer.Features.JobSupport.Literals; -namespace Producer.Jobs; +namespace Producer.Features.Crawlers.Tgx; [DisallowConcurrentExecution] public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) { private const string JobName = nameof(TgxCrawler); - public static readonly JobKey Key = new(JobName, nameof(Crawlers)); - public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers)); + public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs)); + public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs)); protected override string Crawler => nameof(TgxCrawler); } \ No newline at end of file diff --git a/src/producer/Crawlers/Sites/TgxCrawler.cs b/src/producer/Features/Crawlers/Tgx/TgxCrawler.cs similarity index 99% rename from src/producer/Crawlers/Sites/TgxCrawler.cs rename to src/producer/Features/Crawlers/Tgx/TgxCrawler.cs index 036a403..88f086f 100644 --- a/src/producer/Crawlers/Sites/TgxCrawler.cs +++ b/src/producer/Features/Crawlers/Tgx/TgxCrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers.Sites; +namespace Producer.Features.Crawlers.Tgx; public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) { diff --git a/src/producer/Features/Crawlers/Torrentio/SyncTorrentioJob.cs b/src/producer/Features/Crawlers/Torrentio/SyncTorrentioJob.cs new file mode 100644 index 0000000..8ab6ba0 --- /dev/null +++ b/src/producer/Features/Crawlers/Torrentio/SyncTorrentioJob.cs @@ -0,0 +1,13 @@ +using Literals = Producer.Features.JobSupport.Literals; + +namespace Producer.Features.Crawlers.Torrentio; + +[DisallowConcurrentExecution] +[ManualJobRegistration] +public class SyncTorrentioJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) +{ + private const string JobName = nameof(TorrentioCrawler); + public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs)); + public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs)); + protected override string Crawler => nameof(TorrentioCrawler); +} \ No newline at end of file diff --git a/src/producer/Features/Crawlers/Torrentio/TorrentioConfiguration.cs b/src/producer/Features/Crawlers/Torrentio/TorrentioConfiguration.cs new file mode 100644 index 0000000..65796c0 --- /dev/null +++ b/src/producer/Features/Crawlers/Torrentio/TorrentioConfiguration.cs @@ -0,0 +1,9 @@ +namespace Producer.Features.Crawlers.Torrentio; + +public class TorrentioConfiguration +{ + public const string SectionName = "TorrentioConfiguration"; + public const string Filename = "torrentio.json"; + + public List Instances { get; set; } = []; +} \ No newline at end of file diff --git a/src/producer/Features/Crawlers/Torrentio/TorrentioCrawler.cs b/src/producer/Features/Crawlers/Torrentio/TorrentioCrawler.cs new file mode 100644 index 0000000..2e5e82e --- /dev/null +++ b/src/producer/Features/Crawlers/Torrentio/TorrentioCrawler.cs @@ -0,0 +1,197 @@ +using Literals = Producer.Features.CrawlerSupport.Literals; + +namespace Producer.Features.Crawlers.Torrentio; + +public partial class TorrentioCrawler( + IHttpClientFactory httpClientFactory, + ILogger logger, + IDataStorage storage, + TorrentioConfiguration configuration, + ImdbMongoDbService imdbDataService) : BaseCrawler(logger, storage) +{ + [GeneratedRegex(@"(\d+(\.\d+)?) (GB|MB)")] + private static partial Regex SizeMatcher(); + + private const string MovieSlug = "movie/{0}.json"; + protected override string Url => "sort=size%7Cqualityfilter=other,scr,cam,unknown/stream/{0}"; + protected override IReadOnlyDictionary Mappings { get; } = new Dictionary(); + protected override string Source => "Torrentio"; + private readonly Dictionary _instanceStates = []; + public override async Task Execute() + { + var client = httpClientFactory.CreateClient(Literals.CrawlerClient); + var instances = configuration.Instances; + var totalRecordCount = await imdbDataService.GetTotalCountAsync(); + logger.LogInformation("Total IMDB records to process: {TotalRecordCount}", totalRecordCount); + var tasks = instances.Select(x => ProcessForInstanceAsync(x, client, totalRecordCount)).ToArray(); + await Task.WhenAll(tasks); + } + + private Task ProcessForInstanceAsync(TorrentioInstance instance, HttpClient client, long totalRecordCount) => + Task.Run( + async () => + { + while (instance.TotalProcessedRequests(_instanceStates) < totalRecordCount) + { + logger.LogInformation("Processing {TorrentioInstance}", instance.Name); + logger.LogInformation("Current processed requests: {ProcessedRequests}", instance.TotalProcessedRequests(_instanceStates)); + + var items = await imdbDataService.GetImdbEntriesForRequests( + DateTime.UtcNow.Year.ToString(), + instance.RateLimit.RequestLimit, + instance.LastProcessedImdbId(_instanceStates)); + + if (items.Count == 0) + { + logger.LogInformation("No items to process for {TorrentioInstance}", instance.Name); + break; + } + + var newTorrents = new List(); + var processedItemsCount = 0; + + foreach (var item in items) + { + processedItemsCount++; + + var waitTime = instance.CalculateWaitTime(_instanceStates); + + if (waitTime > TimeSpan.Zero) + { + logger.LogInformation("Rate limit reached for {TorrentioInstance}", instance.Name); + logger.LogInformation("Waiting for {TorrentioInstance}: {WaitTime}", instance.Name, waitTime); + await Task.Delay(waitTime); + } + + if (processedItemsCount % 2 == 0) + { + var randomWait = new Random().Next(1000, 5000); + logger.LogInformation("Waiting for {TorrentioInstance}: {WaitTime}", instance.Name, randomWait); + await Task.Delay(randomWait); + } + + try + { + var torrentInfo = await ScrapeInstance(instance, item.ImdbId, client); + if (torrentInfo is not null) + { + newTorrents.AddRange(torrentInfo.Where(x => x != null).Select(x => x!)); + } + } + catch (Exception error) + { + logger.LogError(error, "page processing error in TorrentioCrawler"); + } + } + + if (newTorrents.Count > 0) + { + await InsertTorrents(newTorrents); + + var currentState = _instanceStates[instance.Name]; + _instanceStates[instance.Name] = currentState with + { + LastProcessedImdbId = items[^1].ImdbId, + }; + } + } + }); + + private async Task?> ScrapeInstance(TorrentioInstance instance, string imdbId, HttpClient client) + { + logger.LogInformation("Searching Torrentio {TorrentioInstance}: {ImdbId}", instance.Name, imdbId); + try + { + var movieSlug = string.Format(MovieSlug, imdbId); + var urlSlug = string.Format(Url, movieSlug); + return await RunRequest(instance, urlSlug, imdbId, client); + } + catch (Exception error) + { + logger.LogError(error, "page processing error {TorrentioInstance}: {ImdbId}", instance.Name, imdbId); + logger.LogWarning("Setting {TorrentioInstance} to possibly rate limited for Five minutes", instance.Name); + instance.SetPossiblyRateLimited(_instanceStates); + } + return null; + } + + private async Task?> RunRequest(TorrentioInstance instance, string urlSlug, string imdbId, HttpClient client) + { + var requestUrl = $"{instance.Url}/{urlSlug}"; + var response = await client.GetAsync(requestUrl); + + if (!response.IsSuccessStatusCode) + { + logger.LogError("Failed to fetch {Url}", requestUrl); + return null; + } + + var json = JsonDocument.Parse(await response.Content.ReadAsStringAsync()); + var streams = json.RootElement.GetProperty("streams").EnumerateArray(); + return streams.Select(x => ParseTorrent(instance, x, imdbId)).Where(x => x != null).ToList(); + } + + private Torrent? ParseTorrent(TorrentioInstance instance, JsonElement item, string imdId) + { + var title = item.GetProperty("title").GetString(); + var infoHash = item.GetProperty("infoHash").GetString(); + + if (string.IsNullOrEmpty(title) || string.IsNullOrEmpty(infoHash)) + { + return null; + } + + var torrent = ParseTorrentDetails(title, instance, infoHash, imdId); + + if (torrent is null || string.IsNullOrEmpty(torrent.Name)) + { + return null; + } + + return torrent; + } + + private Torrent? ParseTorrentDetails(string title, TorrentioInstance instance, string infoHash, string imdbId) + { + try + { + var torrent = new Torrent + { + Source = $"{Source}_{instance.Name}", + InfoHash = infoHash, + Category = "movies", // we only handle movies for now... + Imdb = imdbId, + }; + + var span = title.AsSpan(); + var titleEnd = span.IndexOf('\n'); + var titlePart = titleEnd >= 0 ? span[..titleEnd].ToString() : title; + + torrent.Name = titlePart.Replace('.', ' ').TrimEnd('.'); + + var sizeMatch = SizeMatcher().Match(title); + + if (sizeMatch.Success) + { + var size = double.Parse(sizeMatch.Groups[1].Value); // Size Value + var sizeUnit = sizeMatch.Groups[3].Value; // Size Unit (GB/MB) + + var sizeInBytes = sizeUnit switch + { + "GB" => (long) (size * 1073741824), + "MB" => (long) (size * 1048576), + _ => 0, + }; + + torrent.Size = sizeInBytes.ToString(); + } + + return torrent; + } + catch (Exception e) + { + logger.LogError(e, "Error parsing torrent details"); + return null; + } + } +} \ No newline at end of file diff --git a/src/producer/Features/Crawlers/Torrentio/TorrentioInstance.cs b/src/producer/Features/Crawlers/Torrentio/TorrentioInstance.cs new file mode 100644 index 0000000..f91fa09 --- /dev/null +++ b/src/producer/Features/Crawlers/Torrentio/TorrentioInstance.cs @@ -0,0 +1,10 @@ +namespace Producer.Features.Crawlers.Torrentio; + +public class TorrentioInstance +{ + public string Name { get; init; } = default!; + + public string Url { get; init; } = default!; + + public TorrentioRateLimit RateLimit { get; init; } = default!; +} \ No newline at end of file diff --git a/src/producer/Features/Crawlers/Torrentio/TorrentioInstanceExtensions.cs b/src/producer/Features/Crawlers/Torrentio/TorrentioInstanceExtensions.cs new file mode 100644 index 0000000..7ffd316 --- /dev/null +++ b/src/producer/Features/Crawlers/Torrentio/TorrentioInstanceExtensions.cs @@ -0,0 +1,53 @@ +namespace Producer.Features.Crawlers.Torrentio; + +public static class TorrentioInstancesExtensions +{ + public static TimeSpan CalculateWaitTime(this TorrentioInstance instance, Dictionary scraperState) + { + if (!scraperState.TryGetValue(instance.Name, out var state)) + { + state = new (DateTime.UtcNow, 0, 0, null); + scraperState[instance.Name] = state; + } + + var (startedAt, requestCount, totalProcessed, lastProcessedImdbId) = state; + + if (requestCount < instance.RateLimit.RequestLimit) + { + scraperState[instance.Name] = new (startedAt, requestCount + 1, totalProcessed + 1, lastProcessedImdbId); + return TimeSpan.Zero; + } + + var elapsed = DateTime.UtcNow - startedAt; + var interval = TimeSpan.FromSeconds(instance.RateLimit.IntervalInSeconds); + var remaining = interval - elapsed; + + // reset the state for the next interval + scraperState[instance.Name] = new (DateTime.UtcNow, 0, totalProcessed, lastProcessedImdbId); + + return remaining > TimeSpan.Zero ? remaining : TimeSpan.Zero; + } + + public static void SetPossiblyRateLimited(this TorrentioInstance instance, Dictionary scraperState, int minutesToWait = 5) + { + if (!scraperState.TryGetValue(instance.Name, out var state)) + { + state = new (DateTime.UtcNow, 0, 0, null); + } + + var (_, _, totalProcessed, lastProcessedImdbId) = state; + + // Set the start time to 15 minutes in the past so that the next check will result in a rate limit period of 15 minutes + var startedAt = DateTime.UtcNow.AddMinutes(-minutesToWait); + var requestCount = instance.RateLimit.RequestLimit; + + // Update the scraper state for the instance + scraperState[instance.Name] = new (startedAt, requestCount, totalProcessed, lastProcessedImdbId); + } + + public static long TotalProcessedRequests(this TorrentioInstance instance, Dictionary scraperState) => + !scraperState.TryGetValue(instance.Name, out var state) ? 0 : state.TotalProcessed; + + public static string? LastProcessedImdbId(this TorrentioInstance instance, Dictionary scraperState) => + !scraperState.TryGetValue(instance.Name, out var state) ? null : state.LastProcessedImdbId; +} \ No newline at end of file diff --git a/src/producer/Features/Crawlers/Torrentio/TorrentioRateLimit.cs b/src/producer/Features/Crawlers/Torrentio/TorrentioRateLimit.cs new file mode 100644 index 0000000..fe89195 --- /dev/null +++ b/src/producer/Features/Crawlers/Torrentio/TorrentioRateLimit.cs @@ -0,0 +1,7 @@ +namespace Producer.Features.Crawlers.Torrentio; + +public class TorrentioRateLimit +{ + public int RequestLimit { get; set; } + public int IntervalInSeconds { get; set; } +} \ No newline at end of file diff --git a/src/producer/Features/Crawlers/Torrentio/TorrentioScrapeInstance.cs b/src/producer/Features/Crawlers/Torrentio/TorrentioScrapeInstance.cs new file mode 100644 index 0000000..9d6c23e --- /dev/null +++ b/src/producer/Features/Crawlers/Torrentio/TorrentioScrapeInstance.cs @@ -0,0 +1,3 @@ +namespace Producer.Features.Crawlers.Torrentio; + +public record TorrentioScrapeInstance(DateTime StartedAt, int RequestCount, int TotalProcessed, string? LastProcessedImdbId); \ No newline at end of file diff --git a/src/producer/Jobs/SyncTpbJob.cs b/src/producer/Features/Crawlers/Tpb/SyncTpbJob.cs similarity index 57% rename from src/producer/Jobs/SyncTpbJob.cs rename to src/producer/Features/Crawlers/Tpb/SyncTpbJob.cs index db64032..78985e4 100644 --- a/src/producer/Jobs/SyncTpbJob.cs +++ b/src/producer/Features/Crawlers/Tpb/SyncTpbJob.cs @@ -1,12 +1,12 @@ -using Producer.Crawlers.Sites; +using Literals = Producer.Features.JobSupport.Literals; -namespace Producer.Jobs; +namespace Producer.Features.Crawlers.Tpb; [DisallowConcurrentExecution] public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) { private const string JobName = nameof(TpbCrawler); - public static readonly JobKey Key = new(JobName, nameof(Crawlers)); - public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers)); + public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs)); + public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs)); protected override string Crawler => nameof(TpbCrawler); } \ No newline at end of file diff --git a/src/producer/Crawlers/Sites/TpbCrawler.cs b/src/producer/Features/Crawlers/Tpb/TpbCrawler.cs similarity index 98% rename from src/producer/Crawlers/Sites/TpbCrawler.cs rename to src/producer/Features/Crawlers/Tpb/TpbCrawler.cs index c0cba00..a9e500d 100644 --- a/src/producer/Crawlers/Sites/TpbCrawler.cs +++ b/src/producer/Features/Crawlers/Tpb/TpbCrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers.Sites; +namespace Producer.Features.Crawlers.Tpb; public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage) { diff --git a/src/producer/Jobs/SyncYtsJob.cs b/src/producer/Features/Crawlers/Yts/SyncYtsJob.cs similarity index 57% rename from src/producer/Jobs/SyncYtsJob.cs rename to src/producer/Features/Crawlers/Yts/SyncYtsJob.cs index 2b4fa63..0ea81c6 100644 --- a/src/producer/Jobs/SyncYtsJob.cs +++ b/src/producer/Features/Crawlers/Yts/SyncYtsJob.cs @@ -1,12 +1,12 @@ -using Producer.Crawlers.Sites; +using Literals = Producer.Features.JobSupport.Literals; -namespace Producer.Jobs; +namespace Producer.Features.Crawlers.Yts; [DisallowConcurrentExecution] public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) { private const string JobName = nameof(YtsCrawler); - public static readonly JobKey Key = new(JobName, nameof(Crawlers)); - public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers)); + public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs)); + public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs)); protected override string Crawler => nameof(YtsCrawler); } \ No newline at end of file diff --git a/src/producer/Crawlers/Sites/YtsCrawler.cs b/src/producer/Features/Crawlers/Yts/YtsCrawler.cs similarity index 97% rename from src/producer/Crawlers/Sites/YtsCrawler.cs rename to src/producer/Features/Crawlers/Yts/YtsCrawler.cs index d47ff39..d49b4f8 100644 --- a/src/producer/Crawlers/Sites/YtsCrawler.cs +++ b/src/producer/Features/Crawlers/Yts/YtsCrawler.cs @@ -1,4 +1,4 @@ -namespace Producer.Crawlers.Sites; +namespace Producer.Features.Crawlers.Yts; public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) { diff --git a/src/producer/Services/DapperDataStorage.cs b/src/producer/Features/DataProcessing/DapperDataStorage.cs similarity index 99% rename from src/producer/Services/DapperDataStorage.cs rename to src/producer/Features/DataProcessing/DapperDataStorage.cs index c08b082..a2ccd76 100644 --- a/src/producer/Services/DapperDataStorage.cs +++ b/src/producer/Features/DataProcessing/DapperDataStorage.cs @@ -1,4 +1,4 @@ -namespace Producer.Services; +namespace Producer.Features.DataProcessing; public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConfiguration rabbitConfig, ILogger logger) : IDataStorage { diff --git a/src/producer/Interfaces/IDataStorage.cs b/src/producer/Features/DataProcessing/IDataStorage.cs similarity index 93% rename from src/producer/Interfaces/IDataStorage.cs rename to src/producer/Features/DataProcessing/IDataStorage.cs index 7286454..faa926c 100644 --- a/src/producer/Interfaces/IDataStorage.cs +++ b/src/producer/Features/DataProcessing/IDataStorage.cs @@ -1,4 +1,4 @@ -namespace Producer.Interfaces; +namespace Producer.Features.DataProcessing; public interface IDataStorage { diff --git a/src/producer/Features/DataProcessing/ImdbEntry.cs b/src/producer/Features/DataProcessing/ImdbEntry.cs new file mode 100644 index 0000000..bffd968 --- /dev/null +++ b/src/producer/Features/DataProcessing/ImdbEntry.cs @@ -0,0 +1,15 @@ +namespace Producer.Features.DataProcessing; + +public class ImdbEntry +{ + [BsonId] + public string ImdbId { get; set; } = default!; + public string? TitleType { get; set; } + public string? PrimaryTitle { get; set; } + public string? OriginalTitle { get; set; } + public string? IsAdult { get; set; } + public string? StartYear { get; set; } + public string? EndYear { get; set; } + public string? RuntimeMinutes { get; set; } + public string? Genres { get; set; } +} \ No newline at end of file diff --git a/src/producer/Features/DataProcessing/ImdbMongoDbService.cs b/src/producer/Features/DataProcessing/ImdbMongoDbService.cs new file mode 100644 index 0000000..cfa9eda --- /dev/null +++ b/src/producer/Features/DataProcessing/ImdbMongoDbService.cs @@ -0,0 +1,78 @@ +namespace Producer.Features.DataProcessing; + +public class ImdbMongoDbService +{ + private readonly ILogger _logger; + private readonly IMongoCollection _imdbCollection; + + public ImdbMongoDbService(MongoConfiguration configuration, ILogger logger) + { + _logger = logger; + + var client = new MongoClient(configuration.ConnectionString); + var database = client.GetDatabase(configuration.DbName); + + _imdbCollection = database.GetCollection("imdb-entries"); + } + + public async Task> GetImdbEntriesForRequests(string startYear, int requestLimit, string? startingId = null) + { + var sort = Builders.Sort + .Descending(e => e.StartYear) + .Descending(e => e.ImdbId); + + var filter = Builders.Filter + .And( + Builders.Filter.Eq(e => e.TitleType, "movie"), + Builders.Filter.Lte(e => e.StartYear, startYear) + ); + + if (!string.IsNullOrWhiteSpace(startingId)) + { + filter = Builders.Filter.And(filter, Builders.Filter.Lt(e => e.ImdbId, startingId)); + } + + return await _imdbCollection.Find(filter).Limit(requestLimit).Sort(sort).ToListAsync(); + } + + public async Task GetTotalCountAsync() + { + var filter = Builders.Filter.Eq(x => x.TitleType, "movie"); + return await _imdbCollection.CountDocumentsAsync(filter); + } + + public bool IsDatabaseInitialized() + { + try + { + // Compound index for PrimaryTitle, TitleType, and StartYear + var index1KeysDefinition = Builders.IndexKeys + .Text(e => e.PrimaryTitle) + .Ascending(e => e.TitleType) + .Ascending(e => e.StartYear); + + CreateIndex(index1KeysDefinition); + + // Compound index for StartYear and _id in descending order + var index2KeysDefinition = Builders.IndexKeys + .Descending(e => e.StartYear) + .Descending(e => e.ImdbId); + + CreateIndex(index2KeysDefinition); + + return true; + } + catch (Exception e) + { + _logger.LogError(e, "Error initializing database"); + return false; + } + } + + private void CreateIndex(IndexKeysDefinition keysDefinition) + { + var createIndexOptions = new CreateIndexOptions { Background = true }; + var indexModel = new CreateIndexModel(keysDefinition, createIndexOptions); + _imdbCollection.Indexes.CreateOne(indexModel); + } +} \ No newline at end of file diff --git a/src/producer/Features/DataProcessing/MongoConfiguration.cs b/src/producer/Features/DataProcessing/MongoConfiguration.cs new file mode 100644 index 0000000..c48b66a --- /dev/null +++ b/src/producer/Features/DataProcessing/MongoConfiguration.cs @@ -0,0 +1,20 @@ +namespace Producer.Features.DataProcessing; + +public class MongoConfiguration +{ + private const string Prefix = "MONGODB"; + private const string HostVariable = "HOST"; + private const string PortVariable = "PORT"; + private const string DbVariable = "DB"; + private const string UsernameVariable = "USER"; + private const string PasswordVariable = "PASSWORD"; + + + private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable); + private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017); + private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable); + private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable); + public string DbName { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DbVariable); + + public string ConnectionString => $"mongodb://{Username}:{Password}@{Host}:{Port}/{DbName}?tls=false&directConnection=true&authSource=admin"; +} \ No newline at end of file diff --git a/src/producer/Models/Configuration/PostgresConfiguration.cs b/src/producer/Features/DataProcessing/PostgresConfiguration.cs similarity index 95% rename from src/producer/Models/Configuration/PostgresConfiguration.cs rename to src/producer/Features/DataProcessing/PostgresConfiguration.cs index c753b4f..4fd3d21 100644 --- a/src/producer/Models/Configuration/PostgresConfiguration.cs +++ b/src/producer/Features/DataProcessing/PostgresConfiguration.cs @@ -1,4 +1,4 @@ -namespace Producer.Models.Configuration; +namespace Producer.Features.DataProcessing; public class PostgresConfiguration { diff --git a/src/producer/Features/DataProcessing/ServiceCollectionExtensions.cs b/src/producer/Features/DataProcessing/ServiceCollectionExtensions.cs new file mode 100644 index 0000000..93d3e7d --- /dev/null +++ b/src/producer/Features/DataProcessing/ServiceCollectionExtensions.cs @@ -0,0 +1,14 @@ +namespace Producer.Features.DataProcessing; + +internal static class ServiceCollectionExtensions +{ + internal static IServiceCollection AddDataStorage(this IServiceCollection services) + { + services.LoadConfigurationFromEnv(); + services.LoadConfigurationFromEnv(); + services.AddTransient(); + services.AddTransient(); + services.AddSingleton(); + return services; + } +} \ No newline at end of file diff --git a/src/producer/Jobs/BaseJob.cs b/src/producer/Features/JobSupport/BaseJob.cs similarity index 93% rename from src/producer/Jobs/BaseJob.cs rename to src/producer/Features/JobSupport/BaseJob.cs index 9dcdc96..81dcf87 100644 --- a/src/producer/Jobs/BaseJob.cs +++ b/src/producer/Features/JobSupport/BaseJob.cs @@ -1,4 +1,4 @@ -namespace Producer.Jobs; +namespace Producer.Features.JobSupport; public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob { diff --git a/src/producer/Jobs/ICrawlerJob.cs b/src/producer/Features/JobSupport/ICrawlerJob.cs similarity index 74% rename from src/producer/Jobs/ICrawlerJob.cs rename to src/producer/Features/JobSupport/ICrawlerJob.cs index bd857ba..ec9dfeb 100644 --- a/src/producer/Jobs/ICrawlerJob.cs +++ b/src/producer/Features/JobSupport/ICrawlerJob.cs @@ -1,4 +1,4 @@ -namespace Producer.Jobs; +namespace Producer.Features.JobSupport; public interface ICrawlerJob : IJob where TCrawler : ICrawler diff --git a/src/producer/Features/JobSupport/Literals.cs b/src/producer/Features/JobSupport/Literals.cs new file mode 100644 index 0000000..4849eb4 --- /dev/null +++ b/src/producer/Features/JobSupport/Literals.cs @@ -0,0 +1,7 @@ +namespace Producer.Features.JobSupport; + +public static class Literals +{ + public const string CrawlersJobs = "CrawlersJobs"; + public const string PublishingJobs = "PublishingJobs"; +} \ No newline at end of file diff --git a/src/producer/Features/JobSupport/ManualJobRegistrationAttribute.cs b/src/producer/Features/JobSupport/ManualJobRegistrationAttribute.cs new file mode 100644 index 0000000..1caf658 --- /dev/null +++ b/src/producer/Features/JobSupport/ManualJobRegistrationAttribute.cs @@ -0,0 +1,6 @@ +namespace Producer.Features.JobSupport; + +[AttributeUsage(AttributeTargets.Class)] +public class ManualJobRegistrationAttribute : Attribute +{ +} \ No newline at end of file diff --git a/src/producer/Models/Configuration/ScrapeConfiguration.cs b/src/producer/Features/JobSupport/ScrapeConfiguration.cs similarity index 83% rename from src/producer/Models/Configuration/ScrapeConfiguration.cs rename to src/producer/Features/JobSupport/ScrapeConfiguration.cs index 1073b0c..fe24162 100644 --- a/src/producer/Models/Configuration/ScrapeConfiguration.cs +++ b/src/producer/Features/JobSupport/ScrapeConfiguration.cs @@ -1,4 +1,4 @@ -namespace Producer.Models.Configuration; +namespace Producer.Features.JobSupport; public class ScrapeConfiguration { diff --git a/src/producer/Features/JobSupport/ServiceCollectionExtensions.cs b/src/producer/Features/JobSupport/ServiceCollectionExtensions.cs new file mode 100644 index 0000000..2064fa5 --- /dev/null +++ b/src/producer/Features/JobSupport/ServiceCollectionExtensions.cs @@ -0,0 +1,131 @@ +namespace Producer.Features.JobSupport; + +internal static class ServiceCollectionExtensions +{ + internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration) + { + var scrapeConfiguration = services.LoadConfigurationFromConfig(configuration, ScrapeConfiguration.SectionName); + var githubConfiguration = services.LoadConfigurationFromEnv(); + var rabbitConfiguration = services.LoadConfigurationFromEnv(); + + var jobTypes = Assembly.GetAssembly(typeof(BaseJob)) + .GetTypes() + .Where(t => t is {IsClass: true, IsAbstract: false} && typeof(IJob).IsAssignableFrom(t) && + !Attribute.IsDefined(t, typeof(ManualJobRegistrationAttribute))) + .ToList(); + + foreach (var type in jobTypes) + { + services.AddTransient(type); + } + + if (!string.IsNullOrEmpty(githubConfiguration.PAT)) + { + services.AddTransient(); + } + + var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance); + + services.AddQuartz( + quartz => + { + RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration); + RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration); + RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration); + RegisterPublisher(quartz, rabbitConfiguration); + }); + + services.AddQuartzHostedService( + options => + { + options.WaitForJobsToComplete = true; + }); + + return services; + } + + private static void RegisterAutomaticRegistrationJobs(List jobTypes, MethodInfo? openMethod, IServiceCollectionQuartzConfigurator quartz, + ScrapeConfiguration scrapeConfiguration) + { + foreach (var jobType in jobTypes) + { + var key = jobType.GetField("Key")?.GetValue(jobType); + var trigger = jobType.GetField("Trigger")?.GetValue(jobType); + + if (key is null || trigger is null) + { + Console.WriteLine($"Job {jobType.Name} does not have a JobKey or TriggerKey property"); + continue; + } + + var method = openMethod.MakeGenericMethod(jobType); + method.Invoke(null, [quartz, key, trigger, scrapeConfiguration]); + } + } + + private static void RegisterDmmJob(GithubConfiguration githubConfiguration, IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration) + { + if (!string.IsNullOrEmpty(githubConfiguration.PAT)) + { + AddJobWithTrigger(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration); + } + } + + private static void RegisterTorrentioJob( + IServiceCollection services, + IServiceCollectionQuartzConfigurator quartz, + IConfiguration configuration, + ScrapeConfiguration scrapeConfiguration) + { + var torrentioConfiguration = services.LoadConfigurationFromConfig(configuration, TorrentioConfiguration.SectionName); + + if (torrentioConfiguration.Instances.Count != 0) + { + AddJobWithTrigger(quartz, SyncTorrentioJob.Key, SyncTorrentioJob.Trigger, scrapeConfiguration); + } + } + + private static void RegisterPublisher(IServiceCollectionQuartzConfigurator quartz, RabbitMqConfiguration rabbitConfig) => + AddJobWithTriggerAndInterval(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds); + + private static void AddJobWithTrigger( + IServiceCollectionQuartzConfigurator quartz, + JobKey key, + TriggerKey trigger, + ScrapeConfiguration scrapeConfiguration) where TJobType : IJob + { + var scraper = scrapeConfiguration.Scrapers + .FirstOrDefault(x => x.Name != null && + x.Name.Equals(typeof(TJobType).Name, StringComparison.OrdinalIgnoreCase)); + + if (scraper is null || !scraper.Enabled) + { + return; + } + + quartz.AddJob(opts => opts.WithIdentity(key).StoreDurably()); + + quartz.AddTrigger( + opts => opts + .ForJob(key) + .WithIdentity(trigger) + .StartAt(DateTimeOffset.Now.AddSeconds(20)) + .WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(scraper.IntervalSeconds)).RepeatForever())); + } + + private static void AddJobWithTriggerAndInterval( + IServiceCollectionQuartzConfigurator quartz, + JobKey key, + TriggerKey trigger, + int interval) where TJobType : IJob + { + quartz.AddJob(opts => opts.WithIdentity(key).StoreDurably()); + + quartz.AddTrigger( + opts => opts + .ForJob(key) + .WithIdentity(trigger) + .StartAt(DateTimeOffset.Now.AddSeconds(20)) + .WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(interval)).RepeatForever())); + } +} \ No newline at end of file diff --git a/src/producer/GlobalUsings.cs b/src/producer/GlobalUsings.cs index f44a4e6..131bc36 100644 --- a/src/producer/GlobalUsings.cs +++ b/src/producer/GlobalUsings.cs @@ -1,9 +1,10 @@ // Global using directives +global using System.Reflection; global using System.Text; global using System.Text.Json; -global using System.Text.Json.Serialization; global using System.Text.RegularExpressions; +global using System.Threading.Channels; global using System.Xml.Linq; global using Dapper; global using LZStringCSharp; @@ -12,14 +13,15 @@ global using Microsoft.AspNetCore.Builder; global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.DependencyInjection.Extensions; global using Microsoft.Extensions.Logging; +global using MongoDB.Bson.Serialization.Attributes; +global using MongoDB.Driver; global using Npgsql; global using Quartz; -global using Producer.Crawlers; -global using Producer.Crawlers.Sites; global using Producer.Extensions; -global using Producer.Interfaces; -global using Producer.Jobs; -global using Producer.Models; -global using Producer.Models.Configuration; -global using Producer.Services; +global using Producer.Features.Amqp; +global using Producer.Features.Crawlers.Dmm; +global using Producer.Features.Crawlers.Torrentio; +global using Producer.Features.CrawlerSupport; +global using Producer.Features.DataProcessing; +global using Producer.Features.JobSupport; global using Serilog; \ No newline at end of file diff --git a/src/producer/Interfaces/IIpService.cs b/src/producer/Interfaces/IIpService.cs deleted file mode 100644 index 69f36d7..0000000 --- a/src/producer/Interfaces/IIpService.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Producer.Interfaces; - -public interface IIpService -{ - Task GetPublicIpAddress(); -} \ No newline at end of file diff --git a/src/producer/Jobs/IPJob.cs b/src/producer/Jobs/IPJob.cs deleted file mode 100644 index 3bc93f3..0000000 --- a/src/producer/Jobs/IPJob.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Producer.Jobs; - -[DisallowConcurrentExecution] -public class IPJob(IIpService ipService) : IJob -{ - private const string JobName = nameof(IPJob); - public static readonly JobKey Key = new(JobName, nameof(Jobs)); - public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Jobs)); - - public Task Execute(IJobExecutionContext context) - { - return ipService.GetPublicIpAddress(); - } -} \ No newline at end of file diff --git a/src/producer/Producer.csproj b/src/producer/Producer.csproj index 8cc3862..2405295 100644 --- a/src/producer/Producer.csproj +++ b/src/producer/Producer.csproj @@ -15,6 +15,7 @@ + @@ -24,12 +25,8 @@ - - - Always - - - + + Always diff --git a/src/producer/Services/IpService.cs b/src/producer/Services/IpService.cs deleted file mode 100644 index ceada0b..0000000 --- a/src/producer/Services/IpService.cs +++ /dev/null @@ -1,13 +0,0 @@ -namespace Producer.Services; - -public class IpService(ILogger logger, IHttpClientFactory httpClientFactory) : IIpService -{ - public async Task GetPublicIpAddress() - { - var client = httpClientFactory.CreateClient("Scraper"); - client.DefaultRequestHeaders.UserAgent.ParseAdd("curl"); - var request = await client.GetStringAsync("http://ifconfig.me"); - - logger.LogInformation("Public IP Address: {PublicIPAddress}", request); - } -} \ No newline at end of file