mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Woke up to see a discussion about torrentio scraping: powered by community
Was a little inspired. Now we have a database (self populating) of imdb id's - why shouldn't we actually have the ability to scrape any other instance of torrentio, or knightcrawler? Also restructured the producer to be vertically sliced to make it easier to work with Too much flicking back and forth between Jobs and Crawlers when configuring
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -405,3 +405,5 @@ FodyWeavers.xsd
|
||||
|
||||
dist/
|
||||
deployment/docker/docker-compose-dev.yaml
|
||||
|
||||
src/producer/.run/
|
||||
|
||||
@@ -30,6 +30,11 @@
|
||||
"Name": "SyncDmmJob",
|
||||
"IntervalSeconds": 1800,
|
||||
"Enabled": true
|
||||
},
|
||||
{
|
||||
"Name": "SyncTorrentioJob",
|
||||
"IntervalSeconds": 604800,
|
||||
"Enabled": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
14
src/producer/Configuration/torrentio.json
Normal file
14
src/producer/Configuration/torrentio.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"TorrentioConfiguration": {
|
||||
"Instances": [
|
||||
{
|
||||
"Name": "Official",
|
||||
"Url": "https://torrentio.strem.fun",
|
||||
"RateLimit": {
|
||||
"RequestLimit": 300,
|
||||
"IntervalInSeconds": 3600
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ public static class ConfigurationExtensions
|
||||
|
||||
configuration.AddJsonFile(LoggingConfig, false, true);
|
||||
configuration.AddJsonFile(ScrapeConfiguration.Filename, false, true);
|
||||
configuration.AddJsonFile(TorrentioConfiguration.Filename, false, true);
|
||||
|
||||
configuration.AddEnvironmentVariables();
|
||||
|
||||
@@ -18,4 +19,28 @@ public static class ConfigurationExtensions
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
public static TConfiguration LoadConfigurationFromConfig<TConfiguration>(this IServiceCollection services, IConfiguration configuration, string sectionName)
|
||||
where TConfiguration : class
|
||||
{
|
||||
var instance = configuration.GetSection(sectionName).Get<TConfiguration>();
|
||||
|
||||
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||
|
||||
services.TryAddSingleton(instance);
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
public static TConfiguration LoadConfigurationFromEnv<TConfiguration>(this IServiceCollection services)
|
||||
where TConfiguration : class
|
||||
{
|
||||
var instance = Activator.CreateInstance<TConfiguration>();
|
||||
|
||||
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||
|
||||
services.TryAddSingleton(instance);
|
||||
|
||||
return instance;
|
||||
}
|
||||
}
|
||||
@@ -1,160 +0,0 @@
|
||||
namespace Producer.Extensions;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
internal static IServiceCollection AddCrawlers(this IServiceCollection services)
|
||||
{
|
||||
services.AddHttpClient("Scraper");
|
||||
|
||||
services
|
||||
.AddKeyedTransient<ICrawler, EzTvCrawler>(nameof(EzTvCrawler))
|
||||
.AddKeyedTransient<ICrawler, NyaaCrawler>(nameof(NyaaCrawler))
|
||||
.AddKeyedTransient<ICrawler, YtsCrawler>(nameof(YtsCrawler))
|
||||
.AddKeyedTransient<ICrawler, TpbCrawler>(nameof(TpbCrawler))
|
||||
.AddKeyedTransient<ICrawler, TgxCrawler>(nameof(TgxCrawler))
|
||||
.AddKeyedTransient<ICrawler, DebridMediaManagerCrawler>(nameof(DebridMediaManagerCrawler))
|
||||
.AddSingleton<ICrawlerProvider, CrawlerProvider>()
|
||||
.AddTransient<IIpService, IpService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
internal static IServiceCollection AddDataStorage(this IServiceCollection services)
|
||||
{
|
||||
services.LoadConfigurationFromEnv<PostgresConfiguration>();
|
||||
services.AddTransient<IDataStorage, DapperDataStorage>();
|
||||
services.AddTransient<IMessagePublisher, TorrentPublisher>();
|
||||
return services;
|
||||
}
|
||||
|
||||
internal static IServiceCollection RegisterMassTransit(this IServiceCollection services)
|
||||
{
|
||||
var rabbitConfig = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
|
||||
|
||||
services.AddMassTransit(busConfigurator =>
|
||||
{
|
||||
busConfigurator.SetKebabCaseEndpointNameFormatter();
|
||||
busConfigurator.UsingRabbitMq((_, busFactoryConfigurator) =>
|
||||
{
|
||||
busFactoryConfigurator.Host(rabbitConfig.Host, hostConfigurator =>
|
||||
{
|
||||
hostConfigurator.Username(rabbitConfig.Username);
|
||||
hostConfigurator.Password(rabbitConfig.Password);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
|
||||
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
|
||||
var rabbitConfig = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
|
||||
|
||||
services
|
||||
.AddTransient<SyncEzTvJob>()
|
||||
.AddTransient<SyncNyaaJob>()
|
||||
.AddTransient<SyncTpbJob>()
|
||||
.AddTransient<SyncYtsJob>()
|
||||
.AddTransient<SyncTgxJob>()
|
||||
.AddTransient<IPJob>()
|
||||
.AddTransient<PublisherJob>();
|
||||
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
services.AddTransient<SyncDmmJob>();
|
||||
}
|
||||
|
||||
services.AddQuartz(
|
||||
quartz =>
|
||||
{
|
||||
AddJobWithTrigger<SyncEzTvJob>(quartz, SyncEzTvJob.Key, SyncEzTvJob.Trigger, scrapeConfiguration);
|
||||
AddJobWithTrigger<SyncNyaaJob>(quartz, SyncNyaaJob.Key, SyncNyaaJob.Trigger, scrapeConfiguration);
|
||||
AddJobWithTrigger<SyncTpbJob>(quartz, SyncTpbJob.Key, SyncTpbJob.Trigger, scrapeConfiguration);
|
||||
AddJobWithTrigger<SyncYtsJob>(quartz, SyncYtsJob.Key, SyncYtsJob.Trigger, scrapeConfiguration);
|
||||
AddJobWithTrigger<SyncTgxJob>(quartz, SyncTgxJob.Key, SyncTgxJob.Trigger, scrapeConfiguration);
|
||||
AddJobWithTrigger<IPJob>(quartz, IPJob.Key, IPJob.Trigger, 60 * 5);
|
||||
AddJobWithTrigger<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds);
|
||||
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
|
||||
}
|
||||
});
|
||||
|
||||
services.AddQuartzHostedService(
|
||||
options =>
|
||||
{
|
||||
options.WaitForJobsToComplete = true;
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
private static TConfiguration LoadConfigurationFromConfig<TConfiguration>(this IServiceCollection services, IConfiguration configuration, string sectionName)
|
||||
where TConfiguration : class
|
||||
{
|
||||
var instance = configuration.GetSection(sectionName).Get<TConfiguration>();
|
||||
|
||||
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||
|
||||
services.TryAddSingleton(instance);
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
private static TConfiguration LoadConfigurationFromEnv<TConfiguration>(this IServiceCollection services)
|
||||
where TConfiguration : class
|
||||
{
|
||||
var instance = Activator.CreateInstance<TConfiguration>();
|
||||
|
||||
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||
|
||||
services.TryAddSingleton(instance);
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
private static void AddJobWithTrigger<TJobType>(
|
||||
IServiceCollectionQuartzConfigurator quartz,
|
||||
JobKey key,
|
||||
TriggerKey trigger,
|
||||
ScrapeConfiguration scrapeConfiguration) where TJobType : IJob
|
||||
{
|
||||
var scraper = scrapeConfiguration.Scrapers
|
||||
.FirstOrDefault(x => x.Name != null &&
|
||||
x.Name.Equals(typeof(TJobType).Name, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (scraper is null || !scraper.Enabled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
quartz.AddJob<TJobType>(opts => opts.WithIdentity(key).StoreDurably());
|
||||
|
||||
quartz.AddTrigger(
|
||||
opts => opts
|
||||
.ForJob(key)
|
||||
.WithIdentity(trigger)
|
||||
.StartAt(DateTimeOffset.Now.AddSeconds(20))
|
||||
.WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(scraper.IntervalSeconds)).RepeatForever()));
|
||||
}
|
||||
|
||||
private static void AddJobWithTrigger<TJobType>(
|
||||
IServiceCollectionQuartzConfigurator quartz,
|
||||
JobKey key,
|
||||
TriggerKey trigger,
|
||||
int interval) where TJobType : IJob
|
||||
{
|
||||
quartz.AddJob<TJobType>(opts => opts.WithIdentity(key).StoreDurably());
|
||||
|
||||
quartz.AddTrigger(
|
||||
opts => opts
|
||||
.ForJob(key)
|
||||
.WithIdentity(trigger)
|
||||
.StartAt(DateTimeOffset.Now.AddSeconds(20))
|
||||
.WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(interval)).RepeatForever()));
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Interfaces;
|
||||
namespace Producer.Features.Amqp;
|
||||
|
||||
public interface IMessagePublisher
|
||||
{
|
||||
@@ -1,11 +1,14 @@
|
||||
namespace Producer.Jobs;
|
||||
using Literals = Producer.Features.JobSupport.Literals;
|
||||
|
||||
namespace Producer.Features.Amqp;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
[ManualJobRegistration]
|
||||
public class PublisherJob(IMessagePublisher publisher, IDataStorage storage, ILogger<PublisherJob> logger) : IJob
|
||||
{
|
||||
private const string JobName = nameof(PublisherJob);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Jobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Jobs));
|
||||
public static readonly JobKey Key = new(JobName, nameof(Literals.PublishingJobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.PublishingJobs));
|
||||
|
||||
public async Task Execute(IJobExecutionContext context)
|
||||
{
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Models.Configuration;
|
||||
namespace Producer.Features.Amqp;
|
||||
|
||||
public class RabbitMqConfiguration
|
||||
{
|
||||
24
src/producer/Features/Amqp/ServiceCollectionExtensions.cs
Normal file
24
src/producer/Features/Amqp/ServiceCollectionExtensions.cs
Normal file
@@ -0,0 +1,24 @@
|
||||
namespace Producer.Features.Amqp;
|
||||
|
||||
internal static class ServiceCollectionExtensions
|
||||
{
|
||||
internal static IServiceCollection RegisterMassTransit(this IServiceCollection services)
|
||||
{
|
||||
var rabbitConfig = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
|
||||
|
||||
services.AddMassTransit(busConfigurator =>
|
||||
{
|
||||
busConfigurator.SetKebabCaseEndpointNameFormatter();
|
||||
busConfigurator.UsingRabbitMq((_, busFactoryConfigurator) =>
|
||||
{
|
||||
busFactoryConfigurator.Host(rabbitConfig.Host, hostConfigurator =>
|
||||
{
|
||||
hostConfigurator.Username(rabbitConfig.Username);
|
||||
hostConfigurator.Password(rabbitConfig.Password);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Services;
|
||||
namespace Producer.Features.Amqp;
|
||||
|
||||
public class TorrentPublisher(
|
||||
ISendEndpointProvider sendEndpointProvider,
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public abstract class BaseCrawler(ILogger<BaseCrawler> logger, IDataStorage storage) : ICrawler
|
||||
{
|
||||
@@ -1,8 +1,8 @@
|
||||
namespace Producer.Crawlers;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseJsonCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage)
|
||||
{
|
||||
private readonly HttpClient _client = httpClientFactory.CreateClient("Scraper");
|
||||
private readonly HttpClient _client = httpClientFactory.CreateClient(Literals.CrawlerClient);
|
||||
|
||||
protected virtual async Task Execute(string collectionName)
|
||||
{
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseXmlCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage)
|
||||
{
|
||||
@@ -6,7 +6,7 @@ public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogg
|
||||
{
|
||||
logger.LogInformation("Starting {Source} crawl", Source);
|
||||
|
||||
using var client = httpClientFactory.CreateClient("Scraper");
|
||||
using var client = httpClientFactory.CreateClient(Literals.CrawlerClient);
|
||||
var xml = await client.GetStringAsync(Url);
|
||||
var xmlRoot = XElement.Parse(xml);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public class CrawlerProvider(IServiceProvider serviceProvider) : ICrawlerProvider
|
||||
{
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Interfaces;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public interface ICrawler
|
||||
{
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Interfaces;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public interface ICrawlerProvider
|
||||
{
|
||||
6
src/producer/Features/CrawlerSupport/Literals.cs
Normal file
6
src/producer/Features/CrawlerSupport/Literals.cs
Normal file
@@ -0,0 +1,6 @@
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public static class Literals
|
||||
{
|
||||
public const string CrawlerClient = "Scraper";
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Models;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public record InsertTorrentResult(bool Success, int InsertedCount = 0, string? ErrorMessage = null);
|
||||
public record UpdatedTorrentResult(bool Success, int UpdatedCount = 0, string? ErrorMessage = null);
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Models;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
public class Scraper
|
||||
{
|
||||
@@ -0,0 +1,23 @@
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
internal static class ServiceCollectionExtensions
|
||||
{
|
||||
internal static IServiceCollection AddCrawlers(this IServiceCollection services)
|
||||
{
|
||||
services.AddHttpClient(Literals.CrawlerClient);
|
||||
|
||||
var crawlerTypes = Assembly.GetAssembly(typeof(ICrawler))
|
||||
.GetTypes()
|
||||
.Where(t => t is {IsClass: true, IsAbstract: false} && typeof(ICrawler).IsAssignableFrom(t));
|
||||
|
||||
foreach (var type in crawlerTypes)
|
||||
{
|
||||
services.AddKeyedTransient(typeof(ICrawler), type.Name, type);
|
||||
}
|
||||
|
||||
services
|
||||
.AddSingleton<ICrawlerProvider, CrawlerProvider>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Models;
|
||||
namespace Producer.Features.CrawlerSupport;
|
||||
|
||||
// Torrent represents a crawled torrent from one of our
|
||||
// supported sources.
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers.Sites;
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public partial class DebridMediaManagerCrawler(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Models.Configuration;
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public class GithubConfiguration
|
||||
{
|
||||
@@ -1,12 +1,13 @@
|
||||
using DebridMediaManagerCrawler = Producer.Crawlers.Sites.DebridMediaManagerCrawler;
|
||||
using Literals = Producer.Features.JobSupport.Literals;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
[ManualJobRegistration]
|
||||
public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(DebridMediaManagerCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
|
||||
protected override string Crawler => nameof(DebridMediaManagerCrawler);
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers.Sites;
|
||||
namespace Producer.Features.Crawlers.EzTv;
|
||||
|
||||
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
@@ -1,12 +1,12 @@
|
||||
using Producer.Crawlers.Sites;
|
||||
using Literals = Producer.Features.JobSupport.Literals;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
namespace Producer.Features.Crawlers.EzTv;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(EzTvCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
|
||||
protected override string Crawler => nameof(EzTvCrawler);
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers.Sites;
|
||||
namespace Producer.Features.Crawlers.Nyaa;
|
||||
|
||||
public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
@@ -1,12 +1,12 @@
|
||||
using Producer.Crawlers.Sites;
|
||||
using Literals = Producer.Features.JobSupport.Literals;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
namespace Producer.Features.Crawlers.Nyaa;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncNyaaJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(NyaaCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
|
||||
protected override string Crawler => nameof(NyaaCrawler);
|
||||
}
|
||||
@@ -1,12 +1,12 @@
|
||||
using TgxCrawler = Producer.Crawlers.Sites.TgxCrawler;
|
||||
using Literals = Producer.Features.JobSupport.Literals;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
namespace Producer.Features.Crawlers.Tgx;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(TgxCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
|
||||
protected override string Crawler => nameof(TgxCrawler);
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers.Sites;
|
||||
namespace Producer.Features.Crawlers.Tgx;
|
||||
|
||||
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
13
src/producer/Features/Crawlers/Torrentio/SyncTorrentioJob.cs
Normal file
13
src/producer/Features/Crawlers/Torrentio/SyncTorrentioJob.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
using Literals = Producer.Features.JobSupport.Literals;
|
||||
|
||||
namespace Producer.Features.Crawlers.Torrentio;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
[ManualJobRegistration]
|
||||
public class SyncTorrentioJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(TorrentioCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
|
||||
protected override string Crawler => nameof(TorrentioCrawler);
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
namespace Producer.Features.Crawlers.Torrentio;
|
||||
|
||||
public class TorrentioConfiguration
|
||||
{
|
||||
public const string SectionName = "TorrentioConfiguration";
|
||||
public const string Filename = "torrentio.json";
|
||||
|
||||
public List<TorrentioInstance> Instances { get; set; } = [];
|
||||
}
|
||||
197
src/producer/Features/Crawlers/Torrentio/TorrentioCrawler.cs
Normal file
197
src/producer/Features/Crawlers/Torrentio/TorrentioCrawler.cs
Normal file
@@ -0,0 +1,197 @@
|
||||
using Literals = Producer.Features.CrawlerSupport.Literals;
|
||||
|
||||
namespace Producer.Features.Crawlers.Torrentio;
|
||||
|
||||
public partial class TorrentioCrawler(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
ILogger<TorrentioCrawler> logger,
|
||||
IDataStorage storage,
|
||||
TorrentioConfiguration configuration,
|
||||
ImdbMongoDbService imdbDataService) : BaseCrawler(logger, storage)
|
||||
{
|
||||
[GeneratedRegex(@"(\d+(\.\d+)?) (GB|MB)")]
|
||||
private static partial Regex SizeMatcher();
|
||||
|
||||
private const string MovieSlug = "movie/{0}.json";
|
||||
protected override string Url => "sort=size%7Cqualityfilter=other,scr,cam,unknown/stream/{0}";
|
||||
protected override IReadOnlyDictionary<string, string> Mappings { get; } = new Dictionary<string, string>();
|
||||
protected override string Source => "Torrentio";
|
||||
private readonly Dictionary<string, TorrentioScrapeInstance> _instanceStates = [];
|
||||
public override async Task Execute()
|
||||
{
|
||||
var client = httpClientFactory.CreateClient(Literals.CrawlerClient);
|
||||
var instances = configuration.Instances;
|
||||
var totalRecordCount = await imdbDataService.GetTotalCountAsync();
|
||||
logger.LogInformation("Total IMDB records to process: {TotalRecordCount}", totalRecordCount);
|
||||
var tasks = instances.Select(x => ProcessForInstanceAsync(x, client, totalRecordCount)).ToArray();
|
||||
await Task.WhenAll(tasks);
|
||||
}
|
||||
|
||||
private Task ProcessForInstanceAsync(TorrentioInstance instance, HttpClient client, long totalRecordCount) =>
|
||||
Task.Run(
|
||||
async () =>
|
||||
{
|
||||
while (instance.TotalProcessedRequests(_instanceStates) < totalRecordCount)
|
||||
{
|
||||
logger.LogInformation("Processing {TorrentioInstance}", instance.Name);
|
||||
logger.LogInformation("Current processed requests: {ProcessedRequests}", instance.TotalProcessedRequests(_instanceStates));
|
||||
|
||||
var items = await imdbDataService.GetImdbEntriesForRequests(
|
||||
DateTime.UtcNow.Year.ToString(),
|
||||
instance.RateLimit.RequestLimit,
|
||||
instance.LastProcessedImdbId(_instanceStates));
|
||||
|
||||
if (items.Count == 0)
|
||||
{
|
||||
logger.LogInformation("No items to process for {TorrentioInstance}", instance.Name);
|
||||
break;
|
||||
}
|
||||
|
||||
var newTorrents = new List<Torrent>();
|
||||
var processedItemsCount = 0;
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
processedItemsCount++;
|
||||
|
||||
var waitTime = instance.CalculateWaitTime(_instanceStates);
|
||||
|
||||
if (waitTime > TimeSpan.Zero)
|
||||
{
|
||||
logger.LogInformation("Rate limit reached for {TorrentioInstance}", instance.Name);
|
||||
logger.LogInformation("Waiting for {TorrentioInstance}: {WaitTime}", instance.Name, waitTime);
|
||||
await Task.Delay(waitTime);
|
||||
}
|
||||
|
||||
if (processedItemsCount % 2 == 0)
|
||||
{
|
||||
var randomWait = new Random().Next(1000, 5000);
|
||||
logger.LogInformation("Waiting for {TorrentioInstance}: {WaitTime}", instance.Name, randomWait);
|
||||
await Task.Delay(randomWait);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var torrentInfo = await ScrapeInstance(instance, item.ImdbId, client);
|
||||
if (torrentInfo is not null)
|
||||
{
|
||||
newTorrents.AddRange(torrentInfo.Where(x => x != null).Select(x => x!));
|
||||
}
|
||||
}
|
||||
catch (Exception error)
|
||||
{
|
||||
logger.LogError(error, "page processing error in TorrentioCrawler");
|
||||
}
|
||||
}
|
||||
|
||||
if (newTorrents.Count > 0)
|
||||
{
|
||||
await InsertTorrents(newTorrents);
|
||||
|
||||
var currentState = _instanceStates[instance.Name];
|
||||
_instanceStates[instance.Name] = currentState with
|
||||
{
|
||||
LastProcessedImdbId = items[^1].ImdbId,
|
||||
};
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
private async Task<List<Torrent?>?> ScrapeInstance(TorrentioInstance instance, string imdbId, HttpClient client)
|
||||
{
|
||||
logger.LogInformation("Searching Torrentio {TorrentioInstance}: {ImdbId}", instance.Name, imdbId);
|
||||
try
|
||||
{
|
||||
var movieSlug = string.Format(MovieSlug, imdbId);
|
||||
var urlSlug = string.Format(Url, movieSlug);
|
||||
return await RunRequest(instance, urlSlug, imdbId, client);
|
||||
}
|
||||
catch (Exception error)
|
||||
{
|
||||
logger.LogError(error, "page processing error {TorrentioInstance}: {ImdbId}", instance.Name, imdbId);
|
||||
logger.LogWarning("Setting {TorrentioInstance} to possibly rate limited for Five minutes", instance.Name);
|
||||
instance.SetPossiblyRateLimited(_instanceStates);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private async Task<List<Torrent?>?> RunRequest(TorrentioInstance instance, string urlSlug, string imdbId, HttpClient client)
|
||||
{
|
||||
var requestUrl = $"{instance.Url}/{urlSlug}";
|
||||
var response = await client.GetAsync(requestUrl);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
logger.LogError("Failed to fetch {Url}", requestUrl);
|
||||
return null;
|
||||
}
|
||||
|
||||
var json = JsonDocument.Parse(await response.Content.ReadAsStringAsync());
|
||||
var streams = json.RootElement.GetProperty("streams").EnumerateArray();
|
||||
return streams.Select(x => ParseTorrent(instance, x, imdbId)).Where(x => x != null).ToList();
|
||||
}
|
||||
|
||||
private Torrent? ParseTorrent(TorrentioInstance instance, JsonElement item, string imdId)
|
||||
{
|
||||
var title = item.GetProperty("title").GetString();
|
||||
var infoHash = item.GetProperty("infoHash").GetString();
|
||||
|
||||
if (string.IsNullOrEmpty(title) || string.IsNullOrEmpty(infoHash))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var torrent = ParseTorrentDetails(title, instance, infoHash, imdId);
|
||||
|
||||
if (torrent is null || string.IsNullOrEmpty(torrent.Name))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return torrent;
|
||||
}
|
||||
|
||||
private Torrent? ParseTorrentDetails(string title, TorrentioInstance instance, string infoHash, string imdbId)
|
||||
{
|
||||
try
|
||||
{
|
||||
var torrent = new Torrent
|
||||
{
|
||||
Source = $"{Source}_{instance.Name}",
|
||||
InfoHash = infoHash,
|
||||
Category = "movies", // we only handle movies for now...
|
||||
Imdb = imdbId,
|
||||
};
|
||||
|
||||
var span = title.AsSpan();
|
||||
var titleEnd = span.IndexOf('\n');
|
||||
var titlePart = titleEnd >= 0 ? span[..titleEnd].ToString() : title;
|
||||
|
||||
torrent.Name = titlePart.Replace('.', ' ').TrimEnd('.');
|
||||
|
||||
var sizeMatch = SizeMatcher().Match(title);
|
||||
|
||||
if (sizeMatch.Success)
|
||||
{
|
||||
var size = double.Parse(sizeMatch.Groups[1].Value); // Size Value
|
||||
var sizeUnit = sizeMatch.Groups[3].Value; // Size Unit (GB/MB)
|
||||
|
||||
var sizeInBytes = sizeUnit switch
|
||||
{
|
||||
"GB" => (long) (size * 1073741824),
|
||||
"MB" => (long) (size * 1048576),
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
torrent.Size = sizeInBytes.ToString();
|
||||
}
|
||||
|
||||
return torrent;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.LogError(e, "Error parsing torrent details");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace Producer.Features.Crawlers.Torrentio;
|
||||
|
||||
public class TorrentioInstance
|
||||
{
|
||||
public string Name { get; init; } = default!;
|
||||
|
||||
public string Url { get; init; } = default!;
|
||||
|
||||
public TorrentioRateLimit RateLimit { get; init; } = default!;
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
namespace Producer.Features.Crawlers.Torrentio;
|
||||
|
||||
public static class TorrentioInstancesExtensions
|
||||
{
|
||||
public static TimeSpan CalculateWaitTime(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState)
|
||||
{
|
||||
if (!scraperState.TryGetValue(instance.Name, out var state))
|
||||
{
|
||||
state = new (DateTime.UtcNow, 0, 0, null);
|
||||
scraperState[instance.Name] = state;
|
||||
}
|
||||
|
||||
var (startedAt, requestCount, totalProcessed, lastProcessedImdbId) = state;
|
||||
|
||||
if (requestCount < instance.RateLimit.RequestLimit)
|
||||
{
|
||||
scraperState[instance.Name] = new (startedAt, requestCount + 1, totalProcessed + 1, lastProcessedImdbId);
|
||||
return TimeSpan.Zero;
|
||||
}
|
||||
|
||||
var elapsed = DateTime.UtcNow - startedAt;
|
||||
var interval = TimeSpan.FromSeconds(instance.RateLimit.IntervalInSeconds);
|
||||
var remaining = interval - elapsed;
|
||||
|
||||
// reset the state for the next interval
|
||||
scraperState[instance.Name] = new (DateTime.UtcNow, 0, totalProcessed, lastProcessedImdbId);
|
||||
|
||||
return remaining > TimeSpan.Zero ? remaining : TimeSpan.Zero;
|
||||
}
|
||||
|
||||
public static void SetPossiblyRateLimited(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState, int minutesToWait = 5)
|
||||
{
|
||||
if (!scraperState.TryGetValue(instance.Name, out var state))
|
||||
{
|
||||
state = new (DateTime.UtcNow, 0, 0, null);
|
||||
}
|
||||
|
||||
var (_, _, totalProcessed, lastProcessedImdbId) = state;
|
||||
|
||||
// Set the start time to 15 minutes in the past so that the next check will result in a rate limit period of 15 minutes
|
||||
var startedAt = DateTime.UtcNow.AddMinutes(-minutesToWait);
|
||||
var requestCount = instance.RateLimit.RequestLimit;
|
||||
|
||||
// Update the scraper state for the instance
|
||||
scraperState[instance.Name] = new (startedAt, requestCount, totalProcessed, lastProcessedImdbId);
|
||||
}
|
||||
|
||||
public static long TotalProcessedRequests(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState) =>
|
||||
!scraperState.TryGetValue(instance.Name, out var state) ? 0 : state.TotalProcessed;
|
||||
|
||||
public static string? LastProcessedImdbId(this TorrentioInstance instance, Dictionary<string, TorrentioScrapeInstance> scraperState) =>
|
||||
!scraperState.TryGetValue(instance.Name, out var state) ? null : state.LastProcessedImdbId;
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Producer.Features.Crawlers.Torrentio;
|
||||
|
||||
public class TorrentioRateLimit
|
||||
{
|
||||
public int RequestLimit { get; set; }
|
||||
public int IntervalInSeconds { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
namespace Producer.Features.Crawlers.Torrentio;
|
||||
|
||||
public record TorrentioScrapeInstance(DateTime StartedAt, int RequestCount, int TotalProcessed, string? LastProcessedImdbId);
|
||||
@@ -1,12 +1,12 @@
|
||||
using Producer.Crawlers.Sites;
|
||||
using Literals = Producer.Features.JobSupport.Literals;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
namespace Producer.Features.Crawlers.Tpb;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(TpbCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
|
||||
protected override string Crawler => nameof(TpbCrawler);
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers.Sites;
|
||||
namespace Producer.Features.Crawlers.Tpb;
|
||||
|
||||
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
@@ -1,12 +1,12 @@
|
||||
using Producer.Crawlers.Sites;
|
||||
using Literals = Producer.Features.JobSupport.Literals;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
namespace Producer.Features.Crawlers.Yts;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(YtsCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
|
||||
protected override string Crawler => nameof(YtsCrawler);
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Crawlers.Sites;
|
||||
namespace Producer.Features.Crawlers.Yts;
|
||||
|
||||
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Services;
|
||||
namespace Producer.Features.DataProcessing;
|
||||
|
||||
public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConfiguration rabbitConfig, ILogger<DapperDataStorage> logger) : IDataStorage
|
||||
{
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Interfaces;
|
||||
namespace Producer.Features.DataProcessing;
|
||||
|
||||
public interface IDataStorage
|
||||
{
|
||||
15
src/producer/Features/DataProcessing/ImdbEntry.cs
Normal file
15
src/producer/Features/DataProcessing/ImdbEntry.cs
Normal file
@@ -0,0 +1,15 @@
|
||||
namespace Producer.Features.DataProcessing;
|
||||
|
||||
public class ImdbEntry
|
||||
{
|
||||
[BsonId]
|
||||
public string ImdbId { get; set; } = default!;
|
||||
public string? TitleType { get; set; }
|
||||
public string? PrimaryTitle { get; set; }
|
||||
public string? OriginalTitle { get; set; }
|
||||
public string? IsAdult { get; set; }
|
||||
public string? StartYear { get; set; }
|
||||
public string? EndYear { get; set; }
|
||||
public string? RuntimeMinutes { get; set; }
|
||||
public string? Genres { get; set; }
|
||||
}
|
||||
78
src/producer/Features/DataProcessing/ImdbMongoDbService.cs
Normal file
78
src/producer/Features/DataProcessing/ImdbMongoDbService.cs
Normal file
@@ -0,0 +1,78 @@
|
||||
namespace Producer.Features.DataProcessing;
|
||||
|
||||
public class ImdbMongoDbService
|
||||
{
|
||||
private readonly ILogger<ImdbMongoDbService> _logger;
|
||||
private readonly IMongoCollection<ImdbEntry> _imdbCollection;
|
||||
|
||||
public ImdbMongoDbService(MongoConfiguration configuration, ILogger<ImdbMongoDbService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
|
||||
var client = new MongoClient(configuration.ConnectionString);
|
||||
var database = client.GetDatabase(configuration.DbName);
|
||||
|
||||
_imdbCollection = database.GetCollection<ImdbEntry>("imdb-entries");
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<ImdbEntry>> GetImdbEntriesForRequests(string startYear, int requestLimit, string? startingId = null)
|
||||
{
|
||||
var sort = Builders<ImdbEntry>.Sort
|
||||
.Descending(e => e.StartYear)
|
||||
.Descending(e => e.ImdbId);
|
||||
|
||||
var filter = Builders<ImdbEntry>.Filter
|
||||
.And(
|
||||
Builders<ImdbEntry>.Filter.Eq(e => e.TitleType, "movie"),
|
||||
Builders<ImdbEntry>.Filter.Lte(e => e.StartYear, startYear)
|
||||
);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(startingId))
|
||||
{
|
||||
filter = Builders<ImdbEntry>.Filter.And(filter, Builders<ImdbEntry>.Filter.Lt(e => e.ImdbId, startingId));
|
||||
}
|
||||
|
||||
return await _imdbCollection.Find(filter).Limit(requestLimit).Sort(sort).ToListAsync();
|
||||
}
|
||||
|
||||
public async Task<long> GetTotalCountAsync()
|
||||
{
|
||||
var filter = Builders<ImdbEntry>.Filter.Eq(x => x.TitleType, "movie");
|
||||
return await _imdbCollection.CountDocumentsAsync(filter);
|
||||
}
|
||||
|
||||
public bool IsDatabaseInitialized()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Compound index for PrimaryTitle, TitleType, and StartYear
|
||||
var index1KeysDefinition = Builders<ImdbEntry>.IndexKeys
|
||||
.Text(e => e.PrimaryTitle)
|
||||
.Ascending(e => e.TitleType)
|
||||
.Ascending(e => e.StartYear);
|
||||
|
||||
CreateIndex(index1KeysDefinition);
|
||||
|
||||
// Compound index for StartYear and _id in descending order
|
||||
var index2KeysDefinition = Builders<ImdbEntry>.IndexKeys
|
||||
.Descending(e => e.StartYear)
|
||||
.Descending(e => e.ImdbId);
|
||||
|
||||
CreateIndex(index2KeysDefinition);
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
_logger.LogError(e, "Error initializing database");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private void CreateIndex(IndexKeysDefinition<ImdbEntry> keysDefinition)
|
||||
{
|
||||
var createIndexOptions = new CreateIndexOptions { Background = true };
|
||||
var indexModel = new CreateIndexModel<ImdbEntry>(keysDefinition, createIndexOptions);
|
||||
_imdbCollection.Indexes.CreateOne(indexModel);
|
||||
}
|
||||
}
|
||||
20
src/producer/Features/DataProcessing/MongoConfiguration.cs
Normal file
20
src/producer/Features/DataProcessing/MongoConfiguration.cs
Normal file
@@ -0,0 +1,20 @@
|
||||
namespace Producer.Features.DataProcessing;
|
||||
|
||||
public class MongoConfiguration
|
||||
{
|
||||
private const string Prefix = "MONGODB";
|
||||
private const string HostVariable = "HOST";
|
||||
private const string PortVariable = "PORT";
|
||||
private const string DbVariable = "DB";
|
||||
private const string UsernameVariable = "USER";
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
public string DbName { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DbVariable);
|
||||
|
||||
public string ConnectionString => $"mongodb://{Username}:{Password}@{Host}:{Port}/{DbName}?tls=false&directConnection=true&authSource=admin";
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Models.Configuration;
|
||||
namespace Producer.Features.DataProcessing;
|
||||
|
||||
public class PostgresConfiguration
|
||||
{
|
||||
@@ -0,0 +1,14 @@
|
||||
namespace Producer.Features.DataProcessing;
|
||||
|
||||
internal static class ServiceCollectionExtensions
|
||||
{
|
||||
internal static IServiceCollection AddDataStorage(this IServiceCollection services)
|
||||
{
|
||||
services.LoadConfigurationFromEnv<PostgresConfiguration>();
|
||||
services.LoadConfigurationFromEnv<MongoConfiguration>();
|
||||
services.AddTransient<IDataStorage, DapperDataStorage>();
|
||||
services.AddTransient<IMessagePublisher, TorrentPublisher>();
|
||||
services.AddSingleton<ImdbMongoDbService>();
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Jobs;
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob
|
||||
{
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Jobs;
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
public interface ICrawlerJob<out TCrawler> : IJob
|
||||
where TCrawler : ICrawler
|
||||
7
src/producer/Features/JobSupport/Literals.cs
Normal file
7
src/producer/Features/JobSupport/Literals.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
public static class Literals
|
||||
{
|
||||
public const string CrawlersJobs = "CrawlersJobs";
|
||||
public const string PublishingJobs = "PublishingJobs";
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
[AttributeUsage(AttributeTargets.Class)]
|
||||
public class ManualJobRegistrationAttribute : Attribute
|
||||
{
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Producer.Models.Configuration;
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
public class ScrapeConfiguration
|
||||
{
|
||||
131
src/producer/Features/JobSupport/ServiceCollectionExtensions.cs
Normal file
131
src/producer/Features/JobSupport/ServiceCollectionExtensions.cs
Normal file
@@ -0,0 +1,131 @@
|
||||
namespace Producer.Features.JobSupport;
|
||||
|
||||
internal static class ServiceCollectionExtensions
|
||||
{
|
||||
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
|
||||
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
|
||||
var rabbitConfiguration = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
|
||||
|
||||
var jobTypes = Assembly.GetAssembly(typeof(BaseJob))
|
||||
.GetTypes()
|
||||
.Where(t => t is {IsClass: true, IsAbstract: false} && typeof(IJob).IsAssignableFrom(t) &&
|
||||
!Attribute.IsDefined(t, typeof(ManualJobRegistrationAttribute)))
|
||||
.ToList();
|
||||
|
||||
foreach (var type in jobTypes)
|
||||
{
|
||||
services.AddTransient(type);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
services.AddTransient<SyncDmmJob>();
|
||||
}
|
||||
|
||||
var openMethod = typeof(ServiceCollectionExtensions).GetMethod(nameof(AddJobWithTrigger), BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance);
|
||||
|
||||
services.AddQuartz(
|
||||
quartz =>
|
||||
{
|
||||
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
|
||||
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration);
|
||||
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
|
||||
RegisterPublisher(quartz, rabbitConfiguration);
|
||||
});
|
||||
|
||||
services.AddQuartzHostedService(
|
||||
options =>
|
||||
{
|
||||
options.WaitForJobsToComplete = true;
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
private static void RegisterAutomaticRegistrationJobs(List<Type> jobTypes, MethodInfo? openMethod, IServiceCollectionQuartzConfigurator quartz,
|
||||
ScrapeConfiguration scrapeConfiguration)
|
||||
{
|
||||
foreach (var jobType in jobTypes)
|
||||
{
|
||||
var key = jobType.GetField("Key")?.GetValue(jobType);
|
||||
var trigger = jobType.GetField("Trigger")?.GetValue(jobType);
|
||||
|
||||
if (key is null || trigger is null)
|
||||
{
|
||||
Console.WriteLine($"Job {jobType.Name} does not have a JobKey or TriggerKey property");
|
||||
continue;
|
||||
}
|
||||
|
||||
var method = openMethod.MakeGenericMethod(jobType);
|
||||
method.Invoke(null, [quartz, key, trigger, scrapeConfiguration]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void RegisterDmmJob(GithubConfiguration githubConfiguration, IServiceCollectionQuartzConfigurator quartz, ScrapeConfiguration scrapeConfiguration)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
AddJobWithTrigger<SyncDmmJob>(quartz, SyncDmmJob.Key, SyncDmmJob.Trigger, scrapeConfiguration);
|
||||
}
|
||||
}
|
||||
|
||||
private static void RegisterTorrentioJob(
|
||||
IServiceCollection services,
|
||||
IServiceCollectionQuartzConfigurator quartz,
|
||||
IConfiguration configuration,
|
||||
ScrapeConfiguration scrapeConfiguration)
|
||||
{
|
||||
var torrentioConfiguration = services.LoadConfigurationFromConfig<TorrentioConfiguration>(configuration, TorrentioConfiguration.SectionName);
|
||||
|
||||
if (torrentioConfiguration.Instances.Count != 0)
|
||||
{
|
||||
AddJobWithTrigger<SyncTorrentioJob>(quartz, SyncTorrentioJob.Key, SyncTorrentioJob.Trigger, scrapeConfiguration);
|
||||
}
|
||||
}
|
||||
|
||||
private static void RegisterPublisher(IServiceCollectionQuartzConfigurator quartz, RabbitMqConfiguration rabbitConfig) =>
|
||||
AddJobWithTriggerAndInterval<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds);
|
||||
|
||||
private static void AddJobWithTrigger<TJobType>(
|
||||
IServiceCollectionQuartzConfigurator quartz,
|
||||
JobKey key,
|
||||
TriggerKey trigger,
|
||||
ScrapeConfiguration scrapeConfiguration) where TJobType : IJob
|
||||
{
|
||||
var scraper = scrapeConfiguration.Scrapers
|
||||
.FirstOrDefault(x => x.Name != null &&
|
||||
x.Name.Equals(typeof(TJobType).Name, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (scraper is null || !scraper.Enabled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
quartz.AddJob<TJobType>(opts => opts.WithIdentity(key).StoreDurably());
|
||||
|
||||
quartz.AddTrigger(
|
||||
opts => opts
|
||||
.ForJob(key)
|
||||
.WithIdentity(trigger)
|
||||
.StartAt(DateTimeOffset.Now.AddSeconds(20))
|
||||
.WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(scraper.IntervalSeconds)).RepeatForever()));
|
||||
}
|
||||
|
||||
private static void AddJobWithTriggerAndInterval<TJobType>(
|
||||
IServiceCollectionQuartzConfigurator quartz,
|
||||
JobKey key,
|
||||
TriggerKey trigger,
|
||||
int interval) where TJobType : IJob
|
||||
{
|
||||
quartz.AddJob<TJobType>(opts => opts.WithIdentity(key).StoreDurably());
|
||||
|
||||
quartz.AddTrigger(
|
||||
opts => opts
|
||||
.ForJob(key)
|
||||
.WithIdentity(trigger)
|
||||
.StartAt(DateTimeOffset.Now.AddSeconds(20))
|
||||
.WithSimpleSchedule(x => x.WithInterval(TimeSpan.FromSeconds(interval)).RepeatForever()));
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,10 @@
|
||||
// Global using directives
|
||||
|
||||
global using System.Reflection;
|
||||
global using System.Text;
|
||||
global using System.Text.Json;
|
||||
global using System.Text.Json.Serialization;
|
||||
global using System.Text.RegularExpressions;
|
||||
global using System.Threading.Channels;
|
||||
global using System.Xml.Linq;
|
||||
global using Dapper;
|
||||
global using LZStringCSharp;
|
||||
@@ -12,14 +13,15 @@ global using Microsoft.AspNetCore.Builder;
|
||||
global using Microsoft.Extensions.DependencyInjection;
|
||||
global using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
global using Microsoft.Extensions.Logging;
|
||||
global using MongoDB.Bson.Serialization.Attributes;
|
||||
global using MongoDB.Driver;
|
||||
global using Npgsql;
|
||||
global using Quartz;
|
||||
global using Producer.Crawlers;
|
||||
global using Producer.Crawlers.Sites;
|
||||
global using Producer.Extensions;
|
||||
global using Producer.Interfaces;
|
||||
global using Producer.Jobs;
|
||||
global using Producer.Models;
|
||||
global using Producer.Models.Configuration;
|
||||
global using Producer.Services;
|
||||
global using Producer.Features.Amqp;
|
||||
global using Producer.Features.Crawlers.Dmm;
|
||||
global using Producer.Features.Crawlers.Torrentio;
|
||||
global using Producer.Features.CrawlerSupport;
|
||||
global using Producer.Features.DataProcessing;
|
||||
global using Producer.Features.JobSupport;
|
||||
global using Serilog;
|
||||
@@ -1,6 +0,0 @@
|
||||
namespace Producer.Interfaces;
|
||||
|
||||
public interface IIpService
|
||||
{
|
||||
Task GetPublicIpAddress();
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
namespace Producer.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class IPJob(IIpService ipService) : IJob
|
||||
{
|
||||
private const string JobName = nameof(IPJob);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Jobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Jobs));
|
||||
|
||||
public Task Execute(IJobExecutionContext context)
|
||||
{
|
||||
return ipService.GetPublicIpAddress();
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,7 @@
|
||||
<PackageReference Include="MassTransit.RabbitMQ" Version="8.1.3" />
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="MongoDB.Driver" Version="2.24.0" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.1" />
|
||||
<PackageReference Include="Quartz.Extensions.DependencyInjection" Version="3.8.0" />
|
||||
<PackageReference Include="Quartz.Extensions.Hosting" Version="3.8.0" />
|
||||
@@ -24,12 +25,8 @@
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Remove="Configuration\scrapers.json" />
|
||||
<None Include="Configuration\scrapers.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<Content Remove="Configuration\logging.json" />
|
||||
<None Include="Configuration\logging.json">
|
||||
<Content Remove="Configuration\*.json" />
|
||||
<None Include="Configuration\*.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
namespace Producer.Services;
|
||||
|
||||
public class IpService(ILogger<IpService> logger, IHttpClientFactory httpClientFactory) : IIpService
|
||||
{
|
||||
public async Task GetPublicIpAddress()
|
||||
{
|
||||
var client = httpClientFactory.CreateClient("Scraper");
|
||||
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
|
||||
var request = await client.GetStringAsync("http://ifconfig.me");
|
||||
|
||||
logger.LogInformation("Public IP Address: {PublicIPAddress}", request);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user