Introduce max batch size, and configurable publish window

Still need to implement queue size limit
Also fixes env var consistency between addon and consumer
This commit is contained in:
iPromKnight
2024-02-02 13:49:42 +00:00
parent 0644fe7720
commit 68edaba308
41 changed files with 88 additions and 51 deletions

2
env/consumer.env vendored
View File

@@ -1,6 +1,6 @@
TZ=London/Europe TZ=London/Europe
MONGODB_URI=mongodb://mongo:mongo@mongodb/selfhostio?tls=false&authSource=admin MONGODB_URI=mongodb://mongo:mongo@mongodb/selfhostio?tls=false&authSource=admin
POSTGRES_DATABASE_URI=postgres://postgres:postgres@postgres/selfhostio DATABASE_URI=postgres://postgres:postgres@postgres/selfhostio
RABBIT_URI=amqp://guest:guest@rabbitmq:5672/?heartbeat=30 RABBIT_URI=amqp://guest:guest@rabbitmq:5672/?heartbeat=30
QUEUE_NAME=ingested QUEUE_NAME=ingested
JOB_CONCURRENCY=5 JOB_CONCURRENCY=5

6
env/producer.env vendored
View File

@@ -1,4 +1,10 @@
ScrapeConfiguration__StorageConnectionString=host=postgres;username=postgres;password=postgres;database=selfhostio; ScrapeConfiguration__StorageConnectionString=host=postgres;username=postgres;password=postgres;database=selfhostio;
RabbitMqConfiguration__Host=rabbitmq RabbitMqConfiguration__Host=rabbitmq
RabbitMqConfiguration__QueueName=ingested RabbitMqConfiguration__QueueName=ingested
RabbitMqConfiguration__Username=guest
RabbitMqConfiguration__Password=guest
RabbitMqConfiguration__Durable=true
RabbitMqConfiguration__MaxQueueSize=1000
RabbitMqConfiguration__MaxPublishBatchSize=100
RabbitMqConfiguration__PublishIntervalInSeconds=10
GithubSettings__PAT= GithubSettings__PAT=

View File

@@ -10,7 +10,7 @@ export const cacheConfig = {
} }
export const databaseConfig = { export const databaseConfig = {
DATABASE_URI: process.env.POSTGRES_DATABASE_URI || 'postgres://postgres:postgres@localhost:5432/selfhostio', DATABASE_URI: process.env.DATABASE_URI || 'postgres://postgres:postgres@localhost:5432/selfhostio',
ENABLE_SYNC: parseBool(process.env.ENABLE_SYNC, true) ENABLE_SYNC: parseBool(process.env.ENABLE_SYNC, true)
} }

View File

@@ -4,6 +4,9 @@
"Username": "guest", "Username": "guest",
"Password": "guest", "Password": "guest",
"QueueName": "test-queue", "QueueName": "test-queue",
"Durable": true "Durable": true,
"MaxQueueSize": 1000,
"MaxPublishBatchSize": 100,
"PublishIntervalInSeconds": 10
} }
} }

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers; namespace Producer.Crawlers;
public abstract class BaseCrawler(ILogger<BaseCrawler> logger, IDataStorage storage) : ICrawler public abstract class BaseCrawler(ILogger<BaseCrawler> logger, IDataStorage storage) : ICrawler
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers; namespace Producer.Crawlers;
public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseJsonCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage) public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseJsonCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage)
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers; namespace Producer.Crawlers;
public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseXmlCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage) public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseXmlCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage)
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers; namespace Producer.Crawlers;
public class CrawlerProvider(IServiceProvider serviceProvider) : ICrawlerProvider public class CrawlerProvider(IServiceProvider serviceProvider) : ICrawlerProvider
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites; namespace Producer.Crawlers.Sites;
public partial class DebridMediaManagerCrawler( public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory, IHttpClientFactory httpClientFactory,
@@ -83,6 +83,7 @@ public partial class DebridMediaManagerCrawler(
if (!result.Success) if (!result.Success)
{ {
logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.ErrorMessage); logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.ErrorMessage);
return;
} }
logger.LogInformation("Successfully marked page as ingested"); logger.LogInformation("Successfully marked page as ingested");

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites; namespace Producer.Crawlers.Sites;
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites; namespace Producer.Crawlers.Sites;
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites; namespace Producer.Crawlers.Sites;
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage) public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage)
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites; namespace Producer.Crawlers.Sites;
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Extensions; namespace Producer.Extensions;
public static class ConfigurationExtensions public static class ConfigurationExtensions
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Extensions; namespace Producer.Extensions;
internal static class ConfigureHostBuilderExtensions internal static class ConfigureHostBuilderExtensions
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Extensions; namespace Producer.Extensions;
public static class ServiceCollectionExtensions public static class ServiceCollectionExtensions
{ {
@@ -53,6 +53,7 @@ public static class ServiceCollectionExtensions
{ {
var scrapeConfiguration = LoadScrapeConfiguration(services, configuration); var scrapeConfiguration = LoadScrapeConfiguration(services, configuration);
var githubConfiguration = LoadGithubConfiguration(services, configuration); var githubConfiguration = LoadGithubConfiguration(services, configuration);
var rabbitConfig = LoadRabbitMQConfiguration(services, configuration);
services services
.AddTransient<SyncEzTvJob>() .AddTransient<SyncEzTvJob>()
@@ -75,7 +76,7 @@ public static class ServiceCollectionExtensions
AddJobWithTrigger<SyncYtsJob>(quartz, SyncYtsJob.Key, SyncYtsJob.Trigger, scrapeConfiguration); AddJobWithTrigger<SyncYtsJob>(quartz, SyncYtsJob.Key, SyncYtsJob.Trigger, scrapeConfiguration);
AddJobWithTrigger<SyncTgxJob>(quartz, SyncTgxJob.Key, SyncTgxJob.Trigger, scrapeConfiguration); AddJobWithTrigger<SyncTgxJob>(quartz, SyncTgxJob.Key, SyncTgxJob.Trigger, scrapeConfiguration);
AddJobWithTrigger<IPJob>(quartz, IPJob.Key, IPJob.Trigger, 60 * 5); AddJobWithTrigger<IPJob>(quartz, IPJob.Key, IPJob.Trigger, 60 * 5);
AddJobWithTrigger<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, 10); AddJobWithTrigger<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds);
if (!string.IsNullOrEmpty(githubConfiguration.PAT)) if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{ {
@@ -98,10 +99,21 @@ public static class ServiceCollectionExtensions
ArgumentNullException.ThrowIfNull(githubConfiguration, nameof(githubConfiguration)); ArgumentNullException.ThrowIfNull(githubConfiguration, nameof(githubConfiguration));
services.AddSingleton(githubConfiguration); services.TryAddSingleton(githubConfiguration);
return githubConfiguration; return githubConfiguration;
} }
private static RabbitMqConfiguration LoadRabbitMQConfiguration(IServiceCollection services, IConfiguration configuration)
{
var rabbitConfiguration = configuration.GetSection(RabbitMqConfiguration.SectionName).Get<RabbitMqConfiguration>();
ArgumentNullException.ThrowIfNull(rabbitConfiguration, nameof(rabbitConfiguration));
services.TryAddSingleton(rabbitConfiguration);
return rabbitConfiguration;
}
private static ScrapeConfiguration LoadScrapeConfiguration(IServiceCollection services, IConfiguration configuration) private static ScrapeConfiguration LoadScrapeConfiguration(IServiceCollection services, IConfiguration configuration)
{ {
@@ -109,7 +121,7 @@ public static class ServiceCollectionExtensions
ArgumentNullException.ThrowIfNull(scrapeConfiguration, nameof(scrapeConfiguration)); ArgumentNullException.ThrowIfNull(scrapeConfiguration, nameof(scrapeConfiguration));
services.AddSingleton(scrapeConfiguration); services.TryAddSingleton(scrapeConfiguration);
return scrapeConfiguration; return scrapeConfiguration;
} }

View File

@@ -9,14 +9,14 @@ global using LZStringCSharp;
global using MassTransit; global using MassTransit;
global using Microsoft.AspNetCore.Builder; global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.DependencyInjection;
global using Microsoft.Extensions.DependencyInjection.Extensions;
global using Microsoft.Extensions.Logging; global using Microsoft.Extensions.Logging;
global using Npgsql; global using Npgsql;
global using Quartz; global using Quartz;
global using Scraper.Crawlers; global using Producer.Crawlers;
global using Scraper.Crawlers.Sites; global using Producer.Crawlers.Sites;
global using Scraper.Extensions; global using Producer.Interfaces;
global using Scraper.Interfaces; global using Producer.Jobs;
global using Scraper.Jobs; global using Producer.Models;
global using Scraper.Models; global using Producer.Services;
global using Scraper.Services;
global using Serilog; global using Serilog;

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces; namespace Producer.Interfaces;
public interface ICrawler public interface ICrawler
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces; namespace Producer.Interfaces;
public interface ICrawlerProvider public interface ICrawlerProvider
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces; namespace Producer.Interfaces;
public interface IDataStorage public interface IDataStorage
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces; namespace Producer.Interfaces;
public interface IIpService public interface IIpService
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces; namespace Producer.Interfaces;
public interface IMessagePublisher public interface IMessagePublisher
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Jobs; namespace Producer.Jobs;
public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Jobs; namespace Producer.Jobs;
public interface ICrawlerJob<out TCrawler> : IJob public interface ICrawlerJob<out TCrawler> : IJob
where TCrawler : ICrawler where TCrawler : ICrawler

View File

@@ -1,4 +1,4 @@
namespace Scraper.Jobs; namespace Producer.Jobs;
[DisallowConcurrentExecution] [DisallowConcurrentExecution]
public class IPJob(IIpService ipService) : IJob public class IPJob(IIpService ipService) : IJob

View File

@@ -1,4 +1,4 @@
namespace Scraper.Jobs; namespace Producer.Jobs;
[DisallowConcurrentExecution] [DisallowConcurrentExecution]
public class PublisherJob(IMessagePublisher publisher, IDataStorage storage, ILogger<PublisherJob> logger) : IJob public class PublisherJob(IMessagePublisher publisher, IDataStorage storage, ILogger<PublisherJob> logger) : IJob

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs; using DebridMediaManagerCrawler = Producer.Crawlers.Sites.DebridMediaManagerCrawler;
namespace Producer.Jobs;
[DisallowConcurrentExecution] [DisallowConcurrentExecution]
public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs; using Producer.Crawlers.Sites;
namespace Producer.Jobs;
[DisallowConcurrentExecution] [DisallowConcurrentExecution]
public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs; using TgxCrawler = Producer.Crawlers.Sites.TgxCrawler;
namespace Producer.Jobs;
[DisallowConcurrentExecution] [DisallowConcurrentExecution]
public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs; using Producer.Crawlers.Sites;
namespace Producer.Jobs;
[DisallowConcurrentExecution] [DisallowConcurrentExecution]
public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs; using Producer.Crawlers.Sites;
namespace Producer.Jobs;
[DisallowConcurrentExecution] [DisallowConcurrentExecution]
public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider) public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models; namespace Producer.Models;
public class GithubConfiguration public class GithubConfiguration
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models; namespace Producer.Models;
public class RabbitMqConfiguration public class RabbitMqConfiguration
{ {
@@ -10,4 +10,7 @@ public class RabbitMqConfiguration
public string? Password { get; set; } public string? Password { get; set; }
public string? QueueName { get; set; } public string? QueueName { get; set; }
public bool Durable { get; set; } public bool Durable { get; set; }
public int MaxQueueSize { get; set; } = 1000;
public int MaxPublishBatchSize { get; set; } = 100;
public int PublishIntervalInSeconds { get; set; } = 1000 * 10;
} }

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models; namespace Producer.Models;
public record InsertTorrentResult(bool Success, int InsertedCount = 0, string? ErrorMessage = null); public record InsertTorrentResult(bool Success, int InsertedCount = 0, string? ErrorMessage = null);
public record UpdatedTorrentResult(bool Success, int UpdatedCount = 0, string? ErrorMessage = null); public record UpdatedTorrentResult(bool Success, int UpdatedCount = 0, string? ErrorMessage = null);

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models; namespace Producer.Models;
public class ScrapeConfiguration public class ScrapeConfiguration
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models; namespace Producer.Models;
public class Scraper public class Scraper
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models; namespace Producer.Models;
// Torrent represents a crawled torrent from one of our // Torrent represents a crawled torrent from one of our
// supported sources. // supported sources.

View File

@@ -1,4 +1,6 @@
var builder = WebApplication.CreateBuilder(args); using Producer.Extensions;
var builder = WebApplication.CreateBuilder(args);
builder.Configuration builder.Configuration
.AddScrapeConfiguration(); .AddScrapeConfiguration();

View File

@@ -1,6 +1,6 @@
namespace Scraper.Services; namespace Producer.Services;
public class DapperDataStorage(ScrapeConfiguration configuration, ILogger<DapperDataStorage> logger) : IDataStorage public class DapperDataStorage(ScrapeConfiguration configuration, RabbitMqConfiguration rabbitConfig, ILogger<DapperDataStorage> logger) : IDataStorage
{ {
private const string InsertTorrentSql = private const string InsertTorrentSql =
""" """
@@ -65,7 +65,7 @@ public class DapperDataStorage(ScrapeConfiguration configuration, ILogger<Dapper
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString); await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken); await connection.OpenAsync(cancellationToken);
var torrents = await connection.QueryAsync<Torrent>(GetMovieAndSeriesTorrentsNotProcessedSql); var torrents = await connection.QueryAsync<Torrent>(GetMovieAndSeriesTorrentsNotProcessedSql);
return torrents.ToList(); return torrents.Take(rabbitConfig.MaxPublishBatchSize).ToList();
} }
catch (Exception e) catch (Exception e)
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Services; namespace Producer.Services;
public class IpService(ILogger<IpService> logger, IHttpClientFactory httpClientFactory) : IIpService public class IpService(ILogger<IpService> logger, IHttpClientFactory httpClientFactory) : IIpService
{ {

View File

@@ -1,4 +1,4 @@
namespace Scraper.Services; namespace Producer.Services;
public class TorrentPublisher(ISendEndpointProvider sendEndpointProvider, RabbitMqConfiguration configuration) : IMessagePublisher public class TorrentPublisher(ISendEndpointProvider sendEndpointProvider, RabbitMqConfiguration configuration) : IMessagePublisher
{ {