Introduce max batch size, and configurable publish window

Still need to implement queue size limit
Also fixes env var consistency between addon and consumer
This commit is contained in:
iPromKnight
2024-02-02 13:49:42 +00:00
parent 0644fe7720
commit 68edaba308
41 changed files with 88 additions and 51 deletions

View File

@@ -10,7 +10,7 @@ export const cacheConfig = {
}
export const databaseConfig = {
DATABASE_URI: process.env.POSTGRES_DATABASE_URI || 'postgres://postgres:postgres@localhost:5432/selfhostio',
DATABASE_URI: process.env.DATABASE_URI || 'postgres://postgres:postgres@localhost:5432/selfhostio',
ENABLE_SYNC: parseBool(process.env.ENABLE_SYNC, true)
}

View File

@@ -4,6 +4,9 @@
"Username": "guest",
"Password": "guest",
"QueueName": "test-queue",
"Durable": true
"Durable": true,
"MaxQueueSize": 1000,
"MaxPublishBatchSize": 100,
"PublishIntervalInSeconds": 10
}
}

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers;
namespace Producer.Crawlers;
public abstract class BaseCrawler(ILogger<BaseCrawler> logger, IDataStorage storage) : ICrawler
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers;
namespace Producer.Crawlers;
public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseJsonCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage)
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers;
namespace Producer.Crawlers;
public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseXmlCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage)
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers;
namespace Producer.Crawlers;
public class CrawlerProvider(IServiceProvider serviceProvider) : ICrawlerProvider
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites;
namespace Producer.Crawlers.Sites;
public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory,
@@ -83,6 +83,7 @@ public partial class DebridMediaManagerCrawler(
if (!result.Success)
{
logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.ErrorMessage);
return;
}
logger.LogInformation("Successfully marked page as ingested");

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites;
namespace Producer.Crawlers.Sites;
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites;
namespace Producer.Crawlers.Sites;
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites;
namespace Producer.Crawlers.Sites;
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage)
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Crawlers.Sites;
namespace Producer.Crawlers.Sites;
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Extensions;
namespace Producer.Extensions;
public static class ConfigurationExtensions
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Extensions;
namespace Producer.Extensions;
internal static class ConfigureHostBuilderExtensions
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Extensions;
namespace Producer.Extensions;
public static class ServiceCollectionExtensions
{
@@ -53,6 +53,7 @@ public static class ServiceCollectionExtensions
{
var scrapeConfiguration = LoadScrapeConfiguration(services, configuration);
var githubConfiguration = LoadGithubConfiguration(services, configuration);
var rabbitConfig = LoadRabbitMQConfiguration(services, configuration);
services
.AddTransient<SyncEzTvJob>()
@@ -75,7 +76,7 @@ public static class ServiceCollectionExtensions
AddJobWithTrigger<SyncYtsJob>(quartz, SyncYtsJob.Key, SyncYtsJob.Trigger, scrapeConfiguration);
AddJobWithTrigger<SyncTgxJob>(quartz, SyncTgxJob.Key, SyncTgxJob.Trigger, scrapeConfiguration);
AddJobWithTrigger<IPJob>(quartz, IPJob.Key, IPJob.Trigger, 60 * 5);
AddJobWithTrigger<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, 10);
AddJobWithTrigger<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds);
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
{
@@ -98,10 +99,21 @@ public static class ServiceCollectionExtensions
ArgumentNullException.ThrowIfNull(githubConfiguration, nameof(githubConfiguration));
services.AddSingleton(githubConfiguration);
services.TryAddSingleton(githubConfiguration);
return githubConfiguration;
}
private static RabbitMqConfiguration LoadRabbitMQConfiguration(IServiceCollection services, IConfiguration configuration)
{
var rabbitConfiguration = configuration.GetSection(RabbitMqConfiguration.SectionName).Get<RabbitMqConfiguration>();
ArgumentNullException.ThrowIfNull(rabbitConfiguration, nameof(rabbitConfiguration));
services.TryAddSingleton(rabbitConfiguration);
return rabbitConfiguration;
}
private static ScrapeConfiguration LoadScrapeConfiguration(IServiceCollection services, IConfiguration configuration)
{
@@ -109,7 +121,7 @@ public static class ServiceCollectionExtensions
ArgumentNullException.ThrowIfNull(scrapeConfiguration, nameof(scrapeConfiguration));
services.AddSingleton(scrapeConfiguration);
services.TryAddSingleton(scrapeConfiguration);
return scrapeConfiguration;
}

View File

@@ -9,14 +9,14 @@ global using LZStringCSharp;
global using MassTransit;
global using Microsoft.AspNetCore.Builder;
global using Microsoft.Extensions.DependencyInjection;
global using Microsoft.Extensions.DependencyInjection.Extensions;
global using Microsoft.Extensions.Logging;
global using Npgsql;
global using Quartz;
global using Scraper.Crawlers;
global using Scraper.Crawlers.Sites;
global using Scraper.Extensions;
global using Scraper.Interfaces;
global using Scraper.Jobs;
global using Scraper.Models;
global using Scraper.Services;
global using Producer.Crawlers;
global using Producer.Crawlers.Sites;
global using Producer.Interfaces;
global using Producer.Jobs;
global using Producer.Models;
global using Producer.Services;
global using Serilog;

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces;
namespace Producer.Interfaces;
public interface ICrawler
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces;
namespace Producer.Interfaces;
public interface ICrawlerProvider
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces;
namespace Producer.Interfaces;
public interface IDataStorage
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces;
namespace Producer.Interfaces;
public interface IIpService
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Interfaces;
namespace Producer.Interfaces;
public interface IMessagePublisher
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Jobs;
namespace Producer.Jobs;
public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Jobs;
namespace Producer.Jobs;
public interface ICrawlerJob<out TCrawler> : IJob
where TCrawler : ICrawler

View File

@@ -1,4 +1,4 @@
namespace Scraper.Jobs;
namespace Producer.Jobs;
[DisallowConcurrentExecution]
public class IPJob(IIpService ipService) : IJob

View File

@@ -1,4 +1,4 @@
namespace Scraper.Jobs;
namespace Producer.Jobs;
[DisallowConcurrentExecution]
public class PublisherJob(IMessagePublisher publisher, IDataStorage storage, ILogger<PublisherJob> logger) : IJob

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs;
using DebridMediaManagerCrawler = Producer.Crawlers.Sites.DebridMediaManagerCrawler;
namespace Producer.Jobs;
[DisallowConcurrentExecution]
public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs;
using Producer.Crawlers.Sites;
namespace Producer.Jobs;
[DisallowConcurrentExecution]
public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs;
using TgxCrawler = Producer.Crawlers.Sites.TgxCrawler;
namespace Producer.Jobs;
[DisallowConcurrentExecution]
public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs;
using Producer.Crawlers.Sites;
namespace Producer.Jobs;
[DisallowConcurrentExecution]
public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,6 @@
namespace Scraper.Jobs;
using Producer.Crawlers.Sites;
namespace Producer.Jobs;
[DisallowConcurrentExecution]
public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models;
namespace Producer.Models;
public class GithubConfiguration
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models;
namespace Producer.Models;
public class RabbitMqConfiguration
{
@@ -10,4 +10,7 @@ public class RabbitMqConfiguration
public string? Password { get; set; }
public string? QueueName { get; set; }
public bool Durable { get; set; }
public int MaxQueueSize { get; set; } = 1000;
public int MaxPublishBatchSize { get; set; } = 100;
public int PublishIntervalInSeconds { get; set; } = 1000 * 10;
}

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models;
namespace Producer.Models;
public record InsertTorrentResult(bool Success, int InsertedCount = 0, string? ErrorMessage = null);
public record UpdatedTorrentResult(bool Success, int UpdatedCount = 0, string? ErrorMessage = null);

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models;
namespace Producer.Models;
public class ScrapeConfiguration
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models;
namespace Producer.Models;
public class Scraper
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Models;
namespace Producer.Models;
// Torrent represents a crawled torrent from one of our
// supported sources.

View File

@@ -1,4 +1,6 @@
var builder = WebApplication.CreateBuilder(args);
using Producer.Extensions;
var builder = WebApplication.CreateBuilder(args);
builder.Configuration
.AddScrapeConfiguration();

View File

@@ -1,6 +1,6 @@
namespace Scraper.Services;
namespace Producer.Services;
public class DapperDataStorage(ScrapeConfiguration configuration, ILogger<DapperDataStorage> logger) : IDataStorage
public class DapperDataStorage(ScrapeConfiguration configuration, RabbitMqConfiguration rabbitConfig, ILogger<DapperDataStorage> logger) : IDataStorage
{
private const string InsertTorrentSql =
"""
@@ -65,7 +65,7 @@ public class DapperDataStorage(ScrapeConfiguration configuration, ILogger<Dapper
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
var torrents = await connection.QueryAsync<Torrent>(GetMovieAndSeriesTorrentsNotProcessedSql);
return torrents.ToList();
return torrents.Take(rabbitConfig.MaxPublishBatchSize).ToList();
}
catch (Exception e)
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Services;
namespace Producer.Services;
public class IpService(ILogger<IpService> logger, IHttpClientFactory httpClientFactory) : IIpService
{

View File

@@ -1,4 +1,4 @@
namespace Scraper.Services;
namespace Producer.Services;
public class TorrentPublisher(ISendEndpointProvider sendEndpointProvider, RabbitMqConfiguration configuration) : IMessagePublisher
{