Introduce max batch size, and configurable publish window
Still need to implement queue size limit Also fixes env var consistency between addon and consumer
This commit is contained in:
2
env/consumer.env
vendored
2
env/consumer.env
vendored
@@ -1,6 +1,6 @@
|
||||
TZ=London/Europe
|
||||
MONGODB_URI=mongodb://mongo:mongo@mongodb/selfhostio?tls=false&authSource=admin
|
||||
POSTGRES_DATABASE_URI=postgres://postgres:postgres@postgres/selfhostio
|
||||
DATABASE_URI=postgres://postgres:postgres@postgres/selfhostio
|
||||
RABBIT_URI=amqp://guest:guest@rabbitmq:5672/?heartbeat=30
|
||||
QUEUE_NAME=ingested
|
||||
JOB_CONCURRENCY=5
|
||||
|
||||
6
env/producer.env
vendored
6
env/producer.env
vendored
@@ -1,4 +1,10 @@
|
||||
ScrapeConfiguration__StorageConnectionString=host=postgres;username=postgres;password=postgres;database=selfhostio;
|
||||
RabbitMqConfiguration__Host=rabbitmq
|
||||
RabbitMqConfiguration__QueueName=ingested
|
||||
RabbitMqConfiguration__Username=guest
|
||||
RabbitMqConfiguration__Password=guest
|
||||
RabbitMqConfiguration__Durable=true
|
||||
RabbitMqConfiguration__MaxQueueSize=1000
|
||||
RabbitMqConfiguration__MaxPublishBatchSize=100
|
||||
RabbitMqConfiguration__PublishIntervalInSeconds=10
|
||||
GithubSettings__PAT=
|
||||
@@ -10,7 +10,7 @@ export const cacheConfig = {
|
||||
}
|
||||
|
||||
export const databaseConfig = {
|
||||
DATABASE_URI: process.env.POSTGRES_DATABASE_URI || 'postgres://postgres:postgres@localhost:5432/selfhostio',
|
||||
DATABASE_URI: process.env.DATABASE_URI || 'postgres://postgres:postgres@localhost:5432/selfhostio',
|
||||
ENABLE_SYNC: parseBool(process.env.ENABLE_SYNC, true)
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
"Username": "guest",
|
||||
"Password": "guest",
|
||||
"QueueName": "test-queue",
|
||||
"Durable": true
|
||||
"Durable": true,
|
||||
"MaxQueueSize": 1000,
|
||||
"MaxPublishBatchSize": 100,
|
||||
"PublishIntervalInSeconds": 10
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers;
|
||||
namespace Producer.Crawlers;
|
||||
|
||||
public abstract class BaseCrawler(ILogger<BaseCrawler> logger, IDataStorage storage) : ICrawler
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers;
|
||||
namespace Producer.Crawlers;
|
||||
|
||||
public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseJsonCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers;
|
||||
namespace Producer.Crawlers;
|
||||
|
||||
public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogger<BaseXmlCrawler> logger, IDataStorage storage) : BaseCrawler(logger, storage)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers;
|
||||
namespace Producer.Crawlers;
|
||||
|
||||
public class CrawlerProvider(IServiceProvider serviceProvider) : ICrawlerProvider
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers.Sites;
|
||||
namespace Producer.Crawlers.Sites;
|
||||
|
||||
public partial class DebridMediaManagerCrawler(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
@@ -83,6 +83,7 @@ public partial class DebridMediaManagerCrawler(
|
||||
if (!result.Success)
|
||||
{
|
||||
logger.LogWarning("Failed to mark page as ingested: [{Error}]", result.ErrorMessage);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.LogInformation("Successfully marked page as ingested");
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers.Sites;
|
||||
namespace Producer.Crawlers.Sites;
|
||||
|
||||
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers.Sites;
|
||||
namespace Producer.Crawlers.Sites;
|
||||
|
||||
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers.Sites;
|
||||
namespace Producer.Crawlers.Sites;
|
||||
|
||||
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Crawlers.Sites;
|
||||
namespace Producer.Crawlers.Sites;
|
||||
|
||||
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Extensions;
|
||||
namespace Producer.Extensions;
|
||||
|
||||
public static class ConfigurationExtensions
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Extensions;
|
||||
namespace Producer.Extensions;
|
||||
|
||||
internal static class ConfigureHostBuilderExtensions
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Extensions;
|
||||
namespace Producer.Extensions;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
@@ -53,6 +53,7 @@ public static class ServiceCollectionExtensions
|
||||
{
|
||||
var scrapeConfiguration = LoadScrapeConfiguration(services, configuration);
|
||||
var githubConfiguration = LoadGithubConfiguration(services, configuration);
|
||||
var rabbitConfig = LoadRabbitMQConfiguration(services, configuration);
|
||||
|
||||
services
|
||||
.AddTransient<SyncEzTvJob>()
|
||||
@@ -75,7 +76,7 @@ public static class ServiceCollectionExtensions
|
||||
AddJobWithTrigger<SyncYtsJob>(quartz, SyncYtsJob.Key, SyncYtsJob.Trigger, scrapeConfiguration);
|
||||
AddJobWithTrigger<SyncTgxJob>(quartz, SyncTgxJob.Key, SyncTgxJob.Trigger, scrapeConfiguration);
|
||||
AddJobWithTrigger<IPJob>(quartz, IPJob.Key, IPJob.Trigger, 60 * 5);
|
||||
AddJobWithTrigger<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, 10);
|
||||
AddJobWithTrigger<PublisherJob>(quartz, PublisherJob.Key, PublisherJob.Trigger, rabbitConfig.PublishIntervalInSeconds);
|
||||
|
||||
if (!string.IsNullOrEmpty(githubConfiguration.PAT))
|
||||
{
|
||||
@@ -98,10 +99,21 @@ public static class ServiceCollectionExtensions
|
||||
|
||||
ArgumentNullException.ThrowIfNull(githubConfiguration, nameof(githubConfiguration));
|
||||
|
||||
services.AddSingleton(githubConfiguration);
|
||||
services.TryAddSingleton(githubConfiguration);
|
||||
|
||||
return githubConfiguration;
|
||||
}
|
||||
|
||||
private static RabbitMqConfiguration LoadRabbitMQConfiguration(IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
var rabbitConfiguration = configuration.GetSection(RabbitMqConfiguration.SectionName).Get<RabbitMqConfiguration>();
|
||||
|
||||
ArgumentNullException.ThrowIfNull(rabbitConfiguration, nameof(rabbitConfiguration));
|
||||
|
||||
services.TryAddSingleton(rabbitConfiguration);
|
||||
|
||||
return rabbitConfiguration;
|
||||
}
|
||||
|
||||
private static ScrapeConfiguration LoadScrapeConfiguration(IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
@@ -109,7 +121,7 @@ public static class ServiceCollectionExtensions
|
||||
|
||||
ArgumentNullException.ThrowIfNull(scrapeConfiguration, nameof(scrapeConfiguration));
|
||||
|
||||
services.AddSingleton(scrapeConfiguration);
|
||||
services.TryAddSingleton(scrapeConfiguration);
|
||||
|
||||
return scrapeConfiguration;
|
||||
}
|
||||
|
||||
@@ -9,14 +9,14 @@ global using LZStringCSharp;
|
||||
global using MassTransit;
|
||||
global using Microsoft.AspNetCore.Builder;
|
||||
global using Microsoft.Extensions.DependencyInjection;
|
||||
global using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
global using Microsoft.Extensions.Logging;
|
||||
global using Npgsql;
|
||||
global using Quartz;
|
||||
global using Scraper.Crawlers;
|
||||
global using Scraper.Crawlers.Sites;
|
||||
global using Scraper.Extensions;
|
||||
global using Scraper.Interfaces;
|
||||
global using Scraper.Jobs;
|
||||
global using Scraper.Models;
|
||||
global using Scraper.Services;
|
||||
global using Producer.Crawlers;
|
||||
global using Producer.Crawlers.Sites;
|
||||
global using Producer.Interfaces;
|
||||
global using Producer.Jobs;
|
||||
global using Producer.Models;
|
||||
global using Producer.Services;
|
||||
global using Serilog;
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Interfaces;
|
||||
namespace Producer.Interfaces;
|
||||
|
||||
public interface ICrawler
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Interfaces;
|
||||
namespace Producer.Interfaces;
|
||||
|
||||
public interface ICrawlerProvider
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Interfaces;
|
||||
namespace Producer.Interfaces;
|
||||
|
||||
public interface IDataStorage
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Interfaces;
|
||||
namespace Producer.Interfaces;
|
||||
|
||||
public interface IIpService
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Interfaces;
|
||||
namespace Producer.Interfaces;
|
||||
|
||||
public interface IMessagePublisher
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Jobs;
|
||||
namespace Producer.Jobs;
|
||||
|
||||
public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Jobs;
|
||||
namespace Producer.Jobs;
|
||||
|
||||
public interface ICrawlerJob<out TCrawler> : IJob
|
||||
where TCrawler : ICrawler
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Jobs;
|
||||
namespace Producer.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class IPJob(IIpService ipService) : IJob
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Jobs;
|
||||
namespace Producer.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class PublisherJob(IMessagePublisher publisher, IDataStorage storage, ILogger<PublisherJob> logger) : IJob
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
namespace Scraper.Jobs;
|
||||
using DebridMediaManagerCrawler = Producer.Crawlers.Sites.DebridMediaManagerCrawler;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
namespace Scraper.Jobs;
|
||||
using Producer.Crawlers.Sites;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
namespace Scraper.Jobs;
|
||||
using TgxCrawler = Producer.Crawlers.Sites.TgxCrawler;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
namespace Scraper.Jobs;
|
||||
using Producer.Crawlers.Sites;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
namespace Scraper.Jobs;
|
||||
using Producer.Crawlers.Sites;
|
||||
|
||||
namespace Producer.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Models;
|
||||
namespace Producer.Models;
|
||||
|
||||
public class GithubConfiguration
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Models;
|
||||
namespace Producer.Models;
|
||||
|
||||
public class RabbitMqConfiguration
|
||||
{
|
||||
@@ -10,4 +10,7 @@ public class RabbitMqConfiguration
|
||||
public string? Password { get; set; }
|
||||
public string? QueueName { get; set; }
|
||||
public bool Durable { get; set; }
|
||||
public int MaxQueueSize { get; set; } = 1000;
|
||||
public int MaxPublishBatchSize { get; set; } = 100;
|
||||
public int PublishIntervalInSeconds { get; set; } = 1000 * 10;
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Models;
|
||||
namespace Producer.Models;
|
||||
|
||||
public record InsertTorrentResult(bool Success, int InsertedCount = 0, string? ErrorMessage = null);
|
||||
public record UpdatedTorrentResult(bool Success, int UpdatedCount = 0, string? ErrorMessage = null);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Models;
|
||||
namespace Producer.Models;
|
||||
|
||||
public class ScrapeConfiguration
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Models;
|
||||
namespace Producer.Models;
|
||||
|
||||
public class Scraper
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Models;
|
||||
namespace Producer.Models;
|
||||
|
||||
// Torrent represents a crawled torrent from one of our
|
||||
// supported sources.
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
using Producer.Extensions;
|
||||
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
|
||||
builder.Configuration
|
||||
.AddScrapeConfiguration();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
namespace Scraper.Services;
|
||||
namespace Producer.Services;
|
||||
|
||||
public class DapperDataStorage(ScrapeConfiguration configuration, ILogger<DapperDataStorage> logger) : IDataStorage
|
||||
public class DapperDataStorage(ScrapeConfiguration configuration, RabbitMqConfiguration rabbitConfig, ILogger<DapperDataStorage> logger) : IDataStorage
|
||||
{
|
||||
private const string InsertTorrentSql =
|
||||
"""
|
||||
@@ -65,7 +65,7 @@ public class DapperDataStorage(ScrapeConfiguration configuration, ILogger<Dapper
|
||||
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
var torrents = await connection.QueryAsync<Torrent>(GetMovieAndSeriesTorrentsNotProcessedSql);
|
||||
return torrents.ToList();
|
||||
return torrents.Take(rabbitConfig.MaxPublishBatchSize).ToList();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Services;
|
||||
namespace Producer.Services;
|
||||
|
||||
public class IpService(ILogger<IpService> logger, IHttpClientFactory httpClientFactory) : IIpService
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
namespace Scraper.Services;
|
||||
namespace Producer.Services;
|
||||
|
||||
public class TorrentPublisher(ISendEndpointProvider sendEndpointProvider, RabbitMqConfiguration configuration) : IMessagePublisher
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user