Merge pull request #102 from iPromKnight/rabbit-envvars

BREAKING: Cleanup RabbitMQ env vars, and Github Pat
This commit is contained in:
purple_emily
2024-02-29 10:26:24 +00:00
committed by GitHub
24 changed files with 194 additions and 116 deletions

View File

@@ -15,18 +15,28 @@ MONGODB_DB=knightcrawler
MONGODB_USER=mongo
MONGODB_PASSWORD=mongo
# RabbitMQ
RABBITMQ_HOST=rabbitmq
RABBITMQ_USER=guest
RABBITMQ_PASSWORD=guest
RABBITMQ_QUEUE_NAME=ingested
RABBITMQ_DURABLE=true
RABBITMQ_MAX_QUEUE_SIZE=0
RABBITMQ_MAX_PUBLISH_BATCH_SIZE=500
RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS=10
# Metadata
## Only used if DATA_ONCE is set to false. If true, the schedule is ignored
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE=0 0 1 * * *
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE="0 0 1 * *"
## If true, the metadata will be downloaded once and then the schedule will be ignored
METADATA_DOWNLOAD_IMDB_DATA_ONCE=true
## Controls the amount of records processed in memory at any given time during import, higher values will consume more memory
METADATA_INSERT_BATCH_SIZE=25000
# Addon
DEBUG_MODE=false
# Consumer
RABBIT_URI=amqp://guest:guest@rabbitmq:5672/?heartbeat=30
QUEUE_NAME=ingested
JOB_CONCURRENCY=5
JOBS_ENABLED=true
## can be debug for extra verbosity (a lot more verbosity - useful for development)
@@ -40,12 +50,4 @@ CONSUMER_REPLICAS=3
AUTO_CREATE_AND_APPLY_MIGRATIONS=false
# Producer
RabbitMqConfiguration__Host=rabbitmq
RabbitMqConfiguration__QueueName=ingested
RabbitMqConfiguration__Username=guest
RabbitMqConfiguration__Password=guest
RabbitMqConfiguration__Durable=true
RabbitMqConfiguration__MaxQueueSize=0
RabbitMqConfiguration__MaxPublishBatchSize=500
RabbitMqConfiguration__PublishIntervalInSeconds=10
GithubSettings__PAT=
GITHUB_PAT=

View File

@@ -5,7 +5,9 @@ public class JobConfiguration
private const string Prefix = "METADATA";
private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE";
private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE";
private const string InsertBatchSizeVariable = "INSERT_BATCH_SIZE";
public int InsertBatchSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(InsertBatchSizeVariable, 25_000);
public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour);
public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable);
}

View File

@@ -8,6 +8,7 @@ public class MongoConfiguration
private const string DbVariable = "DB";
private const string UsernameVariable = "USER";
private const string PasswordVariable = "PASSWORD";
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);

View File

@@ -42,11 +42,16 @@ public class ImdbMongoDbService
{
try
{
// Create index for PrimaryTitle
var indexPrimaryTitle = Builders<ImdbEntry>.IndexKeys.Ascending(e => e.PrimaryTitle);
var modelPrimaryTitle = new CreateIndexModel<ImdbEntry>(indexPrimaryTitle);
_imdbCollection.Indexes.CreateOne(modelPrimaryTitle);
// Create compound index for PrimaryTitle, TitleType, and StartYear
var indexKeysDefinition = Builders<ImdbEntry>.IndexKeys
.Text(e => e.PrimaryTitle)
.Ascending(e => e.TitleType)
.Ascending(e => e.StartYear);
var createIndexOptions = new CreateIndexOptions { Background = true };
var indexModel = new CreateIndexModel<ImdbEntry>(indexKeysDefinition, createIndexOptions);
_imdbCollection.Indexes.CreateOne(indexModel);
return true;
}

View File

@@ -1,9 +1,7 @@
namespace Metadata.Features.ImportImdbData;
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService)
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService, JobConfiguration configuration)
{
private const int BatchSize = 50_000;
public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
{
logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath);
@@ -18,7 +16,7 @@ public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler>
using var reader = new StreamReader(request.FilePath);
using var csv = new CsvReader(reader, config);
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(BatchSize)
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(configuration.InsertBatchSize)
{
FullMode = BoundedChannelFullMode.Wait,
});
@@ -53,7 +51,7 @@ public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler>
movieData,
};
while (batch.Count < BatchSize && channel.Reader.TryRead(out var nextMovieData))
while (batch.Count < configuration.InsertBatchSize && channel.Reader.TryRead(out var nextMovieData))
{
batch.Add(nextMovieData);
}

View File

@@ -1,4 +1,13 @@
import {BooleanHelpers} from "@helpers/boolean_helpers";
export const rabbitConfig = {
RABBIT_URI: process.env.RABBIT_URI || 'amqp://localhost',
QUEUE_NAME: process.env.QUEUE_NAME || 'test-queue'
HOST: process.env.RABBITMQ_HOST || 'rabbitmq',
USER: process.env.RABBITMQ_USER || 'guest',
PASSWORD: process.env.RABBITMQ_PASSWORD || 'guest',
QUEUE_NAME: process.env.RABBITMQ_QUEUE_NAME || 'ingested',
DURABLE: BooleanHelpers.parseBool(process.env.RABBITMQ_DURABLE, true),
get RABBIT_URI(): string {
return `amqp://${this.USER}:${this.PASSWORD}@${this.HOST}?heartbeat=30`;
}
};

View File

@@ -13,4 +13,6 @@ const ImdbEntriesSchema: Schema = new Schema({
TitleType: { type: String, default: "" },
});
ImdbEntriesSchema.index({ PrimaryTitle: 'text', TitleType: 1, StartYear: 1 }, { background: true });
export const ImdbEntryModel = mongoose.model<IImdbEntry>('ImdbEntry', ImdbEntriesSchema, 'imdb-entries');

View File

@@ -1,17 +1,28 @@
import {TorrentType} from "@enums/torrent_types";
import {ILoggingService} from "@interfaces/logging_service";
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
import {configurationService} from '@services/configuration_service';
import {injectable} from "inversify";
import {IocTypes} from "@setup/ioc_types";
import {inject, injectable} from "inversify";
import mongoose from 'mongoose';
@injectable()
export class MongoRepository implements IMongoRepository {
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
private db: typeof mongoose = mongoose;
async connect() : Promise<void> {
await this.db.connect(configurationService.cacheConfig.MONGO_URI, {directConnection: true});
try {
await this.db.connect(configurationService.cacheConfig.MONGO_URI, {directConnection: true});
this.logger.info('Successfully connected to mongo db');
}
catch (error) {
this.logger.debug('Failed to connect to mongo db', error);
this.logger.error('Failed to connect to mongo db');
process.exit(1);
}
}
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
@@ -35,7 +46,12 @@ export class MongoRepository implements IMongoRepository {
query.StartYear = year.toString();
}
const result = await ImdbEntryModel.findOne(query);
return result ? result._id : null;
try {
const result = await ImdbEntryModel.findOne(query, '_id').maxTimeMS(30000);
return result ? result._id : null;
} catch (error) {
this.logger.error('Query exceeded the 30 seconds time limit', error);
return null;
}
}
}

View File

@@ -110,12 +110,19 @@ describe('Configuration Tests', () => {
});
it('should populate rabbitConfig correctly', async () => {
process.env.RABBIT_URI = 'amqp://localhost';
process.env.QUEUE_NAME = 'test-queue';
process.env.RABBITMQ_HOST = 'rabbitmq';
process.env.RABBITMQ_USER = 'guest';
process.env.RABBITMQ_PASSWORD = 'guest';
process.env.RABBITMQ_QUEUE_NAME = 'ingested';
process.env.RABBITMQ_DURABLE = 'true';
const {configurationService} = await import("@services/configuration_service");
const {rabbitConfig} = configurationService;
expect(rabbitConfig.RABBIT_URI).toBe('amqp://localhost');
expect(rabbitConfig.QUEUE_NAME).toBe('test-queue');
expect(rabbitConfig.HOST).toBe('rabbitmq');
expect(rabbitConfig.USER).toBe('guest');
expect(rabbitConfig.PASSWORD).toBe('guest');
expect(rabbitConfig.QUEUE_NAME).toBe('ingested');
expect(rabbitConfig.DURABLE).toBe(true);
expect(rabbitConfig.RABBIT_URI).toBe('amqp://guest:guest@rabbitmq?heartbeat=30');
});
it('should populate torrentConfig correctly', async () => {

View File

@@ -1,7 +1,9 @@
import "reflect-metadata"; // required
import {TorrentType} from "@enums/torrent_types";
import {ILoggingService} from "@interfaces/logging_service";
import {MongoRepository} from "@mongo/mongo_repository";
import {Container} from "inversify";
import {IocTypes} from "@setup/ioc_types";
import {Container, inject} from "inversify";
jest.mock('@services/configuration_service', () => {
return {
@@ -20,13 +22,24 @@ jest.mock('@services/configuration_service', () => {
}
});
jest.mock('@services/logging_service', () => {
return {
error: jest.fn(),
info: jest.fn(),
debug: jest.fn(),
}
})
xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by default.', () => {
let mongoRepository: MongoRepository;
let mongoRepository: MongoRepository,
mockLogger: ILoggingService;
beforeEach(() => {
jest.clearAllMocks();
process.env.LOG_LEVEL = 'debug';
mockLogger = jest.requireMock<ILoggingService>('@services/logging_service');
const container = new Container();
container.bind<ILoggingService>(IocTypes.ILoggingService).toConstantValue(mockLogger);
container.bind<MongoRepository>(MongoRepository).toSelf();
mongoRepository = container.get(MongoRepository);
});

View File

@@ -1,5 +0,0 @@
{
"GithubSettings": {
"PAT": ""
}
}

View File

@@ -1,12 +0,0 @@
{
"RabbitMqConfiguration": {
"Host": "localhost",
"Username": "guest",
"Password": "guest",
"QueueName": "test-queue",
"Durable": true,
"MaxQueueSize": 0,
"MaxPublishBatchSize": 1,
"PublishIntervalInSeconds": 10
}
}

View File

@@ -1,5 +1,3 @@
using Producer.Models.Configuration;
namespace Producer.Crawlers.Sites;
public partial class DebridMediaManagerCrawler(

View File

@@ -1,5 +1,3 @@
using Producer.Models.Configuration;
namespace Producer.Extensions;
public static class ConfigurationExtensions
@@ -13,8 +11,6 @@ public static class ConfigurationExtensions
configuration.AddJsonFile(LoggingConfig, false, true);
configuration.AddJsonFile(ScrapeConfiguration.Filename, false, true);
configuration.AddJsonFile(RabbitMqConfiguration.Filename, false, true);
configuration.AddJsonFile(GithubConfiguration.Filename, false, true);
configuration.AddEnvironmentVariables();

View File

@@ -0,0 +1,68 @@
namespace Producer.Extensions;
public static class EnvironmentExtensions
{
public static bool GetEnvironmentVariableAsBool(this string prefix, string varName, bool fallback = false)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return str.Trim().ToLower() switch
{
"true" => true,
"yes" => true,
"1" => true,
_ => false,
};
}
public static int GetEnvironmentVariableAsInt(this string prefix, string varName, int fallback = 0)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return int.TryParse(str, out var result) ? result : fallback;
}
public static string GetRequiredEnvironmentVariableAsString(this string prefix, string varName)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
throw new InvalidOperationException($"Environment variable {fullVarName} is not set");
}
return str;
}
public static string GetOptionalEnvironmentVariableAsString(this string prefix, string varName, string? fallback = null)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return str;
}
private static string GetFullVariableName(string prefix, string varName) => $"{prefix}_{varName}";
}

View File

@@ -1,5 +1,3 @@
using Producer.Models.Configuration;
namespace Producer.Extensions;
public static class ServiceCollectionExtensions
@@ -29,13 +27,9 @@ public static class ServiceCollectionExtensions
return services;
}
internal static IServiceCollection RegisterMassTransit(this IServiceCollection services, IConfiguration configuration)
internal static IServiceCollection RegisterMassTransit(this IServiceCollection services)
{
var rabbitConfig = configuration.GetSection(RabbitMqConfiguration.SectionName).Get<RabbitMqConfiguration>();
ArgumentNullException.ThrowIfNull(rabbitConfig, nameof(rabbitConfig));
services.AddSingleton(rabbitConfig);
var rabbitConfig = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
services.AddMassTransit(busConfigurator =>
{
@@ -56,8 +50,8 @@ public static class ServiceCollectionExtensions
internal static IServiceCollection AddQuartz(this IServiceCollection services, IConfiguration configuration)
{
var scrapeConfiguration = services.LoadConfigurationFromConfig<ScrapeConfiguration>(configuration, ScrapeConfiguration.SectionName);
var githubConfiguration = services.LoadConfigurationFromConfig<GithubConfiguration>(configuration, GithubConfiguration.SectionName);
var rabbitConfig = services.LoadConfigurationFromConfig<RabbitMqConfiguration>(configuration, RabbitMqConfiguration.SectionName);
var githubConfiguration = services.LoadConfigurationFromEnv<GithubConfiguration>();
var rabbitConfig = services.LoadConfigurationFromEnv<RabbitMqConfiguration>();
services
.AddTransient<SyncEzTvJob>()

View File

@@ -16,8 +16,10 @@ global using Npgsql;
global using Quartz;
global using Producer.Crawlers;
global using Producer.Crawlers.Sites;
global using Producer.Extensions;
global using Producer.Interfaces;
global using Producer.Jobs;
global using Producer.Models;
global using Producer.Models.Configuration;
global using Producer.Services;
global using Serilog;

View File

@@ -2,8 +2,8 @@
public class GithubConfiguration
{
public const string SectionName = "GithubSettings";
public const string Filename = "github.json";
private const string Prefix = "GITHUB";
private const string PatVariable = "PAT";
public string? PAT { get; set; }
public string? PAT { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(PatVariable);
}

View File

@@ -9,21 +9,11 @@ public class PostgresConfiguration
private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT";
private string Host { get; init; } = Environment.GetEnvironmentVariable($"{Prefix}_{HostVariable}") ??
throw new InvalidOperationException($"Environment variable {Prefix}_{HostVariable} is not set");
private string Username { get; init; } = Environment.GetEnvironmentVariable($"{Prefix}_{UsernameVariable}") ??
throw new InvalidOperationException($"Environment variable {Prefix}_{UsernameVariable} is not set");
private string Password { get; init; } = Environment.GetEnvironmentVariable($"{Prefix}_{PasswordVariable}") ??
throw new InvalidOperationException($"Environment variable {Prefix}_{PasswordVariable} is not set");
private string Database { get; init; } = Environment.GetEnvironmentVariable($"{Prefix}_{DatabaseVariable}") ??
throw new InvalidOperationException($"Environment variable {Prefix}_{DatabaseVariable} is not set");
private int PORT { get; init; } = int.Parse(
Environment.GetEnvironmentVariable($"{Prefix}_{PortVariable}") ??
throw new InvalidOperationException($"Environment variable {Prefix}_{PortVariable} is not set"));
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
}

View File

@@ -1,18 +1,25 @@
namespace Producer.Models.Configuration;
namespace Producer.Models.Configuration;
public class RabbitMqConfiguration
{
public const string SectionName = "RabbitMqConfiguration";
public const string Filename = "rabbitmq.json";
private const string Prefix = "RABBITMQ";
private const string HostVariable = "HOST";
private const string UsernameVariable = "USER";
private const string PasswordVariable = "PASSWORD";
private const string QueueNameVariable = "QUEUE_NAME";
private const string DurableVariable = "DURABLE";
private const string MaxQueueSizeVariable = "MAX_QUEUE_SIZE";
private const string MaxPublishBatchSizeVariable = "MAX_PUBLISH_BATCH_SIZE";
private const string PublishIntervalInSecondsVariable = "PUBLISH_INTERVAL_IN_SECONDS";
public string? Host { get; set; }
public string? Username { get; set; }
public string? Password { get; set; }
public string? QueueName { get; set; }
public bool Durable { get; set; }
public int MaxQueueSize { get; set; }
public int MaxPublishBatchSize { get; set; } = 500;
public int PublishIntervalInSeconds { get; set; } = 1000 * 10;
public string? Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
public string? Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
public string? Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
public string? QueueName { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(QueueNameVariable);
public bool Durable { get; init; } = Prefix.GetEnvironmentVariableAsBool(DurableVariable, true);
public int MaxQueueSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(MaxQueueSizeVariable, 0);
public int MaxPublishBatchSize { get; set; } = Prefix.GetEnvironmentVariableAsInt(MaxPublishBatchSizeVariable, 500);
public int PublishIntervalInSeconds { get; set; } = Prefix.GetEnvironmentVariableAsInt(PublishIntervalInSecondsVariable, 1000 * 10);
public void Validate()
{

View File

@@ -32,15 +32,6 @@
<None Include="Configuration\logging.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<Content Remove="Configuration\rabbitmq.json" />
<None Include="Configuration\rabbitmq.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<Content Remove="Configuration\github.json" />
<None Include="Configuration\github.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<Content Remove="Configuration\postgres.json" />
</ItemGroup>
</Project>

View File

@@ -1,6 +1,4 @@
using Producer.Extensions;
var builder = WebApplication.CreateBuilder(args);
var builder = WebApplication.CreateBuilder(args);
builder.Configuration
.AddScrapeConfiguration();
@@ -9,7 +7,7 @@ builder.Host
.SetupSerilog(builder.Configuration);
builder.Services
.RegisterMassTransit(builder.Configuration)
.RegisterMassTransit()
.AddDataStorage()
.AddCrawlers()
.AddQuartz(builder.Configuration);

View File

@@ -1,5 +1,3 @@
using Producer.Models.Configuration;
namespace Producer.Services;
public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConfiguration rabbitConfig, ILogger<DapperDataStorage> logger) : IDataStorage

View File

@@ -1,6 +1,4 @@
using Producer.Models.Configuration;
namespace Producer.Services;
namespace Producer.Services;
public class TorrentPublisher(
ISendEndpointProvider sendEndpointProvider,