From 79d0ef7f4d3a899377296bcc31d56d17734f5b8d Mon Sep 17 00:00:00 2001 From: iPromKnight Date: Tue, 27 Feb 2024 22:38:10 +0000 Subject: [PATCH] Create service to populate Imdb data collection within mongo We can use this collection as an alternative source to lookup imdb ids, which would be executed before name_to_imdb is called in the consumer. --- src/metadata/Configuration/logging.json | 31 ++++++ src/metadata/Dockerfile | 17 ++++ .../Extensions/ConfigurationExtensions.cs | 44 +++++++++ .../Extensions/EnvironmentExtensions.cs | 68 ++++++++++++++ .../Extensions/JsonSerializerExtensions.cs | 6 ++ .../Extensions/ServiceCollectionExtensions.cs | 30 ++++++ .../WebApplicationBuilderExtensions.cs | 19 ++++ .../Configuration/JobConfiguration.cs | 11 +++ .../Configuration/MongoConfiguration.cs | 19 ++++ .../DeleteDownloadedImdbDataRequest.cs | 3 + .../DeleteDownloadedImdbDataRequestHandler.cs | 15 +++ .../DownloadImdbData/DownloadImdbDataJob.cs | 8 ++ .../DownloadImdbData/GetImdbDataRequest.cs | 3 + .../GetImdbDataRequestHandler.cs | 30 ++++++ .../Features/ImportImdbData/ImdbEntry.cs | 15 +++ .../ImportImdbData/ImdbMongoDbService.cs | 59 ++++++++++++ .../ImportImdbData/ImportImdbDataRequest.cs | 3 + .../ImportImdbDataRequestHandler.cs | 94 +++++++++++++++++++ src/metadata/Features/Jobs/BaseJob.cs | 10 ++ src/metadata/Features/Jobs/IMetadataJob.cs | 7 ++ src/metadata/Features/Jobs/JobScheduler.cs | 34 +++++++ .../Features/Literals/CronExpressions.cs | 9 ++ src/metadata/Features/Literals/HttpClients.cs | 7 ++ src/metadata/GlobalUsings.cs | 26 +++++ src/metadata/Metadata.csproj | 30 ++++++ src/metadata/Program.cs | 13 +++ 26 files changed, 611 insertions(+) create mode 100644 src/metadata/Configuration/logging.json create mode 100644 src/metadata/Dockerfile create mode 100644 src/metadata/Extensions/ConfigurationExtensions.cs create mode 100644 src/metadata/Extensions/EnvironmentExtensions.cs create mode 100644 src/metadata/Extensions/JsonSerializerExtensions.cs create mode 100644 src/metadata/Extensions/ServiceCollectionExtensions.cs create mode 100644 src/metadata/Extensions/WebApplicationBuilderExtensions.cs create mode 100644 src/metadata/Features/Configuration/JobConfiguration.cs create mode 100644 src/metadata/Features/Configuration/MongoConfiguration.cs create mode 100644 src/metadata/Features/DeleteDownloadedImdbData/DeleteDownloadedImdbDataRequest.cs create mode 100644 src/metadata/Features/DeleteDownloadedImdbData/DeleteDownloadedImdbDataRequestHandler.cs create mode 100644 src/metadata/Features/DownloadImdbData/DownloadImdbDataJob.cs create mode 100644 src/metadata/Features/DownloadImdbData/GetImdbDataRequest.cs create mode 100644 src/metadata/Features/DownloadImdbData/GetImdbDataRequestHandler.cs create mode 100644 src/metadata/Features/ImportImdbData/ImdbEntry.cs create mode 100644 src/metadata/Features/ImportImdbData/ImdbMongoDbService.cs create mode 100644 src/metadata/Features/ImportImdbData/ImportImdbDataRequest.cs create mode 100644 src/metadata/Features/ImportImdbData/ImportImdbDataRequestHandler.cs create mode 100644 src/metadata/Features/Jobs/BaseJob.cs create mode 100644 src/metadata/Features/Jobs/IMetadataJob.cs create mode 100644 src/metadata/Features/Jobs/JobScheduler.cs create mode 100644 src/metadata/Features/Literals/CronExpressions.cs create mode 100644 src/metadata/Features/Literals/HttpClients.cs create mode 100644 src/metadata/GlobalUsings.cs create mode 100644 src/metadata/Metadata.csproj create mode 100644 src/metadata/Program.cs diff --git a/src/metadata/Configuration/logging.json b/src/metadata/Configuration/logging.json new file mode 100644 index 0000000..fd48065 --- /dev/null +++ b/src/metadata/Configuration/logging.json @@ -0,0 +1,31 @@ +{ + "Serilog": { + "Using": [ "Serilog.Sinks.Console" ], + "MinimumLevel": { + "Default": "Information", + "Override": { + "Microsoft": "Warning", + "System": "Warning", + "System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning", + "System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning", + "Quartz.Impl.StdSchedulerFactory": "Warning", + "Quartz.Core.QuartzScheduler": "Warning", + "Quartz.Simpl.RAMJobStore": "Warning", + "Quartz.Core.JobRunShell": "Warning", + "Quartz.Core.SchedulerSignalerImpl": "Warning" + } + }, + "WriteTo": [ + { + "Name": "Console", + "Args": { + "outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}" + } + } + ], + "Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ], + "Properties": { + "Application": "Metadata" + } + } +} \ No newline at end of file diff --git a/src/metadata/Dockerfile b/src/metadata/Dockerfile new file mode 100644 index 0000000..5db54dc --- /dev/null +++ b/src/metadata/Dockerfile @@ -0,0 +1,17 @@ +FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build + +ARG TARGETARCH +WORKDIR /App +COPY . ./ +RUN dotnet restore -a $TARGETARCH +RUN dotnet publish -c Release --no-restore -o out -a $TARGETARCH + +FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine + +WORKDIR /App +COPY --from=build /App/out . +RUN addgroup -S metadata && adduser -S -G metadata metadata +USER metadata +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ + CMD pgrep -f dotnet || exit 1 +ENTRYPOINT ["dotnet", "Metadata.dll"] diff --git a/src/metadata/Extensions/ConfigurationExtensions.cs b/src/metadata/Extensions/ConfigurationExtensions.cs new file mode 100644 index 0000000..5a68249 --- /dev/null +++ b/src/metadata/Extensions/ConfigurationExtensions.cs @@ -0,0 +1,44 @@ +namespace Metadata.Extensions; + +public static class ConfigurationExtensions +{ + private const string ConfigurationFolder = "Configuration"; + private const string LoggingConfig = "logging.json"; + + public static IConfigurationBuilder AddServiceConfiguration(this IConfigurationBuilder configuration) + { + configuration.SetBasePath(Path.Combine(AppContext.BaseDirectory, ConfigurationFolder)); + + configuration.AddJsonFile(LoggingConfig, false, true); + + configuration.AddEnvironmentVariables(); + + configuration.AddUserSecrets(); + + return configuration; + } + + public static TConfiguration LoadConfigurationFromConfig(this IServiceCollection services, IConfiguration configuration, string sectionName) + where TConfiguration : class + { + var instance = configuration.GetSection(sectionName).Get(); + + ArgumentNullException.ThrowIfNull(instance, nameof(instance)); + + services.TryAddSingleton(instance); + + return instance; + } + + public static TConfiguration LoadConfigurationFromEnv(this IServiceCollection services) + where TConfiguration : class + { + var instance = Activator.CreateInstance(); + + ArgumentNullException.ThrowIfNull(instance, nameof(instance)); + + services.TryAddSingleton(instance); + + return instance; + } +} \ No newline at end of file diff --git a/src/metadata/Extensions/EnvironmentExtensions.cs b/src/metadata/Extensions/EnvironmentExtensions.cs new file mode 100644 index 0000000..97f0ba0 --- /dev/null +++ b/src/metadata/Extensions/EnvironmentExtensions.cs @@ -0,0 +1,68 @@ +namespace Metadata.Extensions; + +public static class EnvironmentExtensions +{ + public static bool GetEnvironmentVariableAsBool(this string prefix, string varName, bool fallback = false) + { + var fullVarName = GetFullVariableName(prefix, varName); + + var str = Environment.GetEnvironmentVariable(fullVarName); + + if (string.IsNullOrEmpty(str)) + { + return fallback; + } + + return str.Trim().ToLower() switch + { + "true" => true, + "yes" => true, + "1" => true, + _ => false, + }; + } + + public static int GetEnvironmentVariableAsInt(this string prefix, string varName, int fallback = 0) + { + var fullVarName = GetFullVariableName(prefix, varName); + + var str = Environment.GetEnvironmentVariable(fullVarName); + + if (string.IsNullOrEmpty(str)) + { + return fallback; + } + + return int.TryParse(str, out var result) ? result : fallback; + } + + public static string GetRequiredEnvironmentVariableAsString(this string prefix, string varName) + { + var fullVarName = GetFullVariableName(prefix, varName); + + var str = Environment.GetEnvironmentVariable(fullVarName); + + if (string.IsNullOrEmpty(str)) + { + throw new InvalidOperationException($"Environment variable {fullVarName} is not set"); + } + + return str; + } + + public static string GetOptionalEnvironmentVariableAsString(this string prefix, string varName, string? fallback = null) + { + var fullVarName = GetFullVariableName(prefix, varName); + + var str = Environment.GetEnvironmentVariable(fullVarName); + + if (string.IsNullOrEmpty(str)) + { + return fallback; + } + + return str; + } + + private static string GetFullVariableName(string prefix, string varName) => $"{prefix}_{varName}"; +} \ No newline at end of file diff --git a/src/metadata/Extensions/JsonSerializerExtensions.cs b/src/metadata/Extensions/JsonSerializerExtensions.cs new file mode 100644 index 0000000..28149ce --- /dev/null +++ b/src/metadata/Extensions/JsonSerializerExtensions.cs @@ -0,0 +1,6 @@ +namespace Metadata.Extensions; + +public static class JsonSerializerExtensions +{ + public static string ToJson(this T value) => JsonSerializer.Serialize(value); +} \ No newline at end of file diff --git a/src/metadata/Extensions/ServiceCollectionExtensions.cs b/src/metadata/Extensions/ServiceCollectionExtensions.cs new file mode 100644 index 0000000..e9f4f8f --- /dev/null +++ b/src/metadata/Extensions/ServiceCollectionExtensions.cs @@ -0,0 +1,30 @@ +namespace Metadata.Extensions; + +public static class ServiceCollectionExtensions +{ + internal static IServiceCollection AddHttpClients(this IServiceCollection services) + { + services.AddHttpClient(HttpClients.ImdbDataClientName, client => client.BaseAddress = new(HttpClients.ImdbClientBaseAddress)); + + return services; + } + + internal static IServiceCollection AddMongoDb(this IServiceCollection services) + { + services.LoadConfigurationFromEnv(); + services.AddTransient(); + + return services; + } + + internal static IServiceCollection AddJobSupport(this IServiceCollection services) + { + services.LoadConfigurationFromEnv(); + + services.AddScheduler() + .AddTransient() + .AddHostedService(); + + return services; + } +} \ No newline at end of file diff --git a/src/metadata/Extensions/WebApplicationBuilderExtensions.cs b/src/metadata/Extensions/WebApplicationBuilderExtensions.cs new file mode 100644 index 0000000..6fddbd8 --- /dev/null +++ b/src/metadata/Extensions/WebApplicationBuilderExtensions.cs @@ -0,0 +1,19 @@ +namespace Metadata.Extensions; + +internal static class WebApplicationBuilderExtensions +{ + internal static IHostBuilder SetupSerilog(this WebApplicationBuilder builder, IConfiguration configuration) => + builder.Host.UseSerilog((_, c) => + c.ReadFrom.Configuration(configuration)); + + internal static WebApplicationBuilder SetupWolverine(this WebApplicationBuilder builder) + { + builder.Host.UseWolverine( + options => + { + options.DefaultExecutionTimeout = 6.Hours(); + }); + + return builder; + } +} \ No newline at end of file diff --git a/src/metadata/Features/Configuration/JobConfiguration.cs b/src/metadata/Features/Configuration/JobConfiguration.cs new file mode 100644 index 0000000..9565dfa --- /dev/null +++ b/src/metadata/Features/Configuration/JobConfiguration.cs @@ -0,0 +1,11 @@ +namespace Metadata.Features.Configuration; + +public class JobConfiguration +{ + private const string Prefix = "METADATA"; + private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE"; + private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE"; + + public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour); + public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable); +} \ No newline at end of file diff --git a/src/metadata/Features/Configuration/MongoConfiguration.cs b/src/metadata/Features/Configuration/MongoConfiguration.cs new file mode 100644 index 0000000..e7bff7c --- /dev/null +++ b/src/metadata/Features/Configuration/MongoConfiguration.cs @@ -0,0 +1,19 @@ +namespace Metadata.Features.Configuration; + +public class MongoConfiguration +{ + private const string Prefix = "MONGODB"; + private const string HostVariable = "HOST"; + private const string PortVariable = "PORT"; + private const string DbVariable = "DB"; + private const string UsernameVariable = "USER"; + private const string PasswordVariable = "PASSWORD"; + + private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable); + private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017); + private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable); + private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable); + public string DbName { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DbVariable); + + public string ConnectionString => $"mongodb://{Username}:{Password}@{Host}:{Port}/{DbName}?tls=false&directConnection=true&authSource=admin"; +} \ No newline at end of file diff --git a/src/metadata/Features/DeleteDownloadedImdbData/DeleteDownloadedImdbDataRequest.cs b/src/metadata/Features/DeleteDownloadedImdbData/DeleteDownloadedImdbDataRequest.cs new file mode 100644 index 0000000..d505021 --- /dev/null +++ b/src/metadata/Features/DeleteDownloadedImdbData/DeleteDownloadedImdbDataRequest.cs @@ -0,0 +1,3 @@ +namespace Metadata.Features.DeleteDownloadedImdbData; + +public record DeleteDownloadedImdbDataRequest(string FilePath); \ No newline at end of file diff --git a/src/metadata/Features/DeleteDownloadedImdbData/DeleteDownloadedImdbDataRequestHandler.cs b/src/metadata/Features/DeleteDownloadedImdbData/DeleteDownloadedImdbDataRequestHandler.cs new file mode 100644 index 0000000..4e22ef5 --- /dev/null +++ b/src/metadata/Features/DeleteDownloadedImdbData/DeleteDownloadedImdbDataRequestHandler.cs @@ -0,0 +1,15 @@ +namespace Metadata.Features.DeleteDownloadedImdbData; + +public class DeleteDownloadedImdbDataRequestHandler(ILogger logger) +{ + public Task Handle(DeleteDownloadedImdbDataRequest request, CancellationToken _) + { + logger.LogInformation("Deleting file {FilePath}", request.FilePath); + + File.Delete(request.FilePath); + + logger.LogInformation("File Deleted"); + + return Task.CompletedTask; + } +} \ No newline at end of file diff --git a/src/metadata/Features/DownloadImdbData/DownloadImdbDataJob.cs b/src/metadata/Features/DownloadImdbData/DownloadImdbDataJob.cs new file mode 100644 index 0000000..6248b79 --- /dev/null +++ b/src/metadata/Features/DownloadImdbData/DownloadImdbDataJob.cs @@ -0,0 +1,8 @@ +namespace Metadata.Features.DownloadImdbData; + +public class DownloadImdbDataJob(IMessageBus messageBus, JobConfiguration configuration) : BaseJob +{ + public override bool IsScheduelable => !configuration.DownloadImdbOnce && !string.IsNullOrEmpty(configuration.DownloadImdbCronSchedule); + public override string JobName => nameof(DownloadImdbDataJob); + public override async Task Invoke() => await messageBus.SendAsync(new GetImdbDataRequest()); +} \ No newline at end of file diff --git a/src/metadata/Features/DownloadImdbData/GetImdbDataRequest.cs b/src/metadata/Features/DownloadImdbData/GetImdbDataRequest.cs new file mode 100644 index 0000000..6b4077b --- /dev/null +++ b/src/metadata/Features/DownloadImdbData/GetImdbDataRequest.cs @@ -0,0 +1,3 @@ +namespace Metadata.Features.DownloadImdbData; + +public record GetImdbDataRequest; \ No newline at end of file diff --git a/src/metadata/Features/DownloadImdbData/GetImdbDataRequestHandler.cs b/src/metadata/Features/DownloadImdbData/GetImdbDataRequestHandler.cs new file mode 100644 index 0000000..1b0420c --- /dev/null +++ b/src/metadata/Features/DownloadImdbData/GetImdbDataRequestHandler.cs @@ -0,0 +1,30 @@ +namespace Metadata.Features.DownloadImdbData; + +public class GetImdbDataRequestHandler(IHttpClientFactory clientFactory, ILogger logger) +{ + private const string TitleBasicsFileName = "title.basics.tsv"; + + public async Task Handle(GetImdbDataRequest _, CancellationToken cancellationToken) + { + logger.LogInformation("Downloading IMDB data"); + + var client = clientFactory.CreateClient("imdb-data"); + var response = await client.GetAsync($"{TitleBasicsFileName}.gz", cancellationToken); + + var tempFile = Path.Combine(Path.GetTempPath(), TitleBasicsFileName); + + response.EnsureSuccessStatusCode(); + + await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); + await using var gzipStream = new GZipStream(stream, CompressionMode.Decompress); + await using var fileStream = File.Create(tempFile); + + await gzipStream.CopyToAsync(fileStream, cancellationToken); + + logger.LogInformation("Downloaded IMDB data to {TempFile}", tempFile); + + fileStream.Close(); + + return new(tempFile); + } +} \ No newline at end of file diff --git a/src/metadata/Features/ImportImdbData/ImdbEntry.cs b/src/metadata/Features/ImportImdbData/ImdbEntry.cs new file mode 100644 index 0000000..8e0608e --- /dev/null +++ b/src/metadata/Features/ImportImdbData/ImdbEntry.cs @@ -0,0 +1,15 @@ +namespace Metadata.Features.ImportImdbData; + +public class ImdbEntry +{ + [BsonId] + public string ImdbId { get; set; } = default!; + public string? TitleType { get; set; } + public string? PrimaryTitle { get; set; } + public string? OriginalTitle { get; set; } + public string? IsAdult { get; set; } + public string? StartYear { get; set; } + public string? EndYear { get; set; } + public string? RuntimeMinutes { get; set; } + public string? Genres { get; set; } +} \ No newline at end of file diff --git a/src/metadata/Features/ImportImdbData/ImdbMongoDbService.cs b/src/metadata/Features/ImportImdbData/ImdbMongoDbService.cs new file mode 100644 index 0000000..ae11d9c --- /dev/null +++ b/src/metadata/Features/ImportImdbData/ImdbMongoDbService.cs @@ -0,0 +1,59 @@ +namespace Metadata.Features.ImportImdbData; + +public class ImdbMongoDbService +{ + private readonly ILogger _logger; + private readonly IMongoCollection _imdbCollection; + + public ImdbMongoDbService(MongoConfiguration configuration, ILogger logger) + { + _logger = logger; + + var client = new MongoClient(configuration.ConnectionString); + var database = client.GetDatabase(configuration.DbName); + + _imdbCollection = database.GetCollection("imdb-entries"); + } + + public async Task InsertImdbEntries(IEnumerable entries) + { + var operations = new List>(); + + foreach (var entry in entries) + { + var filter = Builders.Filter.Eq(e => e.ImdbId, entry.ImdbId); + var update = Builders.Update + .SetOnInsert(e => e.TitleType, entry.TitleType) + .SetOnInsert(e => e.PrimaryTitle, entry.PrimaryTitle) + .SetOnInsert(e => e.OriginalTitle, entry.OriginalTitle) + .SetOnInsert(e => e.IsAdult, entry.IsAdult) + .SetOnInsert(e => e.StartYear, entry.StartYear) + .SetOnInsert(e => e.EndYear, entry.EndYear) + .SetOnInsert(e => e.RuntimeMinutes, entry.RuntimeMinutes) + .SetOnInsert(e => e.Genres, entry.Genres); + + operations.Add(new UpdateOneModel(filter, update) { IsUpsert = true }); + } + + await _imdbCollection.BulkWriteAsync(operations); + } + + public bool IsDatabaseInitialized() + { + try + { + + // Create index for PrimaryTitle + var indexPrimaryTitle = Builders.IndexKeys.Ascending(e => e.PrimaryTitle); + var modelPrimaryTitle = new CreateIndexModel(indexPrimaryTitle); + _imdbCollection.Indexes.CreateOne(modelPrimaryTitle); + + return true; + } + catch (Exception e) + { + _logger.LogError(e, "Error initializing database"); + return false; + } + } +} \ No newline at end of file diff --git a/src/metadata/Features/ImportImdbData/ImportImdbDataRequest.cs b/src/metadata/Features/ImportImdbData/ImportImdbDataRequest.cs new file mode 100644 index 0000000..751d092 --- /dev/null +++ b/src/metadata/Features/ImportImdbData/ImportImdbDataRequest.cs @@ -0,0 +1,3 @@ +namespace Metadata.Features.ImportImdbData; + +public record ImportImdbDataRequest(string FilePath); \ No newline at end of file diff --git a/src/metadata/Features/ImportImdbData/ImportImdbDataRequestHandler.cs b/src/metadata/Features/ImportImdbData/ImportImdbDataRequestHandler.cs new file mode 100644 index 0000000..e5d3568 --- /dev/null +++ b/src/metadata/Features/ImportImdbData/ImportImdbDataRequestHandler.cs @@ -0,0 +1,94 @@ +namespace Metadata.Features.ImportImdbData; + +public class ImportImdbDataRequestHandler(ILogger logger, ImdbMongoDbService mongoDbService) +{ + private const int BatchSize = 50_000; + + public async Task Handle(ImportImdbDataRequest request, CancellationToken cancellationToken) + { + logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath); + + var config = new CsvConfiguration(CultureInfo.InvariantCulture) + { + Delimiter = "\t", + BadDataFound = null, // Skip Bad Data from Imdb + MissingFieldFound = null, // Skip Missing Fields from Imdb + }; + + using var reader = new StreamReader(request.FilePath); + using var csv = new CsvReader(reader, config); + + var channel = Channel.CreateBounded(new BoundedChannelOptions(BatchSize) + { + FullMode = BoundedChannelFullMode.Wait, + }); + + + // Skip the header row + await csv.ReadAsync(); + + var batchInsertTask = CreateBatchOfEntries(channel, cancellationToken); + + await ReadEntries(csv, channel, cancellationToken); + + channel.Writer.Complete(); + + await batchInsertTask; + + return new(request.FilePath); + } + + private Task CreateBatchOfEntries(Channel channel, CancellationToken cancellationToken) => + Task.Run(async () => + { + await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken)) + { + if (cancellationToken.IsCancellationRequested) + { + return; + } + + var batch = new List + { + movieData, + }; + + while (batch.Count < BatchSize && channel.Reader.TryRead(out var nextMovieData)) + { + batch.Add(nextMovieData); + } + + if (batch.Count > 0) + { + await mongoDbService.InsertImdbEntries(batch); + logger.LogInformation("Imported batch of {BatchSize} starting with ImdbId {FirstImdbId}", batch.Count, batch.First().ImdbId); + } + } + }, cancellationToken); + + private static async Task ReadEntries(CsvReader csv, Channel channel, CancellationToken cancellationToken) + { + while (await csv.ReadAsync()) + { + var movieData = new ImdbEntry + { + ImdbId = csv.GetField(0), + TitleType = csv.GetField(1), + PrimaryTitle = csv.GetField(2), + OriginalTitle = csv.GetField(3), + IsAdult = csv.GetField(4), + StartYear = csv.GetField(5), + EndYear = csv.GetField(6), + RuntimeMinutes = csv.GetField(7), + Genres = csv.GetField(8), + }; + + if (cancellationToken.IsCancellationRequested) + { + return; + } + + await channel.Writer.WriteAsync(movieData, cancellationToken); + } + } +} \ No newline at end of file diff --git a/src/metadata/Features/Jobs/BaseJob.cs b/src/metadata/Features/Jobs/BaseJob.cs new file mode 100644 index 0000000..83e6986 --- /dev/null +++ b/src/metadata/Features/Jobs/BaseJob.cs @@ -0,0 +1,10 @@ +namespace Metadata.Features.Jobs; + +public abstract class BaseJob : IMetadataJob +{ + public abstract bool IsScheduelable { get; } + + public abstract string JobName { get; } + + public abstract Task Invoke(); +} \ No newline at end of file diff --git a/src/metadata/Features/Jobs/IMetadataJob.cs b/src/metadata/Features/Jobs/IMetadataJob.cs new file mode 100644 index 0000000..f5147e8 --- /dev/null +++ b/src/metadata/Features/Jobs/IMetadataJob.cs @@ -0,0 +1,7 @@ +namespace Metadata.Features.Jobs; + +public interface IMetadataJob : IInvocable +{ + bool IsScheduelable { get; } + string JobName { get; } +} \ No newline at end of file diff --git a/src/metadata/Features/Jobs/JobScheduler.cs b/src/metadata/Features/Jobs/JobScheduler.cs new file mode 100644 index 0000000..059fc4f --- /dev/null +++ b/src/metadata/Features/Jobs/JobScheduler.cs @@ -0,0 +1,34 @@ +namespace Metadata.Features.Jobs; + +public class JobScheduler(IServiceProvider serviceProvider) : IHostedService +{ + public Task StartAsync(CancellationToken cancellationToken) + { + using var scope = serviceProvider.CreateAsyncScope(); + + var mongoDbService = scope.ServiceProvider.GetRequiredService(); + + if (!mongoDbService.IsDatabaseInitialized()) + { + throw new InvalidOperationException("MongoDb is not initialized"); + } + + var jobConfigurations = scope.ServiceProvider.GetRequiredService(); + var downloadJob = scope.ServiceProvider.GetRequiredService(); + + if (!downloadJob.IsScheduelable) + { + return downloadJob.Invoke(); + } + + var scheduler = scope.ServiceProvider.GetRequiredService(); + + scheduler.Schedule() + .Cron(jobConfigurations.DownloadImdbCronSchedule) + .PreventOverlapping(nameof(downloadJob.JobName)); + + return Task.CompletedTask; + } + + public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask; +} \ No newline at end of file diff --git a/src/metadata/Features/Literals/CronExpressions.cs b/src/metadata/Features/Literals/CronExpressions.cs new file mode 100644 index 0000000..c80bca1 --- /dev/null +++ b/src/metadata/Features/Literals/CronExpressions.cs @@ -0,0 +1,9 @@ +namespace Metadata.Features.Literals; + +public static class CronExpressions +{ + public const string EveryHour = "0 0 * * *"; + public const string EveryDay = "0 0 0 * *"; + public const string EveryWeek = "0 0 * * 0"; + public const string EveryMonth = "0 0 0 * *"; +} \ No newline at end of file diff --git a/src/metadata/Features/Literals/HttpClients.cs b/src/metadata/Features/Literals/HttpClients.cs new file mode 100644 index 0000000..923a2cf --- /dev/null +++ b/src/metadata/Features/Literals/HttpClients.cs @@ -0,0 +1,7 @@ +namespace Metadata.Features.Literals; + +public static class HttpClients +{ + public const string ImdbDataClientName = "imdb-data"; + public const string ImdbClientBaseAddress = "https://datasets.imdbws.com/"; +} \ No newline at end of file diff --git a/src/metadata/GlobalUsings.cs b/src/metadata/GlobalUsings.cs new file mode 100644 index 0000000..e6098da --- /dev/null +++ b/src/metadata/GlobalUsings.cs @@ -0,0 +1,26 @@ +// Global using directives + +global using System.Globalization; +global using System.IO.Compression; +global using System.Text.Json; +global using System.Threading.Channels; +global using Coravel; +global using Coravel.Invocable; +global using Coravel.Scheduling.Schedule.Interfaces; +global using CsvHelper; +global using CsvHelper.Configuration; +global using JasperFx.Core; +global using Metadata.Extensions; +global using Metadata.Features.Configuration; +global using Metadata.Features.DeleteDownloadedImdbData; +global using Metadata.Features.DownloadImdbData; +global using Metadata.Features.ImportImdbData; +global using Metadata.Features.Jobs; +global using Metadata.Features.Literals; +global using Microsoft.AspNetCore.Builder; +global using Microsoft.Extensions.DependencyInjection; +global using Microsoft.Extensions.DependencyInjection.Extensions; +global using MongoDB.Bson.Serialization.Attributes; +global using MongoDB.Driver; +global using Serilog; +global using Wolverine; diff --git a/src/metadata/Metadata.csproj b/src/metadata/Metadata.csproj new file mode 100644 index 0000000..5769b47 --- /dev/null +++ b/src/metadata/Metadata.csproj @@ -0,0 +1,30 @@ + + + + Exe + net8.0 + enable + enable + 54cad2ee-57df-4bb2-a192-d5d501448e0a + + + + + + + + + + + + + + + + + + Always + + + + diff --git a/src/metadata/Program.cs b/src/metadata/Program.cs new file mode 100644 index 0000000..c1635f5 --- /dev/null +++ b/src/metadata/Program.cs @@ -0,0 +1,13 @@ +var builder = WebApplication.CreateBuilder(args); + +builder.Configuration.AddServiceConfiguration(); +builder.SetupSerilog(builder.Configuration); +builder.SetupWolverine(); + +builder.Services + .AddHttpClients() + .AddJobSupport(); + +var host = builder.Build(); + +await host.RunAsync(); \ No newline at end of file