Create service to populate Imdb data collection within mongo
We can use this collection as an alternative source to lookup imdb ids, which would be executed before name_to_imdb is called in the consumer.
This commit is contained in:
31
src/metadata/Configuration/logging.json
Normal file
31
src/metadata/Configuration/logging.json
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
{
|
||||||
|
"Serilog": {
|
||||||
|
"Using": [ "Serilog.Sinks.Console" ],
|
||||||
|
"MinimumLevel": {
|
||||||
|
"Default": "Information",
|
||||||
|
"Override": {
|
||||||
|
"Microsoft": "Warning",
|
||||||
|
"System": "Warning",
|
||||||
|
"System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning",
|
||||||
|
"System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning",
|
||||||
|
"Quartz.Impl.StdSchedulerFactory": "Warning",
|
||||||
|
"Quartz.Core.QuartzScheduler": "Warning",
|
||||||
|
"Quartz.Simpl.RAMJobStore": "Warning",
|
||||||
|
"Quartz.Core.JobRunShell": "Warning",
|
||||||
|
"Quartz.Core.SchedulerSignalerImpl": "Warning"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"WriteTo": [
|
||||||
|
{
|
||||||
|
"Name": "Console",
|
||||||
|
"Args": {
|
||||||
|
"outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ],
|
||||||
|
"Properties": {
|
||||||
|
"Application": "Metadata"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
17
src/metadata/Dockerfile
Normal file
17
src/metadata/Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build
|
||||||
|
|
||||||
|
ARG TARGETARCH
|
||||||
|
WORKDIR /App
|
||||||
|
COPY . ./
|
||||||
|
RUN dotnet restore -a $TARGETARCH
|
||||||
|
RUN dotnet publish -c Release --no-restore -o out -a $TARGETARCH
|
||||||
|
|
||||||
|
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
|
||||||
|
|
||||||
|
WORKDIR /App
|
||||||
|
COPY --from=build /App/out .
|
||||||
|
RUN addgroup -S metadata && adduser -S -G metadata metadata
|
||||||
|
USER metadata
|
||||||
|
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||||
|
CMD pgrep -f dotnet || exit 1
|
||||||
|
ENTRYPOINT ["dotnet", "Metadata.dll"]
|
||||||
44
src/metadata/Extensions/ConfigurationExtensions.cs
Normal file
44
src/metadata/Extensions/ConfigurationExtensions.cs
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
namespace Metadata.Extensions;
|
||||||
|
|
||||||
|
public static class ConfigurationExtensions
|
||||||
|
{
|
||||||
|
private const string ConfigurationFolder = "Configuration";
|
||||||
|
private const string LoggingConfig = "logging.json";
|
||||||
|
|
||||||
|
public static IConfigurationBuilder AddServiceConfiguration(this IConfigurationBuilder configuration)
|
||||||
|
{
|
||||||
|
configuration.SetBasePath(Path.Combine(AppContext.BaseDirectory, ConfigurationFolder));
|
||||||
|
|
||||||
|
configuration.AddJsonFile(LoggingConfig, false, true);
|
||||||
|
|
||||||
|
configuration.AddEnvironmentVariables();
|
||||||
|
|
||||||
|
configuration.AddUserSecrets<Program>();
|
||||||
|
|
||||||
|
return configuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TConfiguration LoadConfigurationFromConfig<TConfiguration>(this IServiceCollection services, IConfiguration configuration, string sectionName)
|
||||||
|
where TConfiguration : class
|
||||||
|
{
|
||||||
|
var instance = configuration.GetSection(sectionName).Get<TConfiguration>();
|
||||||
|
|
||||||
|
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||||
|
|
||||||
|
services.TryAddSingleton(instance);
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TConfiguration LoadConfigurationFromEnv<TConfiguration>(this IServiceCollection services)
|
||||||
|
where TConfiguration : class
|
||||||
|
{
|
||||||
|
var instance = Activator.CreateInstance<TConfiguration>();
|
||||||
|
|
||||||
|
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||||
|
|
||||||
|
services.TryAddSingleton(instance);
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
}
|
||||||
68
src/metadata/Extensions/EnvironmentExtensions.cs
Normal file
68
src/metadata/Extensions/EnvironmentExtensions.cs
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
namespace Metadata.Extensions;
|
||||||
|
|
||||||
|
public static class EnvironmentExtensions
|
||||||
|
{
|
||||||
|
public static bool GetEnvironmentVariableAsBool(this string prefix, string varName, bool fallback = false)
|
||||||
|
{
|
||||||
|
var fullVarName = GetFullVariableName(prefix, varName);
|
||||||
|
|
||||||
|
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(str))
|
||||||
|
{
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
return str.Trim().ToLower() switch
|
||||||
|
{
|
||||||
|
"true" => true,
|
||||||
|
"yes" => true,
|
||||||
|
"1" => true,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int GetEnvironmentVariableAsInt(this string prefix, string varName, int fallback = 0)
|
||||||
|
{
|
||||||
|
var fullVarName = GetFullVariableName(prefix, varName);
|
||||||
|
|
||||||
|
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(str))
|
||||||
|
{
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
return int.TryParse(str, out var result) ? result : fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string GetRequiredEnvironmentVariableAsString(this string prefix, string varName)
|
||||||
|
{
|
||||||
|
var fullVarName = GetFullVariableName(prefix, varName);
|
||||||
|
|
||||||
|
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(str))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Environment variable {fullVarName} is not set");
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string GetOptionalEnvironmentVariableAsString(this string prefix, string varName, string? fallback = null)
|
||||||
|
{
|
||||||
|
var fullVarName = GetFullVariableName(prefix, varName);
|
||||||
|
|
||||||
|
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(str))
|
||||||
|
{
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string GetFullVariableName(string prefix, string varName) => $"{prefix}_{varName}";
|
||||||
|
}
|
||||||
6
src/metadata/Extensions/JsonSerializerExtensions.cs
Normal file
6
src/metadata/Extensions/JsonSerializerExtensions.cs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
namespace Metadata.Extensions;
|
||||||
|
|
||||||
|
public static class JsonSerializerExtensions
|
||||||
|
{
|
||||||
|
public static string ToJson<T>(this T value) => JsonSerializer.Serialize(value);
|
||||||
|
}
|
||||||
30
src/metadata/Extensions/ServiceCollectionExtensions.cs
Normal file
30
src/metadata/Extensions/ServiceCollectionExtensions.cs
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
namespace Metadata.Extensions;
|
||||||
|
|
||||||
|
public static class ServiceCollectionExtensions
|
||||||
|
{
|
||||||
|
internal static IServiceCollection AddHttpClients(this IServiceCollection services)
|
||||||
|
{
|
||||||
|
services.AddHttpClient(HttpClients.ImdbDataClientName, client => client.BaseAddress = new(HttpClients.ImdbClientBaseAddress));
|
||||||
|
|
||||||
|
return services;
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static IServiceCollection AddMongoDb(this IServiceCollection services)
|
||||||
|
{
|
||||||
|
services.LoadConfigurationFromEnv<MongoConfiguration>();
|
||||||
|
services.AddTransient<ImdbMongoDbService>();
|
||||||
|
|
||||||
|
return services;
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static IServiceCollection AddJobSupport(this IServiceCollection services)
|
||||||
|
{
|
||||||
|
services.LoadConfigurationFromEnv<JobConfiguration>();
|
||||||
|
|
||||||
|
services.AddScheduler()
|
||||||
|
.AddTransient<DownloadImdbDataJob>()
|
||||||
|
.AddHostedService<JobScheduler>();
|
||||||
|
|
||||||
|
return services;
|
||||||
|
}
|
||||||
|
}
|
||||||
19
src/metadata/Extensions/WebApplicationBuilderExtensions.cs
Normal file
19
src/metadata/Extensions/WebApplicationBuilderExtensions.cs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
namespace Metadata.Extensions;
|
||||||
|
|
||||||
|
internal static class WebApplicationBuilderExtensions
|
||||||
|
{
|
||||||
|
internal static IHostBuilder SetupSerilog(this WebApplicationBuilder builder, IConfiguration configuration) =>
|
||||||
|
builder.Host.UseSerilog((_, c) =>
|
||||||
|
c.ReadFrom.Configuration(configuration));
|
||||||
|
|
||||||
|
internal static WebApplicationBuilder SetupWolverine(this WebApplicationBuilder builder)
|
||||||
|
{
|
||||||
|
builder.Host.UseWolverine(
|
||||||
|
options =>
|
||||||
|
{
|
||||||
|
options.DefaultExecutionTimeout = 6.Hours();
|
||||||
|
});
|
||||||
|
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
}
|
||||||
11
src/metadata/Features/Configuration/JobConfiguration.cs
Normal file
11
src/metadata/Features/Configuration/JobConfiguration.cs
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
namespace Metadata.Features.Configuration;
|
||||||
|
|
||||||
|
public class JobConfiguration
|
||||||
|
{
|
||||||
|
private const string Prefix = "METADATA";
|
||||||
|
private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE";
|
||||||
|
private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE";
|
||||||
|
|
||||||
|
public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour);
|
||||||
|
public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable);
|
||||||
|
}
|
||||||
19
src/metadata/Features/Configuration/MongoConfiguration.cs
Normal file
19
src/metadata/Features/Configuration/MongoConfiguration.cs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
namespace Metadata.Features.Configuration;
|
||||||
|
|
||||||
|
public class MongoConfiguration
|
||||||
|
{
|
||||||
|
private const string Prefix = "MONGODB";
|
||||||
|
private const string HostVariable = "HOST";
|
||||||
|
private const string PortVariable = "PORT";
|
||||||
|
private const string DbVariable = "DB";
|
||||||
|
private const string UsernameVariable = "USER";
|
||||||
|
private const string PasswordVariable = "PASSWORD";
|
||||||
|
|
||||||
|
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||||
|
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);
|
||||||
|
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||||
|
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||||
|
public string DbName { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DbVariable);
|
||||||
|
|
||||||
|
public string ConnectionString => $"mongodb://{Username}:{Password}@{Host}:{Port}/{DbName}?tls=false&directConnection=true&authSource=admin";
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
namespace Metadata.Features.DeleteDownloadedImdbData;
|
||||||
|
|
||||||
|
public record DeleteDownloadedImdbDataRequest(string FilePath);
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
namespace Metadata.Features.DeleteDownloadedImdbData;
|
||||||
|
|
||||||
|
public class DeleteDownloadedImdbDataRequestHandler(ILogger<DeleteDownloadedImdbDataRequestHandler> logger)
|
||||||
|
{
|
||||||
|
public Task Handle(DeleteDownloadedImdbDataRequest request, CancellationToken _)
|
||||||
|
{
|
||||||
|
logger.LogInformation("Deleting file {FilePath}", request.FilePath);
|
||||||
|
|
||||||
|
File.Delete(request.FilePath);
|
||||||
|
|
||||||
|
logger.LogInformation("File Deleted");
|
||||||
|
|
||||||
|
return Task.CompletedTask;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
namespace Metadata.Features.DownloadImdbData;
|
||||||
|
|
||||||
|
public class DownloadImdbDataJob(IMessageBus messageBus, JobConfiguration configuration) : BaseJob
|
||||||
|
{
|
||||||
|
public override bool IsScheduelable => !configuration.DownloadImdbOnce && !string.IsNullOrEmpty(configuration.DownloadImdbCronSchedule);
|
||||||
|
public override string JobName => nameof(DownloadImdbDataJob);
|
||||||
|
public override async Task Invoke() => await messageBus.SendAsync(new GetImdbDataRequest());
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
namespace Metadata.Features.DownloadImdbData;
|
||||||
|
|
||||||
|
public record GetImdbDataRequest;
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
namespace Metadata.Features.DownloadImdbData;
|
||||||
|
|
||||||
|
public class GetImdbDataRequestHandler(IHttpClientFactory clientFactory, ILogger<GetImdbDataRequestHandler> logger)
|
||||||
|
{
|
||||||
|
private const string TitleBasicsFileName = "title.basics.tsv";
|
||||||
|
|
||||||
|
public async Task<ImportImdbDataRequest> Handle(GetImdbDataRequest _, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
logger.LogInformation("Downloading IMDB data");
|
||||||
|
|
||||||
|
var client = clientFactory.CreateClient("imdb-data");
|
||||||
|
var response = await client.GetAsync($"{TitleBasicsFileName}.gz", cancellationToken);
|
||||||
|
|
||||||
|
var tempFile = Path.Combine(Path.GetTempPath(), TitleBasicsFileName);
|
||||||
|
|
||||||
|
response.EnsureSuccessStatusCode();
|
||||||
|
|
||||||
|
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
|
||||||
|
await using var gzipStream = new GZipStream(stream, CompressionMode.Decompress);
|
||||||
|
await using var fileStream = File.Create(tempFile);
|
||||||
|
|
||||||
|
await gzipStream.CopyToAsync(fileStream, cancellationToken);
|
||||||
|
|
||||||
|
logger.LogInformation("Downloaded IMDB data to {TempFile}", tempFile);
|
||||||
|
|
||||||
|
fileStream.Close();
|
||||||
|
|
||||||
|
return new(tempFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
15
src/metadata/Features/ImportImdbData/ImdbEntry.cs
Normal file
15
src/metadata/Features/ImportImdbData/ImdbEntry.cs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
namespace Metadata.Features.ImportImdbData;
|
||||||
|
|
||||||
|
public class ImdbEntry
|
||||||
|
{
|
||||||
|
[BsonId]
|
||||||
|
public string ImdbId { get; set; } = default!;
|
||||||
|
public string? TitleType { get; set; }
|
||||||
|
public string? PrimaryTitle { get; set; }
|
||||||
|
public string? OriginalTitle { get; set; }
|
||||||
|
public string? IsAdult { get; set; }
|
||||||
|
public string? StartYear { get; set; }
|
||||||
|
public string? EndYear { get; set; }
|
||||||
|
public string? RuntimeMinutes { get; set; }
|
||||||
|
public string? Genres { get; set; }
|
||||||
|
}
|
||||||
59
src/metadata/Features/ImportImdbData/ImdbMongoDbService.cs
Normal file
59
src/metadata/Features/ImportImdbData/ImdbMongoDbService.cs
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
namespace Metadata.Features.ImportImdbData;
|
||||||
|
|
||||||
|
public class ImdbMongoDbService
|
||||||
|
{
|
||||||
|
private readonly ILogger<ImdbMongoDbService> _logger;
|
||||||
|
private readonly IMongoCollection<ImdbEntry> _imdbCollection;
|
||||||
|
|
||||||
|
public ImdbMongoDbService(MongoConfiguration configuration, ILogger<ImdbMongoDbService> logger)
|
||||||
|
{
|
||||||
|
_logger = logger;
|
||||||
|
|
||||||
|
var client = new MongoClient(configuration.ConnectionString);
|
||||||
|
var database = client.GetDatabase(configuration.DbName);
|
||||||
|
|
||||||
|
_imdbCollection = database.GetCollection<ImdbEntry>("imdb-entries");
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task InsertImdbEntries(IEnumerable<ImdbEntry> entries)
|
||||||
|
{
|
||||||
|
var operations = new List<WriteModel<ImdbEntry>>();
|
||||||
|
|
||||||
|
foreach (var entry in entries)
|
||||||
|
{
|
||||||
|
var filter = Builders<ImdbEntry>.Filter.Eq(e => e.ImdbId, entry.ImdbId);
|
||||||
|
var update = Builders<ImdbEntry>.Update
|
||||||
|
.SetOnInsert(e => e.TitleType, entry.TitleType)
|
||||||
|
.SetOnInsert(e => e.PrimaryTitle, entry.PrimaryTitle)
|
||||||
|
.SetOnInsert(e => e.OriginalTitle, entry.OriginalTitle)
|
||||||
|
.SetOnInsert(e => e.IsAdult, entry.IsAdult)
|
||||||
|
.SetOnInsert(e => e.StartYear, entry.StartYear)
|
||||||
|
.SetOnInsert(e => e.EndYear, entry.EndYear)
|
||||||
|
.SetOnInsert(e => e.RuntimeMinutes, entry.RuntimeMinutes)
|
||||||
|
.SetOnInsert(e => e.Genres, entry.Genres);
|
||||||
|
|
||||||
|
operations.Add(new UpdateOneModel<ImdbEntry>(filter, update) { IsUpsert = true });
|
||||||
|
}
|
||||||
|
|
||||||
|
await _imdbCollection.BulkWriteAsync(operations);
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool IsDatabaseInitialized()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
|
||||||
|
// Create index for PrimaryTitle
|
||||||
|
var indexPrimaryTitle = Builders<ImdbEntry>.IndexKeys.Ascending(e => e.PrimaryTitle);
|
||||||
|
var modelPrimaryTitle = new CreateIndexModel<ImdbEntry>(indexPrimaryTitle);
|
||||||
|
_imdbCollection.Indexes.CreateOne(modelPrimaryTitle);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
_logger.LogError(e, "Error initializing database");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
namespace Metadata.Features.ImportImdbData;
|
||||||
|
|
||||||
|
public record ImportImdbDataRequest(string FilePath);
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
namespace Metadata.Features.ImportImdbData;
|
||||||
|
|
||||||
|
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService)
|
||||||
|
{
|
||||||
|
private const int BatchSize = 50_000;
|
||||||
|
|
||||||
|
public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath);
|
||||||
|
|
||||||
|
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
|
||||||
|
{
|
||||||
|
Delimiter = "\t",
|
||||||
|
BadDataFound = null, // Skip Bad Data from Imdb
|
||||||
|
MissingFieldFound = null, // Skip Missing Fields from Imdb
|
||||||
|
};
|
||||||
|
|
||||||
|
using var reader = new StreamReader(request.FilePath);
|
||||||
|
using var csv = new CsvReader(reader, config);
|
||||||
|
|
||||||
|
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(BatchSize)
|
||||||
|
{
|
||||||
|
FullMode = BoundedChannelFullMode.Wait,
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
// Skip the header row
|
||||||
|
await csv.ReadAsync();
|
||||||
|
|
||||||
|
var batchInsertTask = CreateBatchOfEntries(channel, cancellationToken);
|
||||||
|
|
||||||
|
await ReadEntries(csv, channel, cancellationToken);
|
||||||
|
|
||||||
|
channel.Writer.Complete();
|
||||||
|
|
||||||
|
await batchInsertTask;
|
||||||
|
|
||||||
|
return new(request.FilePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Task CreateBatchOfEntries(Channel<ImdbEntry, ImdbEntry> channel, CancellationToken cancellationToken) =>
|
||||||
|
Task.Run(async () =>
|
||||||
|
{
|
||||||
|
await foreach (var movieData in channel.Reader.ReadAllAsync(cancellationToken))
|
||||||
|
{
|
||||||
|
if (cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var batch = new List<ImdbEntry>
|
||||||
|
{
|
||||||
|
movieData,
|
||||||
|
};
|
||||||
|
|
||||||
|
while (batch.Count < BatchSize && channel.Reader.TryRead(out var nextMovieData))
|
||||||
|
{
|
||||||
|
batch.Add(nextMovieData);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (batch.Count > 0)
|
||||||
|
{
|
||||||
|
await mongoDbService.InsertImdbEntries(batch);
|
||||||
|
logger.LogInformation("Imported batch of {BatchSize} starting with ImdbId {FirstImdbId}", batch.Count, batch.First().ImdbId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, cancellationToken);
|
||||||
|
|
||||||
|
private static async Task ReadEntries(CsvReader csv, Channel<ImdbEntry, ImdbEntry> channel, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
while (await csv.ReadAsync())
|
||||||
|
{
|
||||||
|
var movieData = new ImdbEntry
|
||||||
|
{
|
||||||
|
ImdbId = csv.GetField(0),
|
||||||
|
TitleType = csv.GetField(1),
|
||||||
|
PrimaryTitle = csv.GetField(2),
|
||||||
|
OriginalTitle = csv.GetField(3),
|
||||||
|
IsAdult = csv.GetField(4),
|
||||||
|
StartYear = csv.GetField(5),
|
||||||
|
EndYear = csv.GetField(6),
|
||||||
|
RuntimeMinutes = csv.GetField(7),
|
||||||
|
Genres = csv.GetField(8),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await channel.Writer.WriteAsync(movieData, cancellationToken);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
10
src/metadata/Features/Jobs/BaseJob.cs
Normal file
10
src/metadata/Features/Jobs/BaseJob.cs
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
namespace Metadata.Features.Jobs;
|
||||||
|
|
||||||
|
public abstract class BaseJob : IMetadataJob
|
||||||
|
{
|
||||||
|
public abstract bool IsScheduelable { get; }
|
||||||
|
|
||||||
|
public abstract string JobName { get; }
|
||||||
|
|
||||||
|
public abstract Task Invoke();
|
||||||
|
}
|
||||||
7
src/metadata/Features/Jobs/IMetadataJob.cs
Normal file
7
src/metadata/Features/Jobs/IMetadataJob.cs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
namespace Metadata.Features.Jobs;
|
||||||
|
|
||||||
|
public interface IMetadataJob : IInvocable
|
||||||
|
{
|
||||||
|
bool IsScheduelable { get; }
|
||||||
|
string JobName { get; }
|
||||||
|
}
|
||||||
34
src/metadata/Features/Jobs/JobScheduler.cs
Normal file
34
src/metadata/Features/Jobs/JobScheduler.cs
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
namespace Metadata.Features.Jobs;
|
||||||
|
|
||||||
|
public class JobScheduler(IServiceProvider serviceProvider) : IHostedService
|
||||||
|
{
|
||||||
|
public Task StartAsync(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
using var scope = serviceProvider.CreateAsyncScope();
|
||||||
|
|
||||||
|
var mongoDbService = scope.ServiceProvider.GetRequiredService<ImdbMongoDbService>();
|
||||||
|
|
||||||
|
if (!mongoDbService.IsDatabaseInitialized())
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("MongoDb is not initialized");
|
||||||
|
}
|
||||||
|
|
||||||
|
var jobConfigurations = scope.ServiceProvider.GetRequiredService<JobConfiguration>();
|
||||||
|
var downloadJob = scope.ServiceProvider.GetRequiredService<DownloadImdbDataJob>();
|
||||||
|
|
||||||
|
if (!downloadJob.IsScheduelable)
|
||||||
|
{
|
||||||
|
return downloadJob.Invoke();
|
||||||
|
}
|
||||||
|
|
||||||
|
var scheduler = scope.ServiceProvider.GetRequiredService<IScheduler>();
|
||||||
|
|
||||||
|
scheduler.Schedule<DownloadImdbDataJob>()
|
||||||
|
.Cron(jobConfigurations.DownloadImdbCronSchedule)
|
||||||
|
.PreventOverlapping(nameof(downloadJob.JobName));
|
||||||
|
|
||||||
|
return Task.CompletedTask;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||||
|
}
|
||||||
9
src/metadata/Features/Literals/CronExpressions.cs
Normal file
9
src/metadata/Features/Literals/CronExpressions.cs
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
namespace Metadata.Features.Literals;
|
||||||
|
|
||||||
|
public static class CronExpressions
|
||||||
|
{
|
||||||
|
public const string EveryHour = "0 0 * * *";
|
||||||
|
public const string EveryDay = "0 0 0 * *";
|
||||||
|
public const string EveryWeek = "0 0 * * 0";
|
||||||
|
public const string EveryMonth = "0 0 0 * *";
|
||||||
|
}
|
||||||
7
src/metadata/Features/Literals/HttpClients.cs
Normal file
7
src/metadata/Features/Literals/HttpClients.cs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
namespace Metadata.Features.Literals;
|
||||||
|
|
||||||
|
public static class HttpClients
|
||||||
|
{
|
||||||
|
public const string ImdbDataClientName = "imdb-data";
|
||||||
|
public const string ImdbClientBaseAddress = "https://datasets.imdbws.com/";
|
||||||
|
}
|
||||||
26
src/metadata/GlobalUsings.cs
Normal file
26
src/metadata/GlobalUsings.cs
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
// Global using directives
|
||||||
|
|
||||||
|
global using System.Globalization;
|
||||||
|
global using System.IO.Compression;
|
||||||
|
global using System.Text.Json;
|
||||||
|
global using System.Threading.Channels;
|
||||||
|
global using Coravel;
|
||||||
|
global using Coravel.Invocable;
|
||||||
|
global using Coravel.Scheduling.Schedule.Interfaces;
|
||||||
|
global using CsvHelper;
|
||||||
|
global using CsvHelper.Configuration;
|
||||||
|
global using JasperFx.Core;
|
||||||
|
global using Metadata.Extensions;
|
||||||
|
global using Metadata.Features.Configuration;
|
||||||
|
global using Metadata.Features.DeleteDownloadedImdbData;
|
||||||
|
global using Metadata.Features.DownloadImdbData;
|
||||||
|
global using Metadata.Features.ImportImdbData;
|
||||||
|
global using Metadata.Features.Jobs;
|
||||||
|
global using Metadata.Features.Literals;
|
||||||
|
global using Microsoft.AspNetCore.Builder;
|
||||||
|
global using Microsoft.Extensions.DependencyInjection;
|
||||||
|
global using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||||
|
global using MongoDB.Bson.Serialization.Attributes;
|
||||||
|
global using MongoDB.Driver;
|
||||||
|
global using Serilog;
|
||||||
|
global using Wolverine;
|
||||||
30
src/metadata/Metadata.csproj
Normal file
30
src/metadata/Metadata.csproj
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk.Worker">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<UserSecretsId>54cad2ee-57df-4bb2-a192-d5d501448e0a</UserSecretsId>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Coravel" Version="5.0.3" />
|
||||||
|
<PackageReference Include="CsvHelper" Version="31.0.0" />
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||||
|
<PackageReference Include="MongoDB.Driver" Version="2.24.0" />
|
||||||
|
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||||
|
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||||
|
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
|
||||||
|
<PackageReference Include="WolverineFx" Version="1.20.1" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Content Remove="Configuration\logging.json" />
|
||||||
|
<None Include="Configuration\logging.json">
|
||||||
|
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
13
src/metadata/Program.cs
Normal file
13
src/metadata/Program.cs
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
var builder = WebApplication.CreateBuilder(args);
|
||||||
|
|
||||||
|
builder.Configuration.AddServiceConfiguration();
|
||||||
|
builder.SetupSerilog(builder.Configuration);
|
||||||
|
builder.SetupWolverine();
|
||||||
|
|
||||||
|
builder.Services
|
||||||
|
.AddHttpClients()
|
||||||
|
.AddJobSupport();
|
||||||
|
|
||||||
|
var host = builder.Build();
|
||||||
|
|
||||||
|
await host.RunAsync();
|
||||||
Reference in New Issue
Block a user