Add cleanup of initialize mongodb, ensure indexes are created for compound searching

This commit is contained in:
iPromKnight
2024-02-28 14:57:26 +00:00
parent 1b9a01c677
commit d0346f29bf
8 changed files with 58 additions and 19 deletions

View File

@@ -27,9 +27,11 @@ RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS=10
# Metadata
## Only used if DATA_ONCE is set to false. If true, the schedule is ignored
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE=0 0 1 * * *
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE="0 0 1 * * *"
## If true, the metadata will be downloaded once and then the schedule will be ignored
METADATA_DOWNLOAD_IMDB_DATA_ONCE=true
## Controls the amount of records processed in memory at any given time during import, higher values will consume more memory
METADATA_INSERT_BATCH_SIZE=25000
# Addon
DEBUG_MODE=false

View File

@@ -5,7 +5,9 @@ public class JobConfiguration
private const string Prefix = "METADATA";
private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE";
private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE";
private const string InsertBatchSizeVariable = "INSERT_BATCH_SIZE";
public int InsertBatchSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(InsertBatchSizeVariable, 25_000);
public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour);
public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable);
}

View File

@@ -8,6 +8,7 @@ public class MongoConfiguration
private const string DbVariable = "DB";
private const string UsernameVariable = "USER";
private const string PasswordVariable = "PASSWORD";
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);

View File

@@ -42,11 +42,16 @@ public class ImdbMongoDbService
{
try
{
// Create index for PrimaryTitle
var indexPrimaryTitle = Builders<ImdbEntry>.IndexKeys.Ascending(e => e.PrimaryTitle);
var modelPrimaryTitle = new CreateIndexModel<ImdbEntry>(indexPrimaryTitle);
_imdbCollection.Indexes.CreateOne(modelPrimaryTitle);
// Create compound index for PrimaryTitle, TitleType, and StartYear
var indexKeysDefinition = Builders<ImdbEntry>.IndexKeys
.Text(e => e.PrimaryTitle)
.Ascending(e => e.TitleType)
.Ascending(e => e.StartYear);
var createIndexOptions = new CreateIndexOptions { Background = true };
var indexModel = new CreateIndexModel<ImdbEntry>(indexKeysDefinition, createIndexOptions);
_imdbCollection.Indexes.CreateOne(indexModel);
return true;
}

View File

@@ -1,9 +1,7 @@
namespace Metadata.Features.ImportImdbData;
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService)
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService, JobConfiguration configuration)
{
private const int BatchSize = 50_000;
public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
{
logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath);
@@ -18,7 +16,7 @@ public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler>
using var reader = new StreamReader(request.FilePath);
using var csv = new CsvReader(reader, config);
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(BatchSize)
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(configuration.InsertBatchSize)
{
FullMode = BoundedChannelFullMode.Wait,
});
@@ -53,7 +51,7 @@ public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler>
movieData,
};
while (batch.Count < BatchSize && channel.Reader.TryRead(out var nextMovieData))
while (batch.Count < configuration.InsertBatchSize && channel.Reader.TryRead(out var nextMovieData))
{
batch.Add(nextMovieData);
}

View File

@@ -13,4 +13,6 @@ const ImdbEntriesSchema: Schema = new Schema({
TitleType: { type: String, default: "" },
});
ImdbEntriesSchema.index({ PrimaryTitle: 'text', TitleType: 1, StartYear: 1 }, { background: true });
export const ImdbEntryModel = mongoose.model<IImdbEntry>('ImdbEntry', ImdbEntriesSchema, 'imdb-entries');

View File

@@ -1,17 +1,28 @@
import {TorrentType} from "@enums/torrent_types";
import {ILoggingService} from "@interfaces/logging_service";
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
import {configurationService} from '@services/configuration_service';
import {injectable} from "inversify";
import {IocTypes} from "@setup/ioc_types";
import {inject, injectable} from "inversify";
import mongoose from 'mongoose';
@injectable()
export class MongoRepository implements IMongoRepository {
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
private db: typeof mongoose = mongoose;
async connect() : Promise<void> {
await this.db.connect(configurationService.cacheConfig.MONGO_URI, {directConnection: true});
try {
await this.db.connect(configurationService.cacheConfig.MONGO_URI, {directConnection: true});
this.logger.info('Successfully connected to mongo db');
}
catch (error) {
this.logger.debug('Failed to connect to mongo db', error);
this.logger.error('Failed to connect to mongo db');
process.exit(1);
}
}
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
@@ -35,7 +46,12 @@ export class MongoRepository implements IMongoRepository {
query.StartYear = year.toString();
}
const result = await ImdbEntryModel.findOne(query);
return result ? result._id : null;
try {
const result = await ImdbEntryModel.findOne(query, '_id').maxTimeMS(30000);
return result ? result._id : null;
} catch (error) {
this.logger.error('Query exceeded the 30 seconds time limit', error);
return null;
}
}
}

View File

@@ -1,7 +1,9 @@
import "reflect-metadata"; // required
import {TorrentType} from "@enums/torrent_types";
import {ILoggingService} from "@interfaces/logging_service";
import {MongoRepository} from "@mongo/mongo_repository";
import {Container} from "inversify";
import {IocTypes} from "@setup/ioc_types";
import {Container, inject} from "inversify";
jest.mock('@services/configuration_service', () => {
return {
@@ -20,13 +22,24 @@ jest.mock('@services/configuration_service', () => {
}
});
jest.mock('@services/logging_service', () => {
return {
error: jest.fn(),
info: jest.fn(),
debug: jest.fn(),
}
})
xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by default.', () => {
let mongoRepository: MongoRepository;
let mongoRepository: MongoRepository,
mockLogger: ILoggingService;
beforeEach(() => {
jest.clearAllMocks();
process.env.LOG_LEVEL = 'debug';
mockLogger = jest.requireMock<ILoggingService>('@services/logging_service');
const container = new Container();
container.bind<ILoggingService>(IocTypes.ILoggingService).toConstantValue(mockLogger);
container.bind<MongoRepository>(MongoRepository).toSelf();
mongoRepository = container.get(MongoRepository);
});