Add cleanup of initialize mongodb, ensure indexes are created for compound searching

This commit is contained in:
iPromKnight
2024-02-28 14:57:26 +00:00
parent 1b9a01c677
commit d0346f29bf
8 changed files with 58 additions and 19 deletions

View File

@@ -27,9 +27,11 @@ RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS=10
# Metadata # Metadata
## Only used if DATA_ONCE is set to false. If true, the schedule is ignored ## Only used if DATA_ONCE is set to false. If true, the schedule is ignored
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE=0 0 1 * * * METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE="0 0 1 * * *"
## If true, the metadata will be downloaded once and then the schedule will be ignored ## If true, the metadata will be downloaded once and then the schedule will be ignored
METADATA_DOWNLOAD_IMDB_DATA_ONCE=true METADATA_DOWNLOAD_IMDB_DATA_ONCE=true
## Controls the amount of records processed in memory at any given time during import, higher values will consume more memory
METADATA_INSERT_BATCH_SIZE=25000
# Addon # Addon
DEBUG_MODE=false DEBUG_MODE=false

View File

@@ -5,7 +5,9 @@ public class JobConfiguration
private const string Prefix = "METADATA"; private const string Prefix = "METADATA";
private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE"; private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE";
private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE"; private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE";
private const string InsertBatchSizeVariable = "INSERT_BATCH_SIZE";
public int InsertBatchSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(InsertBatchSizeVariable, 25_000);
public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour); public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour);
public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable); public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable);
} }

View File

@@ -8,6 +8,7 @@ public class MongoConfiguration
private const string DbVariable = "DB"; private const string DbVariable = "DB";
private const string UsernameVariable = "USER"; private const string UsernameVariable = "USER";
private const string PasswordVariable = "PASSWORD"; private const string PasswordVariable = "PASSWORD";
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable); private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017); private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);

View File

@@ -42,11 +42,16 @@ public class ImdbMongoDbService
{ {
try try
{ {
// Create compound index for PrimaryTitle, TitleType, and StartYear
// Create index for PrimaryTitle var indexKeysDefinition = Builders<ImdbEntry>.IndexKeys
var indexPrimaryTitle = Builders<ImdbEntry>.IndexKeys.Ascending(e => e.PrimaryTitle); .Text(e => e.PrimaryTitle)
var modelPrimaryTitle = new CreateIndexModel<ImdbEntry>(indexPrimaryTitle); .Ascending(e => e.TitleType)
_imdbCollection.Indexes.CreateOne(modelPrimaryTitle); .Ascending(e => e.StartYear);
var createIndexOptions = new CreateIndexOptions { Background = true };
var indexModel = new CreateIndexModel<ImdbEntry>(indexKeysDefinition, createIndexOptions);
_imdbCollection.Indexes.CreateOne(indexModel);
return true; return true;
} }

View File

@@ -1,9 +1,7 @@
namespace Metadata.Features.ImportImdbData; namespace Metadata.Features.ImportImdbData;
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService) public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService, JobConfiguration configuration)
{ {
private const int BatchSize = 50_000;
public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken) public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
{ {
logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath); logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath);
@@ -18,7 +16,7 @@ public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler>
using var reader = new StreamReader(request.FilePath); using var reader = new StreamReader(request.FilePath);
using var csv = new CsvReader(reader, config); using var csv = new CsvReader(reader, config);
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(BatchSize) var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(configuration.InsertBatchSize)
{ {
FullMode = BoundedChannelFullMode.Wait, FullMode = BoundedChannelFullMode.Wait,
}); });
@@ -53,7 +51,7 @@ public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler>
movieData, movieData,
}; };
while (batch.Count < BatchSize && channel.Reader.TryRead(out var nextMovieData)) while (batch.Count < configuration.InsertBatchSize && channel.Reader.TryRead(out var nextMovieData))
{ {
batch.Add(nextMovieData); batch.Add(nextMovieData);
} }

View File

@@ -13,4 +13,6 @@ const ImdbEntriesSchema: Schema = new Schema({
TitleType: { type: String, default: "" }, TitleType: { type: String, default: "" },
}); });
ImdbEntriesSchema.index({ PrimaryTitle: 'text', TitleType: 1, StartYear: 1 }, { background: true });
export const ImdbEntryModel = mongoose.model<IImdbEntry>('ImdbEntry', ImdbEntriesSchema, 'imdb-entries'); export const ImdbEntryModel = mongoose.model<IImdbEntry>('ImdbEntry', ImdbEntriesSchema, 'imdb-entries');

View File

@@ -1,17 +1,28 @@
import {TorrentType} from "@enums/torrent_types"; import {TorrentType} from "@enums/torrent_types";
import {ILoggingService} from "@interfaces/logging_service";
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query"; import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
import {IMongoRepository} from "@mongo/interfaces/mongo_repository"; import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model"; import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
import {configurationService} from '@services/configuration_service'; import {configurationService} from '@services/configuration_service';
import {injectable} from "inversify"; import {IocTypes} from "@setup/ioc_types";
import {inject, injectable} from "inversify";
import mongoose from 'mongoose'; import mongoose from 'mongoose';
@injectable() @injectable()
export class MongoRepository implements IMongoRepository { export class MongoRepository implements IMongoRepository {
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
private db: typeof mongoose = mongoose; private db: typeof mongoose = mongoose;
async connect() : Promise<void> { async connect() : Promise<void> {
await this.db.connect(configurationService.cacheConfig.MONGO_URI, {directConnection: true}); try {
await this.db.connect(configurationService.cacheConfig.MONGO_URI, {directConnection: true});
this.logger.info('Successfully connected to mongo db');
}
catch (error) {
this.logger.debug('Failed to connect to mongo db', error);
this.logger.error('Failed to connect to mongo db');
process.exit(1);
}
} }
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> { async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
@@ -35,7 +46,12 @@ export class MongoRepository implements IMongoRepository {
query.StartYear = year.toString(); query.StartYear = year.toString();
} }
const result = await ImdbEntryModel.findOne(query); try {
return result ? result._id : null; const result = await ImdbEntryModel.findOne(query, '_id').maxTimeMS(30000);
return result ? result._id : null;
} catch (error) {
this.logger.error('Query exceeded the 30 seconds time limit', error);
return null;
}
} }
} }

View File

@@ -1,7 +1,9 @@
import "reflect-metadata"; // required import "reflect-metadata"; // required
import {TorrentType} from "@enums/torrent_types"; import {TorrentType} from "@enums/torrent_types";
import {ILoggingService} from "@interfaces/logging_service";
import {MongoRepository} from "@mongo/mongo_repository"; import {MongoRepository} from "@mongo/mongo_repository";
import {Container} from "inversify"; import {IocTypes} from "@setup/ioc_types";
import {Container, inject} from "inversify";
jest.mock('@services/configuration_service', () => { jest.mock('@services/configuration_service', () => {
return { return {
@@ -20,13 +22,24 @@ jest.mock('@services/configuration_service', () => {
} }
}); });
jest.mock('@services/logging_service', () => {
return {
error: jest.fn(),
info: jest.fn(),
debug: jest.fn(),
}
})
xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by default.', () => { xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by default.', () => {
let mongoRepository: MongoRepository; let mongoRepository: MongoRepository,
mockLogger: ILoggingService;
beforeEach(() => { beforeEach(() => {
jest.clearAllMocks(); jest.clearAllMocks();
process.env.LOG_LEVEL = 'debug'; process.env.LOG_LEVEL = 'debug';
mockLogger = jest.requireMock<ILoggingService>('@services/logging_service');
const container = new Container(); const container = new Container();
container.bind<ILoggingService>(IocTypes.ILoggingService).toConstantValue(mockLogger);
container.bind<MongoRepository>(MongoRepository).toSelf(); container.bind<MongoRepository>(MongoRepository).toSelf();
mongoRepository = container.get(MongoRepository); mongoRepository = container.get(MongoRepository);
}); });