mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Add cleanup of initialize mongodb, ensure indexes are created for compound searching
This commit is contained in:
@@ -27,9 +27,11 @@ RABBITMQ_PUBLISH_INTERVAL_IN_SECONDS=10
|
|||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
## Only used if DATA_ONCE is set to false. If true, the schedule is ignored
|
## Only used if DATA_ONCE is set to false. If true, the schedule is ignored
|
||||||
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE=0 0 1 * * *
|
METADATA_DOWNLOAD_IMDB_DATA_SCHEDULE="0 0 1 * * *"
|
||||||
## If true, the metadata will be downloaded once and then the schedule will be ignored
|
## If true, the metadata will be downloaded once and then the schedule will be ignored
|
||||||
METADATA_DOWNLOAD_IMDB_DATA_ONCE=true
|
METADATA_DOWNLOAD_IMDB_DATA_ONCE=true
|
||||||
|
## Controls the amount of records processed in memory at any given time during import, higher values will consume more memory
|
||||||
|
METADATA_INSERT_BATCH_SIZE=25000
|
||||||
|
|
||||||
# Addon
|
# Addon
|
||||||
DEBUG_MODE=false
|
DEBUG_MODE=false
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ public class JobConfiguration
|
|||||||
private const string Prefix = "METADATA";
|
private const string Prefix = "METADATA";
|
||||||
private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE";
|
private const string DownloadImdbDataVariable = "DOWNLOAD_IMDB_DATA_SCHEDULE";
|
||||||
private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE";
|
private const string DownloadImdbDataOnceVariable = "DOWNLOAD_IMDB_DATA_ONCE";
|
||||||
|
private const string InsertBatchSizeVariable = "INSERT_BATCH_SIZE";
|
||||||
|
|
||||||
|
public int InsertBatchSize { get; init; } = Prefix.GetEnvironmentVariableAsInt(InsertBatchSizeVariable, 25_000);
|
||||||
public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour);
|
public string DownloadImdbCronSchedule { get; init; } = Prefix.GetOptionalEnvironmentVariableAsString(DownloadImdbDataVariable, CronExpressions.EveryHour);
|
||||||
public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable);
|
public bool DownloadImdbOnce { get; init; } = Prefix.GetEnvironmentVariableAsBool(DownloadImdbDataOnceVariable);
|
||||||
}
|
}
|
||||||
@@ -8,6 +8,7 @@ public class MongoConfiguration
|
|||||||
private const string DbVariable = "DB";
|
private const string DbVariable = "DB";
|
||||||
private const string UsernameVariable = "USER";
|
private const string UsernameVariable = "USER";
|
||||||
private const string PasswordVariable = "PASSWORD";
|
private const string PasswordVariable = "PASSWORD";
|
||||||
|
|
||||||
|
|
||||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||||
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);
|
private int Port { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 27017);
|
||||||
|
|||||||
@@ -42,11 +42,16 @@ public class ImdbMongoDbService
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
// Create compound index for PrimaryTitle, TitleType, and StartYear
|
||||||
// Create index for PrimaryTitle
|
var indexKeysDefinition = Builders<ImdbEntry>.IndexKeys
|
||||||
var indexPrimaryTitle = Builders<ImdbEntry>.IndexKeys.Ascending(e => e.PrimaryTitle);
|
.Text(e => e.PrimaryTitle)
|
||||||
var modelPrimaryTitle = new CreateIndexModel<ImdbEntry>(indexPrimaryTitle);
|
.Ascending(e => e.TitleType)
|
||||||
_imdbCollection.Indexes.CreateOne(modelPrimaryTitle);
|
.Ascending(e => e.StartYear);
|
||||||
|
|
||||||
|
var createIndexOptions = new CreateIndexOptions { Background = true };
|
||||||
|
var indexModel = new CreateIndexModel<ImdbEntry>(indexKeysDefinition, createIndexOptions);
|
||||||
|
|
||||||
|
_imdbCollection.Indexes.CreateOne(indexModel);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
namespace Metadata.Features.ImportImdbData;
|
namespace Metadata.Features.ImportImdbData;
|
||||||
|
|
||||||
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService)
|
public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler> logger, ImdbMongoDbService mongoDbService, JobConfiguration configuration)
|
||||||
{
|
{
|
||||||
private const int BatchSize = 50_000;
|
|
||||||
|
|
||||||
public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
|
public async Task<DeleteDownloadedImdbDataRequest> Handle(ImportImdbDataRequest request, CancellationToken cancellationToken)
|
||||||
{
|
{
|
||||||
logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath);
|
logger.LogInformation("Importing Downloaded IMDB data from {FilePath}", request.FilePath);
|
||||||
@@ -18,7 +16,7 @@ public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler>
|
|||||||
using var reader = new StreamReader(request.FilePath);
|
using var reader = new StreamReader(request.FilePath);
|
||||||
using var csv = new CsvReader(reader, config);
|
using var csv = new CsvReader(reader, config);
|
||||||
|
|
||||||
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(BatchSize)
|
var channel = Channel.CreateBounded<ImdbEntry>(new BoundedChannelOptions(configuration.InsertBatchSize)
|
||||||
{
|
{
|
||||||
FullMode = BoundedChannelFullMode.Wait,
|
FullMode = BoundedChannelFullMode.Wait,
|
||||||
});
|
});
|
||||||
@@ -53,7 +51,7 @@ public class ImportImdbDataRequestHandler(ILogger<ImportImdbDataRequestHandler>
|
|||||||
movieData,
|
movieData,
|
||||||
};
|
};
|
||||||
|
|
||||||
while (batch.Count < BatchSize && channel.Reader.TryRead(out var nextMovieData))
|
while (batch.Count < configuration.InsertBatchSize && channel.Reader.TryRead(out var nextMovieData))
|
||||||
{
|
{
|
||||||
batch.Add(nextMovieData);
|
batch.Add(nextMovieData);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,4 +13,6 @@ const ImdbEntriesSchema: Schema = new Schema({
|
|||||||
TitleType: { type: String, default: "" },
|
TitleType: { type: String, default: "" },
|
||||||
});
|
});
|
||||||
|
|
||||||
|
ImdbEntriesSchema.index({ PrimaryTitle: 'text', TitleType: 1, StartYear: 1 }, { background: true });
|
||||||
|
|
||||||
export const ImdbEntryModel = mongoose.model<IImdbEntry>('ImdbEntry', ImdbEntriesSchema, 'imdb-entries');
|
export const ImdbEntryModel = mongoose.model<IImdbEntry>('ImdbEntry', ImdbEntriesSchema, 'imdb-entries');
|
||||||
@@ -1,17 +1,28 @@
|
|||||||
import {TorrentType} from "@enums/torrent_types";
|
import {TorrentType} from "@enums/torrent_types";
|
||||||
|
import {ILoggingService} from "@interfaces/logging_service";
|
||||||
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
|
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
|
||||||
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
|
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
|
||||||
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
|
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
|
||||||
import {configurationService} from '@services/configuration_service';
|
import {configurationService} from '@services/configuration_service';
|
||||||
import {injectable} from "inversify";
|
import {IocTypes} from "@setup/ioc_types";
|
||||||
|
import {inject, injectable} from "inversify";
|
||||||
import mongoose from 'mongoose';
|
import mongoose from 'mongoose';
|
||||||
|
|
||||||
@injectable()
|
@injectable()
|
||||||
export class MongoRepository implements IMongoRepository {
|
export class MongoRepository implements IMongoRepository {
|
||||||
|
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
|
||||||
private db: typeof mongoose = mongoose;
|
private db: typeof mongoose = mongoose;
|
||||||
|
|
||||||
async connect() : Promise<void> {
|
async connect() : Promise<void> {
|
||||||
await this.db.connect(configurationService.cacheConfig.MONGO_URI, {directConnection: true});
|
try {
|
||||||
|
await this.db.connect(configurationService.cacheConfig.MONGO_URI, {directConnection: true});
|
||||||
|
this.logger.info('Successfully connected to mongo db');
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
this.logger.debug('Failed to connect to mongo db', error);
|
||||||
|
this.logger.error('Failed to connect to mongo db');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
|
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
|
||||||
@@ -35,7 +46,12 @@ export class MongoRepository implements IMongoRepository {
|
|||||||
query.StartYear = year.toString();
|
query.StartYear = year.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await ImdbEntryModel.findOne(query);
|
try {
|
||||||
return result ? result._id : null;
|
const result = await ImdbEntryModel.findOne(query, '_id').maxTimeMS(30000);
|
||||||
|
return result ? result._id : null;
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('Query exceeded the 30 seconds time limit', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,9 @@
|
|||||||
import "reflect-metadata"; // required
|
import "reflect-metadata"; // required
|
||||||
import {TorrentType} from "@enums/torrent_types";
|
import {TorrentType} from "@enums/torrent_types";
|
||||||
|
import {ILoggingService} from "@interfaces/logging_service";
|
||||||
import {MongoRepository} from "@mongo/mongo_repository";
|
import {MongoRepository} from "@mongo/mongo_repository";
|
||||||
import {Container} from "inversify";
|
import {IocTypes} from "@setup/ioc_types";
|
||||||
|
import {Container, inject} from "inversify";
|
||||||
|
|
||||||
jest.mock('@services/configuration_service', () => {
|
jest.mock('@services/configuration_service', () => {
|
||||||
return {
|
return {
|
||||||
@@ -20,13 +22,24 @@ jest.mock('@services/configuration_service', () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
jest.mock('@services/logging_service', () => {
|
||||||
|
return {
|
||||||
|
error: jest.fn(),
|
||||||
|
info: jest.fn(),
|
||||||
|
debug: jest.fn(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by default.', () => {
|
xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by default.', () => {
|
||||||
let mongoRepository: MongoRepository;
|
let mongoRepository: MongoRepository,
|
||||||
|
mockLogger: ILoggingService;
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
jest.clearAllMocks();
|
jest.clearAllMocks();
|
||||||
process.env.LOG_LEVEL = 'debug';
|
process.env.LOG_LEVEL = 'debug';
|
||||||
|
mockLogger = jest.requireMock<ILoggingService>('@services/logging_service');
|
||||||
const container = new Container();
|
const container = new Container();
|
||||||
|
container.bind<ILoggingService>(IocTypes.ILoggingService).toConstantValue(mockLogger);
|
||||||
container.bind<MongoRepository>(MongoRepository).toSelf();
|
container.bind<MongoRepository>(MongoRepository).toSelf();
|
||||||
mongoRepository = container.get(MongoRepository);
|
mongoRepository = container.get(MongoRepository);
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user