Merge pull request #104 from Gabisonfire/fuse-weighting
Further enhance title matching by added fuse with a configurable threshold
This commit is contained in:
@@ -48,6 +48,8 @@ UDP_TRACKERS_ENABLED=true
|
||||
CONSUMER_REPLICAS=3
|
||||
## Fix for #66 - toggle on for development
|
||||
AUTO_CREATE_AND_APPLY_MIGRATIONS=false
|
||||
## Allows control of the threshold for matching titles to the IMDB dataset. The closer to 0, the more strict the matching.
|
||||
TITLE_MATCH_THRESHOLD=0.25
|
||||
|
||||
# Producer
|
||||
GITHUB_PAT=
|
||||
|
||||
9
src/node/consumer/package-lock.json
generated
9
src/node/consumer/package-lock.json
generated
@@ -14,6 +14,7 @@
|
||||
"axios": "^1.6.1",
|
||||
"bottleneck": "^2.19.5",
|
||||
"cache-manager": "^5.4.0",
|
||||
"fuse.js": "^7.0.0",
|
||||
"google-sr": "^3.2.1",
|
||||
"inversify": "^6.0.2",
|
||||
"magnet-uri": "^6.2.0",
|
||||
@@ -4782,6 +4783,14 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/fuse.js": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz",
|
||||
"integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/gensync": {
|
||||
"version": "1.0.0-beta.2",
|
||||
"resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
"axios": "^1.6.1",
|
||||
"bottleneck": "^2.19.5",
|
||||
"cache-manager": "^5.4.0",
|
||||
"fuse.js": "^7.0.0",
|
||||
"google-sr": "^3.2.1",
|
||||
"inversify": "^6.0.2",
|
||||
"magnet-uri": "^6.2.0",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
export const metadataConfig = {
|
||||
IMDB_CONCURRENT: parseInt(process.env.IMDB_CONCURRENT || "1", 10),
|
||||
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10)
|
||||
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10),
|
||||
TITLE_MATCH_THRESHOLD: Number(process.env.TITLE_MATCH_THRESHOLD || 0.25),
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
export interface IMongoMetadataQuery {
|
||||
$text: { $search: string },
|
||||
TitleType: string;
|
||||
TitleType: string,
|
||||
StartYear?: string;
|
||||
}
|
||||
@@ -1,13 +1,21 @@
|
||||
import {TorrentType} from "@enums/torrent_types";
|
||||
import {ILoggingService} from "@interfaces/logging_service";
|
||||
import {IImdbEntry} from "@mongo/interfaces/imdb_entry_attributes";
|
||||
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
|
||||
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
|
||||
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
|
||||
import {configurationService} from '@services/configuration_service';
|
||||
import {IocTypes} from "@setup/ioc_types";
|
||||
import Fuse, {FuseResult, IFuseOptions} from 'fuse.js';
|
||||
import {inject, injectable} from "inversify";
|
||||
import mongoose from 'mongoose';
|
||||
|
||||
const fuseOptions : IFuseOptions<IImdbEntry> = {
|
||||
includeScore: true,
|
||||
keys: ['PrimaryTitle', 'OriginalTitle'],
|
||||
threshold: configurationService.metadataConfig.TITLE_MATCH_THRESHOLD,
|
||||
};
|
||||
|
||||
@injectable()
|
||||
export class MongoRepository implements IMongoRepository {
|
||||
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
|
||||
@@ -19,27 +27,34 @@ export class MongoRepository implements IMongoRepository {
|
||||
this.logger.info('Successfully connected to mongo db');
|
||||
}
|
||||
catch (error) {
|
||||
this.logger.debug('Failed to connect to mongo db', error);
|
||||
this.logger.error('Failed to connect to mongo db');
|
||||
const FAILED_TO_CONNECT = 'Failed to connect to mongo db';
|
||||
this.logger.debug(FAILED_TO_CONNECT, error);
|
||||
this.logger.error(FAILED_TO_CONNECT);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
|
||||
const titleType: string = category === TorrentType.Series ? 'tvSeries' : 'movie';
|
||||
|
||||
const query: IMongoMetadataQuery = {
|
||||
$text: { $search: title },
|
||||
TitleType: titleType,
|
||||
TitleType: titleType
|
||||
};
|
||||
|
||||
if (year) {
|
||||
query.StartYear = year.toString();
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await ImdbEntryModel.findOne(query, '_id', {score: {$meta: "textScore" }}).sort({score: {$meta: "textScore"}}).limit(10).maxTimeMS(30000);
|
||||
return result ? result._id : null;
|
||||
const results = await ImdbEntryModel.find(query).limit(100).maxTimeMS(30000);
|
||||
if (!results.length) {
|
||||
return null;
|
||||
}
|
||||
const fuse: Fuse<IImdbEntry> = new Fuse(results, fuseOptions);
|
||||
const searchResults: FuseResult<IImdbEntry>[] = fuse.search(title);
|
||||
if (!searchResults.length) {
|
||||
return null;
|
||||
}
|
||||
const [bestMatch] = searchResults;
|
||||
return bestMatch.item._id;
|
||||
} catch (error) {
|
||||
this.logger.error('Query exceeded the 30 seconds time limit', error);
|
||||
return null;
|
||||
|
||||
@@ -66,7 +66,7 @@ xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by
|
||||
expect(result).toBe('tt0084726');
|
||||
}, 30000);
|
||||
|
||||
it('should get Wrath of Khan imdbId correctly', async () => {
|
||||
it('should get Wrath of Khan simple imdbId correctly', async () => {
|
||||
await mongoRepository.connect();
|
||||
const result = await mongoRepository.getImdbId('Wrath of Khan', TorrentType.Movie, 1982);
|
||||
expect(result).toBe('tt0084726');
|
||||
|
||||
Reference in New Issue
Block a user