Merge pull request #104 from Gabisonfire/fuse-weighting

Further enhance title matching by added fuse with a configurable threshold
This commit is contained in:
iPromKnight
2024-03-01 09:02:24 +00:00
committed by GitHub
7 changed files with 39 additions and 11 deletions

View File

@@ -48,6 +48,8 @@ UDP_TRACKERS_ENABLED=true
CONSUMER_REPLICAS=3
## Fix for #66 - toggle on for development
AUTO_CREATE_AND_APPLY_MIGRATIONS=false
## Allows control of the threshold for matching titles to the IMDB dataset. The closer to 0, the more strict the matching.
TITLE_MATCH_THRESHOLD=0.25
# Producer
GITHUB_PAT=

View File

@@ -14,6 +14,7 @@
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^5.4.0",
"fuse.js": "^7.0.0",
"google-sr": "^3.2.1",
"inversify": "^6.0.2",
"magnet-uri": "^6.2.0",
@@ -4782,6 +4783,14 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/fuse.js": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz",
"integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==",
"engines": {
"node": ">=10"
}
},
"node_modules/gensync": {
"version": "1.0.0-beta.2",
"resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",

View File

@@ -19,6 +19,7 @@
"axios": "^1.6.1",
"bottleneck": "^2.19.5",
"cache-manager": "^5.4.0",
"fuse.js": "^7.0.0",
"google-sr": "^3.2.1",
"inversify": "^6.0.2",
"magnet-uri": "^6.2.0",

View File

@@ -1,4 +1,5 @@
export const metadataConfig = {
IMDB_CONCURRENT: parseInt(process.env.IMDB_CONCURRENT || "1", 10),
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10)
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10),
TITLE_MATCH_THRESHOLD: Number(process.env.TITLE_MATCH_THRESHOLD || 0.25),
};

View File

@@ -1,5 +1,5 @@
export interface IMongoMetadataQuery {
$text: { $search: string },
TitleType: string;
TitleType: string,
StartYear?: string;
}

View File

@@ -1,13 +1,21 @@
import {TorrentType} from "@enums/torrent_types";
import {ILoggingService} from "@interfaces/logging_service";
import {IImdbEntry} from "@mongo/interfaces/imdb_entry_attributes";
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
import {configurationService} from '@services/configuration_service';
import {IocTypes} from "@setup/ioc_types";
import Fuse, {FuseResult, IFuseOptions} from 'fuse.js';
import {inject, injectable} from "inversify";
import mongoose from 'mongoose';
const fuseOptions : IFuseOptions<IImdbEntry> = {
includeScore: true,
keys: ['PrimaryTitle', 'OriginalTitle'],
threshold: configurationService.metadataConfig.TITLE_MATCH_THRESHOLD,
};
@injectable()
export class MongoRepository implements IMongoRepository {
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
@@ -19,27 +27,34 @@ export class MongoRepository implements IMongoRepository {
this.logger.info('Successfully connected to mongo db');
}
catch (error) {
this.logger.debug('Failed to connect to mongo db', error);
this.logger.error('Failed to connect to mongo db');
const FAILED_TO_CONNECT = 'Failed to connect to mongo db';
this.logger.debug(FAILED_TO_CONNECT, error);
this.logger.error(FAILED_TO_CONNECT);
process.exit(1);
}
}
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
const titleType: string = category === TorrentType.Series ? 'tvSeries' : 'movie';
const query: IMongoMetadataQuery = {
$text: { $search: title },
TitleType: titleType,
TitleType: titleType
};
if (year) {
query.StartYear = year.toString();
}
try {
const result = await ImdbEntryModel.findOne(query, '_id', {score: {$meta: "textScore" }}).sort({score: {$meta: "textScore"}}).limit(10).maxTimeMS(30000);
return result ? result._id : null;
const results = await ImdbEntryModel.find(query).limit(100).maxTimeMS(30000);
if (!results.length) {
return null;
}
const fuse: Fuse<IImdbEntry> = new Fuse(results, fuseOptions);
const searchResults: FuseResult<IImdbEntry>[] = fuse.search(title);
if (!searchResults.length) {
return null;
}
const [bestMatch] = searchResults;
return bestMatch.item._id;
} catch (error) {
this.logger.error('Query exceeded the 30 seconds time limit', error);
return null;

View File

@@ -66,7 +66,7 @@ xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by
expect(result).toBe('tt0084726');
}, 30000);
it('should get Wrath of Khan imdbId correctly', async () => {
it('should get Wrath of Khan simple imdbId correctly', async () => {
await mongoRepository.connect();
const result = await mongoRepository.getImdbId('Wrath of Khan', TorrentType.Movie, 1982);
expect(result).toBe('tt0084726');