Merge pull request #104 from Gabisonfire/fuse-weighting
Further enhance title matching by added fuse with a configurable threshold
This commit is contained in:
@@ -48,6 +48,8 @@ UDP_TRACKERS_ENABLED=true
|
|||||||
CONSUMER_REPLICAS=3
|
CONSUMER_REPLICAS=3
|
||||||
## Fix for #66 - toggle on for development
|
## Fix for #66 - toggle on for development
|
||||||
AUTO_CREATE_AND_APPLY_MIGRATIONS=false
|
AUTO_CREATE_AND_APPLY_MIGRATIONS=false
|
||||||
|
## Allows control of the threshold for matching titles to the IMDB dataset. The closer to 0, the more strict the matching.
|
||||||
|
TITLE_MATCH_THRESHOLD=0.25
|
||||||
|
|
||||||
# Producer
|
# Producer
|
||||||
GITHUB_PAT=
|
GITHUB_PAT=
|
||||||
|
|||||||
9
src/node/consumer/package-lock.json
generated
9
src/node/consumer/package-lock.json
generated
@@ -14,6 +14,7 @@
|
|||||||
"axios": "^1.6.1",
|
"axios": "^1.6.1",
|
||||||
"bottleneck": "^2.19.5",
|
"bottleneck": "^2.19.5",
|
||||||
"cache-manager": "^5.4.0",
|
"cache-manager": "^5.4.0",
|
||||||
|
"fuse.js": "^7.0.0",
|
||||||
"google-sr": "^3.2.1",
|
"google-sr": "^3.2.1",
|
||||||
"inversify": "^6.0.2",
|
"inversify": "^6.0.2",
|
||||||
"magnet-uri": "^6.2.0",
|
"magnet-uri": "^6.2.0",
|
||||||
@@ -4782,6 +4783,14 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/fuse.js": {
|
||||||
|
"version": "7.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz",
|
||||||
|
"integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/gensync": {
|
"node_modules/gensync": {
|
||||||
"version": "1.0.0-beta.2",
|
"version": "1.0.0-beta.2",
|
||||||
"resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
|
"resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
"axios": "^1.6.1",
|
"axios": "^1.6.1",
|
||||||
"bottleneck": "^2.19.5",
|
"bottleneck": "^2.19.5",
|
||||||
"cache-manager": "^5.4.0",
|
"cache-manager": "^5.4.0",
|
||||||
|
"fuse.js": "^7.0.0",
|
||||||
"google-sr": "^3.2.1",
|
"google-sr": "^3.2.1",
|
||||||
"inversify": "^6.0.2",
|
"inversify": "^6.0.2",
|
||||||
"magnet-uri": "^6.2.0",
|
"magnet-uri": "^6.2.0",
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
export const metadataConfig = {
|
export const metadataConfig = {
|
||||||
IMDB_CONCURRENT: parseInt(process.env.IMDB_CONCURRENT || "1", 10),
|
IMDB_CONCURRENT: parseInt(process.env.IMDB_CONCURRENT || "1", 10),
|
||||||
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10)
|
IMDB_INTERVAL_MS: parseInt(process.env.IMDB_INTERVAL_MS || "1000", 10),
|
||||||
|
TITLE_MATCH_THRESHOLD: Number(process.env.TITLE_MATCH_THRESHOLD || 0.25),
|
||||||
};
|
};
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
export interface IMongoMetadataQuery {
|
export interface IMongoMetadataQuery {
|
||||||
$text: { $search: string },
|
$text: { $search: string },
|
||||||
TitleType: string;
|
TitleType: string,
|
||||||
StartYear?: string;
|
StartYear?: string;
|
||||||
}
|
}
|
||||||
@@ -1,13 +1,21 @@
|
|||||||
import {TorrentType} from "@enums/torrent_types";
|
import {TorrentType} from "@enums/torrent_types";
|
||||||
import {ILoggingService} from "@interfaces/logging_service";
|
import {ILoggingService} from "@interfaces/logging_service";
|
||||||
|
import {IImdbEntry} from "@mongo/interfaces/imdb_entry_attributes";
|
||||||
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
|
import {IMongoMetadataQuery} from "@mongo/interfaces/mongo_metadata_query";
|
||||||
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
|
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
|
||||||
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
|
import {ImdbEntryModel} from "@mongo/models/imdb_entries_model";
|
||||||
import {configurationService} from '@services/configuration_service';
|
import {configurationService} from '@services/configuration_service';
|
||||||
import {IocTypes} from "@setup/ioc_types";
|
import {IocTypes} from "@setup/ioc_types";
|
||||||
|
import Fuse, {FuseResult, IFuseOptions} from 'fuse.js';
|
||||||
import {inject, injectable} from "inversify";
|
import {inject, injectable} from "inversify";
|
||||||
import mongoose from 'mongoose';
|
import mongoose from 'mongoose';
|
||||||
|
|
||||||
|
const fuseOptions : IFuseOptions<IImdbEntry> = {
|
||||||
|
includeScore: true,
|
||||||
|
keys: ['PrimaryTitle', 'OriginalTitle'],
|
||||||
|
threshold: configurationService.metadataConfig.TITLE_MATCH_THRESHOLD,
|
||||||
|
};
|
||||||
|
|
||||||
@injectable()
|
@injectable()
|
||||||
export class MongoRepository implements IMongoRepository {
|
export class MongoRepository implements IMongoRepository {
|
||||||
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
|
@inject(IocTypes.ILoggingService) private logger: ILoggingService;
|
||||||
@@ -19,27 +27,34 @@ export class MongoRepository implements IMongoRepository {
|
|||||||
this.logger.info('Successfully connected to mongo db');
|
this.logger.info('Successfully connected to mongo db');
|
||||||
}
|
}
|
||||||
catch (error) {
|
catch (error) {
|
||||||
this.logger.debug('Failed to connect to mongo db', error);
|
const FAILED_TO_CONNECT = 'Failed to connect to mongo db';
|
||||||
this.logger.error('Failed to connect to mongo db');
|
this.logger.debug(FAILED_TO_CONNECT, error);
|
||||||
|
this.logger.error(FAILED_TO_CONNECT);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
|
async getImdbId(title: string, category: string, year?: string | number) : Promise<string | null> {
|
||||||
const titleType: string = category === TorrentType.Series ? 'tvSeries' : 'movie';
|
const titleType: string = category === TorrentType.Series ? 'tvSeries' : 'movie';
|
||||||
|
|
||||||
const query: IMongoMetadataQuery = {
|
const query: IMongoMetadataQuery = {
|
||||||
$text: { $search: title },
|
$text: { $search: title },
|
||||||
TitleType: titleType,
|
TitleType: titleType
|
||||||
};
|
};
|
||||||
|
|
||||||
if (year) {
|
if (year) {
|
||||||
query.StartYear = year.toString();
|
query.StartYear = year.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await ImdbEntryModel.findOne(query, '_id', {score: {$meta: "textScore" }}).sort({score: {$meta: "textScore"}}).limit(10).maxTimeMS(30000);
|
const results = await ImdbEntryModel.find(query).limit(100).maxTimeMS(30000);
|
||||||
return result ? result._id : null;
|
if (!results.length) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const fuse: Fuse<IImdbEntry> = new Fuse(results, fuseOptions);
|
||||||
|
const searchResults: FuseResult<IImdbEntry>[] = fuse.search(title);
|
||||||
|
if (!searchResults.length) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const [bestMatch] = searchResults;
|
||||||
|
return bestMatch.item._id;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.logger.error('Query exceeded the 30 seconds time limit', error);
|
this.logger.error('Query exceeded the 30 seconds time limit', error);
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ xdescribe('MongoRepository Tests - Manual Tests against real cluster. Skipped by
|
|||||||
expect(result).toBe('tt0084726');
|
expect(result).toBe('tt0084726');
|
||||||
}, 30000);
|
}, 30000);
|
||||||
|
|
||||||
it('should get Wrath of Khan imdbId correctly', async () => {
|
it('should get Wrath of Khan simple imdbId correctly', async () => {
|
||||||
await mongoRepository.connect();
|
await mongoRepository.connect();
|
||||||
const result = await mongoRepository.getImdbId('Wrath of Khan', TorrentType.Movie, 1982);
|
const result = await mongoRepository.getImdbId('Wrath of Khan', TorrentType.Movie, 1982);
|
||||||
expect(result).toBe('tt0084726');
|
expect(result).toBe('tt0084726');
|
||||||
|
|||||||
Reference in New Issue
Block a user