Merge pull request #129 from Gabisonfire/feature/dmm-improvements

Improvements for DMM
This commit is contained in:
iPromKnight
2024-03-10 13:52:53 +00:00
committed by GitHub
60 changed files with 433123 additions and 296 deletions

3
.gitignore vendored
View File

@@ -355,6 +355,9 @@ MigrationBackup/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# Jetbrains ide's run profiles (Could contain sensative information)
**/.run/
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json

View File

@@ -3,6 +3,7 @@ repos:
rev: v4.5.0
hooks:
- id: check-added-large-files
args: ['--maxkb=2500']
- id: check-json
- id: check-toml
- id: check-xml
@@ -15,5 +16,6 @@ repos:
rev: v2.2.6
hooks:
- id: codespell
exclude: ^src/node/consumer/test/
exclude: |
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*)$
args: ["-L", "strem,chage"]

View File

@@ -15,7 +15,6 @@
"bottleneck": "^2.19.5",
"cache-manager": "^5.4.0",
"fuse.js": "^7.0.0",
"google-sr": "^3.2.1",
"inversify": "^6.0.2",
"magnet-uri": "^6.2.0",
"moment": "^2.30.1",
@@ -2923,11 +2922,6 @@
"resolved": "https://registry.npmjs.org/bncode/-/bncode-0.5.3.tgz",
"integrity": "sha512-0P5VuWobU5Gwbeio8n9Jsdv0tE1IikrV9n4f7RsnXHNtxmdd/oeIO6QyoSEUAEyo5P6i3XMfBppi82WqNsT4JA=="
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
},
"node_modules/bottleneck": {
"version": "2.19.5",
"resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz",
@@ -3189,42 +3183,6 @@
"integrity": "sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==",
"dev": true
},
"node_modules/cheerio": {
"version": "1.0.0-rc.12",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz",
"integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==",
"dependencies": {
"cheerio-select": "^2.1.0",
"dom-serializer": "^2.0.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"htmlparser2": "^8.0.1",
"parse5": "^7.0.0",
"parse5-htmlparser2-tree-adapter": "^7.0.0"
},
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
}
},
"node_modules/cheerio-select": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
"dependencies": {
"boolbase": "^1.0.0",
"css-select": "^5.1.0",
"css-what": "^6.1.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/chokidar": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
@@ -3533,32 +3491,6 @@
"node": ">= 8"
}
},
"node_modules/css-select": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/cyclist": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/cyclist/-/cyclist-0.1.1.tgz",
@@ -3629,14 +3561,6 @@
"node": ">=0.10.0"
}
},
"node_modules/deepmerge-ts": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/deepmerge-ts/-/deepmerge-ts-5.1.0.tgz",
"integrity": "sha512-eS8dRJOckyo9maw9Tu5O5RUi/4inFLrnoLkBe3cPfDMx3WZioXtmOew4TXQaxq7Rhl4xjDtR7c6x8nNTxOvbFw==",
"engines": {
"node": ">=16.0.0"
}
},
"node_modules/defaults": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/defaults/-/defaults-1.0.4.tgz",
@@ -3745,57 +3669,6 @@
"node": ">=6.0.0"
}
},
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"entities": "^4.2.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
]
},
"node_modules/domhandler": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
"dependencies": {
"domelementtype": "^2.3.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
"integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
"dependencies": {
"dom-serializer": "^2.0.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/dottie": {
"version": "2.0.6",
"resolved": "https://registry.npmjs.org/dottie/-/dottie-2.0.6.tgz",
@@ -3857,17 +3730,6 @@
"once": "^1.4.0"
}
},
"node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/error-ex": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz",
@@ -4984,23 +4846,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/google-sr": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/google-sr/-/google-sr-3.2.1.tgz",
"integrity": "sha512-1WGy6mxMTKo+jbIrmq1mwm+2Egvmx9ttsXzCiR0Y2LMcpeG4shqc8C4g12msi4arRn9qEwG1qrFQ1W9jo3dDzw==",
"dependencies": {
"axios": "^1.4.0",
"cheerio": "1.0.0-rc.12",
"deepmerge-ts": "^5.1.0",
"google-sr-selectors": "^0.0.2",
"tslib": "^2.6.1"
}
},
"node_modules/google-sr-selectors": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/google-sr-selectors/-/google-sr-selectors-0.0.2.tgz",
"integrity": "sha512-7h+vo7NSDf+pZB/InDon4mwhXeTvy/9yvAChGnjppcdHgTwlUWDpYPWGUn781J3PrjBj6rZAginsSTGqG5uUZw=="
},
"node_modules/gopd": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
@@ -5141,24 +4986,6 @@
"integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==",
"dev": true
},
"node_modules/htmlparser2": {
"version": "8.0.2",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
"integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"entities": "^4.4.0"
}
},
"node_modules/human-signals": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz",
@@ -6994,17 +6821,6 @@
"node": ">=8"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/object-inspect": {
"version": "1.13.1",
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",
@@ -7317,29 +7133,6 @@
"node": ">=0.2.6"
}
},
"node_modules/parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dependencies": {
"entities": "^4.4.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-htmlparser2-tree-adapter": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz",
"integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==",
"dependencies": {
"domhandler": "^5.0.2",
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -9407,7 +9200,8 @@
"node_modules/tslib": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==",
"dev": true
},
"node_modules/tsx": {
"version": "4.7.0",

View File

@@ -20,7 +20,6 @@
"bottleneck": "^2.19.5",
"cache-manager": "^5.4.0",
"fuse.js": "^7.0.0",
"google-sr": "^3.2.1",
"inversify": "^6.0.2",
"magnet-uri": "^6.2.0",
"moment": "^2.30.1",

View File

@@ -10,7 +10,6 @@ import {IMetadataService} from "@interfaces/metadata_service";
import {IMongoRepository} from "@mongo/interfaces/mongo_repository";
import {IocTypes} from "@setup/ioc_types";
import axios from 'axios';
import {ResultTypes, search} from 'google-sr';
import {inject, injectable} from "inversify";
import nameToImdb from 'name-to-imdb';
@@ -46,10 +45,7 @@ export class MetadataService implements IMetadataService {
const name = this.escapeTitle(info.title!);
const year = info.year || (info.date && info.date.slice(0, 4));
const key = `${name}_${year || 'NA'}_${info.type}`;
const query = `${name} ${year || ''} ${info.type} imdb`;
const fallbackQuery = `${name} ${info.type} imdb`;
const googleQuery = year ? query : fallbackQuery;
const imdbInMongo = await this.mongoRepository.getImdbId(name, info.type, year);
if (imdbInMongo) {
@@ -62,8 +58,7 @@ export class MetadataService implements IMetadataService {
);
return imdbId && 'tt' + imdbId.replace(/tt0*([1-9][0-9]*)$/, '$1').padStart(7, '0');
} catch (error) {
const imdbIdFallback = await this.getIMDbIdFromGoogle(googleQuery);
return imdbIdFallback && 'tt' + imdbIdFallback.toString().replace(/tt0*([1-9][0-9]*)$/, '$1').padStart(7, '0');
return undefined;
}
}
@@ -220,23 +215,4 @@ export class MetadataService implements IMetadataService {
});
});
};
private getIMDbIdFromGoogle = async (query: string): Promise<string | undefined> => {
try {
const searchResults = await search({query: query});
for (const result of searchResults) {
if (result.type === ResultTypes.SearchResult) {
if (result.link.includes('imdb.com/title/')) {
const match = result.link.match(/imdb\.com\/title\/(tt\d+)/);
if (match) {
return match[1];
}
}
}
}
return undefined;
} catch (error) {
throw new Error('Failed to find IMDb ID from Google search');
}
};
}

View File

@@ -103,10 +103,12 @@ describe('Configuration Tests', () => {
it('should populate metadataConfig correctly', async() => {
process.env.IMDB_CONCURRENT = '1';
process.env.IMDB_INTERVAL_MS = '1000';
process.env.TITLE_MATCH_THRESHOLD = '0.1';
const {configurationService} = await import("@services/configuration_service");
const {metadataConfig} = configurationService;
expect(metadataConfig.IMDB_CONCURRENT).toBe(1);
expect(metadataConfig.IMDB_INTERVAL_MS).toBe(1000);
expect(metadataConfig.TITLE_MATCH_THRESHOLD).toBe(0.1);
});
it('should populate rabbitConfig correctly', async () => {

View File

@@ -5,20 +5,28 @@ import {MongoRepository} from "@mongo/mongo_repository";
import {IocTypes} from "@setup/ioc_types";
import {Container, inject} from "inversify";
jest.mock('@services/configuration_service', () => {
const metadataConfig = {
TITLE_MATCH_THRESHOLD: 0.25,
}
const cacheConfig = {
MONGODB_HOST: 'localhost',
MONGODB_PORT: '27017',
MONGODB_DB: 'knightcrawler',
MONGODB_USER: 'mongo',
MONGODB_PASSWORD: 'mongo',
get MONGO_URI(): string {
return `mongodb://${this.MONGODB_USER}:${this.MONGODB_PASSWORD}@${this.MONGODB_HOST}:${this.MONGODB_PORT}/${this.MONGODB_DB}?authSource=admin`;
},
};
jest.doMock('@services/configuration_service', () => {
return {
configurationService: {
cacheConfig: {
MONGODB_HOST: 'localhost',
MONGODB_PORT: '27017',
MONGODB_DB: 'knightcrawler',
MONGODB_USER: 'mongo',
MONGODB_PASSWORD: 'mongo',
get MONGO_URI(): string {
return `mongodb://${this.MONGODB_USER}:${this.MONGODB_PASSWORD}@${this.MONGODB_HOST}:${this.MONGODB_PORT}/${this.MONGODB_DB}?authSource=admin`;
}
},
}
cacheConfig: cacheConfig,
metadataConfig: metadataConfig,
},
}
});

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

234602
src/producer/Data/jav.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
namespace Producer.Extensions;
public static class StringExtensions
{
public static bool IsNullOrEmpty(this string? value) =>
string.IsNullOrEmpty(value);
}

View File

@@ -4,14 +4,12 @@ public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory,
ILogger<DebridMediaManagerCrawler> logger,
IDataStorage storage,
GithubConfiguration githubConfiguration) : BaseCrawler(logger, storage)
GithubConfiguration githubConfiguration,
IParsingService parsingService) : BaseCrawler(logger, storage)
{
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher();
[GeneratedRegex(@"[sS]([0-9]{1,2})|seasons?[\s-]?([0-9]{1,2})", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonMatcher();
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
@@ -95,23 +93,70 @@ public partial class DebridMediaManagerCrawler(
private Torrent? ParseTorrent(JsonElement item)
{
var torrent = new Torrent
{
Source = Source,
Name = item.GetProperty("filename").GetString(),
Size = item.GetProperty("bytes").GetInt64().ToString(),
InfoHash = item.GetProperty("hash").ToString(),
Seeders = 0,
Leechers = 0,
};
if (string.IsNullOrEmpty(torrent.Name))
if (!item.TryGetProperty("filename", out var filenameElement) ||
!item.TryGetProperty("bytes", out var bytesElement) ||
!item.TryGetProperty("hash", out var hashElement))
{
return null;
}
torrent.Category = SeasonMatcher().IsMatch(torrent.Name) ? "tv" : "movies";
var parsedTorrent = parsingService.Parse(filenameElement.GetString());
if (parsedTorrent.IsInvalid)
{
return null;
}
var torrent = new Torrent
{
Source = Source,
Size = bytesElement.GetInt64().ToString(),
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
};
return parsedTorrent.Type switch
{
TorrentType.Movie => HandleMovieType(torrent, parsedTorrent),
TorrentType.Tv => HandleTvType(torrent, parsedTorrent),
_ => null,
};
}
private Torrent HandleMovieType(Torrent torrent, ParsedFilename parsedTorrent)
{
if (parsedTorrent.Movie.ReleaseTitle.IsNullOrEmpty())
{
return null;
}
if (!parsingService.HasNoBannedTerms(parsedTorrent.Movie.ReleaseTitle))
{
logger.LogWarning("Banned terms found in {Title}", parsedTorrent.Movie.ReleaseTitle);
return null;
}
torrent.Category = "movies";
torrent.Name = parsedTorrent.Movie.ReleaseTitle;
return torrent;
}
private Torrent HandleTvType(Torrent torrent, ParsedFilename parsedTorrent)
{
if (parsedTorrent.Show.ReleaseTitle.IsNullOrEmpty())
{
return null;
}
if (!parsingService.HasNoBannedTerms(parsedTorrent.Show.ReleaseTitle))
{
return null;
}
torrent.Category = "tv";
torrent.Name = parsedTorrent.Show.ReleaseTitle;
return torrent;
}
@@ -119,6 +164,7 @@ public partial class DebridMediaManagerCrawler(
{
var torrents = json.RootElement.EnumerateArray()
.Select(ParseTorrent)
.Where(t => t is not null)
.ToList();
if (torrents.Count == 0)

View File

@@ -190,7 +190,6 @@ public partial class TorrentioCrawler(
Source = $"{Source}_{instance.Name}",
InfoHash = infoHash,
Category = "movies", // we only handle movies for now...
Imdb = imdbId,
};
var span = title.AsSpan();

View File

@@ -5,38 +5,43 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json";
protected override string Source => "TPB";
// ReSharper disable once UnusedMember.Local
private readonly Dictionary<string, Dictionary<string, int>> TpbCategories = new()
{
{"VIDEO", new() {
{"ALL", 200},
{"MOVIES", 201},
{"MOVIES_DVDR", 202},
{"MUSIC_VIDEOS", 203},
{"MOVIE_CLIPS", 204},
{"TV_SHOWS", 205},
{"HANDHELD", 206},
{"MOVIES_HD", 207},
{"TV_SHOWS_HD", 208},
{"MOVIES_3D", 209},
{"OTHER", 299},
}},
{"PORN", new() {
{"ALL", 500},
{"MOVIES", 501},
{"MOVIES_DVDR", 502},
{"PICTURES", 503},
{"GAMES", 504},
{"MOVIES_HD", 505},
{"MOVIE_CLIPS", 506},
{"OTHER", 599},
}},
};
// // ReSharper disable once UnusedMember.Local
// private readonly Dictionary<string, Dictionary<string, int>> TpbCategories = new()
// {
// {
// "VIDEO", new()
// {
// {"ALL", 200},
// {"MOVIES", 201},
// {"MOVIES_DVDR", 202},
// {"MUSIC_VIDEOS", 203},
// {"MOVIE_CLIPS", 204},
// {"TV_SHOWS", 205},
// {"HANDHELD", 206},
// {"MOVIES_HD", 207},
// {"TV_SHOWS_HD", 208},
// {"MOVIES_3D", 209},
// {"OTHER", 299},
// }
// },
// {
// "PORN", new()
// {
// {"ALL", 500},
// {"MOVIES", 501},
// {"MOVIES_DVDR", 502},
// {"PICTURES", 503},
// {"GAMES", 504},
// {"MOVIES_HD", 505},
// {"MOVIE_CLIPS", 506},
// {"OTHER", 599},
// }
// },
// };
private static readonly HashSet<int> TvSeriesCategories = [ 205, 208 ];
private static readonly HashSet<int> MovieCategories = [ 201, 202, 207, 209 ];
private static readonly HashSet<int> PornCategories = [ 500, 501, 502, 505, 506 ];
private static readonly HashSet<int> AllowedCategories = [ ..MovieCategories, ..TvSeriesCategories ];
protected override IReadOnlyDictionary<string, string> Mappings
@@ -47,7 +52,6 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
[nameof(Torrent.Seeders)] = "seeders",
[nameof(Torrent.Leechers)] = "leechers",
[nameof(Torrent.InfoHash)] = "info_hash",
[nameof(Torrent.Imdb)] = "imdb",
[nameof(Torrent.Category)] = "category",
};
@@ -67,7 +71,6 @@ public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler
Size = item.GetProperty(Mappings["Size"]).GetInt64().ToString(),
Seeders = item.GetProperty(Mappings["Seeders"]).GetInt32(),
Leechers = item.GetProperty(Mappings["Leechers"]).GetInt32(),
Imdb = item.GetProperty(Mappings["Imdb"]).GetString(),
};
HandleInfoHash(item, torrent, "InfoHash");

View File

@@ -0,0 +1,13 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class AudioChannels : SmartEnum<AudioChannels, string>
{
public static readonly AudioChannels SEVEN = new("SEVEN", "7.1");
public static readonly AudioChannels SIX = new("SIX", "5.1");
public static readonly AudioChannels STEREO = new("STEREO", "stereo");
public static readonly AudioChannels MONO = new ("MONO", "mono");
private AudioChannels(string name, string value) : base(name, value)
{
}
}

View File

@@ -0,0 +1,50 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class AudioChannelsParser
{
[GeneratedRegex(@"\b(?<eight>7.?[01])\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex EightChannelExp();
[GeneratedRegex(@"\b(?<six>(6[\W]0(?:ch)?)(?=[^\d]|$)|(5[\W][01](?:ch)?)(?=[^\d]|$)|5ch|6ch)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SixChannelExp();
[GeneratedRegex(@"(?<stereo>((2[\W]0(?:ch)?)(?=[^\d]|$))|(stereo))", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex StereoChannelExp();
[GeneratedRegex(@"(?<mono>(1[\W]0(?:ch)?)(?=[^\d]|$)|(mono)|(1ch))", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MonoChannelExp();
private static readonly Regex ChannelExp = new(string.Join("|", EightChannelExp(), SixChannelExp(), StereoChannelExp(), MonoChannelExp()), RegexOptions.IgnoreCase);
public static void Parse(string title, out AudioChannels? channels, out string? source)
{
channels = null;
source = null;
var channelResult = ChannelExp.Match(title);
if (!channelResult.Success)
{
return;
}
var groups = channelResult.Groups;
if (groups["eight"].Success)
{
channels = AudioChannels.SEVEN;
source = groups["eight"].Value;
}
else if (groups["six"].Success)
{
channels = AudioChannels.SIX;
source = groups["six"].Value;
}
else if (groups["stereo"].Success)
{
channels = AudioChannels.STEREO;
source = groups["stereo"].Value;
}
else if (groups["mono"].Success)
{
channels = AudioChannels.MONO;
source = groups["mono"].Value;
}
}
}

View File

@@ -0,0 +1,22 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class AudioCodec : SmartEnum<AudioCodec, string>
{
public static readonly AudioCodec MP3 = new("MP3", "MP3");
public static readonly AudioCodec MP2 = new("MP2", "MP2");
public static readonly AudioCodec DOLBY = new("DOLBY", "Dolby Digital");
public static readonly AudioCodec EAC3 = new("EAC3", "Dolby Digital Plus");
public static readonly AudioCodec AAC = new("AAC", "AAC");
public static readonly AudioCodec FLAC = new("FLAC", "FLAC");
public static readonly AudioCodec DTS = new("DTS", "DTS");
public static readonly AudioCodec DTSHD = new("DTSHD", "DTS-HD");
public static readonly AudioCodec TRUEHD = new("TRUEHD", "Dolby TrueHD");
public static readonly AudioCodec OPUS = new("OPUS", "Opus");
public static readonly AudioCodec VORBIS = new("VORBIS", "Vorbis");
public static readonly AudioCodec PCM = new("PCM", "PCM");
public static readonly AudioCodec LPCM = new("LPCM", "LPCM");
private AudioCodec(string name, string value) : base(name, value)
{
}
}

View File

@@ -0,0 +1,138 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class AudioCodecsParser
{
[GeneratedRegex(@"\b(?<mp3>(LAME(?:\d)+-?(?:\d)+)|(mp3))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex Mp3CodecExp();
[GeneratedRegex(@"\b(?<mp2>(mp2))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex Mp2CodecExp();
[GeneratedRegex(@"\b(?<dolby>(Dolby)|(Dolby-?Digital)|(DD)|(AC3D?))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DolbyCodecExp();
[GeneratedRegex(@"\b(?<dolbyatmos>(Dolby-?Atmos))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DolbyAtmosCodecExp();
[GeneratedRegex(@"\b(?<aac>(AAC))(\d?.?\d?)(ch)?\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex AacAtmosCodecExp();
[GeneratedRegex(@"\b(?<eac3>(EAC3|DDP|DD\+))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex Eac3CodecExp();
[GeneratedRegex(@"\b(?<flac>(FLAC))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex FlacCodecExp();
[GeneratedRegex(@"\b(?<dts>(DTS))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DtsCodecExp();
[GeneratedRegex(@"\b(?<dtshd>(DTS-?HD)|(DTS(?=-?MA)|(DTS-X)))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DtsHdCodecExp();
[GeneratedRegex(@"\b(?<truehd>(True-?HD))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TrueHdCodecExp();
[GeneratedRegex(@"\b(?<opus>(Opus))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex OpusCodecExp();
[GeneratedRegex(@"\b(?<vorbis>(Vorbis))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex VorbisCodecExp();
[GeneratedRegex(@"\b(?<pcm>(PCM))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PcmCodecExp();
[GeneratedRegex(@"\b(?<lpcm>(LPCM))\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex LpcmCodecExp();
private static readonly Regex AudioCodecExp = new(
string.Join(
"|", Mp3CodecExp(), Mp2CodecExp(), DolbyCodecExp(), DolbyAtmosCodecExp(), AacAtmosCodecExp(), Eac3CodecExp(), FlacCodecExp(),
DtsHdCodecExp(),
DtsCodecExp(), TrueHdCodecExp(), OpusCodecExp(), VorbisCodecExp(), PcmCodecExp(), LpcmCodecExp()), RegexOptions.IgnoreCase);
public static void Parse(string title, out AudioCodec? codec, out string? source)
{
codec = null;
source = null;
var audioResult = AudioCodecExp.Match(title);
if (!audioResult.Success)
{
return;
}
var groups = audioResult.Groups;
if (groups["aac"].Success)
{
codec = AudioCodec.AAC;
source = groups["aac"].Value;
}
else if (groups["dolbyatmos"].Success)
{
codec = AudioCodec.EAC3;
source = groups["dolbyatmos"].Value;
}
else if (groups["dolby"].Success)
{
codec = AudioCodec.DOLBY;
source = groups["dolby"].Value;
}
else if (groups["dtshd"].Success)
{
codec = AudioCodec.DTSHD;
source = groups["dtshd"].Value;
}
else if (groups["dts"].Success)
{
codec = AudioCodec.DTS;
source = groups["dts"].Value;
}
else if (groups["flac"].Success)
{
codec = AudioCodec.FLAC;
source = groups["flac"].Value;
}
else if (groups["truehd"].Success)
{
codec = AudioCodec.TRUEHD;
source = groups["truehd"].Value;
}
else if (groups["mp3"].Success)
{
codec = AudioCodec.MP3;
source = groups["mp3"].Value;
}
else if (groups["mp2"].Success)
{
codec = AudioCodec.MP2;
source = groups["mp2"].Value;
}
else if (groups["pcm"].Success)
{
codec = AudioCodec.PCM;
source = groups["pcm"].Value;
}
else if (groups["lpcm"].Success)
{
codec = AudioCodec.LPCM;
source = groups["lpcm"].Value;
}
else if (groups["opus"].Success)
{
codec = AudioCodec.OPUS;
source = groups["opus"].Value;
}
else if (groups["vorbis"].Success)
{
codec = AudioCodec.VORBIS;
source = groups["vorbis"].Value;
}
else if (groups["eac3"].Success)
{
codec = AudioCodec.EAC3;
source = groups["eac3"].Value;
}
}
}

View File

@@ -0,0 +1,19 @@
namespace Producer.Features.ParseTorrentTitle;
public class BaseParsed
{
public string? ReleaseTitle { get; set; }
public string? Title { get; set; }
public string? Year { get; set; }
public Edition? Edition { get; set; }
public Resolution? Resolution { get; set; }
public VideoCodec? VideoCodec { get; set; }
public AudioCodec? AudioCodec { get; set; }
public AudioChannels? AudioChannels { get; set; }
public Revision? Revision { get; set; }
public string? Group { get; set; }
public List<Language> Languages { get; set; } = [];
public List<Source> Sources { get; set; } = [];
public bool? Multi { get; set; }
public bool? Complete { get; set; }
}

View File

@@ -0,0 +1,14 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class Complete
{
[GeneratedRegex(@"\b(NTSC|PAL)?.DVDR\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CompleteDvdExp();
[GeneratedRegex(@"\b(COMPLETE)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CompleteExp();
public static bool? IsCompleteDvd(string title) => CompleteDvdExp().IsMatch(title) ? true : null;
public static bool IsComplete(string title) => CompleteExp().IsMatch(title) || IsCompleteDvd(title) == true;
}

View File

@@ -0,0 +1,26 @@
namespace Producer.Features.ParseTorrentTitle;
public class Edition
{
public bool? Internal { get; set; }
public bool? Limited { get; set; }
public bool? Remastered { get; set; }
public bool? Extended { get; set; }
public bool? Theatrical { get; set; }
public bool? Directors { get; set; }
public bool? Unrated { get; set; }
public bool? Imax { get; set; }
public bool? FanEdit { get; set; }
public bool? Hdr { get; set; }
public bool? Bw { get; set; }
public bool? ThreeD { get; set; }
public bool? Hsbs { get; set; }
public bool? Sbs { get; set; }
public bool? Hou { get; set; }
public bool? Uhd { get; set; }
public bool? Oar { get; set; }
public bool? DolbyVision { get; set; }
public bool? HardcodedSubs { get; set; }
public bool? DeletedScenes { get; set; }
public bool? BonusContent { get; set; }
}

View File

@@ -0,0 +1,101 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class EditionParser
{
[GeneratedRegex(@"\b(INTERNAL)\b", RegexOptions.IgnoreCase)]
private static partial Regex InternalExp();
[GeneratedRegex(@"\b(Remastered|Anniversary|Restored)\b", RegexOptions.IgnoreCase)]
private static partial Regex RemasteredExp();
[GeneratedRegex(@"\b(IMAX)\b", RegexOptions.IgnoreCase)]
private static partial Regex ImaxExp();
[GeneratedRegex(@"\b(Uncensored|Unrated)\b", RegexOptions.IgnoreCase)]
private static partial Regex UnratedExp();
[GeneratedRegex(@"\b(Extended|Uncut|Ultimate|Rogue|Collector)\b", RegexOptions.IgnoreCase)]
private static partial Regex ExtendedExp();
[GeneratedRegex(@"\b(Theatrical)\b", RegexOptions.IgnoreCase)]
private static partial Regex TheatricalExp();
[GeneratedRegex(@"\b(Directors?)\b", RegexOptions.IgnoreCase)]
private static partial Regex DirectorsExp();
[GeneratedRegex(@"\b(Despecialized|Fan.?Edit)\b", RegexOptions.IgnoreCase)]
private static partial Regex FanExp();
[GeneratedRegex(@"\b(LIMITED)\b", RegexOptions.IgnoreCase)]
private static partial Regex LimitedExp();
[GeneratedRegex(@"\b(HDR)\b", RegexOptions.IgnoreCase)]
private static partial Regex HdrExp();
[GeneratedRegex(@"\b(3D)\b", RegexOptions.IgnoreCase)]
private static partial Regex ThreeD();
[GeneratedRegex(@"\b(Half-?SBS|HSBS)\b", RegexOptions.IgnoreCase)]
private static partial Regex Hsbs();
[GeneratedRegex(@"\b((?<!H|HALF-)SBS)\b", RegexOptions.IgnoreCase)]
private static partial Regex Sbs();
[GeneratedRegex(@"\b(HOU)\b", RegexOptions.IgnoreCase)]
private static partial Regex Hou();
[GeneratedRegex(@"\b(UHD)\b", RegexOptions.IgnoreCase)]
private static partial Regex Uhd();
[GeneratedRegex(@"\b(OAR)\b", RegexOptions.IgnoreCase)]
private static partial Regex Oar();
[GeneratedRegex(@"\b(DV(\b(HDR10|HLG|SDR))?)\b", RegexOptions.IgnoreCase)]
private static partial Regex DolbyVision();
[GeneratedRegex(@"\b((?<hcsub>(\w+(?<!SOFT|HORRIBLE)SUBS?))|(?<hc>(HC|SUBBED)))\b", RegexOptions.IgnoreCase)]
private static partial Regex HardcodedSubsExp();
[GeneratedRegex(@"\b((Bonus.)?Deleted.Scenes)\b", RegexOptions.IgnoreCase)]
private static partial Regex DeletedScenes();
[GeneratedRegex(@"\b((Bonus|Extras|Behind.the.Scenes|Making.of|Interviews|Featurettes|Outtakes|Bloopers|Gag.Reel).(?!(Deleted.Scenes)))\b", RegexOptions.IgnoreCase)]
private static partial Regex BonusContent();
[GeneratedRegex(@"\b(BW)\b", RegexOptions.IgnoreCase)]
private static partial Regex Bw();
public static Edition Parse(string title)
{
TitleParser.Parse(title, out var parsedTitle, out _);
var withoutTitle = title.Replace(".", " ").Replace(parsedTitle, "").ToLower();
var result = new Edition
{
Internal = InternalExp().IsMatch(withoutTitle),
Limited = LimitedExp().IsMatch(withoutTitle),
Remastered = RemasteredExp().IsMatch(withoutTitle),
Extended = ExtendedExp().IsMatch(withoutTitle),
Theatrical = TheatricalExp().IsMatch(withoutTitle),
Directors = DirectorsExp().IsMatch(withoutTitle),
Unrated = UnratedExp().IsMatch(withoutTitle),
Imax = ImaxExp().IsMatch(withoutTitle),
FanEdit = FanExp().IsMatch(withoutTitle),
Hdr = HdrExp().IsMatch(withoutTitle),
ThreeD = ThreeD().IsMatch(withoutTitle),
Hsbs = Hsbs().IsMatch(withoutTitle),
Sbs = Sbs().IsMatch(withoutTitle),
Hou = Hou().IsMatch(withoutTitle),
Uhd = Uhd().IsMatch(withoutTitle),
Oar = Oar().IsMatch(withoutTitle),
DolbyVision = DolbyVision().IsMatch(withoutTitle),
HardcodedSubs = HardcodedSubsExp().IsMatch(withoutTitle),
DeletedScenes = DeletedScenes().IsMatch(withoutTitle),
BonusContent = BonusContent().IsMatch(withoutTitle),
Bw = Bw().IsMatch(withoutTitle),
};
return result;
}
}

View File

@@ -0,0 +1,78 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class FileExtensionParser
{
[GeneratedRegex(@"\.[a-z0-9]{2,4}$", RegexOptions.IgnoreCase)]
private static partial Regex FileExtensionExp();
private static readonly List<string> _fileExtensions = new()
{
// Unknown
".webm",
// SDTV
".m4v",
".3gp",
".nsv",
".ty",
".strm",
".rm",
".rmvb",
".m3u",
".ifo",
".mov",
".qt",
".divx",
".xvid",
".bivx",
".nrg",
".pva",
".wmv",
".asf",
".asx",
".ogm",
".ogv",
".m2v",
".avi",
".bin",
".dat",
".dvr-ms",
".mpg",
".mpeg",
".mp4",
".avc",
".vp3",
".svq3",
".nuv",
".viv",
".dv",
".fli",
".flv",
".wpl",
// DVD
".img",
".iso",
".vob",
// HD
".mkv",
".mk3d",
".ts",
".wtv",
// Bluray
".m2ts",
};
public static string RemoveFileExtension(string title) =>
FileExtensionExp().Replace(
title, match =>
{
if (_fileExtensions.Any(ext => ext.Equals(match.Value, StringComparison.OrdinalIgnoreCase)))
{
return string.Empty;
}
return match.Value;
});
}

View File

@@ -0,0 +1,71 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class GroupParser
{
[GeneratedRegex(@"^\[\s*[a-z]+(\.[a-z]+)+\s*\][- ]*|^www\.[a-z]+\.(?:com|net)[ -]*", RegexOptions.IgnoreCase)]
private static partial Regex WebsitePrefixExp();
[GeneratedRegex(@"(-(RP|1|NZBGeek|Obfuscated|Obfuscation|Scrambled|sample|Pre|postbot|xpost|Rakuv[a-z0-9]*|WhiteRev|BUYMORE|AsRequested|AlternativeToRequested|GEROV|Z0iDS3N|Chamele0n|4P|4Planet|AlteZachen|RePACKPOST))+$", RegexOptions.IgnoreCase)]
private static partial Regex CleanReleaseGroupExp();
[GeneratedRegex(@"-(?<releasegroup>[a-z0-9]+)(?<!WEB-DL|WEB-RIP|480p|720p|1080p|2160p|DTS-(HD|X|MA|ES)|([a-zA-Z]{3}-ENG))(?:\b|[-._ ])", RegexOptions.IgnoreCase)]
private static partial Regex ReleaseGroupRegexExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeReleaseGroupExp();
[GeneratedRegex(@"(\[)?(?<releasegroup>(Joy|YIFY|YTS.(MX|LT|AG)|FreetheFish|VH-PROD|FTW-HS|DX-TV|Blu-bits|afm72|Anna|Bandi|Ghost|Kappa|MONOLITH|Qman|RZeroX|SAMPA|Silence|theincognito|D-Z0N3|t3nzin|Vyndros|HDO|DusIctv|DHD|SEV|CtrlHD|-ZR-|ADC|XZVN|RH|Kametsu|r00t|HONE))(\])?$", RegexOptions.IgnoreCase)]
private static partial Regex ExceptionReleaseGroupRegex();
public static string? Parse(string title)
{
var nowebsiteTitle = WebsitePrefixExp().Replace(title, "");
TitleParser.Parse(nowebsiteTitle, out var releaseTitle, out _);
releaseTitle = releaseTitle.Replace(" ", ".");
var trimmed = nowebsiteTitle.Replace(" ", ".");
if (releaseTitle != nowebsiteTitle)
{
trimmed = trimmed.Replace(releaseTitle, "");
}
trimmed = trimmed.Replace(".-.", ".");
trimmed = TitleParser.SimplifyTitle(FileExtensionParser.RemoveFileExtension(trimmed.Trim()));
if (trimmed.Length == 0)
{
return null;
}
var exceptionResult = ExceptionReleaseGroupRegex().Match(trimmed);
if (exceptionResult.Groups["releasegroup"].Success)
{
return exceptionResult.Groups["releasegroup"].Value;
}
var animeResult = AnimeReleaseGroupExp().Match(trimmed);
if (animeResult.Success)
{
return animeResult.Groups["subgroup"].Value;
}
trimmed = CleanReleaseGroupExp().Replace(trimmed, "");
var globalReleaseGroupExp = new Regex(ReleaseGroupRegexExp().ToString(), RegexOptions.IgnoreCase);
var result = globalReleaseGroupExp.Match(trimmed);
while (result.Success)
{
if (result.Groups["releasegroup"].Success)
{
return result.Groups["releasegroup"].Value;
}
result = result.NextMatch();
}
return null;
}
}

View File

@@ -0,0 +1,23 @@
namespace Producer.Features.ParseTorrentTitle;
public interface IParsingService
{
ParsedFilename Parse(string name);
string Naked(string title);
List<string> GrabYears(string str);
List<int> GrabPossibleSeasonNums(string str);
bool HasYear(string test, List<string> years, bool strictCheck = false);
string RemoveDiacritics(string str);
string RemoveRepeats(string str);
int RomanToDecimal(string roman);
string ReplaceRomanWithDecimal(string input);
bool StrictEqual(string title1, string title2);
int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false);
bool FlexEq(string test, string target, List<string> years);
bool MatchesTitle(string target, List<string> years, string test);
bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle);
bool HasNoBannedTerms(string targetTitle, string testTitle);
bool HasNoBannedTerms(string targetTitle);
bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle);
int CountUncommonWords(string title);
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.ParseTorrentTitle;
public interface ITorrentTitleParser
{
ParsedFilename Parse(string name);
}

View File

@@ -0,0 +1,50 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class Language : SmartEnum<Language, string>
{
public static readonly Language English = new("English", "English");
public static readonly Language French = new("French", "French");
public static readonly Language Spanish = new("Spanish", "Spanish");
public static readonly Language German = new("German", "German");
public static readonly Language Italian = new("Italian", "Italian");
public static readonly Language Danish = new("Danish", "Danish");
public static readonly Language Dutch = new("Dutch", "Dutch");
public static readonly Language Japanese = new("Japanese", "Japanese");
public static readonly Language Cantonese = new("Cantonese", "Cantonese");
public static readonly Language Mandarin = new("Mandarin", "Mandarin");
public static readonly Language Russian = new("Russian", "Russian");
public static readonly Language Polish = new("Polish", "Polish");
public static readonly Language Vietnamese = new("Vietnamese", "Vietnamese");
public static readonly Language Nordic = new("Nordic", "Nordic");
public static readonly Language Swedish = new("Swedish", "Swedish");
public static readonly Language Norwegian = new("Norwegian", "Norwegian");
public static readonly Language Finnish = new("Finnish", "Finnish");
public static readonly Language Turkish = new("Turkish", "Turkish");
public static readonly Language Portuguese = new("Portuguese", "Portuguese");
public static readonly Language Flemish = new("Flemish", "Flemish");
public static readonly Language Greek = new("Greek", "Greek");
public static readonly Language Korean = new("Korean", "Korean");
public static readonly Language Hungarian = new("Hungarian", "Hungarian");
public static readonly Language Persian = new("Persian", "Persian");
public static readonly Language Bengali = new("Bengali", "Bengali");
public static readonly Language Bulgarian = new("Bulgarian", "Bulgarian");
public static readonly Language Brazilian = new("Brazilian", "Brazilian");
public static readonly Language Hebrew = new("Hebrew", "Hebrew");
public static readonly Language Czech = new("Czech", "Czech");
public static readonly Language Ukrainian = new("Ukrainian", "Ukrainian");
public static readonly Language Catalan = new("Catalan", "Catalan");
public static readonly Language Chinese = new("Chinese", "Chinese");
public static readonly Language Thai = new("Thai", "Thai");
public static readonly Language Hindi = new("Hindi", "Hindi");
public static readonly Language Tamil = new("Tamil", "Tamil");
public static readonly Language Arabic = new("Arabic", "Arabic");
public static readonly Language Estonian = new("Estonian", "Estonian");
public static readonly Language Icelandic = new("Icelandic", "Icelandic");
public static readonly Language Latvian = new("Latvian", "Latvian");
public static readonly Language Lithuanian = new("Lithuanian", "Lithuanian");
public static readonly Language Romanian = new("Romanian", "Romanian");
public static readonly Language Slovak = new("Slovak", "Slovak");
public static readonly Language Serbian = new("Serbian", "Serbian");
private Language(string name, string value) : base(name, value) { }
}

View File

@@ -0,0 +1,340 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class LanguageParser
{
[GeneratedRegex(@"\bWEB-?DL\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebDL();
[GeneratedRegex(@"(?<!(WEB-))\b(MULTi|DUAL|DL)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MultiExp();
[GeneratedRegex(@"\b(english|eng|EN|FI)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex EnglishRegex();
[GeneratedRegex(@"\b(DK|DAN|danish)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DanishRegex();
[GeneratedRegex(@"\b(SE|SWE|swedish)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SwedishRegex();
[GeneratedRegex(@"\b(ice|Icelandic)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex IcelandicRegex();
[GeneratedRegex(@"\b(chi|chinese)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ChineseRegex();
[GeneratedRegex(@"\b(ita|italian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ItalianRegex();
[GeneratedRegex(@"\b(german|videomann)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex GermanRegex();
[GeneratedRegex(@"\b(flemish)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex FlemishRegex();
[GeneratedRegex(@"\b(greek)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex GreekRegex();
[GeneratedRegex(@"\b(FR|FRENCH|VOSTFR|VO|VFF|VFQ|VF2|TRUEFRENCH|SUBFRENCH)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex FrenchRegex();
[GeneratedRegex(@"\b(russian|rus)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RussianRegex();
[GeneratedRegex(@"\b(norwegian|NO)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex NorwegianRegex();
[GeneratedRegex(@"\b(HUNDUB|HUN|hungarian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HungarianRegex();
[GeneratedRegex(@"\b(HebDub)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HebrewRegex();
[GeneratedRegex(@"\b(CZ|SK)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CzechRegex();
[GeneratedRegex(@"(?<ukrainian>\bukr\b)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex UkrainianRegex();
[GeneratedRegex(@"\b(PL|PLDUB|POLISH)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PolishRegex();
[GeneratedRegex(@"\b(nl|dutch)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DutchRegex();
[GeneratedRegex(@"\b(HIN|Hindi)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HindiRegex();
[GeneratedRegex(@"\b(TAM|Tamil)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TamilRegex();
[GeneratedRegex(@"\b(Arabic)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ArabicRegex();
[GeneratedRegex(@"\b(Latvian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex LatvianRegex();
[GeneratedRegex(@"\b(Lithuanian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex LithuanianRegex();
[GeneratedRegex(@"\b(RO|Romanian|rodubbed)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RomanianRegex();
[GeneratedRegex(@"\b(SK|Slovak)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SlovakRegex();
[GeneratedRegex(@"\b(Brazilian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BrazilianRegex();
[GeneratedRegex(@"\b(Persian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PersianRegex();
[GeneratedRegex(@"\b(Bengali)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BengaliRegex();
[GeneratedRegex(@"\b(Bulgarian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BulgarianRegex();
[GeneratedRegex(@"\b(Serbian)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SerbianRegex();
public static void Parse(string title, out List<Language> languages)
{
TitleParser.Parse(title, out var parsedTitle, out _);
var languageTitle = title.Replace(".", " ").Replace(parsedTitle, "").ToLower();
languages = new();
if (languageTitle.Contains("spanish"))
{
languages.Add(Language.Spanish);
}
if (languageTitle.Contains("japanese"))
{
languages.Add(Language.Japanese);
}
if (languageTitle.Contains("cantonese"))
{
languages.Add(Language.Cantonese);
}
if (languageTitle.Contains("mandarin"))
{
languages.Add(Language.Mandarin);
}
if (languageTitle.Contains("korean"))
{
languages.Add(Language.Korean);
}
if (languageTitle.Contains("vietnamese"))
{
languages.Add(Language.Vietnamese);
}
if (languageTitle.Contains("finnish"))
{
languages.Add(Language.Finnish);
}
if (languageTitle.Contains("turkish"))
{
languages.Add(Language.Turkish);
}
if (languageTitle.Contains("portuguese"))
{
languages.Add(Language.Portuguese);
}
if (languageTitle.Contains("hebrew"))
{
languages.Add(Language.Hebrew);
}
if (languageTitle.Contains("czech"))
{
languages.Add(Language.Czech);
}
if (languageTitle.Contains("ukrainian"))
{
languages.Add(Language.Ukrainian);
}
if (languageTitle.Contains("catalan"))
{
languages.Add(Language.Catalan);
}
if (languageTitle.Contains("estonian"))
{
languages.Add(Language.Estonian);
}
if (languageTitle.Contains("thai"))
{
languages.Add(Language.Thai);
}
if (EnglishRegex().IsMatch(languageTitle))
{
languages.Add(Language.English);
}
if (DanishRegex().IsMatch(languageTitle))
{
languages.Add(Language.Danish);
}
if (SwedishRegex().IsMatch(languageTitle))
{
languages.Add(Language.Swedish);
}
if (IcelandicRegex().IsMatch(languageTitle))
{
languages.Add(Language.Icelandic);
}
if (ChineseRegex().IsMatch(languageTitle))
{
languages.Add(Language.Chinese);
}
if (ItalianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Italian);
}
if (GermanRegex().IsMatch(languageTitle))
{
languages.Add(Language.German);
}
if (FlemishRegex().IsMatch(languageTitle))
{
languages.Add(Language.Flemish);
}
if (GreekRegex().IsMatch(languageTitle))
{
languages.Add(Language.Greek);
}
if (FrenchRegex().IsMatch(languageTitle))
{
languages.Add(Language.French);
}
if (RussianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Russian);
}
if (NorwegianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Norwegian);
}
if (HungarianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Hungarian);
}
if (HebrewRegex().IsMatch(languageTitle))
{
languages.Add(Language.Hebrew);
}
if (CzechRegex().IsMatch(languageTitle))
{
languages.Add(Language.Czech);
}
if (UkrainianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Ukrainian);
}
if (PolishRegex().IsMatch(languageTitle))
{
languages.Add(Language.Polish);
}
if (DutchRegex().IsMatch(languageTitle))
{
languages.Add(Language.Dutch);
}
if (HindiRegex().IsMatch(languageTitle))
{
languages.Add(Language.Hindi);
}
if (TamilRegex().IsMatch(languageTitle))
{
languages.Add(Language.Tamil);
}
if (ArabicRegex().IsMatch(languageTitle))
{
languages.Add(Language.Arabic);
}
if (LatvianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Latvian);
}
if (LithuanianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Lithuanian);
}
if (RomanianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Romanian);
}
if (SlovakRegex().IsMatch(languageTitle))
{
languages.Add(Language.Slovak);
}
if (BrazilianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Brazilian);
}
if (PersianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Persian);
}
if (BengaliRegex().IsMatch(languageTitle))
{
languages.Add(Language.Bengali);
}
if (BulgarianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Bulgarian);
}
if (SerbianRegex().IsMatch(languageTitle))
{
languages.Add(Language.Serbian);
}
}
public static bool? IsMulti(string title)
{
var noWebTitle = WebDL().Replace(title, "");
return MultiExp().IsMatch(noWebTitle) ? true : null;
}
}

View File

@@ -0,0 +1,10 @@
namespace Producer.Features.ParseTorrentTitle;
public class ParsedFilename
{
public ParsedMovie? Movie { get; set; }
public ParsedTv? Show { get; set; }
public TorrentType? Type { get; set; }
public bool IsInvalid => Movie is null && Show is null;
}

View File

@@ -0,0 +1,5 @@
namespace Producer.Features.ParseTorrentTitle;
public class ParsedMovie : BaseParsed
{
}

View File

@@ -0,0 +1,15 @@
namespace Producer.Features.ParseTorrentTitle;
public class ParsedTv : BaseParsed
{
public string? SeriesTitle { get; set; }
public List<int> Seasons { get; set; } = [];
public List<int> EpisodeNumbers { get; set; } = [];
public DateTime? AirDate { get; set; }
public bool FullSeason { get; set; }
public bool IsPartialSeason { get; set; }
public bool IsMultiSeason { get; set; }
public bool IsSeasonExtra { get; set; }
public bool IsSpecial { get; set; }
public int SeasonPart { get; set; }
}

View File

@@ -0,0 +1,29 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService
{
[GeneratedRegex("[^a-z0-9]")]
private static partial Regex NakedMatcher();
[GeneratedRegex(@"\d{4}")]
private static partial Regex GrabYearsMatcher();
[GeneratedRegex(@"\d+")]
private static partial Regex GrabPossibleSeasonNumsMatcher();
[GeneratedRegex(@"(.)\1+")]
private static partial Regex RemoveRepeatsMatcher();
[GeneratedRegex(@"m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})")]
private static partial Regex ReplaceRomanWithDecimalMatcher();
[GeneratedRegex(@"\s+")]
private static partial Regex WhitespaceMatcher();
[GeneratedRegex(@"\W+")]
private static partial Regex WordMatcher();
[GeneratedRegex(@"'s|\s&\s|\W")]
private static partial Regex WordProcessingMatcher();
}

View File

@@ -0,0 +1,350 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser) : IParsingService
{
private static readonly char[] WhitespaceSeparator = [' '];
public string Naked(string title) =>
NakedMatcher().Replace(title.ToLower(), "");
public List<string> GrabYears(string str)
{
var matches = GrabYearsMatcher().Matches(str);
return matches
.Select(m => m.Value)
.Where(n => int.Parse(n) > 1900 && int.Parse(n) <= DateTime.Now.Year)
.ToList();
}
public List<int> GrabPossibleSeasonNums(string str)
{
var matches = GrabPossibleSeasonNumsMatcher().Matches(str);
return matches
.Select(m => int.Parse(m.Value))
.Where(n => n is > 0 and <= 500)
.ToList();
}
public bool HasYear(string test, List<string> years, bool strictCheck = false) =>
strictCheck
? years.Any(test.Contains)
: years.Any(year =>
{
var intYear = int.Parse(year);
return test.Contains(year) ||
test.Contains($"{intYear + 1}") ||
test.Contains($"{intYear - 1}");
});
public string RemoveDiacritics(string str)
{
var normalizedString = str.Normalize(NormalizationForm.FormD);
var stringBuilder = new StringBuilder();
foreach (var c in normalizedString)
{
var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
if (unicodeCategory != UnicodeCategory.NonSpacingMark)
{
stringBuilder.Append(c);
}
}
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
}
public string RemoveRepeats(string str) => RemoveRepeatsMatcher().Replace(str, "$1");
public int RomanToDecimal(string roman)
{
var romanNumerals = new Dictionary<char, int>
{
{'I', 1},
{'V', 5},
{'X', 10},
{'L', 50},
{'C', 100},
{'D', 500},
{'M', 1000}
};
var total = 0;
var prevValue = 0;
for (var i = roman.Length - 1; i >= 0; i--)
{
var currentValue = romanNumerals[roman[i].ToString().ToUpper()[0]];
total = currentValue < prevValue ? total - currentValue : total + currentValue;
prevValue = currentValue;
}
return total;
}
public string ReplaceRomanWithDecimal(string input) => ReplaceRomanWithDecimalMatcher().Replace(input, match => RomanToDecimal(match.Value).ToString());
public bool StrictEqual(string title1, string title2)
{
title1 = WhitespaceMatcher().Replace(title1, "");
title2 = WhitespaceMatcher().Replace(title2, "");
return (title1.Length > 0 && title1 == title2) ||
(Naked(title1).Length > 0 && Naked(title1) == Naked(title2)) ||
(RemoveRepeats(title1).Length > 0 && RemoveRepeats(title1) == RemoveRepeats(title2)) ||
(RemoveDiacritics(title1).Length > 0 && RemoveDiacritics(title1) == RemoveDiacritics(title2));
}
public int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false)
{
var replaceCount = 0;
var prevReplaceCount = 0;
var prevOffset = 0;
var prevLength = 0;
const int wordTolerance = 5;
var wordsInTitle = WordMatcher().Split(target).Where(e => !string.IsNullOrEmpty(e)).ToList();
const int magicLength = 3;
var testStr = test;
var inSequenceTerms = 1;
var longestSequence = 0;
MatchEvaluator replacer = match =>
{
if (shouldBeInSequence && prevLength > 0 && match.Index >= wordTolerance)
{
if (inSequenceTerms > longestSequence)
{
longestSequence = inSequenceTerms;
}
inSequenceTerms = 0;
}
prevOffset = match.Index;
prevLength = match.Length;
replaceCount++;
inSequenceTerms++;
return match.Value;
};
Action<string, bool, bool> wrapReplace = (newTerm, first, last) =>
{
var prefix = first ? @"\b" : "";
var suffix = last ? @"\b" : "";
testStr = Regex.Replace(testStr[(prevOffset + prevLength)..], $"{prefix}{newTerm}{suffix}", replacer);
};
var actual = wordsInTitle.Where((term, idx) =>
{
var first = idx == 0;
var last = idx == wordsInTitle.Count - 1;
testStr = testStr[(prevOffset + prevLength)..];
wrapReplace(term, first, last);
if (replaceCount > prevReplaceCount)
{
prevReplaceCount = replaceCount;
return true;
}
if (RemoveDiacritics(term).Length >= magicLength)
{
wrapReplace(RemoveDiacritics(term), first, last);
if (replaceCount > prevReplaceCount)
{
prevReplaceCount = replaceCount;
return true;
}
}
if (RemoveRepeats(term).Length >= magicLength)
{
wrapReplace(RemoveRepeats(term), first, last);
if (replaceCount > prevReplaceCount)
{
prevReplaceCount = replaceCount;
return true;
}
}
if (Naked(term).Length >= magicLength)
{
wrapReplace(Naked(term), first, last);
if (replaceCount > prevReplaceCount)
{
prevReplaceCount = replaceCount;
return true;
}
}
if (ReplaceRomanWithDecimal(term) == term)
{
return false;
}
wrapReplace(ReplaceRomanWithDecimal(term), first, last);
if (replaceCount <= prevReplaceCount)
{
return false;
}
prevReplaceCount = replaceCount;
return true;
}).ToList();
if (shouldBeInSequence)
{
return inSequenceTerms > longestSequence ? inSequenceTerms : longestSequence;
}
return actual.Count;
}
public bool FlexEq(string test, string target, List<string> years)
{
var movieTitle = torrentTitleParser.Parse(test).Movie.Title.ToLower();
var tvTitle = torrentTitleParser.Parse(test).Show.Title.ToLower();
var target2 = WhitespaceMatcher().Replace(target, "");
var test2 = WhitespaceMatcher().Replace(test, "");
var magicLength = HasYear(test, years) ? 3 : 5;
if (Naked(target2).Length >= magicLength && test2.Contains(Naked(target2)))
{
return true;
}
if (RemoveRepeats(target2).Length >= magicLength && test2.Contains(RemoveRepeats(target2)))
{
return true;
}
if (RemoveDiacritics(target2).Length >= magicLength && test2.Contains(RemoveDiacritics(target2)))
{
return true;
}
if (target2.Length >= Math.Ceiling(magicLength * 1.5) && test2.Contains(target2))
{
return true;
}
return StrictEqual(target, movieTitle) || StrictEqual(target, tvTitle);
}
public bool MatchesTitle(string target, List<string> years, string test)
{
target = target.ToLower();
test = test.ToLower();
var splits = WordMatcher().Split(target).Where(e => !string.IsNullOrEmpty(e)).ToList();
var containsYear = HasYear(test, years);
if (FlexEq(test, target, years))
{
var sequenceCheck = CountTestTermsInTarget(test, string.Join(' ', splits), true);
return containsYear || sequenceCheck >= 0;
}
var totalTerms = splits.Count;
if (totalTerms == 0 || (totalTerms <= 2 && !containsYear))
{
return false;
}
var keyTerms = splits.Where(s => (s.Length > 1 && !wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList();
keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2));
var keySet = new HashSet<string>(keyTerms);
var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList();
var hasYearScore = totalTerms * 1.5;
var totalScore = keyTerms.Count * 2 + commonTerms.Count + hasYearScore;
if (keyTerms.Count == 0 && totalTerms <= 2 && !containsYear)
{
return false;
}
var foundKeyTerms = CountTestTermsInTarget(test, string.Join(' ', keyTerms));
var foundCommonTerms = CountTestTermsInTarget(test, string.Join(' ', commonTerms));
var score = foundKeyTerms * 2 + foundCommonTerms + (containsYear ? hasYearScore : 0);
return Math.Floor(score / 0.85) >= totalScore;
}
public bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle) =>
mustHaveTerms.All(term =>
{
var newTitle = testTitle.Replace(term, "");
if (newTitle != testTitle)
{
testTitle = newTitle;
return true;
}
newTitle = testTitle.Replace(RemoveDiacritics(term), "");
if (newTitle != testTitle)
{
testTitle = newTitle;
return true;
}
newTitle = testTitle.Replace(RemoveRepeats(term), "");
if (newTitle != testTitle)
{
testTitle = newTitle;
return true;
}
return false;
});
public bool HasNoBannedTerms(string targetTitle, string testTitle)
{
var words = WordMatcher().Split(testTitle.ToLower()).Where(word => word.Length > 3).ToList();
var hasBannedWords = words.Any(word => !targetTitle.Contains(word) && wordCollections.AdultWords.Contains(word));
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower()));
var hasJavWords = wordCollections.Jav.Any(jav => !targetTitle.Contains(jav) && titleWithoutSymbols.Contains(jav));
var hasAdultStars = wordCollections.AdultStars.Any(star => !targetTitle.Contains(star) && titleWithoutSymbols.Contains(star));
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
return !hasBannedWords &&
!hasJavWords &&
!hasAdultStars &&
!hasBannedCompoundWords;
}
public bool HasNoBannedTerms(string targetTitle)
{
var words = WordMatcher().Split(targetTitle.ToLower()).ToList();
var hasBannedWords = words.Any(word => wordCollections.AdultWords.Contains(word));
var inputWithoutSymbols = string.Join(' ', WordMatcher().Split(targetTitle.ToLower()));
var hasJavWords = wordCollections.Jav.Any(jav => inputWithoutSymbols.Contains(jav, StringComparison.OrdinalIgnoreCase));
var hasAdultStars = wordCollections.AdultStars.Any(star => inputWithoutSymbols.Contains(star, StringComparison.OrdinalIgnoreCase));
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => inputWithoutSymbols.Contains(compoundWord, StringComparison.OrdinalIgnoreCase));
return !hasBannedWords &&
!hasJavWords &&
!hasAdultStars &&
!hasBannedCompoundWords;
}
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
public int CountUncommonWords(string title)
{
var processedTitle = WhitespaceMatcher().Split(title)
.Select(word => WordProcessingMatcher().Replace(word.ToLower(), ""))
.Where(word => word.Length > 3)
.ToList();
return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word));
}
public ParsedFilename Parse(string name) => torrentTitleParser.Parse(name);
}

View File

@@ -0,0 +1,9 @@
namespace Producer.Features.ParseTorrentTitle;
public class QualityModel
{
public List<Source> Sources { get; set; } = [];
public QualityModifier? Modifier { get; set; }
public Resolution? Resolution { get; set; }
public Revision Revision { get; set; } = new();
}

View File

@@ -0,0 +1,10 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class QualityModifier : SmartEnum<QualityModifier, string>
{
public static readonly QualityModifier REMUX = new("REMUX", "REMUX");
public static readonly QualityModifier BRDISK = new("BRDISK", "BRDISK");
public static readonly QualityModifier RAWHD = new("RAWHD", "RAWHD");
private QualityModifier(string name, string value) : base(name, value) { }
}

View File

@@ -0,0 +1,230 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class QualityParser
{
[GeneratedRegex(@"\b(?<proper>proper|repack|rerip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ProperRegex();
[GeneratedRegex(@"\b(?<real>REAL)\b", RegexOptions.None, "en-GB")]
private static partial Regex RealRegex();
[GeneratedRegex(@"(?<version>v\d\b|\[v\d\])", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex VersionExp();
[GeneratedRegex(@"\b(?<remux>(BD|UHD)?Remux)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RemuxExp();
[GeneratedRegex(@"\b(COMPLETE|ISO|BDISO|BDMux|BD25|BD50|BR.?DISK)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BdiskExp();
[GeneratedRegex(@"\b(?<rawhd>RawHD|1080i[-_. ]HDTV|Raw[-_. ]HD|MPEG[-_. ]?2)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RawHdExp();
[GeneratedRegex(@"hr[-_. ]ws", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HighDefPdtvRegex();
public static void Parse(string title, out QualityModel result)
{
var normalizedTitle = title.Trim().Replace("_", " ").Replace("[", " ").Replace("]", " ").Trim().ToLower();
ParseQualityModifyers(title, out var revision);
ResolutionParser.Parse(normalizedTitle, out var resolution, out _);
SourceParser.ParseSourceGroups(normalizedTitle, out var sourceGroups);
SourceParser.Parse(normalizedTitle, out var source);
VideoCodecsParser.Parse(normalizedTitle, out var codec, out _);
result = new()
{
Sources = source,
Resolution = resolution,
Revision = revision,
Modifier = null,
};
if (BdiskExp().IsMatch(normalizedTitle) && sourceGroups["bluray"])
{
result.Modifier = QualityModifier.BRDISK;
result.Sources = [Source.BLURAY];
}
if (RemuxExp().IsMatch(normalizedTitle) && !sourceGroups["webdl"] && !sourceGroups["hdtv"])
{
result.Modifier = QualityModifier.REMUX;
result.Sources = [Source.BLURAY];
}
if (RawHdExp().IsMatch(normalizedTitle) && result.Modifier != QualityModifier.BRDISK && result.Modifier != QualityModifier.REMUX)
{
result.Modifier = QualityModifier.RAWHD;
result.Sources = [Source.TV];
}
if (sourceGroups["bluray"])
{
result.Sources = [Source.BLURAY];
if (codec == VideoCodec.XVID)
{
result.Resolution = Resolution.R480P;
result.Sources = [Source.DVD];
}
if (resolution == null)
{
// assume bluray is at least 720p
result.Resolution = Resolution.R720P;
}
if (resolution == null && result.Modifier == QualityModifier.BRDISK)
{
result.Resolution = Resolution.R1080P;
}
if (resolution == null && result.Modifier == QualityModifier.REMUX)
{
result.Resolution = Resolution.R2160P;
}
return;
}
if (sourceGroups["webdl"] || sourceGroups["webrip"])
{
result.Sources = source;
if (resolution == null)
{
result.Resolution = Resolution.R480P;
}
if (resolution == null)
{
result.Resolution = Resolution.R480P;
}
if (resolution == null && title.Contains("[WEBDL]"))
{
result.Resolution = Resolution.R720P;
}
return;
}
if (sourceGroups["hdtv"])
{
result.Sources = [Source.TV];
if (resolution == null)
{
result.Resolution = Resolution.R480P;
}
if (resolution == null && title.Contains("[HDTV]"))
{
result.Resolution = Resolution.R720P;
}
return;
}
if (sourceGroups["pdtv"] || sourceGroups["sdtv"] || sourceGroups["dsr"] || sourceGroups["tvrip"])
{
result.Sources = [Source.TV];
if (HighDefPdtvRegex().IsMatch(normalizedTitle))
{
result.Resolution = Resolution.R720P;
return;
}
result.Resolution = Resolution.R480P;
return;
}
if (sourceGroups["bdrip"] || sourceGroups["brrip"])
{
if (codec == VideoCodec.XVID)
{
result.Resolution = Resolution.R480P;
result.Sources = [Source.DVD];
return;
}
if (resolution == null)
{
// bdrips are at least 480p
result.Resolution = Resolution.R480P;
}
result.Sources = [Source.BLURAY];
return;
}
if (sourceGroups["workprint"])
{
result.Sources = [Source.WORKPRINT];
return;
}
if (sourceGroups["cam"])
{
result.Sources = [Source.CAM];
return;
}
if (sourceGroups["ts"])
{
result.Sources = [Source.TELESYNC];
return;
}
if (sourceGroups["tc"])
{
result.Sources = [Source.TELECINE];
return;
}
if (result.Modifier == null && (resolution == Resolution.R2160P || resolution == Resolution.R1080P || resolution == Resolution.R720P))
{
result.Sources = [Source.WEBDL];
}
}
private static void ParseQualityModifyers(string title, out Revision revision)
{
var normalizedTitle = title.Trim().Replace("_", " ").Trim().ToLower();
revision = new()
{
Version = 1,
Real = 0,
};
if (ProperRegex().IsMatch(normalizedTitle))
{
revision.Version = 2;
}
var versionResult = VersionExp().Match(normalizedTitle);
if (versionResult.Success)
{
// get numbers from version regex
var digits = Regex.Match(versionResult.Groups["version"].Value, @"\d");
if (digits.Success)
{
var value = int.Parse(digits.Value);
revision.Version = value;
}
}
var realCount = 0;
var realGlobalExp = new Regex(RealRegex().ToString(), RegexOptions.None);
// use non normalized title to prevent insensitive REAL matching
while (realGlobalExp.IsMatch(title))
{
realCount += 1;
}
revision.Real = realCount;
}
}

View File

@@ -0,0 +1,13 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class Resolution : SmartEnum<Resolution, string>
{
public static readonly Resolution R2160P = new("R2160P", "2160P");
public static readonly Resolution R1080P = new("R1080P", "1080P");
public static readonly Resolution R720P = new("R720P", "720P");
public static readonly Resolution R576P = new("R576P", "576P");
public static readonly Resolution R540P = new("R540P", "540P");
public static readonly Resolution R480P = new("R480P", "480P");
private Resolution(string name, string value) : base(name, value) { }
}

View File

@@ -0,0 +1,55 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class ResolutionParser
{
[GeneratedRegex(@"(?<R2160P>2160p|4k[-_. ](?:UHD|HEVC|BD)|(?:UHD|HEVC|BD)[-_. ]4k|\b(4k)\b|COMPLETE.UHD|UHD.COMPLETE)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R2160pExp();
[GeneratedRegex(@"(?<R1080P>1080(i|p)|1920x1080)(10bit)?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R1080pExp();
[GeneratedRegex(@"(?<R720P>720(i|p)|1280x720|960p)(10bit)?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R720pExp();
[GeneratedRegex(@"(?<R576P>576(i|p))", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R576pExp();
[GeneratedRegex(@"(?<R540P>540(i|p))", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R540pExp();
[GeneratedRegex(@"(?<R480P>480(i|p)|640x480|848x480)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex R480Exp();
private static readonly Regex ResolutionExp = new(string.Join("|", R2160pExp(), R1080pExp(), R720pExp(), R576pExp(), R540pExp(), R480Exp()), RegexOptions.IgnoreCase);
public static void Parse(string title, out Resolution? resolution, out string? source)
{
resolution = null;
source = null;
var result = ResolutionExp.Match(title);
if (result.Success)
{
foreach (var resolutionEnum in Resolution.List)
{
if (!result.Groups[resolutionEnum.Name].Success)
{
continue;
}
resolution = resolutionEnum;
source = result.Groups[resolutionEnum.Name].Value;
return;
}
}
// Fallback to guessing from some sources
// Make safe assumptions like dvdrip is probably 480p
SourceParser.Parse(title, out var sourceList);
if (sourceList.Contains(Source.DVD))
{
resolution = Resolution.R480P;
}
}
}

View File

@@ -0,0 +1,7 @@
namespace Producer.Features.ParseTorrentTitle;
public class Revision
{
public int Version { get; set; }
public int Real { get; set; }
}

View File

@@ -0,0 +1,16 @@
namespace Producer.Features.ParseTorrentTitle;
public class Season
{
public string? ReleaseTitle { get; set; }
public string? SeriesTitle { get; set; }
public List<int> Seasons { get; set; } = [];
public List<int> EpisodeNumbers { get; set; } = [];
public DateTime? AirDate { get; set; }
public bool FullSeason { get; set; }
public bool IsPartialSeason { get; set; }
public bool IsMultiSeason { get; set; }
public bool IsSeasonExtra { get; set; }
public bool IsSpecial { get; set; }
public int SeasonPart { get; set; }
}

View File

@@ -0,0 +1,44 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class SeasonParser
{
[GeneratedRegex(@"^[0-9a-zA-Z]{32}", RegexOptions.IgnoreCase)]
private static partial Regex GenericMatchForMd5AndMixedCaseHashesExp();
[GeneratedRegex(@"^[a-z0-9]{24}$", RegexOptions.IgnoreCase)]
private static partial Regex GenericMatchForShorterLowerCaseHashesExp();
[GeneratedRegex(@"^[A-Z]{11}\d{3}$", RegexOptions.IgnoreCase)]
private static partial Regex FormatSeenOnSomeNZBGeekReleasesExp();
[GeneratedRegex(@"^[a-z]{12}\d{3}$", RegexOptions.IgnoreCase)]
private static partial Regex FormatSeenOnSomeNZBGeekReleasesExp2();
[GeneratedRegex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.IgnoreCase)]
private static partial Regex BackupFilenameExp();
[GeneratedRegex(@"^123$", RegexOptions.IgnoreCase)]
private static partial Regex StartedAppearingDecember2014Exp();
[GeneratedRegex(@"^abc$", RegexOptions.IgnoreCase)]
private static partial Regex StartedAppearingJanuary2015Exp();
[GeneratedRegex(@"^b00bs$", RegexOptions.IgnoreCase)]
private static partial Regex StartedAppearingJanuary2015Exp2();
[GeneratedRegex(@"^\d{6}_\d{2}$", RegexOptions.IgnoreCase)]
private static partial Regex StartedAppearingAugust2018Exp();
private static List<Func<Regex>> _rejectedRegex =
[
GenericMatchForMd5AndMixedCaseHashesExp,
GenericMatchForShorterLowerCaseHashesExp,
FormatSeenOnSomeNZBGeekReleasesExp,
FormatSeenOnSomeNZBGeekReleasesExp2,
BackupFilenameExp,
StartedAppearingDecember2014Exp,
StartedAppearingJanuary2015Exp,
StartedAppearingJanuary2015Exp2,
StartedAppearingAugust2018Exp
];
}

View File

@@ -0,0 +1,248 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class SeasonParser
{
[GeneratedRegex(@"^(?<airyear>19[6-9]\d|20\d\d)(?<sep>[-_]?)(?<airmonth>0\d|1[0-2])\k<sep>(?<airday>[0-2]\d|3[01])(?!\d)", RegexOptions.IgnoreCase)]
private static partial Regex DailyEpisodesWithoutTitleExp();
[GeneratedRegex(@"^(?:\W*S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", RegexOptions.IgnoreCase)]
private static partial Regex MultiPartEpisodesWithoutTitleExp();
[GeneratedRegex(@"^(?<title>.+?)[-_. ]S(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:[E-_. ]?[ex]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+(?:[-_. ]?[ex]?(?<episode1>(?<!\d+)\d{1,2}(?!\d+)))+", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeWithSingleEpisodeNumbersExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode1>\d{2,3}(?!\d+)))+).+?(?:\[.+?\])(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeWithTitleAndTrailingInfoInSlashesExp();
[GeneratedRegex(@"(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[-_]|[ex]){1,2}(?<episode>\d{2,3}(?!\d+))){2,})", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithoutTitleMultiExp();
[GeneratedRegex(@"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[-_ ]?[ex])(?<episode>\d{2,3}(?!\d+))))", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithoutTitleSingleExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>.+?)[-_. ](?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleEpisodeAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+(?<absoluteepisode>\d{2,3}(\.\d{1,2})?))+(?:_|-|\s|\.)+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+).*?(?<hash>[([]\w{8}[)\]])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleAbsoluteEpisodeNumberSeasonEpisodeExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:(?:_|-|\s|\.)+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+.*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleSeasonEpisodeAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:\s|\.).*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleSeasonEpisodeExp();
[GeneratedRegex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>[^-]+?\d+?)[-_. ]+(?:[-_. ]?(?<absoluteepisode>\d{3}(\.\d{1,2})?(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleWithTrailingNumberAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)(?:[. ]-[. ](?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+|[-])))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)[-_. ]+\(?(?:[-_. ]?#?(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+)))+\)?(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeSubGroupTitleAbsoluteEpisodeNumberSpecialExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]|[-_. ]e){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeRepeatedExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:[ex]|\W[ex]){1,2}(?<episode>(?!265|264)\d{2,3}(?!\d+|(?:[ex]|\W[ex]|_|-){1,2})))", RegexOptions.IgnoreCase)]
private static partial Regex SingleEpisodesWithTitleExp();
[GeneratedRegex(@"^(?<title>.+?)(?:[-_\W](?<![()[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+)))).+?(?:[-_. ]?(?<absoluteepisode>(?<!\d+)\d{3}(\.\d{1,2})?(?!\d+)))+.+?\[(?<subgroup>.+?)\](?:$|\.mkv)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleSeasonEpisodeNumberAbsoluteEpisodeNumberSubGroupExp();
[GeneratedRegex(@"^(?<title>.+?)[-_. ]Episode(?:[-_. ]+(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:.+?)\[(?<subgroup>.+?)\].*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleEpisodeAbsoluteEpisodeNumberSubGroupHashExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{3}(\.\d{1,2})(?!\d+)))+(?:.+?)\[(?<subgroup>.+?)\].*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleAbsoluteEpisodeNumberSubGroupHashExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?[-_. ]+.*?(?<hash>\[\w{8}\])(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleAbsoluteEpisodeNumberHashExp();
[GeneratedRegex(@"^(?<title>.+?)?\W*(?<airdate>\d{4}\W+[0-1][0-9]\W+[0-3][0-9])(?!\W+[0-3][0-9])[-_. ](?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))/i", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithAirdateAndSeasonEpisodeNumberCaptureSeasonEpisodeOnlyExp();
[GeneratedRegex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(?!\W+[0-3][0-9]).+?(?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))/i", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithAirdateAndSeasonEpisodeNumberExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:e|\We|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:-|e|\We|_){1,2}(?<episode1>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithTitleSingleEpisodesMultiEpisodeExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S(?<season>(?<!\d+)(?:\d{4})(?!\d+))(?:e|\We|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:-|e|\We|_){1,2}(?<episode1>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithTitle4DigitSeasonNumberSingleEpisodesMultiEpisodeExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+(?<season>(?<!\d+)(?:\d{4})(?!\d+))(?:x|\Wx){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:-|x|\Wx|_){1,2}(?<episode1>\d{2,3}(?!\d+)))*)\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithTitle4DigitSeasonNumberSingleEpisodesMultiEpisodeExp2();
[GeneratedRegex(@"^(?<title>.+?)[-_. ]+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))\W?-\W?S?(?<season1>(?<!\d+)(?:\d{1,2})(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex MultiSeasonPackExp();
[GeneratedRegex(@"^(?<title>.+?)(?:\W+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<seasonpart>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase)]
private static partial Regex PartialSeasonPackExp();
[GeneratedRegex(@"^(?<title>.+?\d{4})(?:\W+(?:(?:Part\W?|e)(?<episode>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesWithYearInTitleExp();
[GeneratedRegex(@"^(?<title>.+?)(?:[-._ ][e])(?<episode>\d{2,3}(?!\d+))(?:(?:-?[e])(?<episode1>\d{2,3}(?!\d+)))+", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesMultiEpisodesExp();
[GeneratedRegex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesEpisodesExp();
[GeneratedRegex(@"^(?<title>.+?)(?:\W+(?:Part[-._ ](?<episode>One|Two|Three|Four|Five|Six|Seven|Eight|Nine)(>[-._ ])))", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesEpisodesExp2();
[GeneratedRegex(@"^(?<title>.+?)(?:\W+(?:(?<episode>(?<!\d+)\d{1,2}(?!\d+))of\d+)+)", RegexOptions.IgnoreCase)]
private static partial Regex MiniSeriesEpisodesExp3();
[GeneratedRegex(@"(?:.*(?:""|^))(?<title>.*?)(?:[-_\W](?<![()[]))+(?:\W?Season\W?)(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)+(?:Episode\W)(?:[-_. ]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+", RegexOptions.IgnoreCase)]
private static partial Regex SupportsSeason01Episode03Exp();
[GeneratedRegex(@"(?:.*(?:^))(?<title>.*?)[-._ ]+\[S(?<season>(?<!\d+)\d{2}(?!\d+))(?:[E-]{1,2}(?<episode>(?<!\d+)\d{2}(?!\d+)))+\]", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeWithEpisodesInSquareBracketsExp();
[GeneratedRegex(@"(?:.*(?:^))(?<title>.*?)S(?<season>(?<!\d+)\d{2}(?!\d+))(?:E(?<episode>(?<!\d+)\d{2}(?!\d+)))+", RegexOptions.IgnoreCase)]
private static partial Regex MultiEpisodeReleaseWithNoSpaceBetweenSeriesTitleAndSeasonExp();
[GeneratedRegex(@"(?:.*(?:""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)?Ep?[ ._]?(?<episode>(?<!\d+)\d{1,2}(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex SingleEpisodeSeasonOrEpisodeExp();
[GeneratedRegex(@"(?:.*(?:""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{3}(?!\d+))(?:\W|_)?E(?<episode>(?<!\d+)\d{1,2}(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex ThreeDigitSeasonExp();
[GeneratedRegex(@"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))", RegexOptions.IgnoreCase)]
private static partial Regex FiveDigitEpisodeNumberWithTitleExp();
[GeneratedRegex(@"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:[-_. ]{1,3}ep){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))", RegexOptions.IgnoreCase)]
private static partial Regex FiveDigitMultiEpisodeWithTitleExp();
[GeneratedRegex(@"^(?<title>.+?)(?:_|-|\s|\.)+S(?<season>\d{2}(?!\d+))(\W-\W)E(?<episode>(?<!\d+)\d{2}(?!\d+))(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex SeparatedSeasonAndEpisodeNumbersExp();
[GeneratedRegex(@"^(?<title>.+?S\d{1,2})[-_. ]{3,}(?:EP)?(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex AnimeTitleWithSeasonNumberAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)[-_. ]+?(?:Episode[-_. ]+?)(?<absoluteepisode>\d{1}(\.\d{1,2})?(?!\d+))", RegexOptions.IgnoreCase)]
private static partial Regex AnimeFrenchTitlesWithSingleEpisodeNumbersExp();
[GeneratedRegex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex SeasonOnlyReleasesExp();
[GeneratedRegex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{4}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex FourDigitSeasonOnlyReleasesExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+\[S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+|i|p)))+\])\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithTitleAndSeasonEpisodeInSquareBracketsExp();
[GeneratedRegex(@"^(?<title>.+?)?(?:(?:[_.](?<![()[!]))+(?<season>(?<!\d+)[1-9])(?<episode>[1-9][0-9]|[0][1-9])(?![a-z]|\d+))+(?:[_.]|$)", RegexOptions.IgnoreCase)]
private static partial Regex Supports103_113NamingExp();
[GeneratedRegex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex FourDigitEpisodeNumberEpisodesWithoutTitleSingleAndMultiExp();
[GeneratedRegex(@"^(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)\W?(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex FourDigitEpisodeNumberEpisodesWithTitleSingleAndMultiExp();
[GeneratedRegex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})[-_. ]+(?<airmonth>[0-1][0-9])[-_. ]+(?<airday>[0-3][0-9])(?![-_. ]+[0-3][0-9])", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithAirdateExp();
[GeneratedRegex(@"^(?<title>.+?)?\W*(?<airmonth>[0-1][0-9])[-_. ]+(?<airday>[0-3][0-9])[-_. ]+(?<airyear>\d{4})(?!\d+)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithAirdateExp2();
[GeneratedRegex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()[!]))*(?<season>(?<!\d+|\(|\[|e|x)\d{2})(?<episode>(?<!e|x)\d{2}(?!p|i|\d+|\)|\]|\W\d+|\W(?:e|ep|x)\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex Supports1103_1113NamingExp();
[GeneratedRegex(@"^(?<title>.*?)(?:(?:[-_\W](?<![()[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:-|[ex]){1,2}(?<episode>\d{1}))+)+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase)]
private static partial Regex EpisodesWithSingleDigitEpisodeNumberExp();
[GeneratedRegex(@"^(?:Season(?:_|-|\s|\.)(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:_|-|\s|\.)(?<episode>(?<!\d+)\d{1,2})", RegexOptions.IgnoreCase)]
private static partial Regex ITunesSeason1_05TitleQualityExp();
[GeneratedRegex(@"^(?:(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:-(?<episode>\d{2,3}(?!\d+))))", RegexOptions.IgnoreCase)]
private static partial Regex ITunes1_05TitleQualityExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:_|\s|\.)+(?:e|ep)(?<absoluteepisode>\d{2,3}(\.\d{1,2})?)-(?<absoluteepisode1>(?<!\d+)\d{1,2}(\.\d{1,2})?(?!\d+|-)).*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex AnimeRange_TitleAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:_|-|\s|\.)+(?:e|ep)(?<absoluteepisode>\d{2,4}(\.\d{1,2})?))+.*?(?<hash>\[\w{8}\])?(?:$|\.)", RegexOptions.IgnoreCase)]
private static partial Regex Anime_TitleAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?<title>.+?)[-_. ](?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex Anime_TitleEpisodeAbsoluteEpisodeNumberExp();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)[_. ]+(?<absoluteepisode>(?<!\d+)\d{1,2}(\.\d{1,2})?(?!\d+))-(?<absoluteepisode1>(?<!\d+)\d{1,2}(\.\d{1,2})?(?!\d+|-))(?:_|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex AnimeRange_TitleAbsoluteEpisodeNumberExp2();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex Anime_TitleAbsoluteEpisodeNumberExp2();
[GeneratedRegex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:[-_\W](?<![()[!]))+(?<absoluteepisode>(?<!\d+)\d{2,3}(\.\d{1,2})?(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?", RegexOptions.IgnoreCase)]
private static partial Regex Anime_TitleAbsoluteEpisodeNumberExp3();
[GeneratedRegex(@"^(?<title>.+?)[-_. ](?<season>[0]?\d?)(?:(?<episode>\d{2}){2}(?!\d+))[-_. ]", RegexOptions.IgnoreCase)]
private static partial Regex ExtantTerribleMultiEpisodeNamingExp();
private static List<Func<Regex>> _validRegexes =
[
DailyEpisodesWithoutTitleExp,
MultiPartEpisodesWithoutTitleExp,
MultiEpisodeWithSingleEpisodeNumbersExp,
MultiEpisodeWithTitleAndTrailingInfoInSlashesExp,
EpisodesWithoutTitleMultiExp,
EpisodesWithoutTitleSingleExp,
AnimeSubGroupTitleEpisodeAbsoluteEpisodeNumberExp,
AnimeSubGroupTitleAbsoluteEpisodeNumberSeasonEpisodeExp,
AnimeSubGroupTitleSeasonEpisodeAbsoluteEpisodeNumberExp,
AnimeSubGroupTitleSeasonEpisodeExp,
AnimeSubGroupTitleWithTrailingNumberAbsoluteEpisodeNumberExp,
AnimeSubGroupTitleAbsoluteEpisodeNumberExp,
AnimeSubGroupTitleAbsoluteEpisodeNumberSpecialExp,
MultiEpisodeRepeatedExp,
SingleEpisodesWithTitleExp,
AnimeTitleSeasonEpisodeNumberAbsoluteEpisodeNumberSubGroupExp,
AnimeTitleEpisodeAbsoluteEpisodeNumberSubGroupHashExp,
AnimeTitleAbsoluteEpisodeNumberSubGroupHashExp,
AnimeTitleAbsoluteEpisodeNumberHashExp,
EpisodesWithAirdateAndSeasonEpisodeNumberCaptureSeasonEpisodeOnlyExp,
EpisodesWithAirdateAndSeasonEpisodeNumberExp,
EpisodesWithTitleSingleEpisodesMultiEpisodeExp,
EpisodesWithTitle4DigitSeasonNumberSingleEpisodesMultiEpisodeExp,
EpisodesWithTitle4DigitSeasonNumberSingleEpisodesMultiEpisodeExp2,
MultiSeasonPackExp,
PartialSeasonPackExp,
MiniSeriesWithYearInTitleExp,
MiniSeriesMultiEpisodesExp,
MiniSeriesEpisodesExp,
MiniSeriesEpisodesExp2,
MiniSeriesEpisodesExp3,
SupportsSeason01Episode03Exp,
MultiEpisodeWithEpisodesInSquareBracketsExp,
MultiEpisodeReleaseWithNoSpaceBetweenSeriesTitleAndSeasonExp,
SingleEpisodeSeasonOrEpisodeExp,
ThreeDigitSeasonExp,
FiveDigitEpisodeNumberWithTitleExp,
SeparatedSeasonAndEpisodeNumbersExp,
AnimeTitleWithSeasonNumberAbsoluteEpisodeNumberExp,
AnimeFrenchTitlesWithSingleEpisodeNumbersExp,
SeasonOnlyReleasesExp,
FourDigitSeasonOnlyReleasesExp,
EpisodesWithTitleAndSeasonEpisodeInSquareBracketsExp,
Supports103_113NamingExp,
FourDigitEpisodeNumberEpisodesWithoutTitleSingleAndMultiExp,
FourDigitEpisodeNumberEpisodesWithTitleSingleAndMultiExp,
EpisodesWithAirdateExp,
EpisodesWithAirdateExp2,
Supports1103_1113NamingExp,
EpisodesWithSingleDigitEpisodeNumberExp,
ITunesSeason1_05TitleQualityExp,
ITunes1_05TitleQualityExp,
AnimeRange_TitleAbsoluteEpisodeNumberExp,
Anime_TitleAbsoluteEpisodeNumberExp,
Anime_TitleEpisodeAbsoluteEpisodeNumberExp,
AnimeRange_TitleAbsoluteEpisodeNumberExp2,
Anime_TitleAbsoluteEpisodeNumberExp2,
Anime_TitleAbsoluteEpisodeNumberExp3,
ExtantTerribleMultiEpisodeNamingExp,
];
}

View File

@@ -0,0 +1,307 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class SeasonParser
{
[GeneratedRegex(@"^(?:\[.+?\])+", RegexOptions.None)]
private static partial Regex RequestInfoExp();
[GeneratedRegex(@"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])", RegexOptions.IgnoreCase)]
private static partial Regex SixDigitAirDateMatchExp();
public static Season? Parse(string title)
{
if (!PreValidation(title))
{
return null;
}
var simpleTitle = TitleParser.SimplifyTitle(title);
// parse daily episodes with mmddyy eg `At.Midnight.140722.720p.HDTV.x264-YesTV`
var sixDigitAirDateMatch = SixDigitAirDateMatchExp().Match(title);
if (sixDigitAirDateMatch.Groups.Count > 0)
{
var airYear = sixDigitAirDateMatch.Groups["airyear"]?.Value ?? "";
var airMonth = sixDigitAirDateMatch.Groups["airmonth"]?.Value ?? "";
var airDay = sixDigitAirDateMatch.Groups["airday"]?.Value ?? "";
if (airMonth != "00" || airDay != "00")
{
var fixedDate = $"20{airYear}.{airMonth}.{airDay}";
if (!string.IsNullOrEmpty(sixDigitAirDateMatch.Groups["airdate"].Value))
{
simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate);
}
}
}
foreach (var exp in _validRegexes)
{
var match = exp().Match(simpleTitle);
if (match.Groups.Count <= 0 || !match.Success)
{
continue;
}
var result = ParseMatchCollection(match, simpleTitle);
if (result.FullSeason && result.ReleaseTokens != null && result.ReleaseTokens.Contains("Special", StringComparison.OrdinalIgnoreCase))
{
result.FullSeason = false;
result.IsSpecial = true;
}
return new()
{
ReleaseTitle = title,
SeriesTitle = result.SeriesName,
Seasons = result.SeasonNumbers ?? [],
EpisodeNumbers = result.EpisodeNumbers ?? [],
AirDate = result.AirDate,
FullSeason = result.FullSeason,
IsPartialSeason = result.IsPartialSeason ?? false,
IsMultiSeason = result.IsMultiSeason ?? false,
IsSeasonExtra = result.IsSeasonExtra ?? false,
IsSpecial = result.IsSpecial ?? false,
SeasonPart = result.SeasonPart ?? 0,
};
}
return null;
}
private static ParsedMatch ParseMatchCollection(Match match, string simpleTitle)
{
var groups = match.Groups;
if (groups.Count == 0)
{
throw new("No match");
}
var seriesName = groups["title"].Value
.Replace(".", " ")
.Replace("_", " ")
.Replace(RequestInfoExp().ToString(), "")
.Trim();
var result = new ParsedMatch
{
SeriesName = seriesName,
};
var lastSeasonEpisodeStringIndex = IndexOfEnd(simpleTitle, groups["title"].Value);
if (int.TryParse(groups["airyear"].Value, out var airYear) && airYear >= 1900)
{
var seasons = new List<string> {groups["season"]?.Value, groups["season1"]?.Value}
.Where(x => !string.IsNullOrEmpty(x))
.Select(
x =>
{
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, x ?? ""),
lastSeasonEpisodeStringIndex
);
return int.Parse(x);
})
.ToList();
if (seasons.Count > 1)
{
seasons = CompleteRange(seasons);
}
result.SeasonNumbers = seasons;
if (seasons.Count > 1)
{
result.IsMultiSeason = true;
}
var episodeCaptures = new List<string> {groups["episode"]?.Value, groups["episode1"]?.Value}
.Where(x => !string.IsNullOrEmpty(x))
.ToList();
var absoluteEpisodeCaptures = new List<string> {groups["absoluteepisode"]?.Value, groups["absoluteepisode1"]?.Value}
.Where(x => !string.IsNullOrEmpty(x))
.ToList();
// handle 0 episode possibly indicating a full season release
if (episodeCaptures.Any())
{
var first = int.Parse(episodeCaptures[0]);
var last = int.Parse(episodeCaptures[^1]);
if (first > last)
{
return null;
}
var count = last - first + 1;
result.EpisodeNumbers = Enumerable.Range(first, count).ToList();
}
if (absoluteEpisodeCaptures.Any())
{
var first = double.Parse(absoluteEpisodeCaptures[0]);
var last = double.Parse(absoluteEpisodeCaptures[^1]);
if (first % 1 != 0 || last % 1 != 0)
{
if (absoluteEpisodeCaptures.Count != 1)
{
return null;
}
// specialAbsoluteEpisodeNumbers in radarr
result.EpisodeNumbers = new()
{(int) first};
result.IsSpecial = true;
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, absoluteEpisodeCaptures[0] ?? ""),
lastSeasonEpisodeStringIndex
);
}
else
{
var count = (int) (last - first + 1);
// AbsoluteEpisodeNumbers in radarr
result.EpisodeNumbers = Enumerable.Range((int) first, count).ToList();
if (groups["special"]?.Value != null)
{
result.IsSpecial = true;
}
}
}
if (!episodeCaptures.Any() && !absoluteEpisodeCaptures.Any())
{
// Check to see if this is an "Extras" or "SUBPACK" release, if it is, set
// IsSeasonExtra so they can be filtered out
if (groups["extras"]?.Value != null)
{
result.IsSeasonExtra = true;
}
// Partial season packs will have a seasonpart group so they can be differentiated
// from a full season/single episode release
var seasonPart = groups["seasonpart"]?.Value;
if (seasonPart != null)
{
result.SeasonPart = int.Parse(seasonPart);
result.IsPartialSeason = true;
}
else
{
result.FullSeason = true;
}
}
if (absoluteEpisodeCaptures.Any() && result.EpisodeNumbers == null)
{
result.SeasonNumbers = new()
{0};
}
}
else
{
if (int.TryParse(groups["airmonth"]?.Value, out var airMonth) && int.TryParse(groups["airday"]?.Value, out var airDay))
{
// Swap day and month if month is bigger than 12 (scene fail)
if (airMonth > 12)
{
(airDay, airMonth) = (airMonth, airDay);
}
var airDate = new DateTime(airYear, airMonth, airDay);
// dates in the future is most likely parser error
if (airDate > DateTime.Now)
{
throw new("Parsed date is in the future");
}
if (airDate < new DateTime(1970, 1, 1))
{
throw new("Parsed date error");
}
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, groups["airyear"]?.Value ?? ""),
lastSeasonEpisodeStringIndex
);
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, groups["airmonth"]?.Value ?? ""),
lastSeasonEpisodeStringIndex
);
lastSeasonEpisodeStringIndex = Math.Max(
IndexOfEnd(simpleTitle, groups["airday"]?.Value ?? ""),
lastSeasonEpisodeStringIndex
);
result.AirDate = airDate;
}
}
if (lastSeasonEpisodeStringIndex == simpleTitle.Length || lastSeasonEpisodeStringIndex == -1)
{
result.ReleaseTokens = simpleTitle;
}
else
{
result.ReleaseTokens = simpleTitle.Substring(lastSeasonEpisodeStringIndex);
}
result.SeriesTitle = seriesName;
// TODO: seriesTitleInfo
return result;
}
private static bool PreValidation(string title) =>
_rejectedRegex.Select(exp => exp().Match(title)).All(match => !match.Success);
private static List<int> CompleteRange(List<int> arr)
{
var uniqArr = arr.Distinct().ToList();
var first = uniqArr[0];
var last = uniqArr[^1];
if (first > last)
{
return arr;
}
var count = last - first + 1;
return Enumerable.Range(first, count).ToList();
}
private static int IndexOfEnd(string str1, string str2)
{
var io = str1.IndexOf(str2, StringComparison.Ordinal);
return io == -1 ? -1 : io + str2.Length;
}
private record ParsedMatch
{
public string? SeriesName { get; set; }
public string? SeriesTitle { get; set; }
public List<int>? SeasonNumbers { get; set; }
public bool? IsMultiSeason { get; set; }
public List<int>? EpisodeNumbers { get; set; }
public bool? IsSpecial { get; set; }
public bool? IsSeasonExtra { get; set; }
public int? SeasonPart { get; set; }
public bool? IsPartialSeason { get; set; }
public bool FullSeason { get; set; }
public DateTime? AirDate { get; set; }
public string? ReleaseTokens { get; set; }
}
}

View File

@@ -0,0 +1,12 @@
namespace Producer.Features.ParseTorrentTitle;
public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterParseTorrentTitle(this IServiceCollection services)
{
services.AddSingleton<IParsingService, ParsingService>();
services.AddSingleton<ITorrentTitleParser, TorrentTitleParser>();
return services;
}
}

View File

@@ -0,0 +1,20 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class Source : SmartEnum<Source, string>
{
public static readonly Source BLURAY = new("BLURAY", "BLURAY");
public static readonly Source WEBDL = new("WEBDL", "WEBDL");
public static readonly Source WEBRIP = new("WEBRIP", "WEBRIP");
public static readonly Source DVD = new("DVD", "DVD");
public static readonly Source CAM = new("CAM", "CAM");
public static readonly Source SCREENER = new("SCREENER", "SCREENER");
public static readonly Source PPV = new("PPV", "PPV");
public static readonly Source TELESYNC = new("TELESYNC", "TELESYNC");
public static readonly Source TELECINE = new("TELECINE", "TELECINE");
public static readonly Source WORKPRINT = new("WORKPRINT", "WORKPRINT");
public static readonly Source TV = new("TV", "TV");
private Source(string name, string value) : base(name, value)
{
}
}

View File

@@ -0,0 +1,151 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class SourceParser
{
[GeneratedRegex(@"\b(?<bluray>M?Blu-?Ray|HDDVD|BD|UHDBD|BDISO|BDMux|BD25|BD50|BR.?DISK|Bluray(1080|720)p?|BD(1080|720)p?)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BlurayExp();
[GeneratedRegex(@"\b(?<webdl>WEB[-_. ]DL|HDRIP|WEBDL|WEB-DLMux|NF|APTV|NETFLIX|NetflixU?HD|DSNY|DSNP|HMAX|AMZN|AmazonHD|iTunesHD|MaxdomeHD|WebHD|WEB$|[. ]WEB[. ](?:[xh]26[45]|DD5[. ]1)|\d+0p[. ]WEB[. ]|\b\s\/\sWEB\s\/\s\b|AMZN[. ]WEB[. ])\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebdlExp();
[GeneratedRegex(@"\b(?<webrip>WebRip|Web-Rip|WEBCap|WEBMux)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebripExp();
[GeneratedRegex(@"\b(?<hdtv>HDTV)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HdtvExp();
[GeneratedRegex(@"\b(?<bdrip>BDRip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BdripExp();
[GeneratedRegex(@"\b(?<brrip>BRRip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex BrripExp();
[GeneratedRegex(@"\b(?<scr>SCR|SCREENER|DVDSCR|(DVD|WEB).?SCREENER)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex ScrExp();
[GeneratedRegex(@"\b(?<dvdr>DVD-R|DVDR)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DvdrExp();
[GeneratedRegex(@"\b(?<dvd>DVD9?|DVDRip|NTSC|PAL|xvidvd|DvDivX)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DvdExp();
[GeneratedRegex(@"\b(?<dsr>WS[-_. ]DSR|DSR)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DsrExp();
[GeneratedRegex(@"\b(?<regional>R[0-9]{1}|REGIONAL)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RegionalExp();
[GeneratedRegex(@"\b(?<ppv>PPV)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PpvExp();
[GeneratedRegex(@"\b(?<ts>TS|TELESYNC|HD-TS|HDTS|PDVD|TSRip|HDTSRip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TsExp();
[GeneratedRegex(@"\b(?<tc>TC|TELECINE|HD-TC|HDTC)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TcExp();
[GeneratedRegex(@"\b(?<cam>CAMRIP|CAM|HDCAM|HD-CAM)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CamExp();
[GeneratedRegex(@"\b(?<workprint>WORKPRINT|WP)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WorkprintExp();
[GeneratedRegex(@"\b(?<pdtv>PDTV)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex PdtvExp();
[GeneratedRegex(@"\b(?<sdtv>SDTV)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SdtvExp();
[GeneratedRegex(@"\b(?<tvrip>TVRip)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TvripExp();
public static void Parse(string title, out List<Source> result)
{
ParseSourceGroups(title, out var groups);
result = [];
if (groups["bluray"] || groups["bdrip"] || groups["brrip"])
{
result.Add(Source.BLURAY);
}
if (groups["webrip"])
{
result.Add(Source.WEBRIP);
}
if (!groups["webrip"] && groups["webdl"])
{
result.Add(Source.WEBDL);
}
if (groups["dvdr"] || (groups["dvd"] && !groups["scr"]))
{
result.Add(Source.DVD);
}
if (groups["ppv"])
{
result.Add(Source.PPV);
}
if (groups["workprint"])
{
result.Add(Source.WORKPRINT);
}
if (groups["pdtv"] || groups["sdtv"] || groups["dsr"] || groups["tvrip"] || groups["hdtv"])
{
result.Add(Source.TV);
}
if (groups["cam"])
{
result.Add(Source.CAM);
}
if (groups["ts"])
{
result.Add(Source.TELESYNC);
}
if (groups["tc"])
{
result.Add(Source.TELECINE);
}
if (groups["scr"])
{
result.Add(Source.SCREENER);
}
}
public static void ParseSourceGroups(string title, out Dictionary<string, bool> groups)
{
var normalizedName = title.Replace("_", " ").Replace("[", " ").Replace("]", " ").Trim();
groups = new()
{
{"bluray", BlurayExp().IsMatch(normalizedName)},
{"webdl", WebdlExp().IsMatch(normalizedName)},
{"webrip", WebripExp().IsMatch(normalizedName)},
{"hdtv", HdtvExp().IsMatch(normalizedName)},
{"bdrip", BdripExp().IsMatch(normalizedName)},
{"brrip", BrripExp().IsMatch(normalizedName)},
{"scr", ScrExp().IsMatch(normalizedName)},
{"dvdr", DvdrExp().IsMatch(normalizedName)},
{"dvd", DvdExp().IsMatch(normalizedName)},
{"dsr", DsrExp().IsMatch(normalizedName)},
{"regional", RegionalExp().IsMatch(normalizedName)},
{"ppv", PpvExp().IsMatch(normalizedName)},
{"ts", TsExp().IsMatch(normalizedName)},
{"tc", TcExp().IsMatch(normalizedName)},
{"cam", CamExp().IsMatch(normalizedName)},
{"workprint", WorkprintExp().IsMatch(normalizedName)},
{"pdtv", PdtvExp().IsMatch(normalizedName)},
{"sdtv", SdtvExp().IsMatch(normalizedName)},
{"tvrip", TvripExp().IsMatch(normalizedName)},
};
}
}

View File

@@ -0,0 +1,197 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class TitleParser
{
[GeneratedRegex(@"^(?<title>(?![([]).+?)?(?:(?:[-_\W](?<![)[!]))*\(?\b(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Anniversary|The.Uncut|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\b\)?.{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex1();
[GeneratedRegex(@"^(?<title>(?![([]).+?)?(?:(?:[-_\W](?<![)[!]))*\((?<year>(1(8|9)|20)\d{2}(?!p|i|(1(8|9)|20)\d{2}|\]|\W(1(8|9)|20)\d{2})))+", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex2();
[GeneratedRegex(@"^(?<title>(?![([]).+?)?(?:(?:[-_\W](?<![)[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|(1(8|9)|20)\d{2}|\]|\W(1(8|9)|20)\d{2})))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex3();
[GeneratedRegex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![()[!]))*(?<year>(\[\w *\])))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex4();
[GeneratedRegex(@"^(?<title>(?![([]).+?)?(?:(?:[-_\W](?<![)!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex5();
[GeneratedRegex(@"^(?<title>.+?)?(?:(?:[-_\W](?<![)[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex MovieTitleYearRegex6();
[GeneratedRegex(@"\s*(?:480[ip]|576[ip]|720[ip]|1080[ip]|2160[ip]|HVEC|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*:|]|848x480|1280x720|1920x1080)((8|10)b(it))?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SimpleTitleRegex();
[GeneratedRegex(@"^\[\s*[a-z]+(\.[a-z]+)+\s*\][- ]*|^www\.[a-z]+\.(?:com|net)[ -]*", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebsitePrefixRegex();
[GeneratedRegex(@"^\[(?:REQ)\]", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CleanTorrentPrefixRegex();
[GeneratedRegex(@"\[(?:ettv|rartv|rarbg|cttv)\]$", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CleanTorrentSuffixRegex();
[GeneratedRegex(@"\b(Bluray|(dvdr?|BD)rip|HDTV|HDRip|TS|R5|CAM|SCR|(WEB|DVD)?.?SCREENER|DiVX|xvid|web-?dl)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex CommonSourcesRegex();
[GeneratedRegex(@"\b(?<webdl>WEB[-_. ]DL|HDRIP|WEBDL|WEB-DLMux|NF|APTV|NETFLIX|NetflixU?HD|DSNY|DSNP|HMAX|AMZN|AmazonHD|iTunesHD|MaxdomeHD|WebHD|WEB$|[. ]WEB[. ](?:[xh]26[45]|DD5[. ]1)|\d+0p[. ]WEB[. ]|\b\s\/\sWEB\s\/\s\b|AMZN[. ]WEB[. ])\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WebdlExp();
[GeneratedRegex(@"\[.+?\]", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex RequestInfoRegex();
[GeneratedRegex(
@"\b((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Anniversary|The.Uncut|DC|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Special|Despecialized|unrated|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1)))){1,3}",
RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex EditionExp();
[GeneratedRegex(@"\b(TRUE.?FRENCH|videomann|SUBFRENCH|PLDUB|MULTI)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex LanguageExp();
[GeneratedRegex(@"\b(PROPER|REAL|READ.NFO)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SceneGarbageExp();
[GeneratedRegex(@"-([a-z0-9]+)$", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex GrouplessTitleRegex();
public static void Parse(string title, out string parsedTitle, out string? year)
{
var simpleTitle = SimplifyTitle(title);
// Removing the group from the end could be trouble if a title is "title-year"
var grouplessTitle = simpleTitle.Replace(GrouplessTitleRegex().ToString(), "");
var movieTitleYearRegex = new List<Regex>
{
MovieTitleYearRegex1(), MovieTitleYearRegex2(), MovieTitleYearRegex3(), MovieTitleYearRegex4(), MovieTitleYearRegex5(),
MovieTitleYearRegex6()
};
foreach (var exp in movieTitleYearRegex)
{
var match = exp.Match(grouplessTitle);
if (match.Success)
{
parsedTitle = ReleaseTitleCleaner(match.Groups["title"].Value);
year = match.Groups["year"].Value;
return;
}
}
// year not found, attack using codec or resolution
// attempt to parse using the first found artifact like codec
ResolutionParser.Parse(title, out var resolution, out _);
VideoCodecsParser.Parse(title, out var videoCodec, out _);
AudioChannelsParser.Parse(title, out var channels, out _);
AudioCodecsParser.Parse(title, out var audioCodec, out _);
var resolutionPosition = title.IndexOf(resolution?.Value ?? string.Empty, StringComparison.Ordinal);
var videoCodecPosition = title.IndexOf(videoCodec?.Value ?? string.Empty, StringComparison.Ordinal);
var channelsPosition = title.IndexOf(channels?.Value ?? string.Empty, StringComparison.Ordinal);
var audioCodecPosition = title.IndexOf(audioCodec?.Value ?? string.Empty, StringComparison.Ordinal);
var positions = new List<int> {resolutionPosition, audioCodecPosition, channelsPosition, videoCodecPosition}.Where(x => x > 0).ToList();
if (positions.Count != 0)
{
var firstPosition = positions.Min();
parsedTitle = ReleaseTitleCleaner(title[..firstPosition]);
year = null;
return;
}
parsedTitle = title.Trim();
year = null;
}
public static string SimplifyTitle(string title)
{
var simpleTitle = title.Replace(SimpleTitleRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(WebsitePrefixRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(CleanTorrentPrefixRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(CleanTorrentSuffixRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(CommonSourcesRegex().ToString(), "");
simpleTitle = simpleTitle.Replace(WebdlExp().ToString(), "");
// allow filtering of up to two codecs.
// maybe parseVideoCodec should be an array
VideoCodecsParser.Parse(simpleTitle, out _, out var source1);
if (!string.IsNullOrEmpty(source1))
{
simpleTitle = simpleTitle.Replace(source1, "");
}
VideoCodecsParser.Parse(simpleTitle, out _, out var source2);
if (!string.IsNullOrEmpty(source2))
{
simpleTitle = simpleTitle.Replace(source2, "");
}
return simpleTitle.Trim();
}
public static string ReleaseTitleCleaner(string title)
{
if (string.IsNullOrEmpty(title) || title.Length == 0 || title == "(")
{
return null;
}
var trimmedTitle = title.Replace("_", " ");
trimmedTitle = trimmedTitle.Replace(RequestInfoRegex().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(CommonSourcesRegex().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(WebdlExp().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(EditionExp().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(LanguageExp().ToString(), "").Trim();
trimmedTitle = trimmedTitle.Replace(SceneGarbageExp().ToString(), "").Trim();
trimmedTitle = Language.List.Aggregate(trimmedTitle, (current, lang) => current.Replace($@"\b{lang.Value.ToUpper()}", "").Trim());
// Look for gap formed by removing items
trimmedTitle = trimmedTitle.Split(" ")[0];
trimmedTitle = trimmedTitle.Split("..")[0];
var parts = trimmedTitle.Split('.');
var result = "";
var n = 0;
var previousAcronym = false;
var nextPart = "";
foreach (var part in parts)
{
if (parts.Length >= n + 2)
{
nextPart = parts[n + 1];
}
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out _))
{
result += part + ".";
previousAcronym = true;
}
else if (part.ToLower() == "a" && (previousAcronym || nextPart.Length == 1))
{
result += part + ".";
previousAcronym = true;
}
else
{
if (previousAcronym)
{
result += " ";
previousAcronym = false;
}
result += part + " ";
}
n++;
}
return result.Trim();
}
}

View File

@@ -0,0 +1,142 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class TorrentTitleParser : ITorrentTitleParser
{
[GeneratedRegex(@"(season|episode)s?.?\d?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonEpisode();
[GeneratedRegex(@"[se]\d\d", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonShort();
[GeneratedRegex(@"\b(tv|complete)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex TvOrComplete();
[GeneratedRegex(@"\b(saison|stage).?\d", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonStage();
[GeneratedRegex(@"[a-z]\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex Season();
[GeneratedRegex(@"\d{2,4}\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonTwo();
public ParsedFilename Parse(string name)
{
VideoCodecsParser.Parse(name, out var videoCodec, out _);
AudioCodecsParser.Parse(name, out var audioCodec, out _);
AudioChannelsParser.Parse(name, out var audioChannels, out _);
LanguageParser.Parse(name, out var languages);
QualityParser.Parse(name, out var quality);
var group = GroupParser.Parse(name);
var edition = EditionParser.Parse(name);
var multi = LanguageParser.IsMulti(name);
var complete = Complete.IsComplete(name);
var baseParsed = new BaseParsed
{
Resolution = quality.Resolution,
Sources = quality.Sources,
VideoCodec = videoCodec,
AudioCodec = audioCodec,
AudioChannels = audioChannels,
Revision = quality.Revision,
Group = group,
Edition = edition,
Languages = languages,
Multi = multi,
Complete = complete,
};
var isTv = GetTypeByName(name) == TorrentType.Tv;
return !isTv ? ParseMovie(name, baseParsed) : ParseSeason(name, baseParsed);
}
private static ParsedFilename ParseSeason(string name, BaseParsed baseParsed)
{
var season = SeasonParser.Parse(name);
if (season == null)
{
return new();
}
return new()
{
Show = new()
{
EpisodeNumbers = season.EpisodeNumbers,
FullSeason = season.FullSeason,
IsPartialSeason = season.IsPartialSeason,
IsSpecial = season.IsSpecial,
SeasonPart = season.SeasonPart,
IsSeasonExtra = season.IsSeasonExtra,
SeriesTitle = season.SeriesTitle,
IsMultiSeason = season.IsMultiSeason,
AirDate = season.AirDate,
Seasons = season.Seasons,
ReleaseTitle = season.ReleaseTitle,
Edition = baseParsed.Edition,
Resolution = baseParsed.Resolution,
Sources = baseParsed.Sources,
VideoCodec = baseParsed.VideoCodec,
Complete = baseParsed.Complete,
AudioCodec = baseParsed.AudioCodec,
Languages = baseParsed.Languages,
AudioChannels = baseParsed.AudioChannels,
Group = baseParsed.Group,
Multi = baseParsed.Multi,
Revision = baseParsed.Revision,
},
Type = TorrentType.Tv,
};
}
private static ParsedFilename ParseMovie(string name, BaseParsed baseParsed)
{
TitleParser.Parse(name, out var title, out var year);
baseParsed.Title = title;
baseParsed.Year = year;
return new()
{
Movie = new()
{
ReleaseTitle = name,
Title = baseParsed.Title,
Year = baseParsed.Year,
Edition = baseParsed.Edition,
Resolution = baseParsed.Resolution,
Sources = baseParsed.Sources,
VideoCodec = baseParsed.VideoCodec,
Complete = baseParsed.Complete,
AudioCodec = baseParsed.AudioCodec,
Languages = baseParsed.Languages,
AudioChannels = baseParsed.AudioChannels,
Group = baseParsed.Group,
Multi = baseParsed.Multi,
Revision = baseParsed.Revision,
},
Type = TorrentType.Movie,
};
}
private static TorrentType GetTypeByName(string name)
{
var tvRegexes = new[]
{
SeasonEpisode,
SeasonShort,
TvOrComplete,
SeasonStage,
Season,
SeasonTwo
};
foreach (var regex in tvRegexes)
{
if (regex().IsMatch(name))
{
return TorrentType.Tv;
}
}
return TorrentType.Movie;
}
}

View File

@@ -0,0 +1,7 @@
namespace Producer.Features.ParseTorrentTitle;
public enum TorrentType
{
Movie,
Tv,
}

View File

@@ -0,0 +1,16 @@
namespace Producer.Features.ParseTorrentTitle;
public sealed class VideoCodec : SmartEnum<VideoCodec, string>
{
public static readonly VideoCodec X265 = new("X265", "x265");
public static readonly VideoCodec X264 = new("X264", "x264");
public static readonly VideoCodec H264 = new("H264", "h264");
public static readonly VideoCodec H265 = new("H265", "h265");
public static readonly VideoCodec WMV = new("WMV", "WMV");
public static readonly VideoCodec XVID = new("XVID", "xvid");
public static readonly VideoCodec DVDR = new("DVDR", "dvdr");
private VideoCodec(string name, string value) : base(name, value)
{
}
}

View File

@@ -0,0 +1,89 @@
namespace Producer.Features.ParseTorrentTitle;
public partial class VideoCodecsParser
{
[GeneratedRegex(@"(?<x265>x265)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex X265Exp();
[GeneratedRegex(@"(?<h265>h265)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex H265Exp();
[GeneratedRegex(@"(?<x264>x264)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex X264Exp();
[GeneratedRegex(@"(?<h264>h264)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex H264Exp();
[GeneratedRegex(@"(?<wmv>WMV)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex WMVExp();
[GeneratedRegex(@"(?<xvidhd>XvidHD)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex XvidhdExp();
[GeneratedRegex(@"(?<xvid>X-?vid)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex XvidExp();
[GeneratedRegex(@"(?<divx>divx)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DivxExp();
[GeneratedRegex(@"(?<hevc>HEVC)", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex HevcExp();
[GeneratedRegex(@"(?<dvdr>DVDR)\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex DvdrExp();
private static readonly Regex CodecExp = new(
string.Join(
"|", X265Exp(), H265Exp(), X264Exp(), H264Exp(), WMVExp(), XvidhdExp(), XvidExp(), DivxExp(), HevcExp(), DvdrExp()), RegexOptions.IgnoreCase);
public static void Parse(string title, out VideoCodec? codec, out string? source)
{
codec = null;
source = null;
var result = CodecExp.Match(title);
if (!result.Success)
{
return;
}
var groups = result.Groups;
if (groups["h264"].Success)
{
codec = VideoCodec.H264;
source = groups["h264"].Value;
}
else if (groups["h265"].Success)
{
codec = VideoCodec.H265;
source = groups["h265"].Value;
}
else if (groups["x265"].Success || groups["hevc"].Success)
{
codec = VideoCodec.X265;
source = groups["x265"].Success ? groups["x265"].Value : groups["hevc"].Value;
}
else if (groups["x264"].Success)
{
codec = VideoCodec.X264;
source = groups["x264"].Value;
}
else if (groups["xvidhd"].Success || groups["xvid"].Success || groups["divx"].Success)
{
codec = VideoCodec.XVID;
source = groups["xvidhd"].Success ? groups["xvidhd"].Value : (groups["xvid"].Success ? groups["xvid"].Value : groups["divx"].Value);
}
else if (groups["wmv"].Success)
{
codec = VideoCodec.WMV;
source = groups["wmv"].Value;
}
else if (groups["dvdr"].Success)
{
codec = VideoCodec.DVDR;
source = groups["dvdr"].Value;
}
}
}

View File

@@ -0,0 +1,16 @@
namespace Producer.Features.Wordlists;
public interface IWordCollections
{
HashSet<string> AdultWords { get; }
HashSet<string> AdultCompoundPhrases { get; }
HashSet<string> CommonWords { get; }
HashSet<string> Jav { get; }
HashSet<string> AdultStars { get; }
Task LoadAsync();
}

View File

@@ -0,0 +1,21 @@
namespace Producer.Features.Wordlists;
public class PopulationService(IWordCollections wordCollections, ILogger<PopulationService> logger) : IHostedService
{
public async Task StartAsync(CancellationToken cancellationToken)
{
logger.LogInformation("Loading word collections...");
await wordCollections.LoadAsync();
logger.LogInformation("Common Words Count: {Count}", wordCollections.CommonWords.Count);
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
logger.LogInformation("Jav Count: {Count}", wordCollections.Jav.Count);
logger.LogInformation("Adult Stars Count: {Count}", wordCollections.AdultStars.Count);
logger.LogInformation("Word collections loaded.");
}
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}

View File

@@ -0,0 +1,12 @@
namespace Producer.Features.Wordlists;
public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterWordCollections(this IServiceCollection services)
{
services.AddSingleton<IWordCollections, WordCollections>();
services.AddHostedService<PopulationService>();
return services;
}
}

View File

@@ -0,0 +1,65 @@
namespace Producer.Features.Wordlists;
public class WordCollections : IWordCollections
{
private const string AdultWordsFile = "adult-words.txt";
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
private const string AdultStarsFile = "adult-stars.txt";
private const string JavFile = "jav.txt";
private const string CommonWordsFile = "common-words.txt";
public HashSet<string> AdultWords { get; private set; } = [];
public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
public HashSet<string> AdultStars { get; private set; } = [];
public HashSet<string> Jav { get; private set; } = [];
public HashSet<string> CommonWords { get; private set; } = [];
public async Task LoadAsync()
{
var loaderTasks = new List<Task>
{
LoadAdultWords(),
LoadAdultCompounds(),
LoadCommonWords(),
LoadJav(),
LoadAdultStars(),
};
await Task.WhenAll(loaderTasks);
}
private async Task LoadCommonWords()
{
var commonWords = await File.ReadAllLinesAsync(GetPath(CommonWordsFile));
CommonWords = [..commonWords];
}
private async Task LoadAdultCompounds()
{
var adultCompoundWords = await File.ReadAllLinesAsync(GetPath(AdultCompoundPhrasesFile));
AdultCompoundPhrases = [..adultCompoundWords];
}
private async Task LoadAdultWords()
{
var adultWords = await File.ReadAllLinesAsync(GetPath(AdultWordsFile));
AdultWords = [..adultWords];
}
private async Task LoadJav()
{
var jav = await File.ReadAllLinesAsync(GetPath(JavFile));
Jav = [..jav];
}
private async Task LoadAdultStars()
{
var adultStars = await File.ReadAllLinesAsync(GetPath(AdultStarsFile));
AdultStars = [..adultStars];
}
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
}

View File

@@ -1,12 +1,17 @@
// Global using directives
global using System.Globalization;
global using System.Reflection;
global using System.Text;
global using System.Text.Json;
global using System.Text.RegularExpressions;
global using System.Threading.Channels;
global using System.Xml.Linq;
global using Ardalis.SmartEnum;
global using Dapper;
global using FuzzySharp;
global using FuzzySharp.Extractor;
global using FuzzySharp.PreProcess;
global using LZStringCSharp;
global using MassTransit;
global using Microsoft.AspNetCore.Builder;
@@ -25,4 +30,6 @@ global using Producer.Features.Crawlers.Torrentio;
global using Producer.Features.CrawlerSupport;
global using Producer.Features.DataProcessing;
global using Producer.Features.JobSupport;
global using Producer.Features.ParseTorrentTitle;
global using Producer.Features.Wordlists;
global using Serilog;

View File

@@ -9,7 +9,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Ardalis.SmartEnum" Version="8.0.0" />
<PackageReference Include="Dapper" Version="2.1.28" />
<PackageReference Include="FuzzySharp" Version="2.0.2" />
<PackageReference Include="LZStringCSharp" Version="1.4.0" />
<PackageReference Include="MassTransit" Version="8.1.3" />
<PackageReference Include="MassTransit.RabbitMQ" Version="8.1.3" />
@@ -32,4 +34,11 @@
</None>
</ItemGroup>
<ItemGroup>
<Content Remove="Data\**" />
<None Include="Data\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@@ -10,6 +10,8 @@ builder.Services
.RegisterMassTransit()
.AddDataStorage()
.AddCrawlers()
.RegisterWordCollections()
.RegisterParseTorrentTitle()
.AddQuartz(builder.Configuration);
var host = builder.Build();