mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
[skip ci] More work on parsing - seasons to fix still and use banned words
This commit is contained in:
File diff suppressed because it is too large
Load Diff
3000
src/producer/Data/adult-compound-words.txt
Normal file
3000
src/producer/Data/adult-compound-words.txt
Normal file
File diff suppressed because it is too large
Load Diff
1670
src/producer/Data/adult-words.txt
Normal file
1670
src/producer/Data/adult-words.txt
Normal file
File diff suppressed because it is too large
Load Diff
172964
src/producer/Data/common-words.txt
Normal file
172964
src/producer/Data/common-words.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -12,8 +12,7 @@ public static class ConfigurationExtensions
|
|||||||
configuration.AddJsonFile(LoggingConfig, false, true);
|
configuration.AddJsonFile(LoggingConfig, false, true);
|
||||||
configuration.AddJsonFile(ScrapeConfiguration.Filename, false, true);
|
configuration.AddJsonFile(ScrapeConfiguration.Filename, false, true);
|
||||||
configuration.AddJsonFile(TorrentioConfiguration.Filename, false, true);
|
configuration.AddJsonFile(TorrentioConfiguration.Filename, false, true);
|
||||||
configuration.AddJsonFile(AdultContentConfiguration.Filename, false, true);
|
|
||||||
|
|
||||||
configuration.AddEnvironmentVariables();
|
configuration.AddEnvironmentVariables();
|
||||||
|
|
||||||
configuration.AddUserSecrets<Program>();
|
configuration.AddUserSecrets<Program>();
|
||||||
|
|||||||
@@ -4,34 +4,19 @@ public partial class DebridMediaManagerCrawler(
|
|||||||
IHttpClientFactory httpClientFactory,
|
IHttpClientFactory httpClientFactory,
|
||||||
ILogger<DebridMediaManagerCrawler> logger,
|
ILogger<DebridMediaManagerCrawler> logger,
|
||||||
IDataStorage storage,
|
IDataStorage storage,
|
||||||
GithubConfiguration githubConfiguration,
|
GithubConfiguration githubConfiguration) : BaseCrawler(logger, storage)
|
||||||
AdultContentConfiguration adultContentConfiguration,
|
|
||||||
IServiceProvider serviceProvider) : BaseCrawler(logger, storage)
|
|
||||||
{
|
{
|
||||||
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
|
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
|
||||||
private static partial Regex HashCollectionMatcher();
|
private static partial Regex HashCollectionMatcher();
|
||||||
|
|
||||||
[GeneratedRegex(@"[sS]([0-9]{1,2})|seasons?[\s-]?([0-9]{1,2})", RegexOptions.IgnoreCase, "en-GB")]
|
|
||||||
private static partial Regex SeasonMatcher();
|
|
||||||
|
|
||||||
[GeneratedRegex(@"[0-9]{4}", RegexOptions.IgnoreCase, "en-GB")]
|
|
||||||
private static partial Regex YearMatcher();
|
|
||||||
|
|
||||||
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
|
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
|
||||||
|
|
||||||
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
|
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
|
||||||
protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/git/trees/main?recursive=1";
|
protected override string Url => "https://api.github.com/repos/debridmediamanager/hashlists/git/trees/main?recursive=1";
|
||||||
protected override string Source => "DMM";
|
protected override string Source => "DMM";
|
||||||
|
|
||||||
private IFuzzySearcher<string>? _adultContentSearcher;
|
|
||||||
|
|
||||||
public override async Task Execute()
|
public override async Task Execute()
|
||||||
{
|
{
|
||||||
if (!adultContentConfiguration.Allow)
|
|
||||||
{
|
|
||||||
_adultContentSearcher = serviceProvider.GetRequiredService<IFuzzySearcher<string>>();
|
|
||||||
}
|
|
||||||
|
|
||||||
var client = httpClientFactory.CreateClient("Scraper");
|
var client = httpClientFactory.CreateClient("Scraper");
|
||||||
client.DefaultRequestHeaders.Authorization = new("Bearer", githubConfiguration.PAT);
|
client.DefaultRequestHeaders.Authorization = new("Bearer", githubConfiguration.PAT);
|
||||||
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
|
client.DefaultRequestHeaders.UserAgent.ParseAdd("curl");
|
||||||
@@ -107,14 +92,14 @@ public partial class DebridMediaManagerCrawler(
|
|||||||
|
|
||||||
private Torrent? ParseTorrent(JsonElement item)
|
private Torrent? ParseTorrent(JsonElement item)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (!item.TryGetProperty("filename", out var filenameElement) ||
|
if (!item.TryGetProperty("filename", out var filenameElement) ||
|
||||||
!item.TryGetProperty("bytes", out var bytesElement) ||
|
!item.TryGetProperty("bytes", out var bytesElement) ||
|
||||||
!item.TryGetProperty("hash", out var hashElement))
|
!item.TryGetProperty("hash", out var hashElement))
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
var torrent = new Torrent
|
var torrent = new Torrent
|
||||||
{
|
{
|
||||||
Source = Source,
|
Source = Source,
|
||||||
@@ -130,45 +115,37 @@ public partial class DebridMediaManagerCrawler(
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
torrent.Category = (SeasonMatcher().IsMatch(torrent.Name), YearMatcher().IsMatch(torrent.Name)) switch
|
var parsedTorrent = TorrentTitleParser.Parse(torrent.Name);
|
||||||
|
|
||||||
|
if (parsedTorrent.IsInvalid)
|
||||||
{
|
{
|
||||||
(true, _) => "tv",
|
return null;
|
||||||
(_, true) => "movies",
|
}
|
||||||
_ => "unknown",
|
|
||||||
};
|
|
||||||
|
|
||||||
return HandleAdultContent(torrent);
|
if (parsedTorrent.IsMovie)
|
||||||
}
|
|
||||||
|
|
||||||
private Torrent HandleAdultContent(Torrent torrent)
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
{
|
||||||
if (!adultContentConfiguration.Allow)
|
torrent.Category = "movies";
|
||||||
{
|
torrent.Name = parsedTorrent.Movie.Title;
|
||||||
var adultMatch = _adultContentSearcher!.Search(torrent.Name.Replace(".", " "));
|
|
||||||
|
|
||||||
if (adultMatch.Count > 0)
|
|
||||||
{
|
|
||||||
logger.LogWarning("Adult content found in {Name}. Marking category as 'xxx'", torrent.Name);
|
|
||||||
logger.LogWarning("Matches: {TopMatch} {TopScore}", adultMatch.First().Value, adultMatch.First().Score);
|
|
||||||
torrent.Category = "xxx";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return torrent;
|
return torrent;
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
|
||||||
|
if (parsedTorrent.IsShow)
|
||||||
{
|
{
|
||||||
logger.LogWarning("Failed to handle adult content for {Name}: [{Error}]. Torrent will not be ingested at this time.", torrent.Name, e.Message);
|
torrent.Category = "tv";
|
||||||
return null;
|
torrent.Name = parsedTorrent.Show.Title;
|
||||||
|
|
||||||
|
return torrent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async Task InsertTorrentsForPage(JsonDocument json)
|
private async Task InsertTorrentsForPage(JsonDocument json)
|
||||||
{
|
{
|
||||||
var torrents = json.RootElement.EnumerateArray()
|
var torrents = json.RootElement.EnumerateArray()
|
||||||
.Select(ParseTorrent)
|
.Select(ParseTorrent)
|
||||||
|
.Where(t => t is not null)
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
if (torrents.Count == 0)
|
if (torrents.Count == 0)
|
||||||
|
|||||||
@@ -1,16 +1,13 @@
|
|||||||
namespace Producer.Features.Crawlers.Dmm;
|
namespace Producer.Features.Crawlers.Dmm;
|
||||||
|
|
||||||
public partial class ParsingService(AdultContentConfiguration adultContentConfiguration)
|
public partial class ParsingService(IWordCollections wordCollections)
|
||||||
{
|
{
|
||||||
private static readonly char[] WhitespaceSeparator = [' '];
|
private static readonly char[] WhitespaceSeparator = [' '];
|
||||||
|
|
||||||
//todo: Populate dictionary
|
public string Naked(string title) =>
|
||||||
private static readonly HashSet<string> Dictionary = new HashSet<string>();
|
|
||||||
|
|
||||||
public static string Naked(string title) =>
|
|
||||||
NakedMatcher().Replace(title.ToLower(), "");
|
NakedMatcher().Replace(title.ToLower(), "");
|
||||||
|
|
||||||
public static List<string> GrabYears(string str)
|
public List<string> GrabYears(string str)
|
||||||
{
|
{
|
||||||
var matches = GrabYearsMatcher().Matches(str);
|
var matches = GrabYearsMatcher().Matches(str);
|
||||||
return matches
|
return matches
|
||||||
@@ -19,7 +16,7 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
.ToList();
|
.ToList();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<int> GrabPossibleSeasonNums(string str)
|
public List<int> GrabPossibleSeasonNums(string str)
|
||||||
{
|
{
|
||||||
var matches = GrabPossibleSeasonNumsMatcher().Matches(str);
|
var matches = GrabPossibleSeasonNumsMatcher().Matches(str);
|
||||||
return matches
|
return matches
|
||||||
@@ -28,7 +25,7 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
.ToList();
|
.ToList();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool HasYear(string test, List<string> years, bool strictCheck = false) =>
|
public bool HasYear(string test, List<string> years, bool strictCheck = false) =>
|
||||||
strictCheck
|
strictCheck
|
||||||
? years.Any(test.Contains)
|
? years.Any(test.Contains)
|
||||||
: years.Any(year =>
|
: years.Any(year =>
|
||||||
@@ -39,7 +36,7 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
test.Contains($"{intYear - 1}");
|
test.Contains($"{intYear - 1}");
|
||||||
});
|
});
|
||||||
|
|
||||||
public static string RemoveDiacritics(string str)
|
public string RemoveDiacritics(string str)
|
||||||
{
|
{
|
||||||
var normalizedString = str.Normalize(NormalizationForm.FormD);
|
var normalizedString = str.Normalize(NormalizationForm.FormD);
|
||||||
var stringBuilder = new StringBuilder();
|
var stringBuilder = new StringBuilder();
|
||||||
@@ -56,9 +53,9 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
|
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static string RemoveRepeats(string str) => RemoveRepeatsMatcher().Replace(str, "$1");
|
public string RemoveRepeats(string str) => RemoveRepeatsMatcher().Replace(str, "$1");
|
||||||
|
|
||||||
public static int RomanToDecimal(string roman)
|
public int RomanToDecimal(string roman)
|
||||||
{
|
{
|
||||||
var romanNumerals = new Dictionary<char, int>
|
var romanNumerals = new Dictionary<char, int>
|
||||||
{
|
{
|
||||||
@@ -84,9 +81,9 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static string ReplaceRomanWithDecimal(string input) => ReplaceRomanWithDecimalMatcher().Replace(input, match => RomanToDecimal(match.Value).ToString());
|
public string ReplaceRomanWithDecimal(string input) => ReplaceRomanWithDecimalMatcher().Replace(input, match => RomanToDecimal(match.Value).ToString());
|
||||||
|
|
||||||
public static bool StrictEqual(string title1, string title2)
|
public bool StrictEqual(string title1, string title2)
|
||||||
{
|
{
|
||||||
title1 = WhitespaceMatcher().Replace(title1, "");
|
title1 = WhitespaceMatcher().Replace(title1, "");
|
||||||
title2 = WhitespaceMatcher().Replace(title2, "");
|
title2 = WhitespaceMatcher().Replace(title2, "");
|
||||||
@@ -96,8 +93,8 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
(RemoveRepeats(title1).Length > 0 && RemoveRepeats(title1) == RemoveRepeats(title2)) ||
|
(RemoveRepeats(title1).Length > 0 && RemoveRepeats(title1) == RemoveRepeats(title2)) ||
|
||||||
(RemoveDiacritics(title1).Length > 0 && RemoveDiacritics(title1) == RemoveDiacritics(title2));
|
(RemoveDiacritics(title1).Length > 0 && RemoveDiacritics(title1) == RemoveDiacritics(title2));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false)
|
public int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false)
|
||||||
{
|
{
|
||||||
var replaceCount = 0;
|
var replaceCount = 0;
|
||||||
var prevReplaceCount = 0;
|
var prevReplaceCount = 0;
|
||||||
@@ -134,7 +131,7 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
{
|
{
|
||||||
var prefix = first ? @"\b" : "";
|
var prefix = first ? @"\b" : "";
|
||||||
var suffix = last ? @"\b" : "";
|
var suffix = last ? @"\b" : "";
|
||||||
testStr = Regex.Replace(testStr.Substring(prevOffset + prevLength), $"{prefix}{newTerm}{suffix}", replacer);
|
testStr = Regex.Replace(testStr[(prevOffset + prevLength)..], $"{prefix}{newTerm}{suffix}", replacer);
|
||||||
};
|
};
|
||||||
|
|
||||||
var actual = wordsInTitle.Where((term, idx) =>
|
var actual = wordsInTitle.Where((term, idx) =>
|
||||||
@@ -198,11 +195,11 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
}
|
}
|
||||||
return actual.Count;
|
return actual.Count;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool FlexEq(string test, string target, List<string> years)
|
public bool FlexEq(string test, string target, List<string> years)
|
||||||
{
|
{
|
||||||
var movieTitle = TorrentTitleParser.Parse(test).Movie.Title.ToLower();
|
var movieTitle = TorrentTitleParser.Parse(test).Movie.Title.ToLower();
|
||||||
var tvTitle = TorrentTitleParser.Parse(test, true).Show.Title.ToLower();
|
var tvTitle = TorrentTitleParser.Parse(test).Show.Title.ToLower();
|
||||||
|
|
||||||
var target2 = WhitespaceMatcher().Replace(target, "");
|
var target2 = WhitespaceMatcher().Replace(target, "");
|
||||||
var test2 = WhitespaceMatcher().Replace(test, "");
|
var test2 = WhitespaceMatcher().Replace(test, "");
|
||||||
@@ -230,7 +227,7 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
return StrictEqual(target, movieTitle) || StrictEqual(target, tvTitle);
|
return StrictEqual(target, movieTitle) || StrictEqual(target, tvTitle);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool MatchesTitle(string target, List<string> years, string test)
|
public bool MatchesTitle(string target, List<string> years, string test)
|
||||||
{
|
{
|
||||||
target = target.ToLower();
|
target = target.ToLower();
|
||||||
test = test.ToLower();
|
test = test.ToLower();
|
||||||
@@ -250,7 +247,7 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
var keyTerms = splits.Where(s => (s.Length > 1 && !Dictionary.Contains(s)) || s.Length > 5).ToList();
|
var keyTerms = splits.Where(s => (s.Length > 1 && !wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList();
|
||||||
keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2));
|
keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2));
|
||||||
var keySet = new HashSet<string>(keyTerms);
|
var keySet = new HashSet<string>(keyTerms);
|
||||||
var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList();
|
var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList();
|
||||||
@@ -269,8 +266,8 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
|
|
||||||
return Math.Floor(score / 0.85) >= totalScore;
|
return Math.Floor(score / 0.85) >= totalScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle) =>
|
public bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle) =>
|
||||||
mustHaveTerms.All(term =>
|
mustHaveTerms.All(term =>
|
||||||
{
|
{
|
||||||
var newTitle = testTitle.Replace(term, "");
|
var newTitle = testTitle.Replace(term, "");
|
||||||
@@ -301,24 +298,24 @@ public partial class ParsingService(AdultContentConfiguration adultContentConfig
|
|||||||
{
|
{
|
||||||
var words = WordMatcher().Split(testTitle.ToLower()).Where(word => word.Length > 3).ToList();
|
var words = WordMatcher().Split(testTitle.ToLower()).Where(word => word.Length > 3).ToList();
|
||||||
|
|
||||||
var hasBannedWords = words.Any(word => !targetTitle.Contains(word) && adultContentConfiguration.Keywords.Contains(word));
|
var hasBannedWords = words.Any(word => !targetTitle.Contains(word) && wordCollections.AdultWords.Contains(word));
|
||||||
|
|
||||||
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower()));
|
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower()));
|
||||||
|
|
||||||
var hasBannedCompoundWords = adultContentConfiguration.CompoundKeywords.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
|
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
|
||||||
|
|
||||||
return !hasBannedWords && !hasBannedCompoundWords;
|
return !hasBannedWords && !hasBannedCompoundWords;
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
|
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
|
||||||
|
|
||||||
public static int CountUncommonWords(string title)
|
public int CountUncommonWords(string title)
|
||||||
{
|
{
|
||||||
var processedTitle = WhitespaceMatcher().Split(title)
|
var processedTitle = WhitespaceMatcher().Split(title)
|
||||||
.Select(word => WordProcessingMatcher().Replace(word.ToLower(), ""))
|
.Select(word => WordProcessingMatcher().Replace(word.ToLower(), ""))
|
||||||
.Where(word => word.Length > 3)
|
.Where(word => word.Length > 3)
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
return processedTitle.Count(word => !Dictionary.Contains(word));
|
return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,12 +22,14 @@ public static partial class GroupParser
|
|||||||
var nowebsiteTitle = WebsitePrefixExp().Replace(title, "");
|
var nowebsiteTitle = WebsitePrefixExp().Replace(title, "");
|
||||||
TitleParser.Parse(nowebsiteTitle, out var releaseTitle, out _);
|
TitleParser.Parse(nowebsiteTitle, out var releaseTitle, out _);
|
||||||
releaseTitle = releaseTitle.Replace(" ", ".");
|
releaseTitle = releaseTitle.Replace(" ", ".");
|
||||||
|
|
||||||
var trimmed = nowebsiteTitle
|
var trimmed = nowebsiteTitle.Replace(" ", ".");
|
||||||
.Replace(" ", ".")
|
if (releaseTitle != nowebsiteTitle)
|
||||||
.Replace(releaseTitle == nowebsiteTitle ? "" : releaseTitle, "")
|
{
|
||||||
.Replace(".-.", ".");
|
trimmed = trimmed.Replace(releaseTitle, "");
|
||||||
|
}
|
||||||
|
trimmed = trimmed.Replace(".-.", ".");
|
||||||
|
|
||||||
trimmed = TitleParser.SimplifyTitle(FileExtensionParser.RemoveFileExtension(trimmed.Trim()));
|
trimmed = TitleParser.SimplifyTitle(FileExtensionParser.RemoveFileExtension(trimmed.Trim()));
|
||||||
|
|
||||||
if (trimmed.Length == 0)
|
if (trimmed.Length == 0)
|
||||||
@@ -66,4 +68,4 @@ public static partial class GroupParser
|
|||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ public partial class ResolutionParser
|
|||||||
private static partial Regex R480Exp();
|
private static partial Regex R480Exp();
|
||||||
|
|
||||||
private static readonly Regex ResolutionExp = new(string.Join("|", R2160pExp(), R1080pExp(), R720pExp(), R576pExp(), R540pExp(), R480Exp()), RegexOptions.IgnoreCase);
|
private static readonly Regex ResolutionExp = new(string.Join("|", R2160pExp(), R1080pExp(), R720pExp(), R576pExp(), R540pExp(), R480Exp()), RegexOptions.IgnoreCase);
|
||||||
|
|
||||||
public static void Parse(string title, out Resolution? resolution, out string? source)
|
public static void Parse(string title, out Resolution? resolution, out string? source)
|
||||||
{
|
{
|
||||||
resolution = null;
|
resolution = null;
|
||||||
@@ -31,14 +31,16 @@ public partial class ResolutionParser
|
|||||||
|
|
||||||
if (result.Success)
|
if (result.Success)
|
||||||
{
|
{
|
||||||
foreach (var key in Enum.GetNames(typeof(Resolution)))
|
foreach (var resolutionEnum in Resolution.List)
|
||||||
{
|
{
|
||||||
if (result.Groups[key].Success)
|
if (!result.Groups[resolutionEnum.Name].Success)
|
||||||
{
|
{
|
||||||
resolution = Resolution.FromName(key);
|
continue;
|
||||||
source = result.Groups[key].Value;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resolution = resolutionEnum;
|
||||||
|
source = result.Groups[resolutionEnum.Name].Value;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -50,4 +52,4 @@ public partial class ResolutionParser
|
|||||||
resolution = Resolution.R480P;
|
resolution = Resolution.R480P;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ public static partial class SeasonParser
|
|||||||
[GeneratedRegex(@"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])", RegexOptions.IgnoreCase)]
|
[GeneratedRegex(@"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])", RegexOptions.IgnoreCase)]
|
||||||
private static partial Regex SixDigitAirDateMatchExp();
|
private static partial Regex SixDigitAirDateMatchExp();
|
||||||
|
|
||||||
public static Season Parse(string title)
|
public static Season? Parse(string title)
|
||||||
{
|
{
|
||||||
if (!PreValidation(title))
|
if (!PreValidation(title))
|
||||||
{
|
{
|
||||||
@@ -30,7 +30,10 @@ public static partial class SeasonParser
|
|||||||
{
|
{
|
||||||
var fixedDate = $"20{airYear}.{airMonth}.{airDay}";
|
var fixedDate = $"20{airYear}.{airMonth}.{airDay}";
|
||||||
|
|
||||||
simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"]?.Value ?? "", fixedDate);
|
if (!string.IsNullOrEmpty(sixDigitAirDateMatch.Groups["airdate"].Value))
|
||||||
|
{
|
||||||
|
simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -38,32 +41,33 @@ public static partial class SeasonParser
|
|||||||
{
|
{
|
||||||
var match = exp().Match(simpleTitle);
|
var match = exp().Match(simpleTitle);
|
||||||
|
|
||||||
if (match.Groups.Count > 0)
|
if (match.Groups.Count <= 0 || !match.Success)
|
||||||
{
|
{
|
||||||
var result = ParseMatchCollection(match, simpleTitle);
|
continue;
|
||||||
|
|
||||||
if (result.FullSeason && result.ReleaseTokens != null && result.ReleaseTokens.Contains("Special", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
result.FullSeason = false;
|
|
||||||
result.IsSpecial = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new()
|
|
||||||
{
|
|
||||||
ReleaseTitle = title,
|
|
||||||
SeriesTitle = result.SeriesName,
|
|
||||||
// SeriesTitleInfo = 0,
|
|
||||||
Seasons = result.SeasonNumbers ?? [],
|
|
||||||
EpisodeNumbers = result.EpisodeNumbers ?? [],
|
|
||||||
AirDate = result.AirDate,
|
|
||||||
FullSeason = result.FullSeason,
|
|
||||||
IsPartialSeason = result.IsPartialSeason ?? false,
|
|
||||||
IsMultiSeason = result.IsMultiSeason ?? false,
|
|
||||||
IsSeasonExtra = result.IsSeasonExtra ?? false,
|
|
||||||
IsSpecial = result.IsSpecial ?? false,
|
|
||||||
SeasonPart = result.SeasonPart ?? 0,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var result = ParseMatchCollection(match, simpleTitle);
|
||||||
|
|
||||||
|
if (result.FullSeason && result.ReleaseTokens != null && result.ReleaseTokens.Contains("Special", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
result.FullSeason = false;
|
||||||
|
result.IsSpecial = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new()
|
||||||
|
{
|
||||||
|
ReleaseTitle = title,
|
||||||
|
SeriesTitle = result.SeriesName,
|
||||||
|
Seasons = result.SeasonNumbers ?? [],
|
||||||
|
EpisodeNumbers = result.EpisodeNumbers ?? [],
|
||||||
|
AirDate = result.AirDate,
|
||||||
|
FullSeason = result.FullSeason,
|
||||||
|
IsPartialSeason = result.IsPartialSeason ?? false,
|
||||||
|
IsMultiSeason = result.IsMultiSeason ?? false,
|
||||||
|
IsSeasonExtra = result.IsSeasonExtra ?? false,
|
||||||
|
IsSpecial = result.IsSpecial ?? false,
|
||||||
|
SeasonPart = result.SeasonPart ?? 0,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
@@ -78,7 +82,7 @@ public static partial class SeasonParser
|
|||||||
throw new("No match");
|
throw new("No match");
|
||||||
}
|
}
|
||||||
|
|
||||||
var seriesName = (groups["title"]?.Value ?? "")
|
var seriesName = groups["title"].Value
|
||||||
.Replace(".", " ")
|
.Replace(".", " ")
|
||||||
.Replace("_", " ")
|
.Replace("_", " ")
|
||||||
.Replace(RequestInfoExp().ToString(), "")
|
.Replace(RequestInfoExp().ToString(), "")
|
||||||
@@ -89,9 +93,9 @@ public static partial class SeasonParser
|
|||||||
SeriesName = seriesName,
|
SeriesName = seriesName,
|
||||||
};
|
};
|
||||||
|
|
||||||
var lastSeasonEpisodeStringIndex = IndexOfEnd(simpleTitle, groups["title"]?.Value ?? "");
|
var lastSeasonEpisodeStringIndex = IndexOfEnd(simpleTitle, groups["title"].Value);
|
||||||
|
|
||||||
if (int.TryParse(groups["airyear"]?.Value, out var airYear) && airYear >= 1900)
|
if (int.TryParse(groups["airyear"].Value, out var airYear) && airYear >= 1900)
|
||||||
{
|
{
|
||||||
var seasons = new List<string> {groups["season"]?.Value, groups["season1"]?.Value}
|
var seasons = new List<string> {groups["season"]?.Value, groups["season1"]?.Value}
|
||||||
.Where(x => !string.IsNullOrEmpty(x))
|
.Where(x => !string.IsNullOrEmpty(x))
|
||||||
@@ -300,4 +304,4 @@ public static partial class SeasonParser
|
|||||||
public DateTime? AirDate { get; set; }
|
public DateTime? AirDate { get; set; }
|
||||||
public string? ReleaseTokens { get; set; }
|
public string? ReleaseTokens { get; set; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -88,16 +88,16 @@ public static partial class TitleParser
|
|||||||
VideoCodecsParser.Parse(title, out var videoCodec, out _);
|
VideoCodecsParser.Parse(title, out var videoCodec, out _);
|
||||||
AudioChannelsParser.Parse(title, out var channels, out _);
|
AudioChannelsParser.Parse(title, out var channels, out _);
|
||||||
AudioCodecsParser.Parse(title, out var audioCodec, out _);
|
AudioCodecsParser.Parse(title, out var audioCodec, out _);
|
||||||
var resolutionPosition = title.IndexOf(resolution.Value ?? string.Empty, StringComparison.Ordinal);
|
var resolutionPosition = title.IndexOf(resolution?.Value ?? string.Empty, StringComparison.Ordinal);
|
||||||
var videoCodecPosition = title.IndexOf(videoCodec.Value ?? string.Empty, StringComparison.Ordinal);
|
var videoCodecPosition = title.IndexOf(videoCodec?.Value ?? string.Empty, StringComparison.Ordinal);
|
||||||
var channelsPosition = title.IndexOf(channels.Value ?? string.Empty, StringComparison.Ordinal);
|
var channelsPosition = title.IndexOf(channels?.Value ?? string.Empty, StringComparison.Ordinal);
|
||||||
var audioCodecPosition = title.IndexOf(audioCodec.Value ?? string.Empty, StringComparison.Ordinal);
|
var audioCodecPosition = title.IndexOf(audioCodec?.Value ?? string.Empty, StringComparison.Ordinal);
|
||||||
var positions = new List<int> {resolutionPosition, audioCodecPosition, channelsPosition, videoCodecPosition}.Where(x => x > 0).ToList();
|
var positions = new List<int> {resolutionPosition, audioCodecPosition, channelsPosition, videoCodecPosition}.Where(x => x > 0).ToList();
|
||||||
|
|
||||||
if (positions.Count != 0)
|
if (positions.Count != 0)
|
||||||
{
|
{
|
||||||
var firstPosition = positions.Min();
|
var firstPosition = positions.Min();
|
||||||
parsedTitle = ReleaseTitleCleaner(title[..firstPosition]) ?? string.Empty;
|
parsedTitle = ReleaseTitleCleaner(title[..firstPosition]);
|
||||||
year = null;
|
year = null;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -149,10 +149,7 @@ public static partial class TitleParser
|
|||||||
trimmedTitle = trimmedTitle.Replace(LanguageExp().ToString(), "").Trim();
|
trimmedTitle = trimmedTitle.Replace(LanguageExp().ToString(), "").Trim();
|
||||||
trimmedTitle = trimmedTitle.Replace(SceneGarbageExp().ToString(), "").Trim();
|
trimmedTitle = trimmedTitle.Replace(SceneGarbageExp().ToString(), "").Trim();
|
||||||
|
|
||||||
foreach (var lang in Enum.GetValues(typeof(Language)).Cast<Language>())
|
trimmedTitle = Language.List.Aggregate(trimmedTitle, (current, lang) => current.Replace($@"\b{lang.Value.ToUpper()}", "").Trim());
|
||||||
{
|
|
||||||
trimmedTitle = trimmedTitle.Replace($@"\b{lang.ToString().ToUpper()}", "").Trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Look for gap formed by removing items
|
// Look for gap formed by removing items
|
||||||
trimmedTitle = trimmedTitle.Split(" ")[0];
|
trimmedTitle = trimmedTitle.Split(" ")[0];
|
||||||
@@ -168,7 +165,7 @@ public static partial class TitleParser
|
|||||||
{
|
{
|
||||||
if (parts.Length >= n + 2)
|
if (parts.Length >= n + 2)
|
||||||
{
|
{
|
||||||
nextPart = parts[n + 1] ?? "";
|
nextPart = parts[n + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out _))
|
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out _))
|
||||||
@@ -197,4 +194,4 @@ public static partial class TitleParser
|
|||||||
|
|
||||||
return result.Trim();
|
return result.Trim();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,21 @@
|
|||||||
namespace Producer.Features.ParseTorrentTitle;
|
namespace Producer.Features.ParseTorrentTitle;
|
||||||
|
|
||||||
public static class TorrentTitleParser
|
public static partial class TorrentTitleParser
|
||||||
{
|
{
|
||||||
public static ParsedFilename Parse(string name, bool isTv = false)
|
[GeneratedRegex(@"(season|episode)s?.?\d?", RegexOptions.IgnoreCase, "en-GB")]
|
||||||
|
private static partial Regex SeasonEpisode();
|
||||||
|
[GeneratedRegex(@"[se]\d\d", RegexOptions.IgnoreCase, "en-GB")]
|
||||||
|
private static partial Regex SeasonShort();
|
||||||
|
[GeneratedRegex(@"\b(tv|complete)\b", RegexOptions.IgnoreCase, "en-GB")]
|
||||||
|
private static partial Regex TvOrComplete();
|
||||||
|
[GeneratedRegex(@"\b(saison|stage).?\d", RegexOptions.IgnoreCase, "en-GB")]
|
||||||
|
private static partial Regex SeasonStage();
|
||||||
|
[GeneratedRegex(@"[a-z]\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
|
||||||
|
private static partial Regex Season();
|
||||||
|
[GeneratedRegex(@"\d{2,4}\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
|
||||||
|
private static partial Regex SeasonTwo();
|
||||||
|
|
||||||
|
public static ParsedFilename Parse(string name)
|
||||||
{
|
{
|
||||||
VideoCodecsParser.Parse(name, out var videoCodec, out _);
|
VideoCodecsParser.Parse(name, out var videoCodec, out _);
|
||||||
AudioCodecsParser.Parse(name, out var audioCodec, out _);
|
AudioCodecsParser.Parse(name, out var audioCodec, out _);
|
||||||
@@ -28,14 +41,21 @@ public static class TorrentTitleParser
|
|||||||
Multi = multi,
|
Multi = multi,
|
||||||
Complete = complete,
|
Complete = complete,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
var isTv = GetTypeByName(name) == TorrentType.Tv;
|
||||||
|
|
||||||
return !isTv ? ParseMovie(name, baseParsed) : ParseSeason(name, baseParsed);
|
return !isTv ? ParseMovie(name, baseParsed) : ParseSeason(name, baseParsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ParsedFilename ParseSeason(string name, BaseParsed baseParsed)
|
private static ParsedFilename ParseSeason(string name, BaseParsed baseParsed)
|
||||||
{
|
{
|
||||||
var season = SeasonParser.Parse(name);
|
var season = SeasonParser.Parse(name);
|
||||||
|
|
||||||
|
if (season == null)
|
||||||
|
{
|
||||||
|
return new();
|
||||||
|
}
|
||||||
|
|
||||||
return new()
|
return new()
|
||||||
{
|
{
|
||||||
Show = new()
|
Show = new()
|
||||||
@@ -69,7 +89,7 @@ public static class TorrentTitleParser
|
|||||||
private static ParsedFilename ParseMovie(string name, BaseParsed baseParsed)
|
private static ParsedFilename ParseMovie(string name, BaseParsed baseParsed)
|
||||||
{
|
{
|
||||||
TitleParser.Parse(name, out var title, out var year);
|
TitleParser.Parse(name, out var title, out var year);
|
||||||
|
|
||||||
baseParsed.Title = title;
|
baseParsed.Title = title;
|
||||||
baseParsed.Year = year;
|
baseParsed.Year = year;
|
||||||
|
|
||||||
@@ -93,4 +113,27 @@ public static class TorrentTitleParser
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
private static TorrentType GetTypeByName(string name)
|
||||||
|
{
|
||||||
|
var tvRegexes = new[]
|
||||||
|
{
|
||||||
|
SeasonEpisode,
|
||||||
|
SeasonShort,
|
||||||
|
TvOrComplete,
|
||||||
|
SeasonStage,
|
||||||
|
Season,
|
||||||
|
SeasonTwo
|
||||||
|
};
|
||||||
|
|
||||||
|
foreach (var regex in tvRegexes)
|
||||||
|
{
|
||||||
|
if (regex().IsMatch(name))
|
||||||
|
{
|
||||||
|
return TorrentType.Tv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return TorrentType.Movie;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
7
src/producer/Features/ParseTorrentTitle/TorrentType.cs
Normal file
7
src/producer/Features/ParseTorrentTitle/TorrentType.cs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
namespace Producer.Features.ParseTorrentTitle;
|
||||||
|
|
||||||
|
public enum TorrentType
|
||||||
|
{
|
||||||
|
Movie,
|
||||||
|
Tv,
|
||||||
|
}
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
namespace Producer.Features.Text;
|
|
||||||
|
|
||||||
public class AdultContentConfiguration
|
|
||||||
{
|
|
||||||
public const string SectionName = "AdultContentSettings";
|
|
||||||
public const string Filename = "adultcontent.json";
|
|
||||||
|
|
||||||
public bool Allow { get; set; }
|
|
||||||
|
|
||||||
public List<string> Keywords { get; set; } = [];
|
|
||||||
public List<string> CompoundKeywords { get; set; } = [];
|
|
||||||
|
|
||||||
public int Threshold { get; set; }
|
|
||||||
}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
namespace Producer.Features.Text;
|
|
||||||
|
|
||||||
public class FuzzyStringSearcher(IEnumerable<string> records, SearchOptions<string>? options = null) : IFuzzySearcher<string>
|
|
||||||
{
|
|
||||||
private readonly IReadOnlyCollection<string> _records = records.ToList();
|
|
||||||
private readonly SearchOptions<string> _options = options ?? new SearchOptions<string>();
|
|
||||||
|
|
||||||
public IReadOnlyCollection<ExtractedResult<string>> Search(string text)
|
|
||||||
{
|
|
||||||
var dynamicThreshold = (int) Math.Ceiling(text.Length * (_options.Threshold / 100.0));
|
|
||||||
return Process.ExtractSorted(text, _records, cutoff: dynamicThreshold).ToList();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
namespace Producer.Features.Text;
|
|
||||||
|
|
||||||
public interface IFuzzySearcher<T>
|
|
||||||
{
|
|
||||||
IReadOnlyCollection<ExtractedResult<T>> Search(string text);
|
|
||||||
}
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
namespace Producer.Features.Text;
|
|
||||||
|
|
||||||
public class SearchOptions<T>
|
|
||||||
{
|
|
||||||
public int Threshold { get; init; } = 60;
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
namespace Producer.Features.Text;
|
|
||||||
|
|
||||||
public class SearchResultRecords
|
|
||||||
{
|
|
||||||
public record struct ScoreInfo(int Errors, int CurrentLocation, int ExpectedLocation, int Distance,
|
|
||||||
bool IgnoreLocation);
|
|
||||||
|
|
||||||
public record struct SearchResult(bool IsMatch, double Score);
|
|
||||||
|
|
||||||
public record struct Index(List<Chunk> Chunks, string Pattern);
|
|
||||||
|
|
||||||
public record struct Chunk(int StartIndex, string Pattern, Dictionary<char, int> Alphabet);
|
|
||||||
|
|
||||||
public record struct SearchResult<T>(T Value, double Score);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
namespace Producer.Features.Text;
|
|
||||||
|
|
||||||
public static class ServiceCollectionExtensions
|
|
||||||
{
|
|
||||||
public static IServiceCollection RegisterAdultKeywordFilter(this IServiceCollection services, IConfiguration configuration)
|
|
||||||
{
|
|
||||||
var adultConfigSettings =
|
|
||||||
services.LoadConfigurationFromConfig<AdultContentConfiguration>(configuration, AdultContentConfiguration.SectionName);
|
|
||||||
|
|
||||||
if (adultConfigSettings.Allow)
|
|
||||||
{
|
|
||||||
return services;
|
|
||||||
}
|
|
||||||
|
|
||||||
return services.AddSingleton<IFuzzySearcher<string>>(
|
|
||||||
_ =>
|
|
||||||
{
|
|
||||||
var options = new SearchOptions<string>
|
|
||||||
{
|
|
||||||
Threshold = adultConfigSettings.Threshold,
|
|
||||||
};
|
|
||||||
|
|
||||||
return new FuzzyStringSearcher(adultConfigSettings.Keywords, options);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
12
src/producer/Features/Wordlists/IWordCollections.cs
Normal file
12
src/producer/Features/Wordlists/IWordCollections.cs
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
namespace Producer.Features.Wordlists;
|
||||||
|
|
||||||
|
public interface IWordCollections
|
||||||
|
{
|
||||||
|
HashSet<string> AdultWords { get; }
|
||||||
|
|
||||||
|
HashSet<string> AdultCompoundPhrases { get; }
|
||||||
|
|
||||||
|
HashSet<string> CommonWords { get; }
|
||||||
|
|
||||||
|
Task LoadAsync();
|
||||||
|
}
|
||||||
19
src/producer/Features/Wordlists/PopulationService.cs
Normal file
19
src/producer/Features/Wordlists/PopulationService.cs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
namespace Producer.Features.Wordlists;
|
||||||
|
|
||||||
|
public class PopulationService(IWordCollections wordCollections, ILogger<PopulationService> logger) : IHostedService
|
||||||
|
{
|
||||||
|
public async Task StartAsync(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
logger.LogInformation("Loading word collections...");
|
||||||
|
|
||||||
|
await wordCollections.LoadAsync();
|
||||||
|
|
||||||
|
logger.LogInformation("Common Words Count: {Count}", wordCollections.CommonWords.Count);
|
||||||
|
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
|
||||||
|
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
|
||||||
|
|
||||||
|
logger.LogInformation("Word collections loaded.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
namespace Producer.Features.Wordlists;
|
||||||
|
|
||||||
|
public static class ServiceCollectionExtensions
|
||||||
|
{
|
||||||
|
public static IServiceCollection RegisterWordCollections(this IServiceCollection services)
|
||||||
|
{
|
||||||
|
services.AddSingleton<IWordCollections, WordCollections>();
|
||||||
|
services.AddHostedService<PopulationService>();
|
||||||
|
|
||||||
|
return services;
|
||||||
|
}
|
||||||
|
}
|
||||||
44
src/producer/Features/Wordlists/WordCollections.cs
Normal file
44
src/producer/Features/Wordlists/WordCollections.cs
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
namespace Producer.Features.Wordlists;
|
||||||
|
|
||||||
|
public class WordCollections : IWordCollections
|
||||||
|
{
|
||||||
|
private const string AdultWordsFile = "adult-words.txt";
|
||||||
|
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
|
||||||
|
private const string CommonWordsFile = "common-words.txt";
|
||||||
|
|
||||||
|
public HashSet<string> AdultWords { get; private set; } = [];
|
||||||
|
public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
|
||||||
|
public HashSet<string> CommonWords { get; private set; } = [];
|
||||||
|
|
||||||
|
public async Task LoadAsync()
|
||||||
|
{
|
||||||
|
var loaderTasks = new List<Task>
|
||||||
|
{
|
||||||
|
LoadAdultWords(),
|
||||||
|
LoadAdultCompounds(),
|
||||||
|
LoadCommonWords()
|
||||||
|
};
|
||||||
|
|
||||||
|
await Task.WhenAll(loaderTasks);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task LoadCommonWords()
|
||||||
|
{
|
||||||
|
var commonWords = await File.ReadAllLinesAsync(GetPath(CommonWordsFile));
|
||||||
|
CommonWords = [..commonWords];
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task LoadAdultCompounds()
|
||||||
|
{
|
||||||
|
var adultCompoundWords = await File.ReadAllLinesAsync(GetPath(AdultCompoundPhrasesFile));
|
||||||
|
AdultCompoundPhrases = [..adultCompoundWords];
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task LoadAdultWords()
|
||||||
|
{
|
||||||
|
var adultWords = await File.ReadAllLinesAsync(GetPath(AdultWordsFile));
|
||||||
|
AdultWords = [..adultWords];
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
|
||||||
|
}
|
||||||
@@ -31,5 +31,5 @@ global using Producer.Features.CrawlerSupport;
|
|||||||
global using Producer.Features.DataProcessing;
|
global using Producer.Features.DataProcessing;
|
||||||
global using Producer.Features.JobSupport;
|
global using Producer.Features.JobSupport;
|
||||||
global using Producer.Features.ParseTorrentTitle;
|
global using Producer.Features.ParseTorrentTitle;
|
||||||
global using Producer.Features.Text;
|
global using Producer.Features.Wordlists;
|
||||||
global using Serilog;
|
global using Serilog;
|
||||||
|
|||||||
@@ -34,4 +34,11 @@
|
|||||||
</None>
|
</None>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Content Remove="Data\**" />
|
||||||
|
<None Include="Data\**">
|
||||||
|
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ builder.Services
|
|||||||
.RegisterMassTransit()
|
.RegisterMassTransit()
|
||||||
.AddDataStorage()
|
.AddDataStorage()
|
||||||
.AddCrawlers()
|
.AddCrawlers()
|
||||||
.RegisterAdultKeywordFilter(builder.Configuration)
|
.RegisterWordCollections()
|
||||||
.AddQuartz(builder.Configuration);
|
.AddQuartz(builder.Configuration);
|
||||||
|
|
||||||
var host = builder.Build();
|
var host = builder.Build();
|
||||||
|
|||||||
Reference in New Issue
Block a user