This commit is contained in:
iPromKnight
2024-03-10 13:48:27 +00:00
parent c8a1ebd8ae
commit 6c03f79933
20 changed files with 252468 additions and 52 deletions

View File

@@ -2998,3 +2998,6 @@ zombie girl fucked
zombie porn
zombie porno
zumba xxx
double pénétration
evil angel
fist each others

File diff suppressed because it is too large Load Diff

View File

@@ -1668,3 +1668,6 @@ kink.com
sg4ge
tube8
x-art
mommygotboobs
threesomes
dp

234602
src/producer/Data/jav.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
namespace Producer.Extensions;
public static class StringExtensions
{
public static bool IsNullOrEmpty(this string? value) =>
string.IsNullOrEmpty(value);
}

View File

@@ -4,7 +4,8 @@ public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory,
ILogger<DebridMediaManagerCrawler> logger,
IDataStorage storage,
GithubConfiguration githubConfiguration) : BaseCrawler(logger, storage)
GithubConfiguration githubConfiguration,
IParsingService parsingService) : BaseCrawler(logger, storage)
{
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher();
@@ -100,45 +101,64 @@ public partial class DebridMediaManagerCrawler(
return null;
}
var torrent = new Torrent
{
Source = Source,
Name = filenameElement.GetString(),
Size = bytesElement.GetInt64().ToString(),
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
};
if (string.IsNullOrEmpty(torrent.Name))
{
return null;
}
var parsedTorrent = TorrentTitleParser.Parse(torrent.Name);
var parsedTorrent = parsingService.Parse(filenameElement.GetString());
if (parsedTorrent.IsInvalid)
{
return null;
}
if (parsedTorrent.IsMovie)
var torrent = new Torrent
{
torrent.Category = "movies";
torrent.Name = parsedTorrent.Movie.Title;
Source = Source,
Size = bytesElement.GetInt64().ToString(),
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
};
return torrent;
return parsedTorrent.Type switch
{
TorrentType.Movie => HandleMovieType(torrent, parsedTorrent),
TorrentType.Tv => HandleTvType(torrent, parsedTorrent),
_ => null,
};
}
private Torrent HandleMovieType(Torrent torrent, ParsedFilename parsedTorrent)
{
if (parsedTorrent.Movie.ReleaseTitle.IsNullOrEmpty())
{
return null;
}
if (parsedTorrent.IsShow)
if (!parsingService.HasNoBannedTerms(parsedTorrent.Movie.ReleaseTitle))
{
torrent.Category = "tv";
torrent.Name = parsedTorrent.Show.Title;
return torrent;
logger.LogWarning("Banned terms found in {Title}", parsedTorrent.Movie.ReleaseTitle);
return null;
}
return null;
torrent.Category = "movies";
torrent.Name = parsedTorrent.Movie.ReleaseTitle;
return torrent;
}
private Torrent HandleTvType(Torrent torrent, ParsedFilename parsedTorrent)
{
if (parsedTorrent.Show.ReleaseTitle.IsNullOrEmpty())
{
return null;
}
if (!parsingService.HasNoBannedTerms(parsedTorrent.Show.ReleaseTitle))
{
logger.LogWarning("Banned terms found in {Title}", parsedTorrent.Show.ReleaseTitle);
return null;
}
torrent.Category = "tv";
torrent.Name = parsedTorrent.Show.ReleaseTitle;
return torrent;
}
private async Task InsertTorrentsForPage(JsonDocument json)

View File

@@ -29,10 +29,10 @@ internal static class ServiceCollectionExtensions
services.AddQuartz(
quartz =>
{
//RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration);
//RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
//RegisterPublisher(quartz, rabbitConfiguration);
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
RegisterPublisher(quartz, rabbitConfiguration);
});
services.AddQuartzHostedService(

View File

@@ -2,6 +2,7 @@ namespace Producer.Features.ParseTorrentTitle;
public class BaseParsed
{
public string? ReleaseTitle { get; set; }
public string? Title { get; set; }
public string? Year { get; set; }
public Edition? Edition { get; set; }
@@ -15,4 +16,4 @@ public class BaseParsed
public List<Source> Sources { get; set; } = [];
public bool? Multi { get; set; }
public bool? Complete { get; set; }
}
}

View File

@@ -0,0 +1,23 @@
namespace Producer.Features.ParseTorrentTitle;
public interface IParsingService
{
ParsedFilename Parse(string name);
string Naked(string title);
List<string> GrabYears(string str);
List<int> GrabPossibleSeasonNums(string str);
bool HasYear(string test, List<string> years, bool strictCheck = false);
string RemoveDiacritics(string str);
string RemoveRepeats(string str);
int RomanToDecimal(string roman);
string ReplaceRomanWithDecimal(string input);
bool StrictEqual(string title1, string title2);
int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false);
bool FlexEq(string test, string target, List<string> years);
bool MatchesTitle(string target, List<string> years, string test);
bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle);
bool HasNoBannedTerms(string targetTitle, string testTitle);
bool HasNoBannedTerms(string targetTitle);
bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle);
int CountUncommonWords(string title);
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.ParseTorrentTitle;
public interface ITorrentTitleParser
{
ParsedFilename Parse(string name);
}

View File

@@ -4,8 +4,7 @@ public class ParsedFilename
{
public ParsedMovie? Movie { get; set; }
public ParsedTv? Show { get; set; }
public bool IsMovie => Movie is not null;
public bool IsShow => Show is not null;
public TorrentType? Type { get; set; }
public bool IsInvalid => (!IsMovie && !IsShow) || (IsMovie && IsShow);
}
public bool IsInvalid => Movie is null && Show is null;
}

View File

@@ -2,7 +2,6 @@ namespace Producer.Features.ParseTorrentTitle;
public class ParsedTv : BaseParsed
{
public string? ReleaseTitle { get; set; }
public string? SeriesTitle { get; set; }
public List<int> Seasons { get; set; } = [];
public List<int> EpisodeNumbers { get; set; } = [];
@@ -13,4 +12,4 @@ public class ParsedTv : BaseParsed
public bool IsSeasonExtra { get; set; }
public bool IsSpecial { get; set; }
public int SeasonPart { get; set; }
}
}

View File

@@ -1,8 +1,8 @@
namespace Producer.Features.Crawlers.Dmm;
namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService
{
[GeneratedRegex(@"[^a-z0-9]")]
[GeneratedRegex("[^a-z0-9]")]
private static partial Regex NakedMatcher();
[GeneratedRegex(@"\d{4}")]
@@ -16,14 +16,14 @@ public partial class ParsingService
[GeneratedRegex(@"m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})")]
private static partial Regex ReplaceRomanWithDecimalMatcher();
[GeneratedRegex(@"\s+")]
private static partial Regex WhitespaceMatcher();
[GeneratedRegex(@"\W+")]
private static partial Regex WordMatcher();
[GeneratedRegex(@"'s|\s&\s|\W")]
private static partial Regex WordProcessingMatcher();
}
}

View File

@@ -1,6 +1,6 @@
namespace Producer.Features.Crawlers.Dmm;
namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService(IWordCollections wordCollections)
public partial class ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser) : IParsingService
{
private static readonly char[] WhitespaceSeparator = [' '];
@@ -198,8 +198,8 @@ public partial class ParsingService(IWordCollections wordCollections)
public bool FlexEq(string test, string target, List<string> years)
{
var movieTitle = TorrentTitleParser.Parse(test).Movie.Title.ToLower();
var tvTitle = TorrentTitleParser.Parse(test).Show.Title.ToLower();
var movieTitle = torrentTitleParser.Parse(test).Movie.Title.ToLower();
var tvTitle = torrentTitleParser.Parse(test).Show.Title.ToLower();
var target2 = WhitespaceMatcher().Replace(target, "");
var test2 = WhitespaceMatcher().Replace(test, "");
@@ -302,9 +302,36 @@ public partial class ParsingService(IWordCollections wordCollections)
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower()));
var hasJavWords = wordCollections.Jav.Any(jav => !targetTitle.Contains(jav) && titleWithoutSymbols.Contains(jav));
var hasAdultStars = wordCollections.AdultStars.Any(star => !targetTitle.Contains(star) && titleWithoutSymbols.Contains(star));
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
return !hasBannedWords && !hasBannedCompoundWords;
return !hasBannedWords &&
!hasJavWords &&
!hasAdultStars &&
!hasBannedCompoundWords;
}
public bool HasNoBannedTerms(string targetTitle)
{
var words = WordMatcher().Split(targetTitle.ToLower()).ToList();
var hasBannedWords = words.Any(word => wordCollections.AdultWords.Contains(word));
var inputWithoutSymbols = string.Join(' ', WordMatcher().Split(targetTitle.ToLower()));
var hasJavWords = wordCollections.Jav.Any(jav => inputWithoutSymbols.Contains(jav, StringComparison.OrdinalIgnoreCase));
var hasAdultStars = wordCollections.AdultStars.Any(star => inputWithoutSymbols.Contains(star, StringComparison.OrdinalIgnoreCase));
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => inputWithoutSymbols.Contains(compoundWord, StringComparison.OrdinalIgnoreCase));
return !hasBannedWords &&
!hasJavWords &&
!hasAdultStars &&
!hasBannedCompoundWords;
}
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
@@ -318,4 +345,6 @@ public partial class ParsingService(IWordCollections wordCollections)
return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word));
}
public ParsedFilename Parse(string name) => torrentTitleParser.Parse(name);
}

View File

@@ -0,0 +1,12 @@
namespace Producer.Features.ParseTorrentTitle;
public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterParseTorrentTitle(this IServiceCollection services)
{
services.AddSingleton<IParsingService, ParsingService>();
services.AddSingleton<ITorrentTitleParser, TorrentTitleParser>();
return services;
}
}

View File

@@ -1,6 +1,6 @@
namespace Producer.Features.ParseTorrentTitle;
public static partial class TorrentTitleParser
public partial class TorrentTitleParser : ITorrentTitleParser
{
[GeneratedRegex(@"(season|episode)s?.?\d?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonEpisode();
@@ -15,7 +15,7 @@ public static partial class TorrentTitleParser
[GeneratedRegex(@"\d{2,4}\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonTwo();
public static ParsedFilename Parse(string name)
public ParsedFilename Parse(string name)
{
VideoCodecsParser.Parse(name, out var videoCodec, out _);
AudioCodecsParser.Parse(name, out var audioCodec, out _);
@@ -83,6 +83,7 @@ public static partial class TorrentTitleParser
Multi = baseParsed.Multi,
Revision = baseParsed.Revision,
},
Type = TorrentType.Tv,
};
}
@@ -97,6 +98,7 @@ public static partial class TorrentTitleParser
{
Movie = new()
{
ReleaseTitle = name,
Title = baseParsed.Title,
Year = baseParsed.Year,
Edition = baseParsed.Edition,
@@ -111,6 +113,7 @@ public static partial class TorrentTitleParser
Multi = baseParsed.Multi,
Revision = baseParsed.Revision,
},
Type = TorrentType.Movie,
};
}

View File

@@ -8,5 +8,9 @@ public interface IWordCollections
HashSet<string> CommonWords { get; }
HashSet<string> Jav { get; }
HashSet<string> AdultStars { get; }
Task LoadAsync();
}

View File

@@ -11,6 +11,8 @@ public class PopulationService(IWordCollections wordCollections, ILogger<Populat
logger.LogInformation("Common Words Count: {Count}", wordCollections.CommonWords.Count);
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
logger.LogInformation("Jav Count: {Count}", wordCollections.Jav.Count);
logger.LogInformation("Adult Stars Count: {Count}", wordCollections.AdultStars.Count);
logger.LogInformation("Word collections loaded.");
}

View File

@@ -4,10 +4,17 @@ public class WordCollections : IWordCollections
{
private const string AdultWordsFile = "adult-words.txt";
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
private const string AdultStarsFile = "adult-stars.txt";
private const string JavFile = "jav.txt";
private const string CommonWordsFile = "common-words.txt";
public HashSet<string> AdultWords { get; private set; } = [];
public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
public HashSet<string> AdultStars { get; private set; } = [];
public HashSet<string> Jav { get; private set; } = [];
public HashSet<string> CommonWords { get; private set; } = [];
public async Task LoadAsync()
@@ -16,7 +23,9 @@ public class WordCollections : IWordCollections
{
LoadAdultWords(),
LoadAdultCompounds(),
LoadCommonWords()
LoadCommonWords(),
LoadJav(),
LoadAdultStars(),
};
await Task.WhenAll(loaderTasks);
@@ -40,5 +49,17 @@ public class WordCollections : IWordCollections
AdultWords = [..adultWords];
}
private async Task LoadJav()
{
var jav = await File.ReadAllLinesAsync(GetPath(JavFile));
Jav = [..jav];
}
private async Task LoadAdultStars()
{
var adultStars = await File.ReadAllLinesAsync(GetPath(AdultStarsFile));
AdultStars = [..adultStars];
}
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
}

View File

@@ -11,6 +11,7 @@ builder.Services
.AddDataStorage()
.AddCrawlers()
.RegisterWordCollections()
.RegisterParseTorrentTitle()
.AddQuartz(builder.Configuration);
var host = builder.Build();