This commit is contained in:
iPromKnight
2024-03-10 13:48:27 +00:00
parent c8a1ebd8ae
commit 6c03f79933
20 changed files with 252468 additions and 52 deletions

View File

@@ -2998,3 +2998,6 @@ zombie girl fucked
zombie porn zombie porn
zombie porno zombie porno
zumba xxx zumba xxx
double pénétration
evil angel
fist each others

File diff suppressed because it is too large Load Diff

View File

@@ -1668,3 +1668,6 @@ kink.com
sg4ge sg4ge
tube8 tube8
x-art x-art
mommygotboobs
threesomes
dp

234602
src/producer/Data/jav.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
namespace Producer.Extensions;
public static class StringExtensions
{
public static bool IsNullOrEmpty(this string? value) =>
string.IsNullOrEmpty(value);
}

View File

@@ -4,7 +4,8 @@ public partial class DebridMediaManagerCrawler(
IHttpClientFactory httpClientFactory, IHttpClientFactory httpClientFactory,
ILogger<DebridMediaManagerCrawler> logger, ILogger<DebridMediaManagerCrawler> logger,
IDataStorage storage, IDataStorage storage,
GithubConfiguration githubConfiguration) : BaseCrawler(logger, storage) GithubConfiguration githubConfiguration,
IParsingService parsingService) : BaseCrawler(logger, storage)
{ {
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")] [GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher(); private static partial Regex HashCollectionMatcher();
@@ -100,45 +101,64 @@ public partial class DebridMediaManagerCrawler(
return null; return null;
} }
var torrent = new Torrent var parsedTorrent = parsingService.Parse(filenameElement.GetString());
{
Source = Source,
Name = filenameElement.GetString(),
Size = bytesElement.GetInt64().ToString(),
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
};
if (string.IsNullOrEmpty(torrent.Name))
{
return null;
}
var parsedTorrent = TorrentTitleParser.Parse(torrent.Name);
if (parsedTorrent.IsInvalid) if (parsedTorrent.IsInvalid)
{ {
return null; return null;
} }
if (parsedTorrent.IsMovie) var torrent = new Torrent
{ {
torrent.Category = "movies"; Source = Source,
torrent.Name = parsedTorrent.Movie.Title; Size = bytesElement.GetInt64().ToString(),
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
};
return torrent; return parsedTorrent.Type switch
{
TorrentType.Movie => HandleMovieType(torrent, parsedTorrent),
TorrentType.Tv => HandleTvType(torrent, parsedTorrent),
_ => null,
};
}
private Torrent HandleMovieType(Torrent torrent, ParsedFilename parsedTorrent)
{
if (parsedTorrent.Movie.ReleaseTitle.IsNullOrEmpty())
{
return null;
} }
if (parsedTorrent.IsShow) if (!parsingService.HasNoBannedTerms(parsedTorrent.Movie.ReleaseTitle))
{ {
torrent.Category = "tv"; logger.LogWarning("Banned terms found in {Title}", parsedTorrent.Movie.ReleaseTitle);
torrent.Name = parsedTorrent.Show.Title; return null;
return torrent;
} }
return null; torrent.Category = "movies";
torrent.Name = parsedTorrent.Movie.ReleaseTitle;
return torrent;
}
private Torrent HandleTvType(Torrent torrent, ParsedFilename parsedTorrent)
{
if (parsedTorrent.Show.ReleaseTitle.IsNullOrEmpty())
{
return null;
}
if (!parsingService.HasNoBannedTerms(parsedTorrent.Show.ReleaseTitle))
{
logger.LogWarning("Banned terms found in {Title}", parsedTorrent.Show.ReleaseTitle);
return null;
}
torrent.Category = "tv";
torrent.Name = parsedTorrent.Show.ReleaseTitle;
return torrent;
} }
private async Task InsertTorrentsForPage(JsonDocument json) private async Task InsertTorrentsForPage(JsonDocument json)

View File

@@ -29,10 +29,10 @@ internal static class ServiceCollectionExtensions
services.AddQuartz( services.AddQuartz(
quartz => quartz =>
{ {
//RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration); RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration); RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration);
//RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration); RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
//RegisterPublisher(quartz, rabbitConfiguration); RegisterPublisher(quartz, rabbitConfiguration);
}); });
services.AddQuartzHostedService( services.AddQuartzHostedService(

View File

@@ -2,6 +2,7 @@ namespace Producer.Features.ParseTorrentTitle;
public class BaseParsed public class BaseParsed
{ {
public string? ReleaseTitle { get; set; }
public string? Title { get; set; } public string? Title { get; set; }
public string? Year { get; set; } public string? Year { get; set; }
public Edition? Edition { get; set; } public Edition? Edition { get; set; }

View File

@@ -0,0 +1,23 @@
namespace Producer.Features.ParseTorrentTitle;
public interface IParsingService
{
ParsedFilename Parse(string name);
string Naked(string title);
List<string> GrabYears(string str);
List<int> GrabPossibleSeasonNums(string str);
bool HasYear(string test, List<string> years, bool strictCheck = false);
string RemoveDiacritics(string str);
string RemoveRepeats(string str);
int RomanToDecimal(string roman);
string ReplaceRomanWithDecimal(string input);
bool StrictEqual(string title1, string title2);
int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false);
bool FlexEq(string test, string target, List<string> years);
bool MatchesTitle(string target, List<string> years, string test);
bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle);
bool HasNoBannedTerms(string targetTitle, string testTitle);
bool HasNoBannedTerms(string targetTitle);
bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle);
int CountUncommonWords(string title);
}

View File

@@ -0,0 +1,6 @@
namespace Producer.Features.ParseTorrentTitle;
public interface ITorrentTitleParser
{
ParsedFilename Parse(string name);
}

View File

@@ -4,8 +4,7 @@ public class ParsedFilename
{ {
public ParsedMovie? Movie { get; set; } public ParsedMovie? Movie { get; set; }
public ParsedTv? Show { get; set; } public ParsedTv? Show { get; set; }
public bool IsMovie => Movie is not null; public TorrentType? Type { get; set; }
public bool IsShow => Show is not null;
public bool IsInvalid => (!IsMovie && !IsShow) || (IsMovie && IsShow); public bool IsInvalid => Movie is null && Show is null;
} }

View File

@@ -2,7 +2,6 @@ namespace Producer.Features.ParseTorrentTitle;
public class ParsedTv : BaseParsed public class ParsedTv : BaseParsed
{ {
public string? ReleaseTitle { get; set; }
public string? SeriesTitle { get; set; } public string? SeriesTitle { get; set; }
public List<int> Seasons { get; set; } = []; public List<int> Seasons { get; set; } = [];
public List<int> EpisodeNumbers { get; set; } = []; public List<int> EpisodeNumbers { get; set; } = [];

View File

@@ -1,8 +1,8 @@
namespace Producer.Features.Crawlers.Dmm; namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService public partial class ParsingService
{ {
[GeneratedRegex(@"[^a-z0-9]")] [GeneratedRegex("[^a-z0-9]")]
private static partial Regex NakedMatcher(); private static partial Regex NakedMatcher();
[GeneratedRegex(@"\d{4}")] [GeneratedRegex(@"\d{4}")]

View File

@@ -1,6 +1,6 @@
namespace Producer.Features.Crawlers.Dmm; namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService(IWordCollections wordCollections) public partial class ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser) : IParsingService
{ {
private static readonly char[] WhitespaceSeparator = [' ']; private static readonly char[] WhitespaceSeparator = [' '];
@@ -198,8 +198,8 @@ public partial class ParsingService(IWordCollections wordCollections)
public bool FlexEq(string test, string target, List<string> years) public bool FlexEq(string test, string target, List<string> years)
{ {
var movieTitle = TorrentTitleParser.Parse(test).Movie.Title.ToLower(); var movieTitle = torrentTitleParser.Parse(test).Movie.Title.ToLower();
var tvTitle = TorrentTitleParser.Parse(test).Show.Title.ToLower(); var tvTitle = torrentTitleParser.Parse(test).Show.Title.ToLower();
var target2 = WhitespaceMatcher().Replace(target, ""); var target2 = WhitespaceMatcher().Replace(target, "");
var test2 = WhitespaceMatcher().Replace(test, ""); var test2 = WhitespaceMatcher().Replace(test, "");
@@ -302,9 +302,36 @@ public partial class ParsingService(IWordCollections wordCollections)
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower())); var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower()));
var hasJavWords = wordCollections.Jav.Any(jav => !targetTitle.Contains(jav) && titleWithoutSymbols.Contains(jav));
var hasAdultStars = wordCollections.AdultStars.Any(star => !targetTitle.Contains(star) && titleWithoutSymbols.Contains(star));
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord)); var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
return !hasBannedWords && !hasBannedCompoundWords; return !hasBannedWords &&
!hasJavWords &&
!hasAdultStars &&
!hasBannedCompoundWords;
}
public bool HasNoBannedTerms(string targetTitle)
{
var words = WordMatcher().Split(targetTitle.ToLower()).ToList();
var hasBannedWords = words.Any(word => wordCollections.AdultWords.Contains(word));
var inputWithoutSymbols = string.Join(' ', WordMatcher().Split(targetTitle.ToLower()));
var hasJavWords = wordCollections.Jav.Any(jav => inputWithoutSymbols.Contains(jav, StringComparison.OrdinalIgnoreCase));
var hasAdultStars = wordCollections.AdultStars.Any(star => inputWithoutSymbols.Contains(star, StringComparison.OrdinalIgnoreCase));
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => inputWithoutSymbols.Contains(compoundWord, StringComparison.OrdinalIgnoreCase));
return !hasBannedWords &&
!hasJavWords &&
!hasAdultStars &&
!hasBannedCompoundWords;
} }
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle); public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
@@ -318,4 +345,6 @@ public partial class ParsingService(IWordCollections wordCollections)
return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word)); return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word));
} }
public ParsedFilename Parse(string name) => torrentTitleParser.Parse(name);
} }

View File

@@ -0,0 +1,12 @@
namespace Producer.Features.ParseTorrentTitle;
public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterParseTorrentTitle(this IServiceCollection services)
{
services.AddSingleton<IParsingService, ParsingService>();
services.AddSingleton<ITorrentTitleParser, TorrentTitleParser>();
return services;
}
}

View File

@@ -1,6 +1,6 @@
namespace Producer.Features.ParseTorrentTitle; namespace Producer.Features.ParseTorrentTitle;
public static partial class TorrentTitleParser public partial class TorrentTitleParser : ITorrentTitleParser
{ {
[GeneratedRegex(@"(season|episode)s?.?\d?", RegexOptions.IgnoreCase, "en-GB")] [GeneratedRegex(@"(season|episode)s?.?\d?", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonEpisode(); private static partial Regex SeasonEpisode();
@@ -15,7 +15,7 @@ public static partial class TorrentTitleParser
[GeneratedRegex(@"\d{2,4}\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")] [GeneratedRegex(@"\d{2,4}\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
private static partial Regex SeasonTwo(); private static partial Regex SeasonTwo();
public static ParsedFilename Parse(string name) public ParsedFilename Parse(string name)
{ {
VideoCodecsParser.Parse(name, out var videoCodec, out _); VideoCodecsParser.Parse(name, out var videoCodec, out _);
AudioCodecsParser.Parse(name, out var audioCodec, out _); AudioCodecsParser.Parse(name, out var audioCodec, out _);
@@ -83,6 +83,7 @@ public static partial class TorrentTitleParser
Multi = baseParsed.Multi, Multi = baseParsed.Multi,
Revision = baseParsed.Revision, Revision = baseParsed.Revision,
}, },
Type = TorrentType.Tv,
}; };
} }
@@ -97,6 +98,7 @@ public static partial class TorrentTitleParser
{ {
Movie = new() Movie = new()
{ {
ReleaseTitle = name,
Title = baseParsed.Title, Title = baseParsed.Title,
Year = baseParsed.Year, Year = baseParsed.Year,
Edition = baseParsed.Edition, Edition = baseParsed.Edition,
@@ -111,6 +113,7 @@ public static partial class TorrentTitleParser
Multi = baseParsed.Multi, Multi = baseParsed.Multi,
Revision = baseParsed.Revision, Revision = baseParsed.Revision,
}, },
Type = TorrentType.Movie,
}; };
} }

View File

@@ -8,5 +8,9 @@ public interface IWordCollections
HashSet<string> CommonWords { get; } HashSet<string> CommonWords { get; }
HashSet<string> Jav { get; }
HashSet<string> AdultStars { get; }
Task LoadAsync(); Task LoadAsync();
} }

View File

@@ -11,6 +11,8 @@ public class PopulationService(IWordCollections wordCollections, ILogger<Populat
logger.LogInformation("Common Words Count: {Count}", wordCollections.CommonWords.Count); logger.LogInformation("Common Words Count: {Count}", wordCollections.CommonWords.Count);
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count); logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count); logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
logger.LogInformation("Jav Count: {Count}", wordCollections.Jav.Count);
logger.LogInformation("Adult Stars Count: {Count}", wordCollections.AdultStars.Count);
logger.LogInformation("Word collections loaded."); logger.LogInformation("Word collections loaded.");
} }

View File

@@ -4,10 +4,17 @@ public class WordCollections : IWordCollections
{ {
private const string AdultWordsFile = "adult-words.txt"; private const string AdultWordsFile = "adult-words.txt";
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt"; private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
private const string AdultStarsFile = "adult-stars.txt";
private const string JavFile = "jav.txt";
private const string CommonWordsFile = "common-words.txt"; private const string CommonWordsFile = "common-words.txt";
public HashSet<string> AdultWords { get; private set; } = []; public HashSet<string> AdultWords { get; private set; } = [];
public HashSet<string> AdultCompoundPhrases { get; private set; } = []; public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
public HashSet<string> AdultStars { get; private set; } = [];
public HashSet<string> Jav { get; private set; } = [];
public HashSet<string> CommonWords { get; private set; } = []; public HashSet<string> CommonWords { get; private set; } = [];
public async Task LoadAsync() public async Task LoadAsync()
@@ -16,7 +23,9 @@ public class WordCollections : IWordCollections
{ {
LoadAdultWords(), LoadAdultWords(),
LoadAdultCompounds(), LoadAdultCompounds(),
LoadCommonWords() LoadCommonWords(),
LoadJav(),
LoadAdultStars(),
}; };
await Task.WhenAll(loaderTasks); await Task.WhenAll(loaderTasks);
@@ -40,5 +49,17 @@ public class WordCollections : IWordCollections
AdultWords = [..adultWords]; AdultWords = [..adultWords];
} }
private async Task LoadJav()
{
var jav = await File.ReadAllLinesAsync(GetPath(JavFile));
Jav = [..jav];
}
private async Task LoadAdultStars()
{
var adultStars = await File.ReadAllLinesAsync(GetPath(AdultStarsFile));
AdultStars = [..adultStars];
}
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName); private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
} }

View File

@@ -11,6 +11,7 @@ builder.Services
.AddDataStorage() .AddDataStorage()
.AddCrawlers() .AddCrawlers()
.RegisterWordCollections() .RegisterWordCollections()
.RegisterParseTorrentTitle()
.AddQuartz(builder.Configuration); .AddQuartz(builder.Configuration);
var host = builder.Build(); var host = builder.Build();