Complete
This commit is contained in:
@@ -2998,3 +2998,6 @@ zombie girl fucked
|
||||
zombie porn
|
||||
zombie porno
|
||||
zumba xxx
|
||||
double pénétration
|
||||
evil angel
|
||||
fist each others
|
||||
|
||||
17681
src/producer/Data/adult-stars.txt
Normal file
17681
src/producer/Data/adult-stars.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1668,3 +1668,6 @@ kink.com
|
||||
sg4ge
|
||||
tube8
|
||||
x-art
|
||||
mommygotboobs
|
||||
threesomes
|
||||
dp
|
||||
|
||||
234602
src/producer/Data/jav.txt
Normal file
234602
src/producer/Data/jav.txt
Normal file
File diff suppressed because it is too large
Load Diff
7
src/producer/Extensions/StringExtensions.cs
Normal file
7
src/producer/Extensions/StringExtensions.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace Producer.Extensions;
|
||||
|
||||
public static class StringExtensions
|
||||
{
|
||||
public static bool IsNullOrEmpty(this string? value) =>
|
||||
string.IsNullOrEmpty(value);
|
||||
}
|
||||
@@ -4,7 +4,8 @@ public partial class DebridMediaManagerCrawler(
|
||||
IHttpClientFactory httpClientFactory,
|
||||
ILogger<DebridMediaManagerCrawler> logger,
|
||||
IDataStorage storage,
|
||||
GithubConfiguration githubConfiguration) : BaseCrawler(logger, storage)
|
||||
GithubConfiguration githubConfiguration,
|
||||
IParsingService parsingService) : BaseCrawler(logger, storage)
|
||||
{
|
||||
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
|
||||
private static partial Regex HashCollectionMatcher();
|
||||
@@ -100,45 +101,64 @@ public partial class DebridMediaManagerCrawler(
|
||||
return null;
|
||||
}
|
||||
|
||||
var torrent = new Torrent
|
||||
{
|
||||
Source = Source,
|
||||
Name = filenameElement.GetString(),
|
||||
Size = bytesElement.GetInt64().ToString(),
|
||||
InfoHash = hashElement.ToString(),
|
||||
Seeders = 0,
|
||||
Leechers = 0,
|
||||
};
|
||||
|
||||
if (string.IsNullOrEmpty(torrent.Name))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var parsedTorrent = TorrentTitleParser.Parse(torrent.Name);
|
||||
var parsedTorrent = parsingService.Parse(filenameElement.GetString());
|
||||
|
||||
if (parsedTorrent.IsInvalid)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (parsedTorrent.IsMovie)
|
||||
var torrent = new Torrent
|
||||
{
|
||||
torrent.Category = "movies";
|
||||
torrent.Name = parsedTorrent.Movie.Title;
|
||||
Source = Source,
|
||||
Size = bytesElement.GetInt64().ToString(),
|
||||
InfoHash = hashElement.ToString(),
|
||||
Seeders = 0,
|
||||
Leechers = 0,
|
||||
};
|
||||
|
||||
return torrent;
|
||||
return parsedTorrent.Type switch
|
||||
{
|
||||
TorrentType.Movie => HandleMovieType(torrent, parsedTorrent),
|
||||
TorrentType.Tv => HandleTvType(torrent, parsedTorrent),
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
|
||||
private Torrent HandleMovieType(Torrent torrent, ParsedFilename parsedTorrent)
|
||||
{
|
||||
if (parsedTorrent.Movie.ReleaseTitle.IsNullOrEmpty())
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (parsedTorrent.IsShow)
|
||||
if (!parsingService.HasNoBannedTerms(parsedTorrent.Movie.ReleaseTitle))
|
||||
{
|
||||
torrent.Category = "tv";
|
||||
torrent.Name = parsedTorrent.Show.Title;
|
||||
|
||||
return torrent;
|
||||
logger.LogWarning("Banned terms found in {Title}", parsedTorrent.Movie.ReleaseTitle);
|
||||
return null;
|
||||
}
|
||||
|
||||
return null;
|
||||
torrent.Category = "movies";
|
||||
torrent.Name = parsedTorrent.Movie.ReleaseTitle;
|
||||
return torrent;
|
||||
}
|
||||
|
||||
private Torrent HandleTvType(Torrent torrent, ParsedFilename parsedTorrent)
|
||||
{
|
||||
if (parsedTorrent.Show.ReleaseTitle.IsNullOrEmpty())
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!parsingService.HasNoBannedTerms(parsedTorrent.Show.ReleaseTitle))
|
||||
{
|
||||
logger.LogWarning("Banned terms found in {Title}", parsedTorrent.Show.ReleaseTitle);
|
||||
return null;
|
||||
}
|
||||
|
||||
torrent.Category = "tv";
|
||||
torrent.Name = parsedTorrent.Show.ReleaseTitle;
|
||||
return torrent;
|
||||
}
|
||||
|
||||
private async Task InsertTorrentsForPage(JsonDocument json)
|
||||
|
||||
@@ -29,10 +29,10 @@ internal static class ServiceCollectionExtensions
|
||||
services.AddQuartz(
|
||||
quartz =>
|
||||
{
|
||||
//RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
|
||||
RegisterAutomaticRegistrationJobs(jobTypes, openMethod, quartz, scrapeConfiguration);
|
||||
RegisterDmmJob(githubConfiguration, quartz, scrapeConfiguration);
|
||||
//RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
|
||||
//RegisterPublisher(quartz, rabbitConfiguration);
|
||||
RegisterTorrentioJob(services, quartz, configuration, scrapeConfiguration);
|
||||
RegisterPublisher(quartz, rabbitConfiguration);
|
||||
});
|
||||
|
||||
services.AddQuartzHostedService(
|
||||
|
||||
@@ -2,6 +2,7 @@ namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public class BaseParsed
|
||||
{
|
||||
public string? ReleaseTitle { get; set; }
|
||||
public string? Title { get; set; }
|
||||
public string? Year { get; set; }
|
||||
public Edition? Edition { get; set; }
|
||||
@@ -15,4 +16,4 @@ public class BaseParsed
|
||||
public List<Source> Sources { get; set; } = [];
|
||||
public bool? Multi { get; set; }
|
||||
public bool? Complete { get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
23
src/producer/Features/ParseTorrentTitle/IParsingService.cs
Normal file
23
src/producer/Features/ParseTorrentTitle/IParsingService.cs
Normal file
@@ -0,0 +1,23 @@
|
||||
namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public interface IParsingService
|
||||
{
|
||||
ParsedFilename Parse(string name);
|
||||
string Naked(string title);
|
||||
List<string> GrabYears(string str);
|
||||
List<int> GrabPossibleSeasonNums(string str);
|
||||
bool HasYear(string test, List<string> years, bool strictCheck = false);
|
||||
string RemoveDiacritics(string str);
|
||||
string RemoveRepeats(string str);
|
||||
int RomanToDecimal(string roman);
|
||||
string ReplaceRomanWithDecimal(string input);
|
||||
bool StrictEqual(string title1, string title2);
|
||||
int CountTestTermsInTarget(string test, string target, bool shouldBeInSequence = false);
|
||||
bool FlexEq(string test, string target, List<string> years);
|
||||
bool MatchesTitle(string target, List<string> years, string test);
|
||||
bool IncludesMustHaveTerms(List<string> mustHaveTerms, string testTitle);
|
||||
bool HasNoBannedTerms(string targetTitle, string testTitle);
|
||||
bool HasNoBannedTerms(string targetTitle);
|
||||
bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle);
|
||||
int CountUncommonWords(string title);
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public interface ITorrentTitleParser
|
||||
{
|
||||
ParsedFilename Parse(string name);
|
||||
}
|
||||
@@ -4,8 +4,7 @@ public class ParsedFilename
|
||||
{
|
||||
public ParsedMovie? Movie { get; set; }
|
||||
public ParsedTv? Show { get; set; }
|
||||
public bool IsMovie => Movie is not null;
|
||||
public bool IsShow => Show is not null;
|
||||
public TorrentType? Type { get; set; }
|
||||
|
||||
public bool IsInvalid => (!IsMovie && !IsShow) || (IsMovie && IsShow);
|
||||
}
|
||||
public bool IsInvalid => Movie is null && Show is null;
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public class ParsedTv : BaseParsed
|
||||
{
|
||||
public string? ReleaseTitle { get; set; }
|
||||
public string? SeriesTitle { get; set; }
|
||||
public List<int> Seasons { get; set; } = [];
|
||||
public List<int> EpisodeNumbers { get; set; } = [];
|
||||
@@ -13,4 +12,4 @@ public class ParsedTv : BaseParsed
|
||||
public bool IsSeasonExtra { get; set; }
|
||||
public bool IsSpecial { get; set; }
|
||||
public int SeasonPart { get; set; }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public partial class ParsingService
|
||||
{
|
||||
[GeneratedRegex(@"[^a-z0-9]")]
|
||||
[GeneratedRegex("[^a-z0-9]")]
|
||||
private static partial Regex NakedMatcher();
|
||||
|
||||
[GeneratedRegex(@"\d{4}")]
|
||||
@@ -16,14 +16,14 @@ public partial class ParsingService
|
||||
|
||||
[GeneratedRegex(@"m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})")]
|
||||
private static partial Regex ReplaceRomanWithDecimalMatcher();
|
||||
|
||||
|
||||
[GeneratedRegex(@"\s+")]
|
||||
private static partial Regex WhitespaceMatcher();
|
||||
|
||||
|
||||
[GeneratedRegex(@"\W+")]
|
||||
private static partial Regex WordMatcher();
|
||||
|
||||
|
||||
|
||||
|
||||
[GeneratedRegex(@"'s|\s&\s|\W")]
|
||||
private static partial Regex WordProcessingMatcher();
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public partial class ParsingService(IWordCollections wordCollections)
|
||||
public partial class ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser) : IParsingService
|
||||
{
|
||||
private static readonly char[] WhitespaceSeparator = [' '];
|
||||
|
||||
@@ -198,8 +198,8 @@ public partial class ParsingService(IWordCollections wordCollections)
|
||||
|
||||
public bool FlexEq(string test, string target, List<string> years)
|
||||
{
|
||||
var movieTitle = TorrentTitleParser.Parse(test).Movie.Title.ToLower();
|
||||
var tvTitle = TorrentTitleParser.Parse(test).Show.Title.ToLower();
|
||||
var movieTitle = torrentTitleParser.Parse(test).Movie.Title.ToLower();
|
||||
var tvTitle = torrentTitleParser.Parse(test).Show.Title.ToLower();
|
||||
|
||||
var target2 = WhitespaceMatcher().Replace(target, "");
|
||||
var test2 = WhitespaceMatcher().Replace(test, "");
|
||||
@@ -302,9 +302,36 @@ public partial class ParsingService(IWordCollections wordCollections)
|
||||
|
||||
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower()));
|
||||
|
||||
var hasJavWords = wordCollections.Jav.Any(jav => !targetTitle.Contains(jav) && titleWithoutSymbols.Contains(jav));
|
||||
|
||||
var hasAdultStars = wordCollections.AdultStars.Any(star => !targetTitle.Contains(star) && titleWithoutSymbols.Contains(star));
|
||||
|
||||
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
|
||||
|
||||
return !hasBannedWords && !hasBannedCompoundWords;
|
||||
return !hasBannedWords &&
|
||||
!hasJavWords &&
|
||||
!hasAdultStars &&
|
||||
!hasBannedCompoundWords;
|
||||
}
|
||||
|
||||
public bool HasNoBannedTerms(string targetTitle)
|
||||
{
|
||||
var words = WordMatcher().Split(targetTitle.ToLower()).ToList();
|
||||
|
||||
var hasBannedWords = words.Any(word => wordCollections.AdultWords.Contains(word));
|
||||
|
||||
var inputWithoutSymbols = string.Join(' ', WordMatcher().Split(targetTitle.ToLower()));
|
||||
|
||||
var hasJavWords = wordCollections.Jav.Any(jav => inputWithoutSymbols.Contains(jav, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var hasAdultStars = wordCollections.AdultStars.Any(star => inputWithoutSymbols.Contains(star, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => inputWithoutSymbols.Contains(compoundWord, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
return !hasBannedWords &&
|
||||
!hasJavWords &&
|
||||
!hasAdultStars &&
|
||||
!hasBannedCompoundWords;
|
||||
}
|
||||
|
||||
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
|
||||
@@ -318,4 +345,6 @@ public partial class ParsingService(IWordCollections wordCollections)
|
||||
|
||||
return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word));
|
||||
}
|
||||
|
||||
public ParsedFilename Parse(string name) => torrentTitleParser.Parse(name);
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection RegisterParseTorrentTitle(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<IParsingService, ParsingService>();
|
||||
services.AddSingleton<ITorrentTitleParser, TorrentTitleParser>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public static partial class TorrentTitleParser
|
||||
public partial class TorrentTitleParser : ITorrentTitleParser
|
||||
{
|
||||
[GeneratedRegex(@"(season|episode)s?.?\d?", RegexOptions.IgnoreCase, "en-GB")]
|
||||
private static partial Regex SeasonEpisode();
|
||||
@@ -15,7 +15,7 @@ public static partial class TorrentTitleParser
|
||||
[GeneratedRegex(@"\d{2,4}\s?\-\s?\d{2,4}\b", RegexOptions.IgnoreCase, "en-GB")]
|
||||
private static partial Regex SeasonTwo();
|
||||
|
||||
public static ParsedFilename Parse(string name)
|
||||
public ParsedFilename Parse(string name)
|
||||
{
|
||||
VideoCodecsParser.Parse(name, out var videoCodec, out _);
|
||||
AudioCodecsParser.Parse(name, out var audioCodec, out _);
|
||||
@@ -83,6 +83,7 @@ public static partial class TorrentTitleParser
|
||||
Multi = baseParsed.Multi,
|
||||
Revision = baseParsed.Revision,
|
||||
},
|
||||
Type = TorrentType.Tv,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -97,6 +98,7 @@ public static partial class TorrentTitleParser
|
||||
{
|
||||
Movie = new()
|
||||
{
|
||||
ReleaseTitle = name,
|
||||
Title = baseParsed.Title,
|
||||
Year = baseParsed.Year,
|
||||
Edition = baseParsed.Edition,
|
||||
@@ -111,6 +113,7 @@ public static partial class TorrentTitleParser
|
||||
Multi = baseParsed.Multi,
|
||||
Revision = baseParsed.Revision,
|
||||
},
|
||||
Type = TorrentType.Movie,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -8,5 +8,9 @@ public interface IWordCollections
|
||||
|
||||
HashSet<string> CommonWords { get; }
|
||||
|
||||
HashSet<string> Jav { get; }
|
||||
|
||||
HashSet<string> AdultStars { get; }
|
||||
|
||||
Task LoadAsync();
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ public class PopulationService(IWordCollections wordCollections, ILogger<Populat
|
||||
logger.LogInformation("Common Words Count: {Count}", wordCollections.CommonWords.Count);
|
||||
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
|
||||
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
|
||||
logger.LogInformation("Jav Count: {Count}", wordCollections.Jav.Count);
|
||||
logger.LogInformation("Adult Stars Count: {Count}", wordCollections.AdultStars.Count);
|
||||
|
||||
logger.LogInformation("Word collections loaded.");
|
||||
}
|
||||
|
||||
@@ -4,10 +4,17 @@ public class WordCollections : IWordCollections
|
||||
{
|
||||
private const string AdultWordsFile = "adult-words.txt";
|
||||
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
|
||||
private const string AdultStarsFile = "adult-stars.txt";
|
||||
private const string JavFile = "jav.txt";
|
||||
private const string CommonWordsFile = "common-words.txt";
|
||||
|
||||
public HashSet<string> AdultWords { get; private set; } = [];
|
||||
public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
|
||||
|
||||
public HashSet<string> AdultStars { get; private set; } = [];
|
||||
|
||||
public HashSet<string> Jav { get; private set; } = [];
|
||||
|
||||
public HashSet<string> CommonWords { get; private set; } = [];
|
||||
|
||||
public async Task LoadAsync()
|
||||
@@ -16,7 +23,9 @@ public class WordCollections : IWordCollections
|
||||
{
|
||||
LoadAdultWords(),
|
||||
LoadAdultCompounds(),
|
||||
LoadCommonWords()
|
||||
LoadCommonWords(),
|
||||
LoadJav(),
|
||||
LoadAdultStars(),
|
||||
};
|
||||
|
||||
await Task.WhenAll(loaderTasks);
|
||||
@@ -40,5 +49,17 @@ public class WordCollections : IWordCollections
|
||||
AdultWords = [..adultWords];
|
||||
}
|
||||
|
||||
private async Task LoadJav()
|
||||
{
|
||||
var jav = await File.ReadAllLinesAsync(GetPath(JavFile));
|
||||
Jav = [..jav];
|
||||
}
|
||||
|
||||
private async Task LoadAdultStars()
|
||||
{
|
||||
var adultStars = await File.ReadAllLinesAsync(GetPath(AdultStarsFile));
|
||||
AdultStars = [..adultStars];
|
||||
}
|
||||
|
||||
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ builder.Services
|
||||
.AddDataStorage()
|
||||
.AddCrawlers()
|
||||
.RegisterWordCollections()
|
||||
.RegisterParseTorrentTitle()
|
||||
.AddQuartz(builder.Configuration);
|
||||
|
||||
var host = builder.Build();
|
||||
|
||||
Reference in New Issue
Block a user