Merge pull request #151 from Gabisonfire/feature/tissue-corn-sanitizer
Improve producer matching - Add tissue service
This commit is contained in:
14
.github/workflows/build_tissue.yaml
vendored
Normal file
14
.github/workflows/build_tissue.yaml
vendored
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
name: Build and Push Tissue Service
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- 'src/tissue/**'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
process:
|
||||||
|
uses: ./.github/workflows/base_image_workflow.yaml
|
||||||
|
secrets: inherit
|
||||||
|
with:
|
||||||
|
CONTEXT: ./src/tissue/
|
||||||
|
IMAGE_NAME: knightcrawler-tissue
|
||||||
@@ -17,5 +17,5 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: codespell
|
- id: codespell
|
||||||
exclude: |
|
exclude: |
|
||||||
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*)$
|
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*|src/tissue/Data/.*)$
|
||||||
args: ["-L", "strem,chage"]
|
args: ["-L", "strem,chage"]
|
||||||
|
|||||||
@@ -35,7 +35,6 @@ adultprime
|
|||||||
adulttime
|
adulttime
|
||||||
adulttimepilots
|
adulttimepilots
|
||||||
aebn
|
aebn
|
||||||
anal
|
|
||||||
agentredgirl
|
agentredgirl
|
||||||
alettaoceanempire
|
alettaoceanempire
|
||||||
alexismonroe
|
alexismonroe
|
||||||
@@ -123,7 +122,6 @@ aussieass
|
|||||||
aussiefellatioqueens
|
aussiefellatioqueens
|
||||||
aussiepov
|
aussiepov
|
||||||
austinwilde
|
austinwilde
|
||||||
av69
|
|
||||||
avadawn
|
avadawn
|
||||||
avanal
|
avanal
|
||||||
aventertainments
|
aventertainments
|
||||||
@@ -715,7 +713,6 @@ hunterpov
|
|||||||
hushpass
|
hushpass
|
||||||
hussiepass
|
hussiepass
|
||||||
hustlaz
|
hustlaz
|
||||||
hustler
|
|
||||||
hustlerhd
|
hustlerhd
|
||||||
hustlerparodies
|
hustlerparodies
|
||||||
hustlerslesbians
|
hustlerslesbians
|
||||||
@@ -1670,8 +1667,6 @@ tube8
|
|||||||
x-art
|
x-art
|
||||||
mommygotboobs
|
mommygotboobs
|
||||||
threesomes
|
threesomes
|
||||||
dp
|
|
||||||
onlyfans
|
onlyfans
|
||||||
alterotic
|
alterotic
|
||||||
assy
|
|
||||||
defloration
|
defloration
|
||||||
|
|||||||
@@ -1,7 +1,24 @@
|
|||||||
namespace Producer.Extensions;
|
namespace Producer.Extensions;
|
||||||
|
|
||||||
public static class StringExtensions
|
public static partial class StringExtensions
|
||||||
{
|
{
|
||||||
|
[GeneratedRegex("[^a-zA-Z0-9 ]")]
|
||||||
|
private static partial Regex NotAlphaNumeric();
|
||||||
|
|
||||||
|
private static readonly char[] separator = [' '];
|
||||||
|
|
||||||
public static bool IsNullOrEmpty(this string? value) =>
|
public static bool IsNullOrEmpty(this string? value) =>
|
||||||
string.IsNullOrEmpty(value);
|
string.IsNullOrEmpty(value);
|
||||||
|
|
||||||
|
public static string NormalizeTitle(this string title)
|
||||||
|
{
|
||||||
|
var alphanumericTitle = NotAlphaNumeric().Replace(title, " ");
|
||||||
|
|
||||||
|
var words = alphanumericTitle.Split(separator, StringSplitOptions.RemoveEmptyEntries)
|
||||||
|
.Select(word => word.ToLower());
|
||||||
|
|
||||||
|
var normalizedTitle = string.Join(" ", words);
|
||||||
|
|
||||||
|
return normalizedTitle;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,21 @@
|
|||||||
namespace Producer.Features.ParseTorrentTitle;
|
namespace Producer.Features.ParseTorrentTitle;
|
||||||
|
|
||||||
public partial class ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser) : IParsingService
|
public partial class ParsingService : IParsingService
|
||||||
{
|
{
|
||||||
|
private readonly IWordCollections _wordCollections;
|
||||||
|
private readonly ITorrentTitleParser _torrentTitleParser;
|
||||||
private static readonly char[] WhitespaceSeparator = [' '];
|
private static readonly char[] WhitespaceSeparator = [' '];
|
||||||
|
private HashSet<string> _compoundWords = [];
|
||||||
|
|
||||||
|
public ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser)
|
||||||
|
{
|
||||||
|
_wordCollections = wordCollections;
|
||||||
|
_torrentTitleParser = torrentTitleParser;
|
||||||
|
|
||||||
|
_compoundWords.UnionWith(_wordCollections.AdultCompoundPhrases);
|
||||||
|
_compoundWords.UnionWith(_wordCollections.Jav);
|
||||||
|
_compoundWords.UnionWith(_wordCollections.AdultStars);
|
||||||
|
}
|
||||||
|
|
||||||
public string Naked(string title) =>
|
public string Naked(string title) =>
|
||||||
NakedMatcher().Replace(title.ToLower(), "");
|
NakedMatcher().Replace(title.ToLower(), "");
|
||||||
@@ -198,8 +211,8 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
|
|||||||
|
|
||||||
public bool FlexEq(string test, string target, List<string> years)
|
public bool FlexEq(string test, string target, List<string> years)
|
||||||
{
|
{
|
||||||
var movieTitle = torrentTitleParser.Parse(test).Movie.Title.ToLower();
|
var movieTitle = _torrentTitleParser.Parse(test).Movie.Title.ToLower();
|
||||||
var tvTitle = torrentTitleParser.Parse(test).Show.Title.ToLower();
|
var tvTitle = _torrentTitleParser.Parse(test).Show.Title.ToLower();
|
||||||
|
|
||||||
var target2 = WhitespaceMatcher().Replace(target, "");
|
var target2 = WhitespaceMatcher().Replace(target, "");
|
||||||
var test2 = WhitespaceMatcher().Replace(test, "");
|
var test2 = WhitespaceMatcher().Replace(test, "");
|
||||||
@@ -247,7 +260,7 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
var keyTerms = splits.Where(s => (s.Length > 1 && !wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList();
|
var keyTerms = splits.Where(s => (s.Length > 1 && !_wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList();
|
||||||
keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2));
|
keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2));
|
||||||
var keySet = new HashSet<string>(keyTerms);
|
var keySet = new HashSet<string>(keyTerms);
|
||||||
var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList();
|
var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList();
|
||||||
@@ -296,42 +309,30 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
|
|||||||
|
|
||||||
public bool HasNoBannedTerms(string targetTitle, string testTitle)
|
public bool HasNoBannedTerms(string targetTitle, string testTitle)
|
||||||
{
|
{
|
||||||
var words = WordMatcher().Split(testTitle.ToLower()).Where(word => word.Length > 3).ToList();
|
var normalisedTitle = targetTitle.NormalizeTitle();
|
||||||
|
|
||||||
var hasBannedWords = words.Any(word => !targetTitle.Contains(word) && wordCollections.AdultWords.Contains(word));
|
var normalisedWords = normalisedTitle.Split(' ');
|
||||||
|
|
||||||
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower()));
|
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => !targetTitle.Contains(word) && _wordCollections.AdultWords.Contains(word));
|
||||||
|
|
||||||
var hasJavWords = wordCollections.Jav.Any(jav => !targetTitle.Contains(jav) && titleWithoutSymbols.Contains(jav));
|
var hasCompounds = _compoundWords.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
|
||||||
|
|
||||||
var hasAdultStars = wordCollections.AdultStars.Any(star => !targetTitle.Contains(star) && titleWithoutSymbols.Contains(star));
|
|
||||||
|
|
||||||
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
|
|
||||||
|
|
||||||
return !hasBannedWords &&
|
return !hasBannedWords &&
|
||||||
!hasJavWords &&
|
!hasCompounds;
|
||||||
!hasAdultStars &&
|
|
||||||
!hasBannedCompoundWords;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool HasNoBannedTerms(string targetTitle)
|
public bool HasNoBannedTerms(string targetTitle)
|
||||||
{
|
{
|
||||||
var words = WordMatcher().Split(targetTitle.ToLower()).ToList();
|
var normalisedTitle = targetTitle.NormalizeTitle();
|
||||||
|
|
||||||
var hasBannedWords = words.Any(word => wordCollections.AdultWords.Contains(word));
|
var normalisedWords = normalisedTitle.Split(' ');
|
||||||
|
|
||||||
var inputWithoutSymbols = string.Join(' ', WordMatcher().Split(targetTitle.ToLower()));
|
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => normalisedWords.Contains(word, StringComparer.OrdinalIgnoreCase) && _wordCollections.AdultWords.Contains(word));
|
||||||
|
|
||||||
var hasJavWords = wordCollections.Jav.Any(jav => inputWithoutSymbols.Contains(jav, StringComparison.OrdinalIgnoreCase));
|
var hasCompounds = _compoundWords.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
|
||||||
|
|
||||||
var hasAdultStars = wordCollections.AdultStars.Any(star => inputWithoutSymbols.Contains(star, StringComparison.OrdinalIgnoreCase));
|
return !hasBannedWords &&
|
||||||
|
!hasCompounds;
|
||||||
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => inputWithoutSymbols.Contains(compoundWord, StringComparison.OrdinalIgnoreCase));
|
|
||||||
|
|
||||||
return !hasBannedWords &&
|
|
||||||
!hasJavWords &&
|
|
||||||
!hasAdultStars &&
|
|
||||||
!hasBannedCompoundWords;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
|
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
|
||||||
@@ -343,10 +344,10 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
|
|||||||
.Where(word => word.Length > 3)
|
.Where(word => word.Length > 3)
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word));
|
return processedTitle.Count(word => !_wordCollections.CommonWords.Contains(word));
|
||||||
}
|
}
|
||||||
|
|
||||||
public ParsedFilename Parse(string name) => torrentTitleParser.Parse(name);
|
public ParsedFilename Parse(string name) => _torrentTitleParser.Parse(name);
|
||||||
|
|
||||||
public TorrentType GetTypeByName(string name) => torrentTitleParser.GetTypeByName(name);
|
public TorrentType GetTypeByName(string name) => _torrentTitleParser.GetTypeByName(name);
|
||||||
}
|
}
|
||||||
|
|||||||
31
src/tissue/Configuration/logging.json
Normal file
31
src/tissue/Configuration/logging.json
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
{
|
||||||
|
"Serilog": {
|
||||||
|
"Using": [ "Serilog.Sinks.Console" ],
|
||||||
|
"MinimumLevel": {
|
||||||
|
"Default": "Information",
|
||||||
|
"Override": {
|
||||||
|
"Microsoft": "Warning",
|
||||||
|
"System": "Warning",
|
||||||
|
"System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning",
|
||||||
|
"System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning",
|
||||||
|
"Quartz.Impl.StdSchedulerFactory": "Warning",
|
||||||
|
"Quartz.Core.QuartzScheduler": "Warning",
|
||||||
|
"Quartz.Simpl.RAMJobStore": "Warning",
|
||||||
|
"Quartz.Core.JobRunShell": "Warning",
|
||||||
|
"Quartz.Core.SchedulerSignalerImpl": "Warning"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"WriteTo": [
|
||||||
|
{
|
||||||
|
"Name": "Console",
|
||||||
|
"Args": {
|
||||||
|
"outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ],
|
||||||
|
"Properties": {
|
||||||
|
"Application": "Producer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
3004
src/tissue/Data/adult-compound-words.txt
Normal file
3004
src/tissue/Data/adult-compound-words.txt
Normal file
File diff suppressed because it is too large
Load Diff
17681
src/tissue/Data/adult-stars.txt
Normal file
17681
src/tissue/Data/adult-stars.txt
Normal file
File diff suppressed because it is too large
Load Diff
1670
src/tissue/Data/adult-words.txt
Normal file
1670
src/tissue/Data/adult-words.txt
Normal file
File diff suppressed because it is too large
Load Diff
234602
src/tissue/Data/jav.txt
Normal file
234602
src/tissue/Data/jav.txt
Normal file
File diff suppressed because it is too large
Load Diff
17
src/tissue/Dockerfile
Normal file
17
src/tissue/Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build
|
||||||
|
|
||||||
|
ARG TARGETARCH
|
||||||
|
WORKDIR /App
|
||||||
|
COPY . ./
|
||||||
|
RUN dotnet restore -a $TARGETARCH
|
||||||
|
RUN dotnet publish -c Release --no-restore -o out -a $TARGETARCH
|
||||||
|
|
||||||
|
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
|
||||||
|
|
||||||
|
WORKDIR /App
|
||||||
|
COPY --from=build /App/out .
|
||||||
|
RUN addgroup -S tissue && adduser -S -G tissue tissue
|
||||||
|
USER tissue
|
||||||
|
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||||
|
CMD pgrep -f dotnet || exit 1
|
||||||
|
ENTRYPOINT ["dotnet", "Tissue.dll"]
|
||||||
44
src/tissue/Extensions/ConfigurationExtensions.cs
Normal file
44
src/tissue/Extensions/ConfigurationExtensions.cs
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
namespace Tissue.Extensions;
|
||||||
|
|
||||||
|
public static class ConfigurationExtensions
|
||||||
|
{
|
||||||
|
private const string ConfigurationFolder = "Configuration";
|
||||||
|
private const string LoggingConfig = "logging.json";
|
||||||
|
|
||||||
|
public static IConfigurationBuilder AddScrapeConfiguration(this IConfigurationBuilder configuration)
|
||||||
|
{
|
||||||
|
configuration.SetBasePath(Path.Combine(AppContext.BaseDirectory, ConfigurationFolder));
|
||||||
|
|
||||||
|
configuration.AddJsonFile(LoggingConfig, false, true);
|
||||||
|
|
||||||
|
configuration.AddEnvironmentVariables();
|
||||||
|
|
||||||
|
configuration.AddUserSecrets<Program>();
|
||||||
|
|
||||||
|
return configuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TConfiguration LoadConfigurationFromConfig<TConfiguration>(this IServiceCollection services, IConfiguration configuration, string sectionName)
|
||||||
|
where TConfiguration : class
|
||||||
|
{
|
||||||
|
var instance = configuration.GetSection(sectionName).Get<TConfiguration>();
|
||||||
|
|
||||||
|
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||||
|
|
||||||
|
services.TryAddSingleton(instance);
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TConfiguration LoadConfigurationFromEnv<TConfiguration>(this IServiceCollection services)
|
||||||
|
where TConfiguration : class
|
||||||
|
{
|
||||||
|
var instance = Activator.CreateInstance<TConfiguration>();
|
||||||
|
|
||||||
|
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||||
|
|
||||||
|
services.TryAddSingleton(instance);
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
}
|
||||||
68
src/tissue/Extensions/EnvironmentExtensions.cs
Normal file
68
src/tissue/Extensions/EnvironmentExtensions.cs
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
namespace Tissue.Extensions;
|
||||||
|
|
||||||
|
public static class EnvironmentExtensions
|
||||||
|
{
|
||||||
|
public static bool GetEnvironmentVariableAsBool(this string prefix, string varName, bool fallback = false)
|
||||||
|
{
|
||||||
|
var fullVarName = GetFullVariableName(prefix, varName);
|
||||||
|
|
||||||
|
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(str))
|
||||||
|
{
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
return str.Trim().ToLower() switch
|
||||||
|
{
|
||||||
|
"true" => true,
|
||||||
|
"yes" => true,
|
||||||
|
"1" => true,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int GetEnvironmentVariableAsInt(this string prefix, string varName, int fallback = 0)
|
||||||
|
{
|
||||||
|
var fullVarName = GetFullVariableName(prefix, varName);
|
||||||
|
|
||||||
|
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(str))
|
||||||
|
{
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
return int.TryParse(str, out var result) ? result : fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string GetRequiredEnvironmentVariableAsString(this string prefix, string varName)
|
||||||
|
{
|
||||||
|
var fullVarName = GetFullVariableName(prefix, varName);
|
||||||
|
|
||||||
|
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(str))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Environment variable {fullVarName} is not set");
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string GetOptionalEnvironmentVariableAsString(this string prefix, string varName, string? fallback = null)
|
||||||
|
{
|
||||||
|
var fullVarName = GetFullVariableName(prefix, varName);
|
||||||
|
|
||||||
|
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(str))
|
||||||
|
{
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string GetFullVariableName(string prefix, string varName) => $"{prefix}_{varName}";
|
||||||
|
}
|
||||||
28
src/tissue/Extensions/ServiceCollectionExtensions.cs
Normal file
28
src/tissue/Extensions/ServiceCollectionExtensions.cs
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
namespace Tissue.Extensions;
|
||||||
|
|
||||||
|
public static class ServiceCollectionExtensions
|
||||||
|
{
|
||||||
|
public static IServiceCollection AddSerilogLogging(this IServiceCollection services, IConfiguration configuration)
|
||||||
|
{
|
||||||
|
services.AddLogging(
|
||||||
|
loggingBuilder =>
|
||||||
|
{
|
||||||
|
loggingBuilder.ClearProviders();
|
||||||
|
|
||||||
|
var logger = new LoggerConfiguration()
|
||||||
|
.ReadFrom.Configuration(configuration)
|
||||||
|
.CreateLogger();
|
||||||
|
|
||||||
|
loggingBuilder.AddSerilog(logger);
|
||||||
|
});
|
||||||
|
|
||||||
|
return services;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static IServiceCollection AddKleenexService(this IServiceCollection services)
|
||||||
|
{
|
||||||
|
services.AddHostedService<KleenexService>();
|
||||||
|
|
||||||
|
return services;
|
||||||
|
}
|
||||||
|
}
|
||||||
24
src/tissue/Extensions/StringExtensions.cs
Normal file
24
src/tissue/Extensions/StringExtensions.cs
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
namespace Tissue.Extensions;
|
||||||
|
|
||||||
|
public static partial class StringExtensions
|
||||||
|
{
|
||||||
|
[GeneratedRegex("[^a-zA-Z0-9 ]")]
|
||||||
|
private static partial Regex NotAlphaNumeric();
|
||||||
|
|
||||||
|
public static bool IsNullOrEmpty(this string? value) =>
|
||||||
|
string.IsNullOrEmpty(value);
|
||||||
|
|
||||||
|
private static readonly char[] separator = [' '];
|
||||||
|
|
||||||
|
public static string NormalizeTitle(this string title)
|
||||||
|
{
|
||||||
|
var alphanumericTitle = NotAlphaNumeric().Replace(title, " ");
|
||||||
|
|
||||||
|
var words = alphanumericTitle.Split(separator, StringSplitOptions.RemoveEmptyEntries)
|
||||||
|
.Select(word => word.ToLower());
|
||||||
|
|
||||||
|
var normalizedTitle = string.Join(" ", words);
|
||||||
|
|
||||||
|
return normalizedTitle;
|
||||||
|
}
|
||||||
|
}
|
||||||
44
src/tissue/Features/DataProcessing/DapperDataStorage.cs
Normal file
44
src/tissue/Features/DataProcessing/DapperDataStorage.cs
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
namespace Tissue.Features.DataProcessing;
|
||||||
|
|
||||||
|
public class DapperDataStorage(PostgresConfiguration configuration, ILogger<DapperDataStorage> logger) : IDataStorage
|
||||||
|
{
|
||||||
|
public async Task<IReadOnlyCollection<Torrent>?> GetAllTorrents(CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
const string GetAllTorrentsSql = "SELECT * FROM torrents";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await using var connection = await CreateAndOpenConnection(cancellationToken);
|
||||||
|
var torrents = await connection.QueryAsync<Torrent>(GetAllTorrentsSql);
|
||||||
|
|
||||||
|
return torrents.ToList();
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
logger.LogError(e, "Error while torrents from database");
|
||||||
|
return new List<Torrent>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task DeleteTorrentsByInfoHashes(IReadOnlyCollection<string> infoHashes, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
const string deleteTorrentsSql = "DELETE FROM torrents WHERE \"infoHash\" = ANY(@infoHashes)";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await using var connection = await CreateAndOpenConnection(cancellationToken);
|
||||||
|
await connection.ExecuteAsync(deleteTorrentsSql, new { infoHashes });
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
logger.LogError(e, "Error while deleting torrents from database");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<NpgsqlConnection> CreateAndOpenConnection(CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var connection = new NpgsqlConnection(configuration.StorageConnectionString);
|
||||||
|
await connection.OpenAsync(cancellationToken);
|
||||||
|
return connection;
|
||||||
|
}
|
||||||
|
}
|
||||||
7
src/tissue/Features/DataProcessing/IDataStorage.cs
Normal file
7
src/tissue/Features/DataProcessing/IDataStorage.cs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
namespace Tissue.Features.DataProcessing;
|
||||||
|
|
||||||
|
public interface IDataStorage
|
||||||
|
{
|
||||||
|
Task<IReadOnlyCollection<Torrent>?> GetAllTorrents(CancellationToken cancellationToken = default);
|
||||||
|
Task DeleteTorrentsByInfoHashes(IReadOnlyCollection<string> infoHashes, CancellationToken cancellationToken = default);
|
||||||
|
}
|
||||||
19
src/tissue/Features/DataProcessing/PostgresConfiguration.cs
Normal file
19
src/tissue/Features/DataProcessing/PostgresConfiguration.cs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
namespace Tissue.Features.DataProcessing;
|
||||||
|
|
||||||
|
public class PostgresConfiguration
|
||||||
|
{
|
||||||
|
private const string Prefix = "POSTGRES";
|
||||||
|
private const string HostVariable = "HOST";
|
||||||
|
private const string UsernameVariable = "USER";
|
||||||
|
private const string PasswordVariable = "PASSWORD";
|
||||||
|
private const string DatabaseVariable = "DB";
|
||||||
|
private const string PortVariable = "PORT";
|
||||||
|
|
||||||
|
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||||
|
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||||
|
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||||
|
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
|
||||||
|
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
|
||||||
|
|
||||||
|
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
namespace Tissue.Features.DataProcessing;
|
||||||
|
|
||||||
|
internal static class ServiceCollectionExtensions
|
||||||
|
{
|
||||||
|
internal static IServiceCollection AddDataStorage(this IServiceCollection services)
|
||||||
|
{
|
||||||
|
services.LoadConfigurationFromEnv<PostgresConfiguration>();
|
||||||
|
services.AddTransient<IDataStorage, DapperDataStorage>();
|
||||||
|
|
||||||
|
return services;
|
||||||
|
}
|
||||||
|
}
|
||||||
20
src/tissue/Features/DataProcessing/Torrent.cs
Normal file
20
src/tissue/Features/DataProcessing/Torrent.cs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
namespace Tissue.Features.DataProcessing;
|
||||||
|
|
||||||
|
public class Torrent
|
||||||
|
{
|
||||||
|
public string? InfoHash { get; set; }
|
||||||
|
public string? Provider { get; set; }
|
||||||
|
public string? TorrentId { get; set; }
|
||||||
|
public string? Title { get; set; }
|
||||||
|
public long? Size { get; set; }
|
||||||
|
public string? Type { get; set; }
|
||||||
|
public DateTime UploadDate { get; set; }
|
||||||
|
public short? Seeders { get; set; }
|
||||||
|
public string? Trackers { get; set; }
|
||||||
|
public string? Languages { get; set; }
|
||||||
|
public string? Resolution { get; set; }
|
||||||
|
public bool Reviewed { get; set; }
|
||||||
|
public bool Opened { get; set; }
|
||||||
|
public DateTime CreatedAt { get; set; }
|
||||||
|
public DateTime UpdatedAt { get; set; }
|
||||||
|
}
|
||||||
93
src/tissue/Features/Kleenex/KleenexService.cs
Normal file
93
src/tissue/Features/Kleenex/KleenexService.cs
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
namespace Tissue.Features.Kleenex;
|
||||||
|
|
||||||
|
public class KleenexService(IDataStorage dataStorage, ILogger<KleenexService> logger, IWordCollections wordCollections) : IHostedService
|
||||||
|
{
|
||||||
|
private HashSet<string> _combinedCompounds = [];
|
||||||
|
|
||||||
|
public async Task StartAsync(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
logger.LogInformation("Kleenex service started");
|
||||||
|
logger.LogInformation("Get ready to pop all that corn...");
|
||||||
|
|
||||||
|
var torrents = await LoadTorrents(cancellationToken);
|
||||||
|
|
||||||
|
_combinedCompounds.UnionWith(wordCollections.AdultCompoundPhrases);
|
||||||
|
_combinedCompounds.UnionWith(wordCollections.Jav);
|
||||||
|
_combinedCompounds.UnionWith(wordCollections.AdultStars);
|
||||||
|
|
||||||
|
var infoHashesToDelete = GetInfoHashesToDelete(torrents);
|
||||||
|
|
||||||
|
await dataStorage.DeleteTorrentsByInfoHashes(infoHashesToDelete!, cancellationToken);
|
||||||
|
|
||||||
|
logger.LogInformation("Deleted {TorrentCount} torrents", infoHashesToDelete.Count);
|
||||||
|
|
||||||
|
logger.LogInformation("Kleenex service completed successfully");
|
||||||
|
|
||||||
|
Environment.Exit(0);
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
logger.LogError(e, "Error while processing torrents");
|
||||||
|
Environment.Exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task StopAsync(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
logger.LogInformation("Service Shutdown");
|
||||||
|
return Task.CompletedTask;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<string?> GetInfoHashesToDelete(IReadOnlyCollection<Torrent> torrents)
|
||||||
|
{
|
||||||
|
var torrentsToDelete = torrents.Where(torrent => HasBannedTerms(torrent.Title)).ToList();
|
||||||
|
var infoHashesToDelete = torrentsToDelete.Select(torrent => torrent.InfoHash).ToList();
|
||||||
|
|
||||||
|
if (infoHashesToDelete.Count == 0)
|
||||||
|
{
|
||||||
|
logger.LogInformation("No torrents to delete");
|
||||||
|
Environment.Exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return infoHashesToDelete;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<IReadOnlyCollection<Torrent>> LoadTorrents(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
var torrents = await dataStorage.GetAllTorrents(cancellationToken);
|
||||||
|
|
||||||
|
if (torrents is null || torrents.Count == 0)
|
||||||
|
{
|
||||||
|
logger.LogInformation("No torrents found");
|
||||||
|
Environment.Exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.LogInformation("Found {TorrentCount} torrents", torrents.Count);
|
||||||
|
return torrents;
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool HasBannedTerms(string targetTitle)
|
||||||
|
{
|
||||||
|
var normalisedTitle = targetTitle.NormalizeTitle();
|
||||||
|
|
||||||
|
var normalisedWords = normalisedTitle.Split(' ');
|
||||||
|
|
||||||
|
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => normalisedWords.Contains(word, StringComparer.OrdinalIgnoreCase) && wordCollections.AdultWords.Contains(word));
|
||||||
|
|
||||||
|
var hasCompounds = _combinedCompounds.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
|
||||||
|
|
||||||
|
var isClean = !hasBannedWords &&
|
||||||
|
!hasCompounds;
|
||||||
|
|
||||||
|
if (isClean)
|
||||||
|
{
|
||||||
|
logger.LogInformation("No banned terms found in torrent title: {Title}", targetTitle);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.LogWarning("Banned terms found in torrent title: {Title}", targetTitle);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
14
src/tissue/Features/Wordlists/IWordCollections.cs
Normal file
14
src/tissue/Features/Wordlists/IWordCollections.cs
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
namespace Tissue.Features.Wordlists;
|
||||||
|
|
||||||
|
public interface IWordCollections
|
||||||
|
{
|
||||||
|
HashSet<string> AdultWords { get; }
|
||||||
|
|
||||||
|
HashSet<string> AdultCompoundPhrases { get; }
|
||||||
|
|
||||||
|
HashSet<string> Jav { get; }
|
||||||
|
|
||||||
|
HashSet<string> AdultStars { get; }
|
||||||
|
|
||||||
|
Task LoadAsync();
|
||||||
|
}
|
||||||
20
src/tissue/Features/Wordlists/PopulationService.cs
Normal file
20
src/tissue/Features/Wordlists/PopulationService.cs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
namespace Tissue.Features.Wordlists;
|
||||||
|
|
||||||
|
public class PopulationService(IWordCollections wordCollections, ILogger<PopulationService> logger) : IHostedService
|
||||||
|
{
|
||||||
|
public async Task StartAsync(CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
logger.LogInformation("Loading word collections...");
|
||||||
|
|
||||||
|
await wordCollections.LoadAsync();
|
||||||
|
|
||||||
|
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
|
||||||
|
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
|
||||||
|
logger.LogInformation("Jav Count: {Count}", wordCollections.Jav.Count);
|
||||||
|
logger.LogInformation("Adult Stars Count: {Count}", wordCollections.AdultStars.Count);
|
||||||
|
|
||||||
|
logger.LogInformation("Word collections loaded.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||||
|
}
|
||||||
12
src/tissue/Features/Wordlists/ServiceCollectionExtensions.cs
Normal file
12
src/tissue/Features/Wordlists/ServiceCollectionExtensions.cs
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
namespace Tissue.Features.Wordlists;
|
||||||
|
|
||||||
|
public static class ServiceCollectionExtensions
|
||||||
|
{
|
||||||
|
public static IServiceCollection RegisterWordCollections(this IServiceCollection services)
|
||||||
|
{
|
||||||
|
services.AddSingleton<IWordCollections, WordCollections>();
|
||||||
|
services.AddHostedService<PopulationService>();
|
||||||
|
|
||||||
|
return services;
|
||||||
|
}
|
||||||
|
}
|
||||||
55
src/tissue/Features/Wordlists/WordCollections.cs
Normal file
55
src/tissue/Features/Wordlists/WordCollections.cs
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
namespace Tissue.Features.Wordlists;
|
||||||
|
|
||||||
|
public class WordCollections : IWordCollections
|
||||||
|
{
|
||||||
|
private const string AdultWordsFile = "adult-words.txt";
|
||||||
|
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
|
||||||
|
private const string AdultStarsFile = "adult-stars.txt";
|
||||||
|
private const string JavFile = "jav.txt";
|
||||||
|
|
||||||
|
public HashSet<string> AdultWords { get; private set; } = [];
|
||||||
|
public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
|
||||||
|
|
||||||
|
public HashSet<string> AdultStars { get; private set; } = [];
|
||||||
|
|
||||||
|
public HashSet<string> Jav { get; private set; } = [];
|
||||||
|
|
||||||
|
public async Task LoadAsync()
|
||||||
|
{
|
||||||
|
var loaderTasks = new List<Task>
|
||||||
|
{
|
||||||
|
LoadAdultWords(),
|
||||||
|
LoadAdultCompounds(),
|
||||||
|
LoadJav(),
|
||||||
|
LoadAdultStars(),
|
||||||
|
};
|
||||||
|
|
||||||
|
await Task.WhenAll(loaderTasks);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task LoadAdultCompounds()
|
||||||
|
{
|
||||||
|
var adultCompoundWords = await File.ReadAllLinesAsync(GetPath(AdultCompoundPhrasesFile));
|
||||||
|
AdultCompoundPhrases = [..adultCompoundWords];
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task LoadAdultWords()
|
||||||
|
{
|
||||||
|
var adultWords = await File.ReadAllLinesAsync(GetPath(AdultWordsFile));
|
||||||
|
AdultWords = [..adultWords];
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task LoadJav()
|
||||||
|
{
|
||||||
|
var jav = await File.ReadAllLinesAsync(GetPath(JavFile));
|
||||||
|
Jav = [..jav];
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task LoadAdultStars()
|
||||||
|
{
|
||||||
|
var adultStars = await File.ReadAllLinesAsync(GetPath(AdultStarsFile));
|
||||||
|
AdultStars = [..adultStars];
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
|
||||||
|
}
|
||||||
13
src/tissue/GlobalUsings.cs
Normal file
13
src/tissue/GlobalUsings.cs
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
// Global using directives
|
||||||
|
|
||||||
|
global using System.Text.RegularExpressions;
|
||||||
|
global using Dapper;
|
||||||
|
global using Microsoft.Extensions.DependencyInjection;
|
||||||
|
global using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||||
|
global using Microsoft.Extensions.Logging;
|
||||||
|
global using Npgsql;
|
||||||
|
global using Serilog;
|
||||||
|
global using Tissue.Extensions;
|
||||||
|
global using Tissue.Features.DataProcessing;
|
||||||
|
global using Tissue.Features.Kleenex;
|
||||||
|
global using Tissue.Features.Wordlists;
|
||||||
14
src/tissue/Program.cs
Normal file
14
src/tissue/Program.cs
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
var builder = Host.CreateApplicationBuilder();
|
||||||
|
|
||||||
|
builder.Configuration
|
||||||
|
.AddScrapeConfiguration();
|
||||||
|
|
||||||
|
builder.Services
|
||||||
|
.AddDataStorage()
|
||||||
|
.RegisterWordCollections()
|
||||||
|
.AddSerilogLogging(builder.Configuration)
|
||||||
|
.AddKleenexService();
|
||||||
|
|
||||||
|
var host = builder.Build();
|
||||||
|
|
||||||
|
await host.RunAsync();
|
||||||
35
src/tissue/Tissue.csproj
Normal file
35
src/tissue/Tissue.csproj
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk.Worker">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<UserSecretsId>54cad2ee-57df-4bb2-a192-d5d501448e0b</UserSecretsId>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Dapper" Version="2.1.28" />
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||||
|
<PackageReference Include="Npgsql" Version="8.0.1" />
|
||||||
|
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||||
|
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||||
|
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Content Remove="Configuration\*.json" />
|
||||||
|
<None Include="Configuration\*.json">
|
||||||
|
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Content Remove="Data\**" />
|
||||||
|
<None Include="Data\**">
|
||||||
|
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
Reference in New Issue
Block a user