Improve producer matching - Add tissue service

Tissue service will sanitize the existign database of ingested torrents by matching existing titles with new banned word lists. Now with added kleenex
This commit is contained in:
iPromKnight
2024-03-12 10:29:13 +00:00
parent e24d81dd96
commit 79a6aa3cb0
28 changed files with 257591 additions and 37 deletions

14
.github/workflows/build_tissue.yaml vendored Normal file
View File

@@ -0,0 +1,14 @@
name: Build and Push Tissue Service
on:
push:
paths:
- 'src/tissue/**'
jobs:
process:
uses: ./.github/workflows/base_image_workflow.yaml
secrets: inherit
with:
CONTEXT: ./src/tissue/
IMAGE_NAME: knightcrawler-tissue

View File

@@ -17,5 +17,5 @@ repos:
hooks: hooks:
- id: codespell - id: codespell
exclude: | exclude: |
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*)$ (?x)^(src/node/consumer/test/.*|src/producer/Data/.*|src/tissue/Data/.*)$
args: ["-L", "strem,chage"] args: ["-L", "strem,chage"]

View File

@@ -35,7 +35,6 @@ adultprime
adulttime adulttime
adulttimepilots adulttimepilots
aebn aebn
anal
agentredgirl agentredgirl
alettaoceanempire alettaoceanempire
alexismonroe alexismonroe
@@ -123,7 +122,6 @@ aussieass
aussiefellatioqueens aussiefellatioqueens
aussiepov aussiepov
austinwilde austinwilde
av69
avadawn avadawn
avanal avanal
aventertainments aventertainments
@@ -715,7 +713,6 @@ hunterpov
hushpass hushpass
hussiepass hussiepass
hustlaz hustlaz
hustler
hustlerhd hustlerhd
hustlerparodies hustlerparodies
hustlerslesbians hustlerslesbians
@@ -1670,8 +1667,6 @@ tube8
x-art x-art
mommygotboobs mommygotboobs
threesomes threesomes
dp
onlyfans onlyfans
alterotic alterotic
assy
defloration defloration

View File

@@ -1,7 +1,24 @@
namespace Producer.Extensions; namespace Producer.Extensions;
public static class StringExtensions public static partial class StringExtensions
{ {
[GeneratedRegex("[^a-zA-Z0-9 ]")]
private static partial Regex NotAlphaNumeric();
private static readonly char[] separator = [' '];
public static bool IsNullOrEmpty(this string? value) => public static bool IsNullOrEmpty(this string? value) =>
string.IsNullOrEmpty(value); string.IsNullOrEmpty(value);
public static string NormalizeTitle(this string title)
{
var alphanumericTitle = NotAlphaNumeric().Replace(title, " ");
var words = alphanumericTitle.Split(separator, StringSplitOptions.RemoveEmptyEntries)
.Select(word => word.ToLower());
var normalizedTitle = string.Join(" ", words);
return normalizedTitle;
}
} }

View File

@@ -1,8 +1,21 @@
namespace Producer.Features.ParseTorrentTitle; namespace Producer.Features.ParseTorrentTitle;
public partial class ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser) : IParsingService public partial class ParsingService : IParsingService
{ {
private readonly IWordCollections _wordCollections;
private readonly ITorrentTitleParser _torrentTitleParser;
private static readonly char[] WhitespaceSeparator = [' ']; private static readonly char[] WhitespaceSeparator = [' '];
private HashSet<string> _compoundWords = [];
public ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser)
{
_wordCollections = wordCollections;
_torrentTitleParser = torrentTitleParser;
_compoundWords.UnionWith(_wordCollections.AdultCompoundPhrases);
_compoundWords.UnionWith(_wordCollections.Jav);
_compoundWords.UnionWith(_wordCollections.AdultStars);
}
public string Naked(string title) => public string Naked(string title) =>
NakedMatcher().Replace(title.ToLower(), ""); NakedMatcher().Replace(title.ToLower(), "");
@@ -198,8 +211,8 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
public bool FlexEq(string test, string target, List<string> years) public bool FlexEq(string test, string target, List<string> years)
{ {
var movieTitle = torrentTitleParser.Parse(test).Movie.Title.ToLower(); var movieTitle = _torrentTitleParser.Parse(test).Movie.Title.ToLower();
var tvTitle = torrentTitleParser.Parse(test).Show.Title.ToLower(); var tvTitle = _torrentTitleParser.Parse(test).Show.Title.ToLower();
var target2 = WhitespaceMatcher().Replace(target, ""); var target2 = WhitespaceMatcher().Replace(target, "");
var test2 = WhitespaceMatcher().Replace(test, ""); var test2 = WhitespaceMatcher().Replace(test, "");
@@ -247,7 +260,7 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
return false; return false;
} }
var keyTerms = splits.Where(s => (s.Length > 1 && !wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList(); var keyTerms = splits.Where(s => (s.Length > 1 && !_wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList();
keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2)); keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2));
var keySet = new HashSet<string>(keyTerms); var keySet = new HashSet<string>(keyTerms);
var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList(); var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList();
@@ -296,42 +309,30 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
public bool HasNoBannedTerms(string targetTitle, string testTitle) public bool HasNoBannedTerms(string targetTitle, string testTitle)
{ {
var words = WordMatcher().Split(testTitle.ToLower()).Where(word => word.Length > 3).ToList(); var normalisedTitle = targetTitle.NormalizeTitle();
var hasBannedWords = words.Any(word => !targetTitle.Contains(word) && wordCollections.AdultWords.Contains(word)); var normalisedWords = normalisedTitle.Split(' ');
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower())); var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => !targetTitle.Contains(word) && _wordCollections.AdultWords.Contains(word));
var hasJavWords = wordCollections.Jav.Any(jav => !targetTitle.Contains(jav) && titleWithoutSymbols.Contains(jav)); var hasCompounds = _compoundWords.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
var hasAdultStars = wordCollections.AdultStars.Any(star => !targetTitle.Contains(star) && titleWithoutSymbols.Contains(star));
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
return !hasBannedWords && return !hasBannedWords &&
!hasJavWords && !hasCompounds;
!hasAdultStars &&
!hasBannedCompoundWords;
} }
public bool HasNoBannedTerms(string targetTitle) public bool HasNoBannedTerms(string targetTitle)
{ {
var words = WordMatcher().Split(targetTitle.ToLower()).ToList(); var normalisedTitle = targetTitle.NormalizeTitle();
var hasBannedWords = words.Any(word => wordCollections.AdultWords.Contains(word)); var normalisedWords = normalisedTitle.Split(' ');
var inputWithoutSymbols = string.Join(' ', WordMatcher().Split(targetTitle.ToLower())); var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => normalisedWords.Contains(word, StringComparer.OrdinalIgnoreCase) && _wordCollections.AdultWords.Contains(word));
var hasJavWords = wordCollections.Jav.Any(jav => inputWithoutSymbols.Contains(jav, StringComparison.OrdinalIgnoreCase)); var hasCompounds = _compoundWords.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
var hasAdultStars = wordCollections.AdultStars.Any(star => inputWithoutSymbols.Contains(star, StringComparison.OrdinalIgnoreCase));
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => inputWithoutSymbols.Contains(compoundWord, StringComparison.OrdinalIgnoreCase));
return !hasBannedWords && return !hasBannedWords &&
!hasJavWords && !hasCompounds;
!hasAdultStars &&
!hasBannedCompoundWords;
} }
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle); public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
@@ -343,10 +344,10 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
.Where(word => word.Length > 3) .Where(word => word.Length > 3)
.ToList(); .ToList();
return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word)); return processedTitle.Count(word => !_wordCollections.CommonWords.Contains(word));
} }
public ParsedFilename Parse(string name) => torrentTitleParser.Parse(name); public ParsedFilename Parse(string name) => _torrentTitleParser.Parse(name);
public TorrentType GetTypeByName(string name) => torrentTitleParser.GetTypeByName(name); public TorrentType GetTypeByName(string name) => _torrentTitleParser.GetTypeByName(name);
} }

View File

@@ -0,0 +1,31 @@
{
"Serilog": {
"Using": [ "Serilog.Sinks.Console" ],
"MinimumLevel": {
"Default": "Information",
"Override": {
"Microsoft": "Warning",
"System": "Warning",
"System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning",
"System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning",
"Quartz.Impl.StdSchedulerFactory": "Warning",
"Quartz.Core.QuartzScheduler": "Warning",
"Quartz.Simpl.RAMJobStore": "Warning",
"Quartz.Core.JobRunShell": "Warning",
"Quartz.Core.SchedulerSignalerImpl": "Warning"
}
},
"WriteTo": [
{
"Name": "Console",
"Args": {
"outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}"
}
}
],
"Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ],
"Properties": {
"Application": "Producer"
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

234602
src/tissue/Data/jav.txt Normal file

File diff suppressed because it is too large Load Diff

17
src/tissue/Dockerfile Normal file
View File

@@ -0,0 +1,17 @@
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build
ARG TARGETARCH
WORKDIR /App
COPY . ./
RUN dotnet restore -a $TARGETARCH
RUN dotnet publish -c Release --no-restore -o out -a $TARGETARCH
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
WORKDIR /App
COPY --from=build /App/out .
RUN addgroup -S tissue && adduser -S -G tissue tissue
USER tissue
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1
ENTRYPOINT ["dotnet", "Tissue.dll"]

View File

@@ -0,0 +1,44 @@
namespace Tissue.Extensions;
public static class ConfigurationExtensions
{
private const string ConfigurationFolder = "Configuration";
private const string LoggingConfig = "logging.json";
public static IConfigurationBuilder AddScrapeConfiguration(this IConfigurationBuilder configuration)
{
configuration.SetBasePath(Path.Combine(AppContext.BaseDirectory, ConfigurationFolder));
configuration.AddJsonFile(LoggingConfig, false, true);
configuration.AddEnvironmentVariables();
configuration.AddUserSecrets<Program>();
return configuration;
}
public static TConfiguration LoadConfigurationFromConfig<TConfiguration>(this IServiceCollection services, IConfiguration configuration, string sectionName)
where TConfiguration : class
{
var instance = configuration.GetSection(sectionName).Get<TConfiguration>();
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
services.TryAddSingleton(instance);
return instance;
}
public static TConfiguration LoadConfigurationFromEnv<TConfiguration>(this IServiceCollection services)
where TConfiguration : class
{
var instance = Activator.CreateInstance<TConfiguration>();
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
services.TryAddSingleton(instance);
return instance;
}
}

View File

@@ -0,0 +1,68 @@
namespace Tissue.Extensions;
public static class EnvironmentExtensions
{
public static bool GetEnvironmentVariableAsBool(this string prefix, string varName, bool fallback = false)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return str.Trim().ToLower() switch
{
"true" => true,
"yes" => true,
"1" => true,
_ => false,
};
}
public static int GetEnvironmentVariableAsInt(this string prefix, string varName, int fallback = 0)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return int.TryParse(str, out var result) ? result : fallback;
}
public static string GetRequiredEnvironmentVariableAsString(this string prefix, string varName)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
throw new InvalidOperationException($"Environment variable {fullVarName} is not set");
}
return str;
}
public static string GetOptionalEnvironmentVariableAsString(this string prefix, string varName, string? fallback = null)
{
var fullVarName = GetFullVariableName(prefix, varName);
var str = Environment.GetEnvironmentVariable(fullVarName);
if (string.IsNullOrEmpty(str))
{
return fallback;
}
return str;
}
private static string GetFullVariableName(string prefix, string varName) => $"{prefix}_{varName}";
}

View File

@@ -0,0 +1,28 @@
namespace Tissue.Extensions;
public static class ServiceCollectionExtensions
{
public static IServiceCollection AddSerilogLogging(this IServiceCollection services, IConfiguration configuration)
{
services.AddLogging(
loggingBuilder =>
{
loggingBuilder.ClearProviders();
var logger = new LoggerConfiguration()
.ReadFrom.Configuration(configuration)
.CreateLogger();
loggingBuilder.AddSerilog(logger);
});
return services;
}
public static IServiceCollection AddKleenexService(this IServiceCollection services)
{
services.AddHostedService<KleenexService>();
return services;
}
}

View File

@@ -0,0 +1,24 @@
namespace Tissue.Extensions;
public static partial class StringExtensions
{
[GeneratedRegex("[^a-zA-Z0-9 ]")]
private static partial Regex NotAlphaNumeric();
public static bool IsNullOrEmpty(this string? value) =>
string.IsNullOrEmpty(value);
private static readonly char[] separator = [' '];
public static string NormalizeTitle(this string title)
{
var alphanumericTitle = NotAlphaNumeric().Replace(title, " ");
var words = alphanumericTitle.Split(separator, StringSplitOptions.RemoveEmptyEntries)
.Select(word => word.ToLower());
var normalizedTitle = string.Join(" ", words);
return normalizedTitle;
}
}

View File

@@ -0,0 +1,44 @@
namespace Tissue.Features.DataProcessing;
public class DapperDataStorage(PostgresConfiguration configuration, ILogger<DapperDataStorage> logger) : IDataStorage
{
public async Task<IReadOnlyCollection<Torrent>?> GetAllTorrents(CancellationToken cancellationToken = default)
{
const string GetAllTorrentsSql = "SELECT * FROM torrents";
try
{
await using var connection = await CreateAndOpenConnection(cancellationToken);
var torrents = await connection.QueryAsync<Torrent>(GetAllTorrentsSql);
return torrents.ToList();
}
catch (Exception e)
{
logger.LogError(e, "Error while torrents from database");
return new List<Torrent>();
}
}
public async Task DeleteTorrentsByInfoHashes(IReadOnlyCollection<string> infoHashes, CancellationToken cancellationToken = default)
{
const string deleteTorrentsSql = "DELETE FROM torrents WHERE \"infoHash\" = ANY(@infoHashes)";
try
{
await using var connection = await CreateAndOpenConnection(cancellationToken);
await connection.ExecuteAsync(deleteTorrentsSql, new { infoHashes });
}
catch (Exception e)
{
logger.LogError(e, "Error while deleting torrents from database");
}
}
private async Task<NpgsqlConnection> CreateAndOpenConnection(CancellationToken cancellationToken = default)
{
var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
return connection;
}
}

View File

@@ -0,0 +1,7 @@
namespace Tissue.Features.DataProcessing;
public interface IDataStorage
{
Task<IReadOnlyCollection<Torrent>?> GetAllTorrents(CancellationToken cancellationToken = default);
Task DeleteTorrentsByInfoHashes(IReadOnlyCollection<string> infoHashes, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,19 @@
namespace Tissue.Features.DataProcessing;
public class PostgresConfiguration
{
private const string Prefix = "POSTGRES";
private const string HostVariable = "HOST";
private const string UsernameVariable = "USER";
private const string PasswordVariable = "PASSWORD";
private const string DatabaseVariable = "DB";
private const string PortVariable = "PORT";
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
}

View File

@@ -0,0 +1,12 @@
namespace Tissue.Features.DataProcessing;
internal static class ServiceCollectionExtensions
{
internal static IServiceCollection AddDataStorage(this IServiceCollection services)
{
services.LoadConfigurationFromEnv<PostgresConfiguration>();
services.AddTransient<IDataStorage, DapperDataStorage>();
return services;
}
}

View File

@@ -0,0 +1,20 @@
namespace Tissue.Features.DataProcessing;
public class Torrent
{
public string? InfoHash { get; set; }
public string? Provider { get; set; }
public string? TorrentId { get; set; }
public string? Title { get; set; }
public long? Size { get; set; }
public string? Type { get; set; }
public DateTime UploadDate { get; set; }
public short? Seeders { get; set; }
public string? Trackers { get; set; }
public string? Languages { get; set; }
public string? Resolution { get; set; }
public bool Reviewed { get; set; }
public bool Opened { get; set; }
public DateTime CreatedAt { get; set; }
public DateTime UpdatedAt { get; set; }
}

View File

@@ -0,0 +1,93 @@
namespace Tissue.Features.Kleenex;
public class KleenexService(IDataStorage dataStorage, ILogger<KleenexService> logger, IWordCollections wordCollections) : IHostedService
{
private HashSet<string> _combinedCompounds = [];
public async Task StartAsync(CancellationToken cancellationToken)
{
try
{
logger.LogInformation("Kleenex service started");
logger.LogInformation("Get ready to pop all that corn...");
var torrents = await LoadTorrents(cancellationToken);
_combinedCompounds.UnionWith(wordCollections.AdultCompoundPhrases);
_combinedCompounds.UnionWith(wordCollections.Jav);
_combinedCompounds.UnionWith(wordCollections.AdultStars);
var infoHashesToDelete = GetInfoHashesToDelete(torrents);
await dataStorage.DeleteTorrentsByInfoHashes(infoHashesToDelete!, cancellationToken);
logger.LogInformation("Deleted {TorrentCount} torrents", infoHashesToDelete.Count);
logger.LogInformation("Kleenex service completed successfully");
Environment.Exit(0);
}
catch (Exception e)
{
logger.LogError(e, "Error while processing torrents");
Environment.Exit(1);
}
}
public Task StopAsync(CancellationToken cancellationToken)
{
logger.LogInformation("Service Shutdown");
return Task.CompletedTask;
}
private List<string?> GetInfoHashesToDelete(IReadOnlyCollection<Torrent> torrents)
{
var torrentsToDelete = torrents.Where(torrent => HasBannedTerms(torrent.Title)).ToList();
var infoHashesToDelete = torrentsToDelete.Select(torrent => torrent.InfoHash).ToList();
if (infoHashesToDelete.Count == 0)
{
logger.LogInformation("No torrents to delete");
Environment.Exit(0);
}
return infoHashesToDelete;
}
private async Task<IReadOnlyCollection<Torrent>> LoadTorrents(CancellationToken cancellationToken)
{
var torrents = await dataStorage.GetAllTorrents(cancellationToken);
if (torrents is null || torrents.Count == 0)
{
logger.LogInformation("No torrents found");
Environment.Exit(0);
}
logger.LogInformation("Found {TorrentCount} torrents", torrents.Count);
return torrents;
}
private bool HasBannedTerms(string targetTitle)
{
var normalisedTitle = targetTitle.NormalizeTitle();
var normalisedWords = normalisedTitle.Split(' ');
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => normalisedWords.Contains(word, StringComparer.OrdinalIgnoreCase) && wordCollections.AdultWords.Contains(word));
var hasCompounds = _combinedCompounds.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
var isClean = !hasBannedWords &&
!hasCompounds;
if (isClean)
{
logger.LogInformation("No banned terms found in torrent title: {Title}", targetTitle);
return false;
}
logger.LogWarning("Banned terms found in torrent title: {Title}", targetTitle);
return true;
}
}

View File

@@ -0,0 +1,14 @@
namespace Tissue.Features.Wordlists;
public interface IWordCollections
{
HashSet<string> AdultWords { get; }
HashSet<string> AdultCompoundPhrases { get; }
HashSet<string> Jav { get; }
HashSet<string> AdultStars { get; }
Task LoadAsync();
}

View File

@@ -0,0 +1,20 @@
namespace Tissue.Features.Wordlists;
public class PopulationService(IWordCollections wordCollections, ILogger<PopulationService> logger) : IHostedService
{
public async Task StartAsync(CancellationToken cancellationToken)
{
logger.LogInformation("Loading word collections...");
await wordCollections.LoadAsync();
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
logger.LogInformation("Jav Count: {Count}", wordCollections.Jav.Count);
logger.LogInformation("Adult Stars Count: {Count}", wordCollections.AdultStars.Count);
logger.LogInformation("Word collections loaded.");
}
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}

View File

@@ -0,0 +1,12 @@
namespace Tissue.Features.Wordlists;
public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterWordCollections(this IServiceCollection services)
{
services.AddSingleton<IWordCollections, WordCollections>();
services.AddHostedService<PopulationService>();
return services;
}
}

View File

@@ -0,0 +1,55 @@
namespace Tissue.Features.Wordlists;
public class WordCollections : IWordCollections
{
private const string AdultWordsFile = "adult-words.txt";
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
private const string AdultStarsFile = "adult-stars.txt";
private const string JavFile = "jav.txt";
public HashSet<string> AdultWords { get; private set; } = [];
public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
public HashSet<string> AdultStars { get; private set; } = [];
public HashSet<string> Jav { get; private set; } = [];
public async Task LoadAsync()
{
var loaderTasks = new List<Task>
{
LoadAdultWords(),
LoadAdultCompounds(),
LoadJav(),
LoadAdultStars(),
};
await Task.WhenAll(loaderTasks);
}
private async Task LoadAdultCompounds()
{
var adultCompoundWords = await File.ReadAllLinesAsync(GetPath(AdultCompoundPhrasesFile));
AdultCompoundPhrases = [..adultCompoundWords];
}
private async Task LoadAdultWords()
{
var adultWords = await File.ReadAllLinesAsync(GetPath(AdultWordsFile));
AdultWords = [..adultWords];
}
private async Task LoadJav()
{
var jav = await File.ReadAllLinesAsync(GetPath(JavFile));
Jav = [..jav];
}
private async Task LoadAdultStars()
{
var adultStars = await File.ReadAllLinesAsync(GetPath(AdultStarsFile));
AdultStars = [..adultStars];
}
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
}

View File

@@ -0,0 +1,13 @@
// Global using directives
global using System.Text.RegularExpressions;
global using Dapper;
global using Microsoft.Extensions.DependencyInjection;
global using Microsoft.Extensions.DependencyInjection.Extensions;
global using Microsoft.Extensions.Logging;
global using Npgsql;
global using Serilog;
global using Tissue.Extensions;
global using Tissue.Features.DataProcessing;
global using Tissue.Features.Kleenex;
global using Tissue.Features.Wordlists;

14
src/tissue/Program.cs Normal file
View File

@@ -0,0 +1,14 @@
var builder = Host.CreateApplicationBuilder();
builder.Configuration
.AddScrapeConfiguration();
builder.Services
.AddDataStorage()
.RegisterWordCollections()
.AddSerilogLogging(builder.Configuration)
.AddKleenexService();
var host = builder.Build();
await host.RunAsync();

35
src/tissue/Tissue.csproj Normal file
View File

@@ -0,0 +1,35 @@
<Project Sdk="Microsoft.NET.Sdk.Worker">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<UserSecretsId>54cad2ee-57df-4bb2-a192-d5d501448e0b</UserSecretsId>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Dapper" Version="2.1.28" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Npgsql" Version="8.0.1" />
<PackageReference Include="Serilog" Version="3.1.1" />
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
</ItemGroup>
<ItemGroup>
<Content Remove="Configuration\*.json" />
<None Include="Configuration\*.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<Content Remove="Data\**" />
<None Include="Data\**">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>