mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Merge pull request #151 from Gabisonfire/feature/tissue-corn-sanitizer
Improve producer matching - Add tissue service
This commit is contained in:
14
.github/workflows/build_tissue.yaml
vendored
Normal file
14
.github/workflows/build_tissue.yaml
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
name: Build and Push Tissue Service
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'src/tissue/**'
|
||||
|
||||
jobs:
|
||||
process:
|
||||
uses: ./.github/workflows/base_image_workflow.yaml
|
||||
secrets: inherit
|
||||
with:
|
||||
CONTEXT: ./src/tissue/
|
||||
IMAGE_NAME: knightcrawler-tissue
|
||||
@@ -17,5 +17,5 @@ repos:
|
||||
hooks:
|
||||
- id: codespell
|
||||
exclude: |
|
||||
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*)$
|
||||
(?x)^(src/node/consumer/test/.*|src/producer/Data/.*|src/tissue/Data/.*)$
|
||||
args: ["-L", "strem,chage"]
|
||||
|
||||
@@ -35,7 +35,6 @@ adultprime
|
||||
adulttime
|
||||
adulttimepilots
|
||||
aebn
|
||||
anal
|
||||
agentredgirl
|
||||
alettaoceanempire
|
||||
alexismonroe
|
||||
@@ -123,7 +122,6 @@ aussieass
|
||||
aussiefellatioqueens
|
||||
aussiepov
|
||||
austinwilde
|
||||
av69
|
||||
avadawn
|
||||
avanal
|
||||
aventertainments
|
||||
@@ -715,7 +713,6 @@ hunterpov
|
||||
hushpass
|
||||
hussiepass
|
||||
hustlaz
|
||||
hustler
|
||||
hustlerhd
|
||||
hustlerparodies
|
||||
hustlerslesbians
|
||||
@@ -1670,8 +1667,6 @@ tube8
|
||||
x-art
|
||||
mommygotboobs
|
||||
threesomes
|
||||
dp
|
||||
onlyfans
|
||||
alterotic
|
||||
assy
|
||||
defloration
|
||||
|
||||
@@ -1,7 +1,24 @@
|
||||
namespace Producer.Extensions;
|
||||
|
||||
public static class StringExtensions
|
||||
public static partial class StringExtensions
|
||||
{
|
||||
[GeneratedRegex("[^a-zA-Z0-9 ]")]
|
||||
private static partial Regex NotAlphaNumeric();
|
||||
|
||||
private static readonly char[] separator = [' '];
|
||||
|
||||
public static bool IsNullOrEmpty(this string? value) =>
|
||||
string.IsNullOrEmpty(value);
|
||||
|
||||
public static string NormalizeTitle(this string title)
|
||||
{
|
||||
var alphanumericTitle = NotAlphaNumeric().Replace(title, " ");
|
||||
|
||||
var words = alphanumericTitle.Split(separator, StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(word => word.ToLower());
|
||||
|
||||
var normalizedTitle = string.Join(" ", words);
|
||||
|
||||
return normalizedTitle;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,21 @@
|
||||
namespace Producer.Features.ParseTorrentTitle;
|
||||
|
||||
public partial class ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser) : IParsingService
|
||||
public partial class ParsingService : IParsingService
|
||||
{
|
||||
private readonly IWordCollections _wordCollections;
|
||||
private readonly ITorrentTitleParser _torrentTitleParser;
|
||||
private static readonly char[] WhitespaceSeparator = [' '];
|
||||
private HashSet<string> _compoundWords = [];
|
||||
|
||||
public ParsingService(IWordCollections wordCollections, ITorrentTitleParser torrentTitleParser)
|
||||
{
|
||||
_wordCollections = wordCollections;
|
||||
_torrentTitleParser = torrentTitleParser;
|
||||
|
||||
_compoundWords.UnionWith(_wordCollections.AdultCompoundPhrases);
|
||||
_compoundWords.UnionWith(_wordCollections.Jav);
|
||||
_compoundWords.UnionWith(_wordCollections.AdultStars);
|
||||
}
|
||||
|
||||
public string Naked(string title) =>
|
||||
NakedMatcher().Replace(title.ToLower(), "");
|
||||
@@ -198,8 +211,8 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
|
||||
|
||||
public bool FlexEq(string test, string target, List<string> years)
|
||||
{
|
||||
var movieTitle = torrentTitleParser.Parse(test).Movie.Title.ToLower();
|
||||
var tvTitle = torrentTitleParser.Parse(test).Show.Title.ToLower();
|
||||
var movieTitle = _torrentTitleParser.Parse(test).Movie.Title.ToLower();
|
||||
var tvTitle = _torrentTitleParser.Parse(test).Show.Title.ToLower();
|
||||
|
||||
var target2 = WhitespaceMatcher().Replace(target, "");
|
||||
var test2 = WhitespaceMatcher().Replace(test, "");
|
||||
@@ -247,7 +260,7 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
|
||||
return false;
|
||||
}
|
||||
|
||||
var keyTerms = splits.Where(s => (s.Length > 1 && !wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList();
|
||||
var keyTerms = splits.Where(s => (s.Length > 1 && !_wordCollections.CommonWords.Contains(s)) || s.Length > 5).ToList();
|
||||
keyTerms.AddRange(target.Split(WhitespaceSeparator, StringSplitOptions.RemoveEmptyEntries).Where(e => e.Length > 2));
|
||||
var keySet = new HashSet<string>(keyTerms);
|
||||
var commonTerms = splits.Where(s => !keySet.Contains(s)).ToList();
|
||||
@@ -296,42 +309,30 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
|
||||
|
||||
public bool HasNoBannedTerms(string targetTitle, string testTitle)
|
||||
{
|
||||
var words = WordMatcher().Split(testTitle.ToLower()).Where(word => word.Length > 3).ToList();
|
||||
var normalisedTitle = targetTitle.NormalizeTitle();
|
||||
|
||||
var hasBannedWords = words.Any(word => !targetTitle.Contains(word) && wordCollections.AdultWords.Contains(word));
|
||||
var normalisedWords = normalisedTitle.Split(' ');
|
||||
|
||||
var titleWithoutSymbols = string.Join(' ', WordMatcher().Split(testTitle.ToLower()));
|
||||
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => !targetTitle.Contains(word) && _wordCollections.AdultWords.Contains(word));
|
||||
|
||||
var hasJavWords = wordCollections.Jav.Any(jav => !targetTitle.Contains(jav) && titleWithoutSymbols.Contains(jav));
|
||||
|
||||
var hasAdultStars = wordCollections.AdultStars.Any(star => !targetTitle.Contains(star) && titleWithoutSymbols.Contains(star));
|
||||
|
||||
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => !targetTitle.Contains(compoundWord) && titleWithoutSymbols.Contains(compoundWord));
|
||||
var hasCompounds = _compoundWords.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
return !hasBannedWords &&
|
||||
!hasJavWords &&
|
||||
!hasAdultStars &&
|
||||
!hasBannedCompoundWords;
|
||||
!hasCompounds;
|
||||
}
|
||||
|
||||
public bool HasNoBannedTerms(string targetTitle)
|
||||
{
|
||||
var words = WordMatcher().Split(targetTitle.ToLower()).ToList();
|
||||
var normalisedTitle = targetTitle.NormalizeTitle();
|
||||
|
||||
var hasBannedWords = words.Any(word => wordCollections.AdultWords.Contains(word));
|
||||
var normalisedWords = normalisedTitle.Split(' ');
|
||||
|
||||
var inputWithoutSymbols = string.Join(' ', WordMatcher().Split(targetTitle.ToLower()));
|
||||
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => normalisedWords.Contains(word, StringComparer.OrdinalIgnoreCase) && _wordCollections.AdultWords.Contains(word));
|
||||
|
||||
var hasJavWords = wordCollections.Jav.Any(jav => inputWithoutSymbols.Contains(jav, StringComparison.OrdinalIgnoreCase));
|
||||
var hasCompounds = _compoundWords.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var hasAdultStars = wordCollections.AdultStars.Any(star => inputWithoutSymbols.Contains(star, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var hasBannedCompoundWords = wordCollections.AdultCompoundPhrases.Any(compoundWord => inputWithoutSymbols.Contains(compoundWord, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
return !hasBannedWords &&
|
||||
!hasJavWords &&
|
||||
!hasAdultStars &&
|
||||
!hasBannedCompoundWords;
|
||||
return !hasBannedWords &&
|
||||
!hasCompounds;
|
||||
}
|
||||
|
||||
public bool MeetsTitleConditions(string targetTitle, List<string> years, string testTitle) => MatchesTitle(targetTitle, years, testTitle) && HasNoBannedTerms(targetTitle, testTitle);
|
||||
@@ -343,10 +344,10 @@ public partial class ParsingService(IWordCollections wordCollections, ITorrentTi
|
||||
.Where(word => word.Length > 3)
|
||||
.ToList();
|
||||
|
||||
return processedTitle.Count(word => !wordCollections.CommonWords.Contains(word));
|
||||
return processedTitle.Count(word => !_wordCollections.CommonWords.Contains(word));
|
||||
}
|
||||
|
||||
public ParsedFilename Parse(string name) => torrentTitleParser.Parse(name);
|
||||
public ParsedFilename Parse(string name) => _torrentTitleParser.Parse(name);
|
||||
|
||||
public TorrentType GetTypeByName(string name) => torrentTitleParser.GetTypeByName(name);
|
||||
public TorrentType GetTypeByName(string name) => _torrentTitleParser.GetTypeByName(name);
|
||||
}
|
||||
|
||||
31
src/tissue/Configuration/logging.json
Normal file
31
src/tissue/Configuration/logging.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"Serilog": {
|
||||
"Using": [ "Serilog.Sinks.Console" ],
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft": "Warning",
|
||||
"System": "Warning",
|
||||
"System.Net.Http.HttpClient.Scraper.LogicalHandler": "Warning",
|
||||
"System.Net.Http.HttpClient.Scraper.ClientHandler": "Warning",
|
||||
"Quartz.Impl.StdSchedulerFactory": "Warning",
|
||||
"Quartz.Core.QuartzScheduler": "Warning",
|
||||
"Quartz.Simpl.RAMJobStore": "Warning",
|
||||
"Quartz.Core.JobRunShell": "Warning",
|
||||
"Quartz.Core.SchedulerSignalerImpl": "Warning"
|
||||
}
|
||||
},
|
||||
"WriteTo": [
|
||||
{
|
||||
"Name": "Console",
|
||||
"Args": {
|
||||
"outputTemplate": "{Timestamp:HH:mm:ss} [{Level}] [{SourceContext}] {Message}{NewLine}{Exception}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"Enrich": [ "FromLogContext", "WithMachineName", "WithThreadId" ],
|
||||
"Properties": {
|
||||
"Application": "Producer"
|
||||
}
|
||||
}
|
||||
}
|
||||
3004
src/tissue/Data/adult-compound-words.txt
Normal file
3004
src/tissue/Data/adult-compound-words.txt
Normal file
File diff suppressed because it is too large
Load Diff
17681
src/tissue/Data/adult-stars.txt
Normal file
17681
src/tissue/Data/adult-stars.txt
Normal file
File diff suppressed because it is too large
Load Diff
1670
src/tissue/Data/adult-words.txt
Normal file
1670
src/tissue/Data/adult-words.txt
Normal file
File diff suppressed because it is too large
Load Diff
234602
src/tissue/Data/jav.txt
Normal file
234602
src/tissue/Data/jav.txt
Normal file
File diff suppressed because it is too large
Load Diff
17
src/tissue/Dockerfile
Normal file
17
src/tissue/Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
||||
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0 AS build
|
||||
|
||||
ARG TARGETARCH
|
||||
WORKDIR /App
|
||||
COPY . ./
|
||||
RUN dotnet restore -a $TARGETARCH
|
||||
RUN dotnet publish -c Release --no-restore -o out -a $TARGETARCH
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine
|
||||
|
||||
WORKDIR /App
|
||||
COPY --from=build /App/out .
|
||||
RUN addgroup -S tissue && adduser -S -G tissue tissue
|
||||
USER tissue
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||
CMD pgrep -f dotnet || exit 1
|
||||
ENTRYPOINT ["dotnet", "Tissue.dll"]
|
||||
44
src/tissue/Extensions/ConfigurationExtensions.cs
Normal file
44
src/tissue/Extensions/ConfigurationExtensions.cs
Normal file
@@ -0,0 +1,44 @@
|
||||
namespace Tissue.Extensions;
|
||||
|
||||
public static class ConfigurationExtensions
|
||||
{
|
||||
private const string ConfigurationFolder = "Configuration";
|
||||
private const string LoggingConfig = "logging.json";
|
||||
|
||||
public static IConfigurationBuilder AddScrapeConfiguration(this IConfigurationBuilder configuration)
|
||||
{
|
||||
configuration.SetBasePath(Path.Combine(AppContext.BaseDirectory, ConfigurationFolder));
|
||||
|
||||
configuration.AddJsonFile(LoggingConfig, false, true);
|
||||
|
||||
configuration.AddEnvironmentVariables();
|
||||
|
||||
configuration.AddUserSecrets<Program>();
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
public static TConfiguration LoadConfigurationFromConfig<TConfiguration>(this IServiceCollection services, IConfiguration configuration, string sectionName)
|
||||
where TConfiguration : class
|
||||
{
|
||||
var instance = configuration.GetSection(sectionName).Get<TConfiguration>();
|
||||
|
||||
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||
|
||||
services.TryAddSingleton(instance);
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
public static TConfiguration LoadConfigurationFromEnv<TConfiguration>(this IServiceCollection services)
|
||||
where TConfiguration : class
|
||||
{
|
||||
var instance = Activator.CreateInstance<TConfiguration>();
|
||||
|
||||
ArgumentNullException.ThrowIfNull(instance, nameof(instance));
|
||||
|
||||
services.TryAddSingleton(instance);
|
||||
|
||||
return instance;
|
||||
}
|
||||
}
|
||||
68
src/tissue/Extensions/EnvironmentExtensions.cs
Normal file
68
src/tissue/Extensions/EnvironmentExtensions.cs
Normal file
@@ -0,0 +1,68 @@
|
||||
namespace Tissue.Extensions;
|
||||
|
||||
public static class EnvironmentExtensions
|
||||
{
|
||||
public static bool GetEnvironmentVariableAsBool(this string prefix, string varName, bool fallback = false)
|
||||
{
|
||||
var fullVarName = GetFullVariableName(prefix, varName);
|
||||
|
||||
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||
|
||||
if (string.IsNullOrEmpty(str))
|
||||
{
|
||||
return fallback;
|
||||
}
|
||||
|
||||
return str.Trim().ToLower() switch
|
||||
{
|
||||
"true" => true,
|
||||
"yes" => true,
|
||||
"1" => true,
|
||||
_ => false,
|
||||
};
|
||||
}
|
||||
|
||||
public static int GetEnvironmentVariableAsInt(this string prefix, string varName, int fallback = 0)
|
||||
{
|
||||
var fullVarName = GetFullVariableName(prefix, varName);
|
||||
|
||||
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||
|
||||
if (string.IsNullOrEmpty(str))
|
||||
{
|
||||
return fallback;
|
||||
}
|
||||
|
||||
return int.TryParse(str, out var result) ? result : fallback;
|
||||
}
|
||||
|
||||
public static string GetRequiredEnvironmentVariableAsString(this string prefix, string varName)
|
||||
{
|
||||
var fullVarName = GetFullVariableName(prefix, varName);
|
||||
|
||||
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||
|
||||
if (string.IsNullOrEmpty(str))
|
||||
{
|
||||
throw new InvalidOperationException($"Environment variable {fullVarName} is not set");
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
public static string GetOptionalEnvironmentVariableAsString(this string prefix, string varName, string? fallback = null)
|
||||
{
|
||||
var fullVarName = GetFullVariableName(prefix, varName);
|
||||
|
||||
var str = Environment.GetEnvironmentVariable(fullVarName);
|
||||
|
||||
if (string.IsNullOrEmpty(str))
|
||||
{
|
||||
return fallback;
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
private static string GetFullVariableName(string prefix, string varName) => $"{prefix}_{varName}";
|
||||
}
|
||||
28
src/tissue/Extensions/ServiceCollectionExtensions.cs
Normal file
28
src/tissue/Extensions/ServiceCollectionExtensions.cs
Normal file
@@ -0,0 +1,28 @@
|
||||
namespace Tissue.Extensions;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddSerilogLogging(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
services.AddLogging(
|
||||
loggingBuilder =>
|
||||
{
|
||||
loggingBuilder.ClearProviders();
|
||||
|
||||
var logger = new LoggerConfiguration()
|
||||
.ReadFrom.Configuration(configuration)
|
||||
.CreateLogger();
|
||||
|
||||
loggingBuilder.AddSerilog(logger);
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
public static IServiceCollection AddKleenexService(this IServiceCollection services)
|
||||
{
|
||||
services.AddHostedService<KleenexService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
24
src/tissue/Extensions/StringExtensions.cs
Normal file
24
src/tissue/Extensions/StringExtensions.cs
Normal file
@@ -0,0 +1,24 @@
|
||||
namespace Tissue.Extensions;
|
||||
|
||||
public static partial class StringExtensions
|
||||
{
|
||||
[GeneratedRegex("[^a-zA-Z0-9 ]")]
|
||||
private static partial Regex NotAlphaNumeric();
|
||||
|
||||
public static bool IsNullOrEmpty(this string? value) =>
|
||||
string.IsNullOrEmpty(value);
|
||||
|
||||
private static readonly char[] separator = [' '];
|
||||
|
||||
public static string NormalizeTitle(this string title)
|
||||
{
|
||||
var alphanumericTitle = NotAlphaNumeric().Replace(title, " ");
|
||||
|
||||
var words = alphanumericTitle.Split(separator, StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(word => word.ToLower());
|
||||
|
||||
var normalizedTitle = string.Join(" ", words);
|
||||
|
||||
return normalizedTitle;
|
||||
}
|
||||
}
|
||||
44
src/tissue/Features/DataProcessing/DapperDataStorage.cs
Normal file
44
src/tissue/Features/DataProcessing/DapperDataStorage.cs
Normal file
@@ -0,0 +1,44 @@
|
||||
namespace Tissue.Features.DataProcessing;
|
||||
|
||||
public class DapperDataStorage(PostgresConfiguration configuration, ILogger<DapperDataStorage> logger) : IDataStorage
|
||||
{
|
||||
public async Task<IReadOnlyCollection<Torrent>?> GetAllTorrents(CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string GetAllTorrentsSql = "SELECT * FROM torrents";
|
||||
|
||||
try
|
||||
{
|
||||
await using var connection = await CreateAndOpenConnection(cancellationToken);
|
||||
var torrents = await connection.QueryAsync<Torrent>(GetAllTorrentsSql);
|
||||
|
||||
return torrents.ToList();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.LogError(e, "Error while torrents from database");
|
||||
return new List<Torrent>();
|
||||
}
|
||||
}
|
||||
|
||||
public async Task DeleteTorrentsByInfoHashes(IReadOnlyCollection<string> infoHashes, CancellationToken cancellationToken = default)
|
||||
{
|
||||
const string deleteTorrentsSql = "DELETE FROM torrents WHERE \"infoHash\" = ANY(@infoHashes)";
|
||||
|
||||
try
|
||||
{
|
||||
await using var connection = await CreateAndOpenConnection(cancellationToken);
|
||||
await connection.ExecuteAsync(deleteTorrentsSql, new { infoHashes });
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.LogError(e, "Error while deleting torrents from database");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NpgsqlConnection> CreateAndOpenConnection(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var connection = new NpgsqlConnection(configuration.StorageConnectionString);
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
return connection;
|
||||
}
|
||||
}
|
||||
7
src/tissue/Features/DataProcessing/IDataStorage.cs
Normal file
7
src/tissue/Features/DataProcessing/IDataStorage.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace Tissue.Features.DataProcessing;
|
||||
|
||||
public interface IDataStorage
|
||||
{
|
||||
Task<IReadOnlyCollection<Torrent>?> GetAllTorrents(CancellationToken cancellationToken = default);
|
||||
Task DeleteTorrentsByInfoHashes(IReadOnlyCollection<string> infoHashes, CancellationToken cancellationToken = default);
|
||||
}
|
||||
19
src/tissue/Features/DataProcessing/PostgresConfiguration.cs
Normal file
19
src/tissue/Features/DataProcessing/PostgresConfiguration.cs
Normal file
@@ -0,0 +1,19 @@
|
||||
namespace Tissue.Features.DataProcessing;
|
||||
|
||||
public class PostgresConfiguration
|
||||
{
|
||||
private const string Prefix = "POSTGRES";
|
||||
private const string HostVariable = "HOST";
|
||||
private const string UsernameVariable = "USER";
|
||||
private const string PasswordVariable = "PASSWORD";
|
||||
private const string DatabaseVariable = "DB";
|
||||
private const string PortVariable = "PORT";
|
||||
|
||||
private string Host { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(HostVariable);
|
||||
private string Username { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(UsernameVariable);
|
||||
private string Password { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(PasswordVariable);
|
||||
private string Database { get; init; } = Prefix.GetRequiredEnvironmentVariableAsString(DatabaseVariable);
|
||||
private int PORT { get; init; } = Prefix.GetEnvironmentVariableAsInt(PortVariable, 5432);
|
||||
|
||||
public string StorageConnectionString => $"Host={Host};Port={PORT};Username={Username};Password={Password};Database={Database};";
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace Tissue.Features.DataProcessing;
|
||||
|
||||
internal static class ServiceCollectionExtensions
|
||||
{
|
||||
internal static IServiceCollection AddDataStorage(this IServiceCollection services)
|
||||
{
|
||||
services.LoadConfigurationFromEnv<PostgresConfiguration>();
|
||||
services.AddTransient<IDataStorage, DapperDataStorage>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
20
src/tissue/Features/DataProcessing/Torrent.cs
Normal file
20
src/tissue/Features/DataProcessing/Torrent.cs
Normal file
@@ -0,0 +1,20 @@
|
||||
namespace Tissue.Features.DataProcessing;
|
||||
|
||||
public class Torrent
|
||||
{
|
||||
public string? InfoHash { get; set; }
|
||||
public string? Provider { get; set; }
|
||||
public string? TorrentId { get; set; }
|
||||
public string? Title { get; set; }
|
||||
public long? Size { get; set; }
|
||||
public string? Type { get; set; }
|
||||
public DateTime UploadDate { get; set; }
|
||||
public short? Seeders { get; set; }
|
||||
public string? Trackers { get; set; }
|
||||
public string? Languages { get; set; }
|
||||
public string? Resolution { get; set; }
|
||||
public bool Reviewed { get; set; }
|
||||
public bool Opened { get; set; }
|
||||
public DateTime CreatedAt { get; set; }
|
||||
public DateTime UpdatedAt { get; set; }
|
||||
}
|
||||
93
src/tissue/Features/Kleenex/KleenexService.cs
Normal file
93
src/tissue/Features/Kleenex/KleenexService.cs
Normal file
@@ -0,0 +1,93 @@
|
||||
namespace Tissue.Features.Kleenex;
|
||||
|
||||
public class KleenexService(IDataStorage dataStorage, ILogger<KleenexService> logger, IWordCollections wordCollections) : IHostedService
|
||||
{
|
||||
private HashSet<string> _combinedCompounds = [];
|
||||
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
logger.LogInformation("Kleenex service started");
|
||||
logger.LogInformation("Get ready to pop all that corn...");
|
||||
|
||||
var torrents = await LoadTorrents(cancellationToken);
|
||||
|
||||
_combinedCompounds.UnionWith(wordCollections.AdultCompoundPhrases);
|
||||
_combinedCompounds.UnionWith(wordCollections.Jav);
|
||||
_combinedCompounds.UnionWith(wordCollections.AdultStars);
|
||||
|
||||
var infoHashesToDelete = GetInfoHashesToDelete(torrents);
|
||||
|
||||
await dataStorage.DeleteTorrentsByInfoHashes(infoHashesToDelete!, cancellationToken);
|
||||
|
||||
logger.LogInformation("Deleted {TorrentCount} torrents", infoHashesToDelete.Count);
|
||||
|
||||
logger.LogInformation("Kleenex service completed successfully");
|
||||
|
||||
Environment.Exit(0);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
logger.LogError(e, "Error while processing torrents");
|
||||
Environment.Exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
logger.LogInformation("Service Shutdown");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private List<string?> GetInfoHashesToDelete(IReadOnlyCollection<Torrent> torrents)
|
||||
{
|
||||
var torrentsToDelete = torrents.Where(torrent => HasBannedTerms(torrent.Title)).ToList();
|
||||
var infoHashesToDelete = torrentsToDelete.Select(torrent => torrent.InfoHash).ToList();
|
||||
|
||||
if (infoHashesToDelete.Count == 0)
|
||||
{
|
||||
logger.LogInformation("No torrents to delete");
|
||||
Environment.Exit(0);
|
||||
}
|
||||
|
||||
return infoHashesToDelete;
|
||||
}
|
||||
|
||||
private async Task<IReadOnlyCollection<Torrent>> LoadTorrents(CancellationToken cancellationToken)
|
||||
{
|
||||
var torrents = await dataStorage.GetAllTorrents(cancellationToken);
|
||||
|
||||
if (torrents is null || torrents.Count == 0)
|
||||
{
|
||||
logger.LogInformation("No torrents found");
|
||||
Environment.Exit(0);
|
||||
}
|
||||
|
||||
logger.LogInformation("Found {TorrentCount} torrents", torrents.Count);
|
||||
return torrents;
|
||||
}
|
||||
|
||||
private bool HasBannedTerms(string targetTitle)
|
||||
{
|
||||
var normalisedTitle = targetTitle.NormalizeTitle();
|
||||
|
||||
var normalisedWords = normalisedTitle.Split(' ');
|
||||
|
||||
var hasBannedWords = normalisedWords.Where(word => word.Length >= 3).Any(word => normalisedWords.Contains(word, StringComparer.OrdinalIgnoreCase) && wordCollections.AdultWords.Contains(word));
|
||||
|
||||
var hasCompounds = _combinedCompounds.Any(term => normalisedTitle.Contains(term, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var isClean = !hasBannedWords &&
|
||||
!hasCompounds;
|
||||
|
||||
if (isClean)
|
||||
{
|
||||
logger.LogInformation("No banned terms found in torrent title: {Title}", targetTitle);
|
||||
return false;
|
||||
}
|
||||
|
||||
logger.LogWarning("Banned terms found in torrent title: {Title}", targetTitle);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
14
src/tissue/Features/Wordlists/IWordCollections.cs
Normal file
14
src/tissue/Features/Wordlists/IWordCollections.cs
Normal file
@@ -0,0 +1,14 @@
|
||||
namespace Tissue.Features.Wordlists;
|
||||
|
||||
public interface IWordCollections
|
||||
{
|
||||
HashSet<string> AdultWords { get; }
|
||||
|
||||
HashSet<string> AdultCompoundPhrases { get; }
|
||||
|
||||
HashSet<string> Jav { get; }
|
||||
|
||||
HashSet<string> AdultStars { get; }
|
||||
|
||||
Task LoadAsync();
|
||||
}
|
||||
20
src/tissue/Features/Wordlists/PopulationService.cs
Normal file
20
src/tissue/Features/Wordlists/PopulationService.cs
Normal file
@@ -0,0 +1,20 @@
|
||||
namespace Tissue.Features.Wordlists;
|
||||
|
||||
public class PopulationService(IWordCollections wordCollections, ILogger<PopulationService> logger) : IHostedService
|
||||
{
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
logger.LogInformation("Loading word collections...");
|
||||
|
||||
await wordCollections.LoadAsync();
|
||||
|
||||
logger.LogInformation("Adult Words Count: {Count}", wordCollections.AdultWords.Count);
|
||||
logger.LogInformation("Adult Compound Phrases Count: {Count}", wordCollections.AdultCompoundPhrases.Count);
|
||||
logger.LogInformation("Jav Count: {Count}", wordCollections.Jav.Count);
|
||||
logger.LogInformation("Adult Stars Count: {Count}", wordCollections.AdultStars.Count);
|
||||
|
||||
logger.LogInformation("Word collections loaded.");
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||
}
|
||||
12
src/tissue/Features/Wordlists/ServiceCollectionExtensions.cs
Normal file
12
src/tissue/Features/Wordlists/ServiceCollectionExtensions.cs
Normal file
@@ -0,0 +1,12 @@
|
||||
namespace Tissue.Features.Wordlists;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection RegisterWordCollections(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<IWordCollections, WordCollections>();
|
||||
services.AddHostedService<PopulationService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
55
src/tissue/Features/Wordlists/WordCollections.cs
Normal file
55
src/tissue/Features/Wordlists/WordCollections.cs
Normal file
@@ -0,0 +1,55 @@
|
||||
namespace Tissue.Features.Wordlists;
|
||||
|
||||
public class WordCollections : IWordCollections
|
||||
{
|
||||
private const string AdultWordsFile = "adult-words.txt";
|
||||
private const string AdultCompoundPhrasesFile = "adult-compound-words.txt";
|
||||
private const string AdultStarsFile = "adult-stars.txt";
|
||||
private const string JavFile = "jav.txt";
|
||||
|
||||
public HashSet<string> AdultWords { get; private set; } = [];
|
||||
public HashSet<string> AdultCompoundPhrases { get; private set; } = [];
|
||||
|
||||
public HashSet<string> AdultStars { get; private set; } = [];
|
||||
|
||||
public HashSet<string> Jav { get; private set; } = [];
|
||||
|
||||
public async Task LoadAsync()
|
||||
{
|
||||
var loaderTasks = new List<Task>
|
||||
{
|
||||
LoadAdultWords(),
|
||||
LoadAdultCompounds(),
|
||||
LoadJav(),
|
||||
LoadAdultStars(),
|
||||
};
|
||||
|
||||
await Task.WhenAll(loaderTasks);
|
||||
}
|
||||
|
||||
private async Task LoadAdultCompounds()
|
||||
{
|
||||
var adultCompoundWords = await File.ReadAllLinesAsync(GetPath(AdultCompoundPhrasesFile));
|
||||
AdultCompoundPhrases = [..adultCompoundWords];
|
||||
}
|
||||
|
||||
private async Task LoadAdultWords()
|
||||
{
|
||||
var adultWords = await File.ReadAllLinesAsync(GetPath(AdultWordsFile));
|
||||
AdultWords = [..adultWords];
|
||||
}
|
||||
|
||||
private async Task LoadJav()
|
||||
{
|
||||
var jav = await File.ReadAllLinesAsync(GetPath(JavFile));
|
||||
Jav = [..jav];
|
||||
}
|
||||
|
||||
private async Task LoadAdultStars()
|
||||
{
|
||||
var adultStars = await File.ReadAllLinesAsync(GetPath(AdultStarsFile));
|
||||
AdultStars = [..adultStars];
|
||||
}
|
||||
|
||||
private static string GetPath(string fileName) => Path.Combine(AppContext.BaseDirectory, "Data", fileName);
|
||||
}
|
||||
13
src/tissue/GlobalUsings.cs
Normal file
13
src/tissue/GlobalUsings.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
// Global using directives
|
||||
|
||||
global using System.Text.RegularExpressions;
|
||||
global using Dapper;
|
||||
global using Microsoft.Extensions.DependencyInjection;
|
||||
global using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
global using Microsoft.Extensions.Logging;
|
||||
global using Npgsql;
|
||||
global using Serilog;
|
||||
global using Tissue.Extensions;
|
||||
global using Tissue.Features.DataProcessing;
|
||||
global using Tissue.Features.Kleenex;
|
||||
global using Tissue.Features.Wordlists;
|
||||
14
src/tissue/Program.cs
Normal file
14
src/tissue/Program.cs
Normal file
@@ -0,0 +1,14 @@
|
||||
var builder = Host.CreateApplicationBuilder();
|
||||
|
||||
builder.Configuration
|
||||
.AddScrapeConfiguration();
|
||||
|
||||
builder.Services
|
||||
.AddDataStorage()
|
||||
.RegisterWordCollections()
|
||||
.AddSerilogLogging(builder.Configuration)
|
||||
.AddKleenexService();
|
||||
|
||||
var host = builder.Build();
|
||||
|
||||
await host.RunAsync();
|
||||
35
src/tissue/Tissue.csproj
Normal file
35
src/tissue/Tissue.csproj
Normal file
@@ -0,0 +1,35 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk.Worker">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<UserSecretsId>54cad2ee-57df-4bb2-a192-d5d501448e0b</UserSecretsId>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Dapper" Version="2.1.28" />
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Npgsql" Version="8.0.1" />
|
||||
<PackageReference Include="Serilog" Version="3.1.1" />
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Remove="Configuration\*.json" />
|
||||
<None Include="Configuration\*.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Remove="Data\**" />
|
||||
<None Include="Data\**">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user