4 Commits

Author SHA1 Message Date
iPromKnight
1e83b4c5d8 Patch the addon (#185) 2024-03-29 19:08:17 +00:00
iPromKnight
66609c2a46 trigram performance increased and housekeeping (#184)
* add new indexes, and change year column to int

* Change gist to gin, and change year to int

* Producer changes for new gin query

* Fully map the rtn response using json dump from Pydantic

Also updates Rtn to 0.1.9

* Add housekeeping script to reconcile imdb ids.

* Join Torrent onto the ingested torrent table

Ensure that a torrent can always find the details of where it came from, and how it was parsed.

* Version bump for release

* missing quote on table name
2024-03-29 19:01:48 +00:00
iPromKnight
2d78dc2735 version bump for release (#183) 2024-03-28 23:37:35 +00:00
iPromKnight
527d6cdf15 Upgrade RTN to 0.1.8, replace rabbitmq with drop in replacement lavinmq - better performance, lower resource usage. (#182) 2024-03-28 23:35:41 +00:00
35 changed files with 473 additions and 223 deletions

View File

@@ -9,7 +9,7 @@ networks:
volumes:
postgres:
rabbitmq:
lavinmq:
redis:
services:
@@ -55,28 +55,29 @@ services:
volumes:
- redis:/data
## RabbitMQ is used as a message broker for the services.
## LavinMQ is used as a message broker for the services.
## It is a high performance drop in replacement for RabbitMQ.
## It is used to communicate between the services.
rabbitmq:
lavinmq:
env_file: stack.env
healthcheck:
test: ["CMD-SHELL", "rabbitmq-diagnostics -q ping"]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, look at the documentation for rabbit on how to secure the service.
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
# ports:
# - "5672:5672"
# - "15672:15672"
# - "15692:15692"
image: rabbitmq:3-management
image: cloudamqp/lavinmq:latest
healthcheck:
test: ["CMD-SHELL", "lavinmqctl status"]
timeout: 10s
interval: 10s
retries: 3
start_period: 10s
restart: unless-stopped
networks:
- knightcrawler-network
restart: unless-stopped
volumes:
- rabbitmq:/var/lib/rabbitmq
- lavinmq:/var/lib/lavinmq/
## The addon. This is what is used in stremio
addon:
@@ -87,13 +88,13 @@ services:
condition: service_completed_successfully
postgres:
condition: service_healthy
rabbitmq:
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:2.0.8
image: gabisonfire/knightcrawler-addon:2.0.10
labels:
logging: promtail
networks:
@@ -111,12 +112,12 @@ services:
condition: service_completed_successfully
postgres:
condition: service_healthy
rabbitmq:
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-consumer:2.0.8
image: gabisonfire/knightcrawler-consumer:2.0.10
labels:
logging: promtail
networks:
@@ -132,12 +133,12 @@ services:
condition: service_completed_successfully
postgres:
condition: service_healthy
rabbitmq:
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-debrid-collector:2.0.8
image: gabisonfire/knightcrawler-debrid-collector:2.0.10
labels:
logging: promtail
networks:
@@ -151,7 +152,7 @@ services:
migrator:
condition: service_completed_successfully
env_file: stack.env
image: gabisonfire/knightcrawler-metadata:2.0.8
image: gabisonfire/knightcrawler-metadata:2.0.10
networks:
- knightcrawler-network
restart: "no"
@@ -162,7 +163,7 @@ services:
postgres:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-migrator:2.0.8
image: gabisonfire/knightcrawler-migrator:2.0.10
networks:
- knightcrawler-network
restart: "no"
@@ -176,12 +177,12 @@ services:
condition: service_completed_successfully
postgres:
condition: service_healthy
rabbitmq:
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-producer:2.0.8
image: gabisonfire/knightcrawler-producer:2.0.10
labels:
logging: promtail
networks:
@@ -191,12 +192,22 @@ services:
## QBit collector utilizes QBitTorrent to download metadata.
qbitcollector:
depends_on:
metadata:
condition: service_completed_successfully
migrator:
condition: service_completed_successfully
postgres:
condition: service_healthy
lavinmq:
condition: service_healthy
redis:
condition: service_healthy
qbittorrent:
condition: service_healthy
deploy:
replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env
image: gabisonfire/knightcrawler-qbit-collector:2.0.8
image: gabisonfire/knightcrawler-qbit-collector:2.0.10
labels:
logging: promtail
networks:

View File

@@ -16,7 +16,7 @@ rule_files:
scrape_configs:
- job_name: "rabbitmq"
static_configs:
- targets: ["rabbitmq:15692"]
- targets: ["lavinmq:15692"]
- job_name: "postgres-exporter"
static_configs:
- targets: ["postgres-exporter:9187"]

View File

@@ -4,8 +4,8 @@ x-basehealth: &base-health
retries: 3
start_period: 10s
x-rabbithealth: &rabbitmq-health
test: rabbitmq-diagnostics -q ping
x-lavinhealth: &lavinmq-health
test: [ "CMD-SHELL", "lavinmqctl status" ]
<<: *base-health
x-redishealth: &redis-health
@@ -52,21 +52,19 @@ services:
networks:
- knightcrawler-network
rabbitmq:
image: rabbitmq:3-management
lavinmq:
env_file: stack.env
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, look at the documentation for rabbit on how to secure the service.
# # Furthermore, please, please, please, look at the documentation for lavinmq / rabbitmq on how to secure the service.
# ports:
# - "5672:5672"
# - "15672:15672"
# - "15692:15692"
volumes:
- rabbitmq:/var/lib/rabbitmq
image: cloudamqp/lavinmq:latest
healthcheck: *lavinmq-health
restart: unless-stopped
healthcheck: *rabbitmq-health
env_file: ../../.env
networks:
- knightcrawler-network
volumes:
- lavinmq:/var/lib/lavinmq/
## QBitTorrent is a torrent client that can be used to download torrents. In this case its used to download metadata.
## The QBit collector requires this.

View File

@@ -11,7 +11,7 @@ x-depends: &knightcrawler-app-depends
condition: service_healthy
postgres:
condition: service_healthy
rabbitmq:
lavinmq:
condition: service_healthy
migrator:
condition: service_completed_successfully
@@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends
services:
metadata:
image: gabisonfire/knightcrawler-metadata:2.0.8
image: gabisonfire/knightcrawler-metadata:2.0.10
env_file: ../../.env
networks:
- knightcrawler-network
@@ -30,7 +30,7 @@ services:
condition: service_completed_successfully
migrator:
image: gabisonfire/knightcrawler-migrator:2.0.8
image: gabisonfire/knightcrawler-migrator:2.0.10
env_file: ../../.env
networks:
- knightcrawler-network
@@ -40,7 +40,7 @@ services:
condition: service_healthy
addon:
image: gabisonfire/knightcrawler-addon:2.0.8
image: gabisonfire/knightcrawler-addon:2.0.10
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
hostname: knightcrawler-addon
@@ -48,22 +48,22 @@ services:
- "7000:7000"
consumer:
image: gabisonfire/knightcrawler-consumer:2.0.8
image: gabisonfire/knightcrawler-consumer:2.0.10
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
debridcollector:
image: gabisonfire/knightcrawler-debrid-collector:2.0.8
image: gabisonfire/knightcrawler-debrid-collector:2.0.10
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
producer:
image: gabisonfire/knightcrawler-producer:2.0.8
image: gabisonfire/knightcrawler-producer:2.0.10
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
qbitcollector:
image: gabisonfire/knightcrawler-qbit-collector:2.0.8
image: gabisonfire/knightcrawler-qbit-collector:2.0.10
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
depends_on:

View File

@@ -1,4 +1,4 @@
volumes:
postgres:
redis:
rabbitmq:
lavinmq:

View File

@@ -13,8 +13,8 @@ REDIS_HOST=redis
REDIS_PORT=6379
REDIS_EXTRA=abortConnect=false,allowAdmin=true
# RabbitMQ
RABBITMQ_HOST=rabbitmq
# AMQP
RABBITMQ_HOST=lavinmq
RABBITMQ_USER=guest
RABBITMQ_PASSWORD=guest
RABBITMQ_CONSUMER_QUEUE_NAME=ingested

View File

@@ -14,13 +14,12 @@ const Torrent = database.define('torrent',
{
infoHash: { type: Sequelize.STRING(64), primaryKey: true },
provider: { type: Sequelize.STRING(32), allowNull: false },
torrentId: { type: Sequelize.STRING(128) },
ingestedTorrentId: { type: Sequelize.BIGINT, allowNull: false },
title: { type: Sequelize.STRING(256), allowNull: false },
size: { type: Sequelize.BIGINT },
type: { type: Sequelize.STRING(16), allowNull: false },
uploadDate: { type: Sequelize.DATE, allowNull: false },
seeders: { type: Sequelize.SMALLINT },
trackers: { type: Sequelize.STRING(4096) },
languages: { type: Sequelize.STRING(4096) },
resolution: { type: Sequelize.STRING(16) }
}

View File

@@ -72,7 +72,7 @@ public class BasicsFile(ILogger<BasicsFile> logger, ImdbDbService dbService): IF
Category = csv.GetField(1),
Title = csv.GetField(2),
Adult = isAdultSet && adult == 1,
Year = csv.GetField(5),
Year = csv.GetField(5) == @"\N" ? 0 : int.Parse(csv.GetField(5)),
};
if (cancellationToken.IsCancellationRequested)

View File

@@ -6,5 +6,5 @@ public class ImdbBasicEntry
public string? Category { get; set; }
public string? Title { get; set; }
public bool Adult { get; set; }
public string? Year { get; set; }
public int Year { get; set; }
}

View File

@@ -17,7 +17,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
await writer.WriteAsync(entry.ImdbId, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Category, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Title, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Year, NpgsqlDbType.Text);
await writer.WriteAsync(entry.Year, NpgsqlDbType.Integer);
await writer.WriteAsync(entry.Adult, NpgsqlDbType.Boolean);
}
catch (Npgsql.PostgresException e)
@@ -116,7 +116,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
ExecuteCommandAsync(
async connection =>
{
await using var command = new NpgsqlCommand($"CREATE INDEX title_gist ON {TableNames.MetadataTable} USING gist(title gist_trgm_ops)", connection);
await using var command = new NpgsqlCommand($"CREATE INDEX title_gin ON {TableNames.MetadataTable} USING gin(title gin_trgm_ops)", connection);
await command.ExecuteNonQueryAsync();
}, "Error while creating index on imdb_metadata table");
@@ -125,7 +125,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
async connection =>
{
logger.LogInformation("Dropping Trigrams index if it exists already");
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gist", connection);
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gin", connection);
await dropCommand.ExecuteNonQueryAsync();
}, $"Error while dropping index on {TableNames.MetadataTable} table");

View File

@@ -0,0 +1,35 @@
-- Purpose: Change the year column to integer and add a search function that allows for searching by year.
ALTER TABLE imdb_metadata
ALTER COLUMN year TYPE integer USING (CASE WHEN year = '\N' THEN 0 ELSE year::integer END);
-- Remove the old search function
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, TEXT, INT);
-- Add the new search function that allows for searching by year with a plus/minus one year range
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10)
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
BEGIN
SET pg_trgm.similarity_threshold = 0.9;
RETURN QUERY
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
FROM imdb_metadata
WHERE (imdb_metadata.title % search_term)
AND (imdb_metadata.adult = FALSE)
AND (category_param IS NULL OR imdb_metadata.category = category_param)
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
ORDER BY score DESC
LIMIT limit_param;
END; $$
LANGUAGE plpgsql;
-- Drop the old indexes
DROP INDEX IF EXISTS idx_imdb_metadata_adult;
DROP INDEX IF EXISTS idx_imdb_metadata_category;
DROP INDEX IF EXISTS idx_imdb_metadata_year;
DROP INDEX IF EXISTS title_gist;
-- Add indexes for the new columns
CREATE INDEX idx_imdb_metadata_adult ON imdb_metadata(adult);
CREATE INDEX idx_imdb_metadata_category ON imdb_metadata(category);
CREATE INDEX idx_imdb_metadata_year ON imdb_metadata(year);
CREATE INDEX title_gin ON imdb_metadata USING gin(title gin_trgm_ops);

View File

@@ -0,0 +1,40 @@
-- Purpose: Add the jsonb column to the ingested_torrents table to store the response from RTN
ALTER TABLE ingested_torrents
ADD COLUMN IF NOT EXISTS rtn_response jsonb;
-- Purpose: Drop torrentId column from torrents table
ALTER TABLE torrents
DROP COLUMN IF EXISTS "torrentId";
-- Purpose: Drop Trackers column from torrents table
ALTER TABLE torrents
DROP COLUMN IF EXISTS "trackers";
-- Purpose: Create a foreign key relationsship if it does not already exist between torrents and the source table ingested_torrents, but do not cascade on delete.
ALTER TABLE torrents
ADD COLUMN IF NOT EXISTS "ingestedTorrentId" bigint;
DO $$
BEGIN
IF EXISTS (
SELECT 1
FROM information_schema.table_constraints
WHERE constraint_name = 'fk_torrents_info_hash'
)
THEN
ALTER TABLE torrents
DROP CONSTRAINT fk_torrents_info_hash;
END IF;
END $$;
ALTER TABLE torrents
ADD CONSTRAINT fk_torrents_info_hash
FOREIGN KEY ("ingestedTorrentId")
REFERENCES ingested_torrents("id")
ON DELETE NO ACTION;
UPDATE torrents
SET "ingestedTorrentId" = ingested_torrents."id"
FROM ingested_torrents
WHERE torrents."infoHash" = ingested_torrents."info_hash"
AND torrents."provider" = ingested_torrents."source";

View File

@@ -0,0 +1,55 @@
DROP FUNCTION IF EXISTS kc_maintenance_reconcile_dmm_imdb_ids();
CREATE OR REPLACE FUNCTION kc_maintenance_reconcile_dmm_imdb_ids()
RETURNS INTEGER AS $$
DECLARE
rec RECORD;
imdb_rec RECORD;
rows_affected INTEGER := 0;
BEGIN
RAISE NOTICE 'Starting Reconciliation of DMM IMDB Ids...';
FOR rec IN
SELECT
it."id" as "ingestion_id",
t."infoHash",
it."category" as "ingestion_category",
f."id" as "file_Id",
f."title" as "file_Title",
(rtn_response->>'raw_title')::text as "raw_title",
(rtn_response->>'parsed_title')::text as "parsed_title",
(rtn_response->>'year')::int as "year"
FROM torrents t
JOIN ingested_torrents it ON t."ingestedTorrentId" = it."id"
JOIN files f ON t."infoHash" = f."infoHash"
WHERE t."provider" = 'DMM'
LOOP
RAISE NOTICE 'Processing record with file_Id: %', rec."file_Id";
FOR imdb_rec IN
SELECT * FROM search_imdb_meta(
rec."parsed_title",
CASE
WHEN rec."ingestion_category" = 'tv' THEN 'tvSeries'
WHEN rec."ingestion_category" = 'movies' THEN 'movie'
END,
CASE
WHEN rec."year" = 0 THEN NULL
ELSE rec."year" END,
1)
LOOP
IF imdb_rec IS NOT NULL THEN
RAISE NOTICE 'Updating file_Id: % with imdbId: %, parsed title: %, imdb title: %', rec."file_Id", imdb_rec."imdb_id", rec."parsed_title", imdb_rec."title";
UPDATE "files"
SET "imdbId" = imdb_rec."imdb_id"
WHERE "id" = rec."file_Id";
rows_affected := rows_affected + 1;
ELSE
RAISE NOTICE 'No IMDB ID found for file_Id: %, parsed title: %, imdb title: %, setting imdbId to NULL', rec."file_Id", rec."parsed_title", imdb_rec."title";
UPDATE "files"
SET "imdbId" = NULL
WHERE "id" = rec."file_Id";
END IF;
END LOOP;
END LOOP;
RAISE NOTICE 'Finished reconciliation. Total rows affected: %', rows_affected;
RETURN rows_affected;
END;
$$ LANGUAGE plpgsql;

View File

@@ -1,2 +1,3 @@
remove-item -recurse -force ../src/python
mkdir -p ../src/python
pip install --force-reinstall rank-torrent-name==0.1.6 -t ../src/python/
pip install -r ../src/requirements.txt -t ../src/python/

View File

@@ -1,4 +1,5 @@
#!/bin/bash
rm -rf ../src/python
mkdir -p ../src/python
pip install --force-reinstall rank-torrent-name==0.1.6 -t ../src/python/
python3 -m pip install -r ../src/requirements.txt -t ../src/python/

View File

@@ -13,13 +13,19 @@ FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine3.19
WORKDIR /app
ENV PYTHONUNBUFFERED=1
RUN apk add --update --no-cache python3=~3.11.8-r0 py3-pip && ln -sf python3 /usr/bin/python
COPY --from=build /src/out .
RUN rm -rf /app/python && mkdir -p /app/python
RUN pip3 install --force-reinstall rank-torrent-name==0.1.6 -t /app/python
RUN pip3 install -r /app/requirements.txt -t /app/python
RUN addgroup -S producer && adduser -S -G producer producer
USER producer
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD pgrep -f dotnet || exit 1

View File

@@ -1,5 +1,3 @@
using Microsoft.VisualBasic;
namespace Producer.Features.Crawlers.Dmm;
public partial class DebridMediaManagerCrawler(
@@ -12,7 +10,6 @@ public partial class DebridMediaManagerCrawler(
{
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
private static partial Regex HashCollectionMatcher();
private LengthAwareRatioScorer _lengthAwareRatioScorer = new();
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
@@ -118,32 +115,27 @@ public partial class DebridMediaManagerCrawler(
return null;
}
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.ParsedTitle);
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.Response.ParsedTitle);
if (cached)
{
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.ParsedTitle);
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
return MapToTorrent(cachedResult, bytesElement, hashElement, parsedTorrent);
}
var year = parsedTorrent.Year != 0 ? parsedTorrent.Year.ToString() : null;
var imdbEntries = await Storage.FindImdbMetadata(parsedTorrent.ParsedTitle, parsedTorrent.IsMovie ? "movies" : "tv", year);
int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null;
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.IsMovie ? "movies" : "tv", year);
if (imdbEntries.Count == 0)
if (imdbEntry is null)
{
return null;
}
var scoredTitles = await ScoreTitles(parsedTorrent, imdbEntries);
await AddToCache(parsedTorrent.Response.ParsedTitle.ToLowerInvariant(), imdbEntry);
if (!scoredTitles.Success)
{
return null;
}
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", scoredTitles.BestMatch.Value.ImdbId, parsedTorrent.ParsedTitle, scoredTitles.BestMatch.Value.Title, scoredTitles.BestMatch.Score);
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
return MapToTorrent(scoredTitles.BestMatch.Value, bytesElement, hashElement, parsedTorrent);
return MapToTorrent(imdbEntry, bytesElement, hashElement, parsedTorrent);
}
private IngestedTorrent MapToTorrent(ImdbEntry result, JsonElement bytesElement, JsonElement hashElement, ParseTorrentTitleResponse parsedTorrent) =>
@@ -156,40 +148,22 @@ public partial class DebridMediaManagerCrawler(
InfoHash = hashElement.ToString(),
Seeders = 0,
Leechers = 0,
Category = parsedTorrent.IsMovie switch
Category = parsedTorrent.Response.IsMovie switch
{
true => "movies",
false => "tv",
},
RtnResponse = parsedTorrent.Response.ToJson(),
};
private async Task<(bool Success, ExtractedResult<ImdbEntry>? BestMatch)> ScoreTitles(ParseTorrentTitleResponse parsedTorrent, List<ImdbEntry> imdbEntries)
{
var lowerCaseTitle = parsedTorrent.ParsedTitle.ToLowerInvariant();
// Scoring directly operates on the List<ImdbEntry>, no need for lookup table.
var scoredResults = Process.ExtractAll(new(){Title = lowerCaseTitle}, imdbEntries, x => x.Title?.ToLowerInvariant(), scorer: _lengthAwareRatioScorer, cutoff: 90);
var best = scoredResults.MaxBy(x => x.Score);
if (best is null)
{
return (false, null);
}
await AddToCache(lowerCaseTitle, best);
return (true, best);
}
private Task AddToCache(string lowerCaseTitle, ExtractedResult<ImdbEntry> best)
private Task AddToCache(string lowerCaseTitle, ImdbEntry best)
{
var cacheOptions = new DistributedCacheEntryOptions
{
AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1),
};
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best.Value), cacheOptions);
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best), cacheOptions);
}
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string title)

View File

@@ -33,6 +33,9 @@
<None Include="Configuration\*.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="requirements.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup Condition="'$(Configuration)' == 'Debug'">

View File

@@ -0,0 +1 @@
rank-torrent-name==0.1.9

View File

@@ -9,9 +9,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
const string query =
"""
INSERT INTO ingested_torrents
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt")
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt", "rtn_response")
VALUES
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt)
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt, @RtnResponse::jsonb)
ON CONFLICT (source, info_hash) DO NOTHING
""";
@@ -110,21 +110,21 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
public async Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE CAST(NULLIF(Year, '\N') AS INTEGER) <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE Year <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
var result = await connection.QueryAsync<ImdbEntry>(query, new { Year = year, LastProcessedImdbId = stateLastProcessedImdbId, BatchSize = batchSize });
return result.ToList();
}, "Error getting imdb metadata.", cancellationToken);
public async Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, string? year, CancellationToken cancellationToken = default) =>
public async Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, int? year, CancellationToken cancellationToken = default) =>
await ExecuteCommandAsync(async connection =>
{
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'";
query += year is not null ? $", '{year}'" : ", NULL";
query += ", 15)";
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\", \"score\" as Score from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'";
query += year is not null ? $", {year}" : ", NULL";
query += ", 1)";
var result = await connection.QueryAsync<ImdbEntry>(query);
return result.ToList();
var results = result.ToList();
return results.FirstOrDefault();
}, "Error finding imdb metadata.", cancellationToken);
public Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default) =>
@@ -134,9 +134,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
const string query =
"""
INSERT INTO "torrents"
("infoHash", "provider", "torrentId", "title", "size", "type", "uploadDate", "seeders", "trackers", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
("infoHash", "ingestedTorrentId", "provider", "title", "size", "type", "uploadDate", "seeders", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
VALUES
(@InfoHash, @Provider, @TorrentId, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, NULL, false, false, NOW(), NOW())
(@InfoHash, @IngestedTorrentId, @Provider, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, false, false, NOW(), NOW())
ON CONFLICT ("infoHash") DO NOTHING
""";

View File

@@ -9,7 +9,7 @@ public interface IDataStorage
Task<DapperResult<PageIngestedResult, PageIngestedResult>> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default);
Task<DapperResult<int, int>> GetRowCountImdbMetadata(CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default);
Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default);
Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, int? parsedTorrentYear, CancellationToken cancellationToken = default);
Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default);
Task InsertFiles(IEnumerable<TorrentFile> files, CancellationToken cancellationToken = default);
Task InsertSubtitles(IEnumerable<SubtitleFile> subtitles, CancellationToken cancellationToken = default);

View File

@@ -0,0 +1,14 @@
namespace SharedContracts.Extensions;
public static class JsonExtensions
{
private static readonly JsonSerializerOptions JsonSerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
ReferenceHandler = ReferenceHandler.IgnoreCycles,
NumberHandling = JsonNumberHandling.Strict,
};
public static string AsJson<T>(this T obj) => JsonSerializer.Serialize(obj, JsonSerializerOptions);
}

View File

@@ -1,6 +1,7 @@
// Global using directives
global using System.Text.Json;
global using System.Text.Json.Serialization;
global using Dapper;
global using MassTransit;
global using Microsoft.AspNetCore.Builder;
@@ -14,4 +15,4 @@ global using Python.Runtime;
global using Serilog;
global using SharedContracts.Configuration;
global using SharedContracts.Extensions;
global using SharedContracts.Models;
global using SharedContracts.Models;

View File

@@ -7,4 +7,5 @@ public class ImdbEntry
public string? Category { get; set; }
public string? Year { get; set; }
public bool? Adult { get; set; }
public decimal? Score { get; set; }
}

View File

@@ -12,7 +12,9 @@ public class IngestedTorrent
public int Leechers { get; set; }
public string? Imdb { get; set; }
public bool Processed { get; set; } = false;
public bool Processed { get; set; }
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
public string? RtnResponse { get; set; }
}

View File

@@ -3,6 +3,7 @@ namespace SharedContracts.Models;
public class Torrent
{
public string? InfoHash { get; set; }
public long? IngestedTorrentId { get; set; }
public string? Provider { get; set; }
public string? TorrentId { get; set; }
public string? Title { get; set; }

View File

@@ -0,0 +1,13 @@
namespace SharedContracts.Python;
public interface IPythonEngineService
{
ILogger<PythonEngineService> Logger { get; }
Task InitializePythonEngine(CancellationToken cancellationToken);
T ExecuteCommandOrScript<T>(string command, PyModule module, bool throwOnErrors);
T ExecutePythonOperation<T>(Func<T> operation, string operationName, bool throwOnErrors);
T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors);
Task StopPythonEngine(CancellationToken cancellationToken);
dynamic? Sys { get; }
}

View File

@@ -0,0 +1,8 @@
namespace SharedContracts.Python;
public class PythonEngineManager(IPythonEngineService pythonEngineService) : IHostedService
{
public Task StartAsync(CancellationToken cancellationToken) => pythonEngineService.InitializePythonEngine(cancellationToken);
public Task StopAsync(CancellationToken cancellationToken) => pythonEngineService.StopPythonEngine(cancellationToken);
}

View File

@@ -1,24 +1,28 @@
namespace SharedContracts.Python;
public class PythonEngineService(ILogger<PythonEngineService> logger) : IHostedService
public class PythonEngineService(ILogger<PythonEngineService> logger) : IPythonEngineService
{
private IntPtr _mainThreadState;
private bool _isInitialized;
public Task StartAsync(CancellationToken cancellationToken)
public ILogger<PythonEngineService> Logger { get; } = logger;
public dynamic? Sys { get; private set; }
public Task InitializePythonEngine(CancellationToken cancellationToken)
{
if (_isInitialized)
{
return Task.CompletedTask;
}
try
{
var pythonDllEnv = Environment.GetEnvironmentVariable("PYTHONNET_PYDLL");
if (string.IsNullOrWhiteSpace(pythonDllEnv))
{
logger.LogWarning("PYTHONNET_PYDLL env is not set. Exiting Application");
Logger.LogWarning("PYTHONNET_PYDLL env is not set. Exiting Application");
Environment.Exit(1);
return Task.CompletedTask;
}
@@ -26,24 +30,95 @@ public class PythonEngineService(ILogger<PythonEngineService> logger) : IHostedS
Runtime.PythonDLL = pythonDllEnv;
PythonEngine.Initialize();
_mainThreadState = PythonEngine.BeginAllowThreads();
_isInitialized = true;
logger.LogInformation("Python engine initialized");
Logger.LogInformation("Python engine initialized");
}
catch (Exception e)
{
logger.LogWarning(e, "Failed to initialize Python engine");
Logger.LogError(e, $"Failed to initialize Python engine: {e.Message}");
Environment.Exit(1);
}
return Task.CompletedTask;
}
public Task StopAsync(CancellationToken cancellationToken)
public T ExecuteCommandOrScript<T>(string command, PyModule module, bool throwOnErrors) =>
ExecutePythonOperation(
() =>
{
var pyCompile = PythonEngine.Compile(command);
var nativeResult = module.Execute(pyCompile);
return nativeResult.As<T>();
}, nameof(ExecuteCommandOrScript), throwOnErrors);
public T ExecutePythonOperation<T>(Func<T> operation, string operationName, bool throwOnErrors) =>
ExecutePythonOperationWithDefault(operation, default, operationName, throwOnErrors, true);
public T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors) =>
ExecutePythonOperationInternal(operation, defaultValue, operationName, throwOnErrors, logErrors);
public void ExecuteOnGIL(Action act, bool throwOnErrors)
{
Sys ??= LoadSys();
try
{
using var gil = Py.GIL();
act();
}
catch (Exception ex)
{
Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(ExecuteOnGIL));
if (throwOnErrors)
{
throw;
}
}
}
public Task StopPythonEngine(CancellationToken cancellationToken)
{
PythonEngine.EndAllowThreads(_mainThreadState);
PythonEngine.Shutdown();
return Task.CompletedTask;
}
private static dynamic LoadSys()
{
using var gil = Py.GIL();
var sys = Py.Import("sys");
return sys;
}
// ReSharper disable once EntityNameCapturedOnly.Local
private T ExecutePythonOperationInternal<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors)
{
Sys ??= LoadSys();
var result = defaultValue;
try
{
using var gil = Py.GIL();
result = operation();
}
catch (Exception ex)
{
if (logErrors)
{
Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(operationName));
}
if (throwOnErrors)
{
throw;
}
}
return result;
}
}

View File

@@ -3,6 +3,4 @@ namespace SharedContracts.Python.RTN;
public interface IRankTorrentName
{
ParseTorrentTitleResponse Parse(string title);
bool IsTrash(string title);
bool TitleMatch(string title, string checkTitle);
}

View File

@@ -1,6 +1,3 @@
namespace SharedContracts.Python.RTN;
public record ParseTorrentTitleResponse(bool Success, string ParsedTitle, int Year, int[]? Season = null, int[]? Episode = null)
{
public bool IsMovie => Season == null && Episode == null;
}
public record ParseTorrentTitleResponse(bool Success, RtnResponse? Response);

View File

@@ -2,117 +2,50 @@ namespace SharedContracts.Python.RTN;
public class RankTorrentName : IRankTorrentName
{
private const string SysModuleName = "sys";
private readonly IPythonEngineService _pythonEngineService;
private const string RtnModuleName = "RTN";
private readonly ILogger<RankTorrentName> _logger;
private dynamic? _sys;
private dynamic? _rtn;
public RankTorrentName(ILogger<RankTorrentName> logger)
public RankTorrentName(IPythonEngineService pythonEngineService)
{
_logger = logger;
_pythonEngineService = pythonEngineService;
InitModules();
}
public ParseTorrentTitleResponse Parse(string title)
{
try
{
using var py = Py.GIL();
var result = _rtn?.parse(title);
if (result == null)
public ParseTorrentTitleResponse Parse(string title) =>
_pythonEngineService.ExecutePythonOperationWithDefault(
() =>
{
return new(false, string.Empty, 0);
}
var result = _rtn?.parse(title);
return ParseResult(result);
}, new ParseTorrentTitleResponse(false, null), nameof(Parse), throwOnErrors: false, logErrors: false);
return ParseResult(result);
}
catch (Exception e)
{
_logger.LogError(e, "Failed to parse title");
return new(false, string.Empty, 0);
}
}
public bool IsTrash(string title)
{
try
{
using var py = Py.GIL();
var result = _rtn?.check_trash(title);
if (result == null)
{
return false;
}
var response = result.As<bool>() ?? false;
return response;
}
catch (Exception e)
{
_logger.LogError(e, "Failed to parse title");
return false;
}
}
public bool TitleMatch(string title, string checkTitle)
{
try
{
using var py = Py.GIL();
var result = _rtn?.title_match(title, checkTitle);
if (result == null)
{
return false;
}
var response = result.As<bool>() ?? false;
return response;
}
catch (Exception e)
{
_logger.LogError(e, "Failed to parse title");
return false;
}
}
private static ParseTorrentTitleResponse ParseResult(dynamic result)
{
var parsedTitle = result.GetAttr("parsed_title")?.As<string>() ?? string.Empty;
var year = result.GetAttr("year")?.As<int>() ?? 0;
var seasonList = result.GetAttr("season")?.As<PyList>();
var episodeList = result.GetAttr("episode")?.As<PyList>();
int[]? seasons = seasonList?.Length() > 0 ? seasonList.As<int[]>() : null;
int[]? episodes = episodeList?.Length() > 0 ? episodeList.As<int[]>() : null;
return new ParseTorrentTitleResponse(true, parsedTitle, year, seasons, episodes);
}
private void InitModules()
{
using var py = Py.GIL();
_sys = Py.Import(SysModuleName);
if (_sys == null)
if (result == null)
{
_logger.LogError($"Failed to import Python module: {SysModuleName}");
return;
return new(false, null);
}
_sys.path.append(Path.Combine(AppContext.BaseDirectory, "python"));
var json = result.model_dump_json()?.As<string?>();
_rtn = Py.Import(RtnModuleName);
if (_rtn == null)
if (json is null || string.IsNullOrEmpty(json))
{
_logger.LogError($"Failed to import Python module: {RtnModuleName}");
return new(false, null);
}
var response = JsonSerializer.Deserialize<RtnResponse>(json);
return new(true, response);
}
private void InitModules() =>
_rtn =
_pythonEngineService.ExecutePythonOperation(() =>
{
_pythonEngineService.Sys.path.append(Path.Combine(AppContext.BaseDirectory, "python"));
return Py.Import(RtnModuleName);
}, nameof(InitModules), throwOnErrors: false);
}

View File

@@ -0,0 +1,83 @@
namespace SharedContracts.Python.RTN;
public class RtnResponse
{
[JsonPropertyName("raw_title")]
public string? RawTitle { get; set; }
[JsonPropertyName("parsed_title")]
public string? ParsedTitle { get; set; }
[JsonPropertyName("fetch")]
public bool Fetch { get; set; }
[JsonPropertyName("is_4k")]
public bool Is4K { get; set; }
[JsonPropertyName("is_multi_audio")]
public bool IsMultiAudio { get; set; }
[JsonPropertyName("is_multi_subtitle")]
public bool IsMultiSubtitle { get; set; }
[JsonPropertyName("is_complete")]
public bool IsComplete { get; set; }
[JsonPropertyName("year")]
public int Year { get; set; }
[JsonPropertyName("resolution")]
public List<string>? Resolution { get; set; }
[JsonPropertyName("quality")]
public List<string>? Quality { get; set; }
[JsonPropertyName("season")]
public List<int>? Season { get; set; }
[JsonPropertyName("episode")]
public List<int>? Episode { get; set; }
[JsonPropertyName("codec")]
public List<string>? Codec { get; set; }
[JsonPropertyName("audio")]
public List<string>? Audio { get; set; }
[JsonPropertyName("subtitles")]
public List<string>? Subtitles { get; set; }
[JsonPropertyName("language")]
public List<string>? Language { get; set; }
[JsonPropertyName("bit_depth")]
public List<int>? BitDepth { get; set; }
[JsonPropertyName("hdr")]
public string? Hdr { get; set; }
[JsonPropertyName("proper")]
public bool Proper { get; set; }
[JsonPropertyName("repack")]
public bool Repack { get; set; }
[JsonPropertyName("remux")]
public bool Remux { get; set; }
[JsonPropertyName("upscaled")]
public bool Upscaled { get; set; }
[JsonPropertyName("remastered")]
public bool Remastered { get; set; }
[JsonPropertyName("directors_cut")]
public bool DirectorsCut { get; set; }
[JsonPropertyName("extended")]
public bool Extended { get; set; }
public bool IsMovie => Season == null && Episode == null;
public string ToJson() => this.AsJson();
}

View File

@@ -4,9 +4,8 @@ public static class ServiceCollectionExtensions
{
public static IServiceCollection RegisterPythonEngine(this IServiceCollection services)
{
services.AddSingleton<PythonEngineService>();
services.AddHostedService(p => p.GetRequiredService<PythonEngineService>());
services.AddSingleton<IPythonEngineService, PythonEngineService>();
services.AddHostedService<PythonEngineManager>();
return services;
}

View File

@@ -11,6 +11,7 @@ public class PerformIngestionConsumer(IDataStorage dataStorage, ILogger<PerformI
var torrent = new Torrent
{
InfoHash = request.IngestedTorrent.InfoHash.ToLowerInvariant(),
IngestedTorrentId = request.IngestedTorrent.Id,
Provider = request.IngestedTorrent.Source,
Title = request.IngestedTorrent.Name,
Type = request.IngestedTorrent.Category,