mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
trigram performance increased and housekeeping (#184)
* add new indexes, and change year column to int * Change gist to gin, and change year to int * Producer changes for new gin query * Fully map the rtn response using json dump from Pydantic Also updates Rtn to 0.1.9 * Add housekeeping script to reconcile imdb ids. * Join Torrent onto the ingested torrent table Ensure that a torrent can always find the details of where it came from, and how it was parsed. * Version bump for release * missing quote on table name
This commit is contained in:
@@ -94,7 +94,7 @@ services:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
hostname: knightcrawler-addon
|
||||
image: gabisonfire/knightcrawler-addon:2.0.9
|
||||
image: gabisonfire/knightcrawler-addon:2.0.10
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
@@ -117,7 +117,7 @@ services:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.9
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.10
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
@@ -138,7 +138,7 @@ services:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-debrid-collector:2.0.9
|
||||
image: gabisonfire/knightcrawler-debrid-collector:2.0.10
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
@@ -152,7 +152,7 @@ services:
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.9
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.10
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: "no"
|
||||
@@ -163,7 +163,7 @@ services:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.9
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.10
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
restart: "no"
|
||||
@@ -182,7 +182,7 @@ services:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-producer:2.0.9
|
||||
image: gabisonfire/knightcrawler-producer:2.0.10
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
@@ -207,7 +207,7 @@ services:
|
||||
deploy:
|
||||
replicas: ${QBIT_REPLICAS:-0}
|
||||
env_file: stack.env
|
||||
image: gabisonfire/knightcrawler-qbit-collector:2.0.9
|
||||
image: gabisonfire/knightcrawler-qbit-collector:2.0.10
|
||||
labels:
|
||||
logging: promtail
|
||||
networks:
|
||||
|
||||
@@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends
|
||||
|
||||
services:
|
||||
metadata:
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.9
|
||||
image: gabisonfire/knightcrawler-metadata:2.0.10
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
@@ -30,7 +30,7 @@ services:
|
||||
condition: service_completed_successfully
|
||||
|
||||
migrator:
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.9
|
||||
image: gabisonfire/knightcrawler-migrator:2.0.10
|
||||
env_file: ../../.env
|
||||
networks:
|
||||
- knightcrawler-network
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
condition: service_healthy
|
||||
|
||||
addon:
|
||||
image: gabisonfire/knightcrawler-addon:2.0.9
|
||||
image: gabisonfire/knightcrawler-addon:2.0.10
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
hostname: knightcrawler-addon
|
||||
@@ -48,22 +48,22 @@ services:
|
||||
- "7000:7000"
|
||||
|
||||
consumer:
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.9
|
||||
image: gabisonfire/knightcrawler-consumer:2.0.10
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
debridcollector:
|
||||
image: gabisonfire/knightcrawler-debrid-collector:2.0.9
|
||||
image: gabisonfire/knightcrawler-debrid-collector:2.0.10
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
producer:
|
||||
image: gabisonfire/knightcrawler-producer:2.0.9
|
||||
image: gabisonfire/knightcrawler-producer:2.0.10
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
|
||||
qbitcollector:
|
||||
image: gabisonfire/knightcrawler-qbit-collector:2.0.9
|
||||
image: gabisonfire/knightcrawler-qbit-collector:2.0.10
|
||||
<<: [*knightcrawler-app, *knightcrawler-app-depends]
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
|
||||
@@ -72,7 +72,7 @@ public class BasicsFile(ILogger<BasicsFile> logger, ImdbDbService dbService): IF
|
||||
Category = csv.GetField(1),
|
||||
Title = csv.GetField(2),
|
||||
Adult = isAdultSet && adult == 1,
|
||||
Year = csv.GetField(5),
|
||||
Year = csv.GetField(5) == @"\N" ? 0 : int.Parse(csv.GetField(5)),
|
||||
};
|
||||
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
|
||||
@@ -6,5 +6,5 @@ public class ImdbBasicEntry
|
||||
public string? Category { get; set; }
|
||||
public string? Title { get; set; }
|
||||
public bool Adult { get; set; }
|
||||
public string? Year { get; set; }
|
||||
public int Year { get; set; }
|
||||
}
|
||||
@@ -17,7 +17,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
await writer.WriteAsync(entry.ImdbId, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Category, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Title, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Year, NpgsqlDbType.Text);
|
||||
await writer.WriteAsync(entry.Year, NpgsqlDbType.Integer);
|
||||
await writer.WriteAsync(entry.Adult, NpgsqlDbType.Boolean);
|
||||
}
|
||||
catch (Npgsql.PostgresException e)
|
||||
@@ -116,7 +116,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
ExecuteCommandAsync(
|
||||
async connection =>
|
||||
{
|
||||
await using var command = new NpgsqlCommand($"CREATE INDEX title_gist ON {TableNames.MetadataTable} USING gist(title gist_trgm_ops)", connection);
|
||||
await using var command = new NpgsqlCommand($"CREATE INDEX title_gin ON {TableNames.MetadataTable} USING gin(title gin_trgm_ops)", connection);
|
||||
await command.ExecuteNonQueryAsync();
|
||||
}, "Error while creating index on imdb_metadata table");
|
||||
|
||||
@@ -125,7 +125,7 @@ public class ImdbDbService(PostgresConfiguration configuration, ILogger<ImdbDbSe
|
||||
async connection =>
|
||||
{
|
||||
logger.LogInformation("Dropping Trigrams index if it exists already");
|
||||
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gist", connection);
|
||||
await using var dropCommand = new NpgsqlCommand("DROP INDEX if exists title_gin", connection);
|
||||
await dropCommand.ExecuteNonQueryAsync();
|
||||
}, $"Error while dropping index on {TableNames.MetadataTable} table");
|
||||
|
||||
|
||||
35
src/migrator/migrations/009_imdb_year_column_int.sql
Normal file
35
src/migrator/migrations/009_imdb_year_column_int.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
-- Purpose: Change the year column to integer and add a search function that allows for searching by year.
|
||||
ALTER TABLE imdb_metadata
|
||||
ALTER COLUMN year TYPE integer USING (CASE WHEN year = '\N' THEN 0 ELSE year::integer END);
|
||||
|
||||
-- Remove the old search function
|
||||
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, TEXT, INT);
|
||||
|
||||
-- Add the new search function that allows for searching by year with a plus/minus one year range
|
||||
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10)
|
||||
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
|
||||
BEGIN
|
||||
SET pg_trgm.similarity_threshold = 0.9;
|
||||
RETURN QUERY
|
||||
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
|
||||
FROM imdb_metadata
|
||||
WHERE (imdb_metadata.title % search_term)
|
||||
AND (imdb_metadata.adult = FALSE)
|
||||
AND (category_param IS NULL OR imdb_metadata.category = category_param)
|
||||
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
|
||||
ORDER BY score DESC
|
||||
LIMIT limit_param;
|
||||
END; $$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-- Drop the old indexes
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_adult;
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_category;
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_year;
|
||||
DROP INDEX IF EXISTS title_gist;
|
||||
|
||||
-- Add indexes for the new columns
|
||||
CREATE INDEX idx_imdb_metadata_adult ON imdb_metadata(adult);
|
||||
CREATE INDEX idx_imdb_metadata_category ON imdb_metadata(category);
|
||||
CREATE INDEX idx_imdb_metadata_year ON imdb_metadata(year);
|
||||
CREATE INDEX title_gin ON imdb_metadata USING gin(title gin_trgm_ops);
|
||||
@@ -0,0 +1,40 @@
|
||||
-- Purpose: Add the jsonb column to the ingested_torrents table to store the response from RTN
|
||||
ALTER TABLE ingested_torrents
|
||||
ADD COLUMN IF NOT EXISTS rtn_response jsonb;
|
||||
|
||||
-- Purpose: Drop torrentId column from torrents table
|
||||
ALTER TABLE torrents
|
||||
DROP COLUMN IF EXISTS "torrentId";
|
||||
|
||||
-- Purpose: Drop Trackers column from torrents table
|
||||
ALTER TABLE torrents
|
||||
DROP COLUMN IF EXISTS "trackers";
|
||||
|
||||
-- Purpose: Create a foreign key relationsship if it does not already exist between torrents and the source table ingested_torrents, but do not cascade on delete.
|
||||
ALTER TABLE torrents
|
||||
ADD COLUMN IF NOT EXISTS "ingestedTorrentId" bigint;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM information_schema.table_constraints
|
||||
WHERE constraint_name = 'fk_torrents_info_hash'
|
||||
)
|
||||
THEN
|
||||
ALTER TABLE torrents
|
||||
DROP CONSTRAINT fk_torrents_info_hash;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
ALTER TABLE torrents
|
||||
ADD CONSTRAINT fk_torrents_info_hash
|
||||
FOREIGN KEY ("ingestedTorrentId")
|
||||
REFERENCES ingested_torrents("id")
|
||||
ON DELETE NO ACTION;
|
||||
|
||||
UPDATE torrents
|
||||
SET "ingestedTorrentId" = ingested_torrents."id"
|
||||
FROM ingested_torrents
|
||||
WHERE torrents."infoHash" = ingested_torrents."info_hash"
|
||||
AND torrents."provider" = ingested_torrents."source";
|
||||
@@ -0,0 +1,55 @@
|
||||
DROP FUNCTION IF EXISTS kc_maintenance_reconcile_dmm_imdb_ids();
|
||||
CREATE OR REPLACE FUNCTION kc_maintenance_reconcile_dmm_imdb_ids()
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
rec RECORD;
|
||||
imdb_rec RECORD;
|
||||
rows_affected INTEGER := 0;
|
||||
BEGIN
|
||||
RAISE NOTICE 'Starting Reconciliation of DMM IMDB Ids...';
|
||||
FOR rec IN
|
||||
SELECT
|
||||
it."id" as "ingestion_id",
|
||||
t."infoHash",
|
||||
it."category" as "ingestion_category",
|
||||
f."id" as "file_Id",
|
||||
f."title" as "file_Title",
|
||||
(rtn_response->>'raw_title')::text as "raw_title",
|
||||
(rtn_response->>'parsed_title')::text as "parsed_title",
|
||||
(rtn_response->>'year')::int as "year"
|
||||
FROM torrents t
|
||||
JOIN ingested_torrents it ON t."ingestedTorrentId" = it."id"
|
||||
JOIN files f ON t."infoHash" = f."infoHash"
|
||||
WHERE t."provider" = 'DMM'
|
||||
LOOP
|
||||
RAISE NOTICE 'Processing record with file_Id: %', rec."file_Id";
|
||||
FOR imdb_rec IN
|
||||
SELECT * FROM search_imdb_meta(
|
||||
rec."parsed_title",
|
||||
CASE
|
||||
WHEN rec."ingestion_category" = 'tv' THEN 'tvSeries'
|
||||
WHEN rec."ingestion_category" = 'movies' THEN 'movie'
|
||||
END,
|
||||
CASE
|
||||
WHEN rec."year" = 0 THEN NULL
|
||||
ELSE rec."year" END,
|
||||
1)
|
||||
LOOP
|
||||
IF imdb_rec IS NOT NULL THEN
|
||||
RAISE NOTICE 'Updating file_Id: % with imdbId: %, parsed title: %, imdb title: %', rec."file_Id", imdb_rec."imdb_id", rec."parsed_title", imdb_rec."title";
|
||||
UPDATE "files"
|
||||
SET "imdbId" = imdb_rec."imdb_id"
|
||||
WHERE "id" = rec."file_Id";
|
||||
rows_affected := rows_affected + 1;
|
||||
ELSE
|
||||
RAISE NOTICE 'No IMDB ID found for file_Id: %, parsed title: %, imdb title: %, setting imdbId to NULL', rec."file_Id", rec."parsed_title", imdb_rec."title";
|
||||
UPDATE "files"
|
||||
SET "imdbId" = NULL
|
||||
WHERE "id" = rec."file_Id";
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
RAISE NOTICE 'Finished reconciliation. Total rows affected: %', rows_affected;
|
||||
RETURN rows_affected;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
@@ -1,5 +1,3 @@
|
||||
using Microsoft.VisualBasic;
|
||||
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public partial class DebridMediaManagerCrawler(
|
||||
@@ -12,7 +10,6 @@ public partial class DebridMediaManagerCrawler(
|
||||
{
|
||||
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
|
||||
private static partial Regex HashCollectionMatcher();
|
||||
private LengthAwareRatioScorer _lengthAwareRatioScorer = new();
|
||||
|
||||
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
|
||||
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
|
||||
@@ -118,32 +115,27 @@ public partial class DebridMediaManagerCrawler(
|
||||
return null;
|
||||
}
|
||||
|
||||
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.ParsedTitle);
|
||||
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.Response.ParsedTitle);
|
||||
|
||||
if (cached)
|
||||
{
|
||||
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.ParsedTitle);
|
||||
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
|
||||
return MapToTorrent(cachedResult, bytesElement, hashElement, parsedTorrent);
|
||||
}
|
||||
|
||||
var year = parsedTorrent.Year != 0 ? parsedTorrent.Year.ToString() : null;
|
||||
var imdbEntries = await Storage.FindImdbMetadata(parsedTorrent.ParsedTitle, parsedTorrent.IsMovie ? "movies" : "tv", year);
|
||||
int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null;
|
||||
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.IsMovie ? "movies" : "tv", year);
|
||||
|
||||
if (imdbEntries.Count == 0)
|
||||
if (imdbEntry is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var scoredTitles = await ScoreTitles(parsedTorrent, imdbEntries);
|
||||
await AddToCache(parsedTorrent.Response.ParsedTitle.ToLowerInvariant(), imdbEntry);
|
||||
|
||||
if (!scoredTitles.Success)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", scoredTitles.BestMatch.Value.ImdbId, parsedTorrent.ParsedTitle, scoredTitles.BestMatch.Value.Title, scoredTitles.BestMatch.Score);
|
||||
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
|
||||
|
||||
return MapToTorrent(scoredTitles.BestMatch.Value, bytesElement, hashElement, parsedTorrent);
|
||||
return MapToTorrent(imdbEntry, bytesElement, hashElement, parsedTorrent);
|
||||
}
|
||||
|
||||
private IngestedTorrent MapToTorrent(ImdbEntry result, JsonElement bytesElement, JsonElement hashElement, ParseTorrentTitleResponse parsedTorrent) =>
|
||||
@@ -156,40 +148,22 @@ public partial class DebridMediaManagerCrawler(
|
||||
InfoHash = hashElement.ToString(),
|
||||
Seeders = 0,
|
||||
Leechers = 0,
|
||||
Category = parsedTorrent.IsMovie switch
|
||||
Category = parsedTorrent.Response.IsMovie switch
|
||||
{
|
||||
true => "movies",
|
||||
false => "tv",
|
||||
},
|
||||
RtnResponse = parsedTorrent.Response.ToJson(),
|
||||
};
|
||||
|
||||
private async Task<(bool Success, ExtractedResult<ImdbEntry>? BestMatch)> ScoreTitles(ParseTorrentTitleResponse parsedTorrent, List<ImdbEntry> imdbEntries)
|
||||
{
|
||||
var lowerCaseTitle = parsedTorrent.ParsedTitle.ToLowerInvariant();
|
||||
|
||||
// Scoring directly operates on the List<ImdbEntry>, no need for lookup table.
|
||||
var scoredResults = Process.ExtractAll(new(){Title = lowerCaseTitle}, imdbEntries, x => x.Title?.ToLowerInvariant(), scorer: _lengthAwareRatioScorer, cutoff: 90);
|
||||
|
||||
var best = scoredResults.MaxBy(x => x.Score);
|
||||
|
||||
if (best is null)
|
||||
{
|
||||
return (false, null);
|
||||
}
|
||||
|
||||
await AddToCache(lowerCaseTitle, best);
|
||||
|
||||
return (true, best);
|
||||
}
|
||||
|
||||
private Task AddToCache(string lowerCaseTitle, ExtractedResult<ImdbEntry> best)
|
||||
private Task AddToCache(string lowerCaseTitle, ImdbEntry best)
|
||||
{
|
||||
var cacheOptions = new DistributedCacheEntryOptions
|
||||
{
|
||||
AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1),
|
||||
};
|
||||
|
||||
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best.Value), cacheOptions);
|
||||
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best), cacheOptions);
|
||||
}
|
||||
|
||||
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string title)
|
||||
|
||||
@@ -1 +1 @@
|
||||
rank-torrent-name==0.1.8
|
||||
rank-torrent-name==0.1.9
|
||||
@@ -9,9 +9,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
const string query =
|
||||
"""
|
||||
INSERT INTO ingested_torrents
|
||||
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt")
|
||||
("name", "source", "category", "info_hash", "size", "seeders", "leechers", "imdb", "processed", "createdAt", "updatedAt", "rtn_response")
|
||||
VALUES
|
||||
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt)
|
||||
(@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt, @RtnResponse::jsonb)
|
||||
ON CONFLICT (source, info_hash) DO NOTHING
|
||||
""";
|
||||
|
||||
@@ -110,21 +110,21 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
public async Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default) =>
|
||||
await ExecuteCommandAsync(async connection =>
|
||||
{
|
||||
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE CAST(NULLIF(Year, '\N') AS INTEGER) <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
|
||||
const string query = @"SELECT imdb_id AS ImdbId, title as Title, category as Category, year as Year, adult as Adult FROM imdb_metadata WHERE Year <= @Year AND imdb_id > @LastProcessedImdbId ORDER BY ImdbId LIMIT @BatchSize";
|
||||
var result = await connection.QueryAsync<ImdbEntry>(query, new { Year = year, LastProcessedImdbId = stateLastProcessedImdbId, BatchSize = batchSize });
|
||||
return result.ToList();
|
||||
}, "Error getting imdb metadata.", cancellationToken);
|
||||
|
||||
public async Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, string? year, CancellationToken cancellationToken = default) =>
|
||||
public async Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string torrentType, int? year, CancellationToken cancellationToken = default) =>
|
||||
await ExecuteCommandAsync(async connection =>
|
||||
{
|
||||
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\" from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'";
|
||||
query += year is not null ? $", '{year}'" : ", NULL";
|
||||
query += ", 15)";
|
||||
var query = $"select \"imdb_id\" as \"ImdbId\", \"title\" as \"Title\", \"year\" as \"Year\", \"score\" as Score from search_imdb_meta('{parsedTorrentTitle.Replace("'", "").Replace("\"", "")}', '{(torrentType.Equals("movie", StringComparison.OrdinalIgnoreCase) ? "movie" : "tvSeries")}'";
|
||||
query += year is not null ? $", {year}" : ", NULL";
|
||||
query += ", 1)";
|
||||
|
||||
var result = await connection.QueryAsync<ImdbEntry>(query);
|
||||
|
||||
return result.ToList();
|
||||
var results = result.ToList();
|
||||
return results.FirstOrDefault();
|
||||
}, "Error finding imdb metadata.", cancellationToken);
|
||||
|
||||
public Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default) =>
|
||||
@@ -134,9 +134,9 @@ public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConf
|
||||
const string query =
|
||||
"""
|
||||
INSERT INTO "torrents"
|
||||
("infoHash", "provider", "torrentId", "title", "size", "type", "uploadDate", "seeders", "trackers", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
|
||||
("infoHash", "ingestedTorrentId", "provider", "title", "size", "type", "uploadDate", "seeders", "languages", "resolution", "reviewed", "opened", "createdAt", "updatedAt")
|
||||
VALUES
|
||||
(@InfoHash, @Provider, @TorrentId, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, NULL, false, false, NOW(), NOW())
|
||||
(@InfoHash, @IngestedTorrentId, @Provider, @Title, 0, @Type, NOW(), @Seeders, NULL, NULL, false, false, NOW(), NOW())
|
||||
ON CONFLICT ("infoHash") DO NOTHING
|
||||
""";
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ public interface IDataStorage
|
||||
Task<DapperResult<PageIngestedResult, PageIngestedResult>> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default);
|
||||
Task<DapperResult<int, int>> GetRowCountImdbMetadata(CancellationToken cancellationToken = default);
|
||||
Task<List<ImdbEntry>> GetImdbEntriesForRequests(int year, int batchSize, string? stateLastProcessedImdbId, CancellationToken cancellationToken = default);
|
||||
Task<List<ImdbEntry>> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, string? parsedTorrentYear, CancellationToken cancellationToken = default);
|
||||
Task<ImdbEntry?> FindImdbMetadata(string? parsedTorrentTitle, string parsedTorrentTorrentType, int? parsedTorrentYear, CancellationToken cancellationToken = default);
|
||||
Task InsertTorrent(Torrent torrent, CancellationToken cancellationToken = default);
|
||||
Task InsertFiles(IEnumerable<TorrentFile> files, CancellationToken cancellationToken = default);
|
||||
Task InsertSubtitles(IEnumerable<SubtitleFile> subtitles, CancellationToken cancellationToken = default);
|
||||
|
||||
14
src/shared/Extensions/JsonExtensions.cs
Normal file
14
src/shared/Extensions/JsonExtensions.cs
Normal file
@@ -0,0 +1,14 @@
|
||||
namespace SharedContracts.Extensions;
|
||||
|
||||
public static class JsonExtensions
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonSerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
WriteIndented = false,
|
||||
ReferenceHandler = ReferenceHandler.IgnoreCycles,
|
||||
NumberHandling = JsonNumberHandling.Strict,
|
||||
};
|
||||
|
||||
public static string AsJson<T>(this T obj) => JsonSerializer.Serialize(obj, JsonSerializerOptions);
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
// Global using directives
|
||||
|
||||
global using System.Collections.Concurrent;
|
||||
global using System.Globalization;
|
||||
global using System.Text;
|
||||
global using System.Text.Json;
|
||||
global using System.Text.Json.Serialization;
|
||||
global using Dapper;
|
||||
global using MassTransit;
|
||||
global using Microsoft.AspNetCore.Builder;
|
||||
@@ -17,4 +15,4 @@ global using Python.Runtime;
|
||||
global using Serilog;
|
||||
global using SharedContracts.Configuration;
|
||||
global using SharedContracts.Extensions;
|
||||
global using SharedContracts.Models;
|
||||
global using SharedContracts.Models;
|
||||
|
||||
@@ -7,4 +7,5 @@ public class ImdbEntry
|
||||
public string? Category { get; set; }
|
||||
public string? Year { get; set; }
|
||||
public bool? Adult { get; set; }
|
||||
public decimal? Score { get; set; }
|
||||
}
|
||||
|
||||
@@ -12,7 +12,9 @@ public class IngestedTorrent
|
||||
public int Leechers { get; set; }
|
||||
public string? Imdb { get; set; }
|
||||
|
||||
public bool Processed { get; set; } = false;
|
||||
public bool Processed { get; set; }
|
||||
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
|
||||
|
||||
public string? RtnResponse { get; set; }
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ namespace SharedContracts.Models;
|
||||
public class Torrent
|
||||
{
|
||||
public string? InfoHash { get; set; }
|
||||
public long? IngestedTorrentId { get; set; }
|
||||
public string? Provider { get; set; }
|
||||
public string? TorrentId { get; set; }
|
||||
public string? Title { get; set; }
|
||||
|
||||
@@ -7,7 +7,7 @@ public interface IPythonEngineService
|
||||
Task InitializePythonEngine(CancellationToken cancellationToken);
|
||||
T ExecuteCommandOrScript<T>(string command, PyModule module, bool throwOnErrors);
|
||||
T ExecutePythonOperation<T>(Func<T> operation, string operationName, bool throwOnErrors);
|
||||
T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors);
|
||||
T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors);
|
||||
Task StopPythonEngine(CancellationToken cancellationToken);
|
||||
dynamic? Sys { get; }
|
||||
}
|
||||
@@ -53,10 +53,10 @@ public class PythonEngineService(ILogger<PythonEngineService> logger) : IPythonE
|
||||
}, nameof(ExecuteCommandOrScript), throwOnErrors);
|
||||
|
||||
public T ExecutePythonOperation<T>(Func<T> operation, string operationName, bool throwOnErrors) =>
|
||||
ExecutePythonOperationWithDefault(operation, default, operationName, throwOnErrors);
|
||||
ExecutePythonOperationWithDefault(operation, default, operationName, throwOnErrors, true);
|
||||
|
||||
public T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors) =>
|
||||
ExecutePythonOperationInternal(operation, defaultValue, operationName, throwOnErrors);
|
||||
public T ExecutePythonOperationWithDefault<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors) =>
|
||||
ExecutePythonOperationInternal(operation, defaultValue, operationName, throwOnErrors, logErrors);
|
||||
|
||||
public void ExecuteOnGIL(Action act, bool throwOnErrors)
|
||||
{
|
||||
@@ -95,7 +95,7 @@ public class PythonEngineService(ILogger<PythonEngineService> logger) : IPythonE
|
||||
}
|
||||
|
||||
// ReSharper disable once EntityNameCapturedOnly.Local
|
||||
private T ExecutePythonOperationInternal<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors)
|
||||
private T ExecutePythonOperationInternal<T>(Func<T> operation, T? defaultValue, string operationName, bool throwOnErrors, bool logErrors)
|
||||
{
|
||||
Sys ??= LoadSys();
|
||||
|
||||
@@ -108,7 +108,10 @@ public class PythonEngineService(ILogger<PythonEngineService> logger) : IPythonE
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(operationName));
|
||||
if (logErrors)
|
||||
{
|
||||
Logger.LogError(ex, "Python Error: {Message} ({OperationName})", ex.Message, nameof(operationName));
|
||||
}
|
||||
|
||||
if (throwOnErrors)
|
||||
{
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
namespace SharedContracts.Python.RTN;
|
||||
|
||||
public record ParseTorrentTitleResponse(bool Success, string ParsedTitle, int Year, int[]? Season = null, int[]? Episode = null)
|
||||
{
|
||||
public bool IsMovie => Season == null && Episode == null;
|
||||
}
|
||||
public record ParseTorrentTitleResponse(bool Success, RtnResponse? Response);
|
||||
@@ -14,34 +14,31 @@ public class RankTorrentName : IRankTorrentName
|
||||
}
|
||||
|
||||
public ParseTorrentTitleResponse Parse(string title) =>
|
||||
_pythonEngineService.ExecutePythonOperation(
|
||||
_pythonEngineService.ExecutePythonOperationWithDefault(
|
||||
() =>
|
||||
{
|
||||
var result = _rtn?.parse(title);
|
||||
return ParseResult(result);
|
||||
}, nameof(Parse), throwOnErrors: false);
|
||||
|
||||
}, new ParseTorrentTitleResponse(false, null), nameof(Parse), throwOnErrors: false, logErrors: false);
|
||||
|
||||
|
||||
private static ParseTorrentTitleResponse ParseResult(dynamic result)
|
||||
{
|
||||
if (result == null)
|
||||
{
|
||||
return new(false, string.Empty, 0);
|
||||
return new(false, null);
|
||||
}
|
||||
|
||||
var json = result.model_dump_json()?.As<string?>();
|
||||
|
||||
if (json is null || string.IsNullOrEmpty(json))
|
||||
{
|
||||
return new(false, null);
|
||||
}
|
||||
|
||||
var parsedTitle = result.GetAttr("parsed_title")?.As<string>() ?? string.Empty;
|
||||
var year = result.GetAttr("year")?.As<int>() ?? 0;
|
||||
var seasons = GetIntArray(result, "season");
|
||||
var episodes = GetIntArray(result, "episode");
|
||||
var response = JsonSerializer.Deserialize<RtnResponse>(json);
|
||||
|
||||
return new ParseTorrentTitleResponse(true, parsedTitle, year, seasons, episodes);
|
||||
}
|
||||
|
||||
private static int[]? GetIntArray(dynamic result, string field)
|
||||
{
|
||||
var theList = result.GetAttr(field)?.As<PyList>();
|
||||
int[]? results = theList?.Length() > 0 ? theList.As<int[]>() : null;
|
||||
|
||||
return results;
|
||||
return new(true, response);
|
||||
}
|
||||
|
||||
private void InitModules() =>
|
||||
|
||||
83
src/shared/Python/RTN/RtnResponse.cs
Normal file
83
src/shared/Python/RTN/RtnResponse.cs
Normal file
@@ -0,0 +1,83 @@
|
||||
namespace SharedContracts.Python.RTN;
|
||||
|
||||
public class RtnResponse
|
||||
{
|
||||
[JsonPropertyName("raw_title")]
|
||||
public string? RawTitle { get; set; }
|
||||
|
||||
[JsonPropertyName("parsed_title")]
|
||||
public string? ParsedTitle { get; set; }
|
||||
|
||||
[JsonPropertyName("fetch")]
|
||||
public bool Fetch { get; set; }
|
||||
|
||||
[JsonPropertyName("is_4k")]
|
||||
public bool Is4K { get; set; }
|
||||
|
||||
[JsonPropertyName("is_multi_audio")]
|
||||
public bool IsMultiAudio { get; set; }
|
||||
|
||||
[JsonPropertyName("is_multi_subtitle")]
|
||||
public bool IsMultiSubtitle { get; set; }
|
||||
|
||||
[JsonPropertyName("is_complete")]
|
||||
public bool IsComplete { get; set; }
|
||||
|
||||
[JsonPropertyName("year")]
|
||||
public int Year { get; set; }
|
||||
|
||||
[JsonPropertyName("resolution")]
|
||||
public List<string>? Resolution { get; set; }
|
||||
|
||||
[JsonPropertyName("quality")]
|
||||
public List<string>? Quality { get; set; }
|
||||
|
||||
[JsonPropertyName("season")]
|
||||
public List<int>? Season { get; set; }
|
||||
|
||||
[JsonPropertyName("episode")]
|
||||
public List<int>? Episode { get; set; }
|
||||
|
||||
[JsonPropertyName("codec")]
|
||||
public List<string>? Codec { get; set; }
|
||||
|
||||
[JsonPropertyName("audio")]
|
||||
public List<string>? Audio { get; set; }
|
||||
|
||||
[JsonPropertyName("subtitles")]
|
||||
public List<string>? Subtitles { get; set; }
|
||||
|
||||
[JsonPropertyName("language")]
|
||||
public List<string>? Language { get; set; }
|
||||
|
||||
[JsonPropertyName("bit_depth")]
|
||||
public List<int>? BitDepth { get; set; }
|
||||
|
||||
[JsonPropertyName("hdr")]
|
||||
public string? Hdr { get; set; }
|
||||
|
||||
[JsonPropertyName("proper")]
|
||||
public bool Proper { get; set; }
|
||||
|
||||
[JsonPropertyName("repack")]
|
||||
public bool Repack { get; set; }
|
||||
|
||||
[JsonPropertyName("remux")]
|
||||
public bool Remux { get; set; }
|
||||
|
||||
[JsonPropertyName("upscaled")]
|
||||
public bool Upscaled { get; set; }
|
||||
|
||||
[JsonPropertyName("remastered")]
|
||||
public bool Remastered { get; set; }
|
||||
|
||||
[JsonPropertyName("directors_cut")]
|
||||
public bool DirectorsCut { get; set; }
|
||||
|
||||
[JsonPropertyName("extended")]
|
||||
public bool Extended { get; set; }
|
||||
|
||||
public bool IsMovie => Season == null && Episode == null;
|
||||
|
||||
public string ToJson() => this.AsJson();
|
||||
}
|
||||
@@ -11,6 +11,7 @@ public class PerformIngestionConsumer(IDataStorage dataStorage, ILogger<PerformI
|
||||
var torrent = new Torrent
|
||||
{
|
||||
InfoHash = request.IngestedTorrent.InfoHash.ToLowerInvariant(),
|
||||
IngestedTorrentId = request.IngestedTorrent.Id,
|
||||
Provider = request.IngestedTorrent.Source,
|
||||
Title = request.IngestedTorrent.Name,
|
||||
Type = request.IngestedTorrent.Category,
|
||||
|
||||
Reference in New Issue
Block a user