mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
trigram performance increased and housekeeping (#184)
* add new indexes, and change year column to int * Change gist to gin, and change year to int * Producer changes for new gin query * Fully map the rtn response using json dump from Pydantic Also updates Rtn to 0.1.9 * Add housekeeping script to reconcile imdb ids. * Join Torrent onto the ingested torrent table Ensure that a torrent can always find the details of where it came from, and how it was parsed. * Version bump for release * missing quote on table name
This commit is contained in:
35
src/migrator/migrations/009_imdb_year_column_int.sql
Normal file
35
src/migrator/migrations/009_imdb_year_column_int.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
-- Purpose: Change the year column to integer and add a search function that allows for searching by year.
|
||||
ALTER TABLE imdb_metadata
|
||||
ALTER COLUMN year TYPE integer USING (CASE WHEN year = '\N' THEN 0 ELSE year::integer END);
|
||||
|
||||
-- Remove the old search function
|
||||
DROP FUNCTION IF EXISTS search_imdb_meta(TEXT, TEXT, TEXT, INT);
|
||||
|
||||
-- Add the new search function that allows for searching by year with a plus/minus one year range
|
||||
CREATE OR REPLACE FUNCTION search_imdb_meta(search_term TEXT, category_param TEXT DEFAULT NULL, year_param INT DEFAULT NULL, limit_param INT DEFAULT 10)
|
||||
RETURNS TABLE(imdb_id character varying(16), title character varying(1000),category character varying(50),year INT, score REAL) AS $$
|
||||
BEGIN
|
||||
SET pg_trgm.similarity_threshold = 0.9;
|
||||
RETURN QUERY
|
||||
SELECT imdb_metadata.imdb_id, imdb_metadata.title, imdb_metadata.category, imdb_metadata.year, similarity(imdb_metadata.title, search_term) as score
|
||||
FROM imdb_metadata
|
||||
WHERE (imdb_metadata.title % search_term)
|
||||
AND (imdb_metadata.adult = FALSE)
|
||||
AND (category_param IS NULL OR imdb_metadata.category = category_param)
|
||||
AND (year_param IS NULL OR imdb_metadata.year BETWEEN year_param - 1 AND year_param + 1)
|
||||
ORDER BY score DESC
|
||||
LIMIT limit_param;
|
||||
END; $$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-- Drop the old indexes
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_adult;
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_category;
|
||||
DROP INDEX IF EXISTS idx_imdb_metadata_year;
|
||||
DROP INDEX IF EXISTS title_gist;
|
||||
|
||||
-- Add indexes for the new columns
|
||||
CREATE INDEX idx_imdb_metadata_adult ON imdb_metadata(adult);
|
||||
CREATE INDEX idx_imdb_metadata_category ON imdb_metadata(category);
|
||||
CREATE INDEX idx_imdb_metadata_year ON imdb_metadata(year);
|
||||
CREATE INDEX title_gin ON imdb_metadata USING gin(title gin_trgm_ops);
|
||||
@@ -0,0 +1,40 @@
|
||||
-- Purpose: Add the jsonb column to the ingested_torrents table to store the response from RTN
|
||||
ALTER TABLE ingested_torrents
|
||||
ADD COLUMN IF NOT EXISTS rtn_response jsonb;
|
||||
|
||||
-- Purpose: Drop torrentId column from torrents table
|
||||
ALTER TABLE torrents
|
||||
DROP COLUMN IF EXISTS "torrentId";
|
||||
|
||||
-- Purpose: Drop Trackers column from torrents table
|
||||
ALTER TABLE torrents
|
||||
DROP COLUMN IF EXISTS "trackers";
|
||||
|
||||
-- Purpose: Create a foreign key relationsship if it does not already exist between torrents and the source table ingested_torrents, but do not cascade on delete.
|
||||
ALTER TABLE torrents
|
||||
ADD COLUMN IF NOT EXISTS "ingestedTorrentId" bigint;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM information_schema.table_constraints
|
||||
WHERE constraint_name = 'fk_torrents_info_hash'
|
||||
)
|
||||
THEN
|
||||
ALTER TABLE torrents
|
||||
DROP CONSTRAINT fk_torrents_info_hash;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
ALTER TABLE torrents
|
||||
ADD CONSTRAINT fk_torrents_info_hash
|
||||
FOREIGN KEY ("ingestedTorrentId")
|
||||
REFERENCES ingested_torrents("id")
|
||||
ON DELETE NO ACTION;
|
||||
|
||||
UPDATE torrents
|
||||
SET "ingestedTorrentId" = ingested_torrents."id"
|
||||
FROM ingested_torrents
|
||||
WHERE torrents."infoHash" = ingested_torrents."info_hash"
|
||||
AND torrents."provider" = ingested_torrents."source";
|
||||
@@ -0,0 +1,55 @@
|
||||
DROP FUNCTION IF EXISTS kc_maintenance_reconcile_dmm_imdb_ids();
|
||||
CREATE OR REPLACE FUNCTION kc_maintenance_reconcile_dmm_imdb_ids()
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
rec RECORD;
|
||||
imdb_rec RECORD;
|
||||
rows_affected INTEGER := 0;
|
||||
BEGIN
|
||||
RAISE NOTICE 'Starting Reconciliation of DMM IMDB Ids...';
|
||||
FOR rec IN
|
||||
SELECT
|
||||
it."id" as "ingestion_id",
|
||||
t."infoHash",
|
||||
it."category" as "ingestion_category",
|
||||
f."id" as "file_Id",
|
||||
f."title" as "file_Title",
|
||||
(rtn_response->>'raw_title')::text as "raw_title",
|
||||
(rtn_response->>'parsed_title')::text as "parsed_title",
|
||||
(rtn_response->>'year')::int as "year"
|
||||
FROM torrents t
|
||||
JOIN ingested_torrents it ON t."ingestedTorrentId" = it."id"
|
||||
JOIN files f ON t."infoHash" = f."infoHash"
|
||||
WHERE t."provider" = 'DMM'
|
||||
LOOP
|
||||
RAISE NOTICE 'Processing record with file_Id: %', rec."file_Id";
|
||||
FOR imdb_rec IN
|
||||
SELECT * FROM search_imdb_meta(
|
||||
rec."parsed_title",
|
||||
CASE
|
||||
WHEN rec."ingestion_category" = 'tv' THEN 'tvSeries'
|
||||
WHEN rec."ingestion_category" = 'movies' THEN 'movie'
|
||||
END,
|
||||
CASE
|
||||
WHEN rec."year" = 0 THEN NULL
|
||||
ELSE rec."year" END,
|
||||
1)
|
||||
LOOP
|
||||
IF imdb_rec IS NOT NULL THEN
|
||||
RAISE NOTICE 'Updating file_Id: % with imdbId: %, parsed title: %, imdb title: %', rec."file_Id", imdb_rec."imdb_id", rec."parsed_title", imdb_rec."title";
|
||||
UPDATE "files"
|
||||
SET "imdbId" = imdb_rec."imdb_id"
|
||||
WHERE "id" = rec."file_Id";
|
||||
rows_affected := rows_affected + 1;
|
||||
ELSE
|
||||
RAISE NOTICE 'No IMDB ID found for file_Id: %, parsed title: %, imdb title: %, setting imdbId to NULL', rec."file_Id", rec."parsed_title", imdb_rec."title";
|
||||
UPDATE "files"
|
||||
SET "imdbId" = NULL
|
||||
WHERE "id" = rec."file_Id";
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
RAISE NOTICE 'Finished reconciliation. Total rows affected: %', rows_affected;
|
||||
RETURN rows_affected;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
Reference in New Issue
Block a user