mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
trigram performance increased and housekeeping (#184)
* add new indexes, and change year column to int * Change gist to gin, and change year to int * Producer changes for new gin query * Fully map the rtn response using json dump from Pydantic Also updates Rtn to 0.1.9 * Add housekeeping script to reconcile imdb ids. * Join Torrent onto the ingested torrent table Ensure that a torrent can always find the details of where it came from, and how it was parsed. * Version bump for release * missing quote on table name
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
using Microsoft.VisualBasic;
|
||||
|
||||
namespace Producer.Features.Crawlers.Dmm;
|
||||
|
||||
public partial class DebridMediaManagerCrawler(
|
||||
@@ -12,7 +10,6 @@ public partial class DebridMediaManagerCrawler(
|
||||
{
|
||||
[GeneratedRegex("""<iframe src="https:\/\/debridmediamanager.com\/hashlist#(.*)"></iframe>""")]
|
||||
private static partial Regex HashCollectionMatcher();
|
||||
private LengthAwareRatioScorer _lengthAwareRatioScorer = new();
|
||||
|
||||
private const string DownloadBaseUrl = "https://raw.githubusercontent.com/debridmediamanager/hashlists/main";
|
||||
protected override IReadOnlyDictionary<string, string> Mappings => new Dictionary<string, string>();
|
||||
@@ -118,32 +115,27 @@ public partial class DebridMediaManagerCrawler(
|
||||
return null;
|
||||
}
|
||||
|
||||
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.ParsedTitle);
|
||||
var (cached, cachedResult) = await CheckIfInCacheAndReturn(parsedTorrent.Response.ParsedTitle);
|
||||
|
||||
if (cached)
|
||||
{
|
||||
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.ParsedTitle);
|
||||
logger.LogInformation("[{ImdbId}] Found cached imdb result for {Title}", cachedResult.ImdbId, parsedTorrent.Response.ParsedTitle);
|
||||
return MapToTorrent(cachedResult, bytesElement, hashElement, parsedTorrent);
|
||||
}
|
||||
|
||||
var year = parsedTorrent.Year != 0 ? parsedTorrent.Year.ToString() : null;
|
||||
var imdbEntries = await Storage.FindImdbMetadata(parsedTorrent.ParsedTitle, parsedTorrent.IsMovie ? "movies" : "tv", year);
|
||||
int? year = parsedTorrent.Response.Year != 0 ? parsedTorrent.Response.Year : null;
|
||||
var imdbEntry = await Storage.FindImdbMetadata(parsedTorrent.Response.ParsedTitle, parsedTorrent.Response.IsMovie ? "movies" : "tv", year);
|
||||
|
||||
if (imdbEntries.Count == 0)
|
||||
if (imdbEntry is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var scoredTitles = await ScoreTitles(parsedTorrent, imdbEntries);
|
||||
await AddToCache(parsedTorrent.Response.ParsedTitle.ToLowerInvariant(), imdbEntry);
|
||||
|
||||
if (!scoredTitles.Success)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", scoredTitles.BestMatch.Value.ImdbId, parsedTorrent.ParsedTitle, scoredTitles.BestMatch.Value.Title, scoredTitles.BestMatch.Score);
|
||||
logger.LogInformation("[{ImdbId}] Found best match for {Title}: {BestMatch} with score {Score}", imdbEntry.ImdbId, parsedTorrent.Response.ParsedTitle, imdbEntry.Title, imdbEntry.Score);
|
||||
|
||||
return MapToTorrent(scoredTitles.BestMatch.Value, bytesElement, hashElement, parsedTorrent);
|
||||
return MapToTorrent(imdbEntry, bytesElement, hashElement, parsedTorrent);
|
||||
}
|
||||
|
||||
private IngestedTorrent MapToTorrent(ImdbEntry result, JsonElement bytesElement, JsonElement hashElement, ParseTorrentTitleResponse parsedTorrent) =>
|
||||
@@ -156,40 +148,22 @@ public partial class DebridMediaManagerCrawler(
|
||||
InfoHash = hashElement.ToString(),
|
||||
Seeders = 0,
|
||||
Leechers = 0,
|
||||
Category = parsedTorrent.IsMovie switch
|
||||
Category = parsedTorrent.Response.IsMovie switch
|
||||
{
|
||||
true => "movies",
|
||||
false => "tv",
|
||||
},
|
||||
RtnResponse = parsedTorrent.Response.ToJson(),
|
||||
};
|
||||
|
||||
private async Task<(bool Success, ExtractedResult<ImdbEntry>? BestMatch)> ScoreTitles(ParseTorrentTitleResponse parsedTorrent, List<ImdbEntry> imdbEntries)
|
||||
{
|
||||
var lowerCaseTitle = parsedTorrent.ParsedTitle.ToLowerInvariant();
|
||||
|
||||
// Scoring directly operates on the List<ImdbEntry>, no need for lookup table.
|
||||
var scoredResults = Process.ExtractAll(new(){Title = lowerCaseTitle}, imdbEntries, x => x.Title?.ToLowerInvariant(), scorer: _lengthAwareRatioScorer, cutoff: 90);
|
||||
|
||||
var best = scoredResults.MaxBy(x => x.Score);
|
||||
|
||||
if (best is null)
|
||||
{
|
||||
return (false, null);
|
||||
}
|
||||
|
||||
await AddToCache(lowerCaseTitle, best);
|
||||
|
||||
return (true, best);
|
||||
}
|
||||
|
||||
private Task AddToCache(string lowerCaseTitle, ExtractedResult<ImdbEntry> best)
|
||||
private Task AddToCache(string lowerCaseTitle, ImdbEntry best)
|
||||
{
|
||||
var cacheOptions = new DistributedCacheEntryOptions
|
||||
{
|
||||
AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1),
|
||||
};
|
||||
|
||||
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best.Value), cacheOptions);
|
||||
return cache.SetStringAsync(lowerCaseTitle, JsonSerializer.Serialize(best), cacheOptions);
|
||||
}
|
||||
|
||||
private async Task<(bool Success, ImdbEntry? Entry)> CheckIfInCacheAndReturn(string title)
|
||||
|
||||
@@ -1 +1 @@
|
||||
rank-torrent-name==0.1.8
|
||||
rank-torrent-name==0.1.9
|
||||
Reference in New Issue
Block a user