Woke up to see a discussion about torrentio scraping: powered by community

Was a little inspired. Now we have a database (self populating) of imdb id's - why shouldn't we actually have the ability to scrape any other instance of torrentio, or knightcrawler?

Also restructured the producer to be vertically sliced to make it easier to work with
Too much flicking back and forth between Jobs and Crawlers when configuring
This commit is contained in:
iPromKnight
2024-03-02 18:41:57 +00:00
parent 98115e0cf7
commit 95fa48c851
59 changed files with 733 additions and 261 deletions

View File

@@ -0,0 +1,12 @@
using Literals = Producer.Features.JobSupport.Literals;
namespace Producer.Features.Crawlers.Tpb;
[DisallowConcurrentExecution]
public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
{
private const string JobName = nameof(TpbCrawler);
public static readonly JobKey Key = new(JobName, nameof(Literals.CrawlersJobs));
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Literals.CrawlersJobs));
protected override string Crawler => nameof(TpbCrawler);
}

View File

@@ -0,0 +1,92 @@
namespace Producer.Features.Crawlers.Tpb;
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage)
{
protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json";
protected override string Source => "TPB";
// ReSharper disable once UnusedMember.Local
private readonly Dictionary<string, Dictionary<string, int>> TpbCategories = new()
{
{"VIDEO", new() {
{"ALL", 200},
{"MOVIES", 201},
{"MOVIES_DVDR", 202},
{"MUSIC_VIDEOS", 203},
{"MOVIE_CLIPS", 204},
{"TV_SHOWS", 205},
{"HANDHELD", 206},
{"MOVIES_HD", 207},
{"TV_SHOWS_HD", 208},
{"MOVIES_3D", 209},
{"OTHER", 299},
}},
{"PORN", new() {
{"ALL", 500},
{"MOVIES", 501},
{"MOVIES_DVDR", 502},
{"PICTURES", 503},
{"GAMES", 504},
{"MOVIES_HD", 505},
{"MOVIE_CLIPS", 506},
{"OTHER", 599},
}},
};
private static readonly HashSet<int> TvSeriesCategories = [ 205, 208 ];
private static readonly HashSet<int> MovieCategories = [ 201, 202, 207, 209 ];
private static readonly HashSet<int> PornCategories = [ 500, 501, 502, 505, 506 ];
private static readonly HashSet<int> AllowedCategories = [ ..MovieCategories, ..TvSeriesCategories ];
protected override IReadOnlyDictionary<string, string> Mappings
=> new Dictionary<string, string>
{
[nameof(Torrent.Name)] = "name",
[nameof(Torrent.Size)] = "size",
[nameof(Torrent.Seeders)] = "seeders",
[nameof(Torrent.Leechers)] = "leechers",
[nameof(Torrent.InfoHash)] = "info_hash",
[nameof(Torrent.Imdb)] = "imdb",
[nameof(Torrent.Category)] = "category",
};
protected override Torrent? ParseTorrent(JsonElement item)
{
var incomingCategory = item.GetProperty(Mappings["Category"]).GetInt32();
if (!AllowedCategories.Contains(incomingCategory))
{
return null;
}
var torrent = new Torrent
{
Source = Source,
Name = item.GetProperty(Mappings["Name"]).GetString(),
Size = item.GetProperty(Mappings["Size"]).GetInt64().ToString(),
Seeders = item.GetProperty(Mappings["Seeders"]).GetInt32(),
Leechers = item.GetProperty(Mappings["Leechers"]).GetInt32(),
Imdb = item.GetProperty(Mappings["Imdb"]).GetString(),
};
HandleInfoHash(item, torrent, "InfoHash");
torrent.Category = HandleCategory(incomingCategory);
return torrent;
}
private static string HandleCategory(int category) =>
MovieCategories.Contains(category) switch
{
true => "movies",
_ => TvSeriesCategories.Contains(category) switch
{
true => "tv",
_ => "xxx",
},
};
public override Task Execute() => Execute("items");
}