mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Big rewrite - distributed consumers for ingestion / scraping(scalable) - single producer written in c#.
Changed from page scraping to rss xml scraping Includes RealDebridManager hashlist decoding (requires a github readonly PAT as requests must be authenticated) - This allows ingestion of 200k+ entries in a few hours. Simplifies a lot of torrentio to deal with new data
This commit is contained in:
23
src/producer/Jobs/BaseJob.cs
Normal file
23
src/producer/Jobs/BaseJob.cs
Normal file
@@ -0,0 +1,23 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
public abstract class BaseJob(ICrawlerProvider crawlerProvider) : IJob
|
||||
{
|
||||
public async Task Execute(IJobExecutionContext context)
|
||||
{
|
||||
if (context.RefireCount > 5)
|
||||
{
|
||||
throw new InvalidOperationException("Job failed too many times");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await crawlerProvider.Get(Crawler).Execute();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new JobExecutionException(msg: "", refireImmediately: true, cause: ex);
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract string Crawler { get; }
|
||||
}
|
||||
7
src/producer/Jobs/ICrawlerJob.cs
Normal file
7
src/producer/Jobs/ICrawlerJob.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
public interface ICrawlerJob<out TCrawler> : IJob
|
||||
where TCrawler : ICrawler
|
||||
{
|
||||
TCrawler CrawlerType { get; }
|
||||
}
|
||||
14
src/producer/Jobs/IPJob.cs
Normal file
14
src/producer/Jobs/IPJob.cs
Normal file
@@ -0,0 +1,14 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class IPJob(IIpService ipService) : IJob
|
||||
{
|
||||
private const string JobName = nameof(IPJob);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Jobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Jobs));
|
||||
|
||||
public Task Execute(IJobExecutionContext context)
|
||||
{
|
||||
return ipService.GetPublicIpAddress();
|
||||
}
|
||||
}
|
||||
31
src/producer/Jobs/PublisherJob.cs
Normal file
31
src/producer/Jobs/PublisherJob.cs
Normal file
@@ -0,0 +1,31 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class PublisherJob(IMessagePublisher publisher, IDataStorage storage, ILogger<PublisherJob> logger) : IJob
|
||||
{
|
||||
private const string JobName = nameof(PublisherJob);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Jobs));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Jobs));
|
||||
|
||||
public async Task Execute(IJobExecutionContext context)
|
||||
{
|
||||
var cancellationToken = context.CancellationToken;
|
||||
var torrents = await storage.GetPublishableTorrents(cancellationToken);
|
||||
|
||||
if (torrents.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await publisher.PublishAsync(torrents, cancellationToken);
|
||||
var result = await storage.SetTorrentsProcessed(torrents, cancellationToken);
|
||||
|
||||
if (!result.Success)
|
||||
{
|
||||
logger.LogWarning("Failed to set torrents as processed: [{Error}]", result.ErrorMessage);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.LogInformation("Successfully set {Count} torrents as processed", result.UpdatedCount);
|
||||
}
|
||||
}
|
||||
10
src/producer/Jobs/SyncDmmJob.cs
Normal file
10
src/producer/Jobs/SyncDmmJob.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncDmmJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(DebridMediaManagerCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
protected override string Crawler => nameof(DebridMediaManagerCrawler);
|
||||
}
|
||||
10
src/producer/Jobs/SyncEzTvJob.cs
Normal file
10
src/producer/Jobs/SyncEzTvJob.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncEzTvJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(EzTvCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
protected override string Crawler => nameof(EzTvCrawler);
|
||||
}
|
||||
10
src/producer/Jobs/SyncTgxJob.cs
Normal file
10
src/producer/Jobs/SyncTgxJob.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncTgxJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(TgxCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
protected override string Crawler => nameof(TgxCrawler);
|
||||
}
|
||||
10
src/producer/Jobs/SyncTpbJob.cs
Normal file
10
src/producer/Jobs/SyncTpbJob.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncTpbJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(TpbCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
protected override string Crawler => nameof(TpbCrawler);
|
||||
}
|
||||
10
src/producer/Jobs/SyncYtsJob.cs
Normal file
10
src/producer/Jobs/SyncYtsJob.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace Scraper.Jobs;
|
||||
|
||||
[DisallowConcurrentExecution]
|
||||
public class SyncYtsJob(ICrawlerProvider crawlerProvider) : BaseJob(crawlerProvider)
|
||||
{
|
||||
private const string JobName = nameof(YtsCrawler);
|
||||
public static readonly JobKey Key = new(JobName, nameof(Crawlers));
|
||||
public static readonly TriggerKey Trigger = new($"{JobName}-trigger", nameof(Crawlers));
|
||||
protected override string Crawler => nameof(YtsCrawler);
|
||||
}
|
||||
Reference in New Issue
Block a user