mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
Big rewrite - distributed consumers for ingestion / scraping(scalable) - single producer written in c#.
Changed from page scraping to rss xml scraping Includes RealDebridManager hashlist decoding (requires a github readonly PAT as requests must be authenticated) - This allows ingestion of 200k+ entries in a few hours. Simplifies a lot of torrentio to deal with new data
This commit is contained in:
9
src/producer/Models/GithubConfiguration.cs
Normal file
9
src/producer/Models/GithubConfiguration.cs
Normal file
@@ -0,0 +1,9 @@
|
||||
namespace Scraper.Models;
|
||||
|
||||
public class GithubConfiguration
|
||||
{
|
||||
public const string SectionName = "GithubSettings";
|
||||
public const string Filename = "github.json";
|
||||
|
||||
public string? PAT { get; set; }
|
||||
}
|
||||
13
src/producer/Models/RabbitMqConfiguration.cs
Normal file
13
src/producer/Models/RabbitMqConfiguration.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
namespace Scraper.Models;
|
||||
|
||||
public class RabbitMqConfiguration
|
||||
{
|
||||
public const string SectionName = "RabbitMqConfiguration";
|
||||
public const string Filename = "rabbitmq.json";
|
||||
|
||||
public string? Host { get; set; }
|
||||
public string? Username { get; set; }
|
||||
public string? Password { get; set; }
|
||||
public string? QueueName { get; set; }
|
||||
public bool Durable { get; set; }
|
||||
}
|
||||
5
src/producer/Models/Results.cs
Normal file
5
src/producer/Models/Results.cs
Normal file
@@ -0,0 +1,5 @@
|
||||
namespace Scraper.Models;
|
||||
|
||||
public record InsertTorrentResult(bool Success, int InsertedCount = 0, string? ErrorMessage = null);
|
||||
public record UpdatedTorrentResult(bool Success, int UpdatedCount = 0, string? ErrorMessage = null);
|
||||
public record PageIngestedResult(bool Success, string? ErrorMessage = null);
|
||||
10
src/producer/Models/ScrapeConfiguration.cs
Normal file
10
src/producer/Models/ScrapeConfiguration.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace Scraper.Models;
|
||||
|
||||
public class ScrapeConfiguration
|
||||
{
|
||||
public const string SectionName = "ScrapeConfiguration";
|
||||
public const string Filename = "scrapers.json";
|
||||
|
||||
public List<Scraper> Scrapers { get; set; } = [];
|
||||
public string StorageConnectionString { get; set; } = "";
|
||||
}
|
||||
10
src/producer/Models/Scraper.cs
Normal file
10
src/producer/Models/Scraper.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace Scraper.Models;
|
||||
|
||||
public class Scraper
|
||||
{
|
||||
public string? Name { get; set; }
|
||||
|
||||
public int IntervalSeconds { get; set; } = 60;
|
||||
|
||||
public bool Enabled { get; set; } = true;
|
||||
}
|
||||
20
src/producer/Models/Torrent.cs
Normal file
20
src/producer/Models/Torrent.cs
Normal file
@@ -0,0 +1,20 @@
|
||||
namespace Scraper.Models;
|
||||
|
||||
// Torrent represents a crawled torrent from one of our
|
||||
// supported sources.
|
||||
public class Torrent
|
||||
{
|
||||
public long? Id { get; set; }
|
||||
public string? Name { get; set; }
|
||||
public string? Source { get; set; }
|
||||
public string? Category { get; set; }
|
||||
public string? InfoHash { get; set; }
|
||||
public string? Size { get; set; }
|
||||
public int Seeders { get; set; }
|
||||
public int Leechers { get; set; }
|
||||
public string? Imdb { get; set; }
|
||||
|
||||
public bool Processed { get; set; } = false;
|
||||
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
|
||||
}
|
||||
Reference in New Issue
Block a user