Allow configuration of producer urls (#203)

* Allow configuration of urls in scrapers by mounting the scrapers.json file over the one in the container

* version bump
This commit is contained in:
iPromKnight
2024-04-11 18:23:42 +01:00
committed by GitHub
parent 02101ac50a
commit e6a63fd72e
11 changed files with 54 additions and 34 deletions

View File

@@ -94,7 +94,7 @@ services:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
hostname: knightcrawler-addon hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:2.0.22 image: gabisonfire/knightcrawler-addon:2.0.23
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -117,7 +117,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-consumer:2.0.22 image: gabisonfire/knightcrawler-consumer:2.0.23
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -138,7 +138,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-debrid-collector:2.0.22 image: gabisonfire/knightcrawler-debrid-collector:2.0.23
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -152,7 +152,7 @@ services:
migrator: migrator:
condition: service_completed_successfully condition: service_completed_successfully
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-metadata:2.0.22 image: gabisonfire/knightcrawler-metadata:2.0.23
networks: networks:
- knightcrawler-network - knightcrawler-network
restart: "no" restart: "no"
@@ -163,7 +163,7 @@ services:
postgres: postgres:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-migrator:2.0.22 image: gabisonfire/knightcrawler-migrator:2.0.23
networks: networks:
- knightcrawler-network - knightcrawler-network
restart: "no" restart: "no"
@@ -182,7 +182,7 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-producer:2.0.22 image: gabisonfire/knightcrawler-producer:2.0.23
labels: labels:
logging: promtail logging: promtail
networks: networks:
@@ -207,7 +207,7 @@ services:
deploy: deploy:
replicas: ${QBIT_REPLICAS:-0} replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env env_file: stack.env
image: gabisonfire/knightcrawler-qbit-collector:2.0.22 image: gabisonfire/knightcrawler-qbit-collector:2.0.23
labels: labels:
logging: promtail logging: promtail
networks: networks:

View File

@@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends
services: services:
metadata: metadata:
image: gabisonfire/knightcrawler-metadata:2.0.22 image: gabisonfire/knightcrawler-metadata:2.0.23
env_file: ../../.env env_file: ../../.env
networks: networks:
- knightcrawler-network - knightcrawler-network
@@ -30,7 +30,7 @@ services:
condition: service_completed_successfully condition: service_completed_successfully
migrator: migrator:
image: gabisonfire/knightcrawler-migrator:2.0.22 image: gabisonfire/knightcrawler-migrator:2.0.23
env_file: ../../.env env_file: ../../.env
networks: networks:
- knightcrawler-network - knightcrawler-network
@@ -40,7 +40,7 @@ services:
condition: service_healthy condition: service_healthy
addon: addon:
image: gabisonfire/knightcrawler-addon:2.0.22 image: gabisonfire/knightcrawler-addon:2.0.23
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
hostname: knightcrawler-addon hostname: knightcrawler-addon
@@ -48,22 +48,22 @@ services:
- "7000:7000" - "7000:7000"
consumer: consumer:
image: gabisonfire/knightcrawler-consumer:2.0.22 image: gabisonfire/knightcrawler-consumer:2.0.23
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
debridcollector: debridcollector:
image: gabisonfire/knightcrawler-debrid-collector:2.0.22 image: gabisonfire/knightcrawler-debrid-collector:2.0.23
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
producer: producer:
image: gabisonfire/knightcrawler-producer:2.0.22 image: gabisonfire/knightcrawler-producer:2.0.23
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
qbitcollector: qbitcollector:
image: gabisonfire/knightcrawler-qbit-collector:2.0.22 image: gabisonfire/knightcrawler-qbit-collector:2.0.23
<<: [*knightcrawler-app, *knightcrawler-app-depends] <<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:

View File

@@ -4,27 +4,34 @@
{ {
"Name": "SyncEzTvJob", "Name": "SyncEzTvJob",
"IntervalSeconds": 60, "IntervalSeconds": 60,
"Enabled": true "Enabled": true,
"Url": "https://eztv1.xyz/ezrss.xml",
"XmlNamespace": "http://xmlns.ezrss.it/0.1/"
}, },
{ {
"Name": "SyncNyaaJob", "Name": "SyncNyaaJob",
"IntervalSeconds": 60, "IntervalSeconds": 60,
"Enabled": true "Enabled": true,
"Url": "https://nyaa.si/?page=rss&c=1_2&f=0",
"XmlNamespace": "https://nyaa.si/xmlns/nyaa"
}, },
{ {
"Name": "SyncTpbJob", "Name": "SyncTpbJob",
"IntervalSeconds": 60, "IntervalSeconds": 60,
"Enabled": true "Enabled": true,
"Url": "https://apibay.org/precompiled/data_top100_recent.json"
}, },
{ {
"Name": "SyncYtsJob", "Name": "SyncYtsJob",
"IntervalSeconds": 60, "IntervalSeconds": 60,
"Enabled": true "Enabled": true,
"Url": "https://yts.am/rss"
}, },
{ {
"Name": "SyncTgxJob", "Name": "SyncTgxJob",
"IntervalSeconds": 60, "IntervalSeconds": 60,
"Enabled": true "Enabled": true,
"Url": "https://tgx.rs/rss"
}, },
{ {
"Name": "SyncDmmJob", "Name": "SyncDmmJob",

View File

@@ -6,6 +6,12 @@ public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILog
protected virtual async Task Execute(string collectionName) protected virtual async Task Execute(string collectionName)
{ {
if (string.IsNullOrWhiteSpace(Url))
{
logger.LogWarning("No URL provided for {Source} crawl", Source);
return;
}
logger.LogInformation("Starting {Source} crawl", Source); logger.LogInformation("Starting {Source} crawl", Source);
using var client = httpClientFactory.CreateClient("Scraper"); using var client = httpClientFactory.CreateClient("Scraper");

View File

@@ -4,6 +4,12 @@ public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogg
{ {
public override async Task Execute() public override async Task Execute()
{ {
if (string.IsNullOrWhiteSpace(Url))
{
logger.LogWarning("No URL provided for {Source} crawl", Source);
return;
}
logger.LogInformation("Starting {Source} crawl", Source); logger.LogInformation("Starting {Source} crawl", Source);
using var client = httpClientFactory.CreateClient(Literals.CrawlerClient); using var client = httpClientFactory.CreateClient(Literals.CrawlerClient);

View File

@@ -7,4 +7,8 @@ public class Scraper
public int IntervalSeconds { get; set; } = 60; public int IntervalSeconds { get; set; } = 60;
public bool Enabled { get; set; } = true; public bool Enabled { get; set; } = true;
public string? Url { get; set; }
public string? XmlNamespace { get; set; }
} }

View File

@@ -1,11 +1,10 @@
namespace Producer.Features.Crawlers.EzTv; namespace Producer.Features.Crawlers.EzTv;
public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
{ {
protected override string Url => "https://eztv1.xyz/ezrss.xml"; protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "EZTV"; protected override string Source => "EZTV";
private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
private static readonly XNamespace XmlNamespace = "http://xmlns.ezrss.it/0.1/";
protected override IReadOnlyDictionary<string, string> Mappings => protected override IReadOnlyDictionary<string, string> Mappings =>
new Dictionary<string, string> new Dictionary<string, string>

View File

@@ -1,11 +1,10 @@
namespace Producer.Features.Crawlers.Nyaa; namespace Producer.Features.Crawlers.Nyaa;
public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
{ {
protected override string Url => "https://nyaa.si/?page=rss&c=1_2&f=0"; protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "Nyaa"; protected override string Source => "Nyaa";
private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
private static readonly XNamespace XmlNamespace = "https://nyaa.si/xmlns/nyaa";
protected override IReadOnlyDictionary<string, string> Mappings => protected override IReadOnlyDictionary<string, string> Mappings =>
new Dictionary<string, string> new Dictionary<string, string>

View File

@@ -1,13 +1,13 @@
namespace Producer.Features.Crawlers.Tgx; namespace Producer.Features.Crawlers.Tgx;
public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
{ {
[GeneratedRegex(@"Size:\s+(.+?)\s+Added")] [GeneratedRegex(@"Size:\s+(.+?)\s+Added")]
private static partial Regex SizeStringExtractor(); private static partial Regex SizeStringExtractor();
[GeneratedRegex(@"(?i)\b(\d+(\.\d+)?)\s*([KMGT]?B)\b", RegexOptions.None, "en-GB")] [GeneratedRegex(@"(?i)\b(\d+(\.\d+)?)\s*([KMGT]?B)\b", RegexOptions.None, "en-GB")]
private static partial Regex SizeStringParser(); private static partial Regex SizeStringParser();
protected override string Url => "https://tgx.rs/rss"; protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTgxJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "TorrentGalaxy"; protected override string Source => "TorrentGalaxy";
protected override IReadOnlyDictionary<string, string> Mappings protected override IReadOnlyDictionary<string, string> Mappings

View File

@@ -1,8 +1,8 @@
namespace Producer.Features.Crawlers.Tpb; namespace Producer.Features.Crawlers.Tpb;
public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage) public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseJsonCrawler(httpClientFactory, logger, storage)
{ {
protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json"; protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTpbJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "TPB"; protected override string Source => "TPB";

View File

@@ -1,9 +1,8 @@
namespace Producer.Features.Crawlers.Yts; namespace Producer.Features.Crawlers.Yts;
public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
{ {
protected override string Url => "https://yts.am/rss"; protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncYtsJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
protected override string Source => "YTS"; protected override string Source => "YTS";
protected override IReadOnlyDictionary<string, string> Mappings protected override IReadOnlyDictionary<string, string> Mappings
=> new Dictionary<string, string> => new Dictionary<string, string>