Allow configuration of producer urls (#203)

* Allow configuration of urls in scrapers by mounting the scrapers.json file over the one in the container * version bump
2024-12-20 03:29:51 +00:00 · 2024-04-11 18:23:42 +01:00
parent 02101ac50a
commit e6a63fd72e
11 changed files with 54 additions and 34 deletions
--- a/deployment/docker/docker-compose.yaml
+++ b/deployment/docker/docker-compose.yaml
@@ -94,7 +94,7 @@ services:
        condition: service_healthy
    env_file: stack.env
    hostname: knightcrawler-addon
-    image: gabisonfire/knightcrawler-addon:2.0.22
+    image: gabisonfire/knightcrawler-addon:2.0.23
    labels:
      logging: promtail
    networks:
@@ -117,7 +117,7 @@ services:
      redis:
        condition: service_healthy
    env_file: stack.env
-    image: gabisonfire/knightcrawler-consumer:2.0.22
+    image: gabisonfire/knightcrawler-consumer:2.0.23
    labels:
      logging: promtail
    networks:
@@ -138,7 +138,7 @@ services:
      redis:
        condition: service_healthy
    env_file: stack.env
-    image: gabisonfire/knightcrawler-debrid-collector:2.0.22
+    image: gabisonfire/knightcrawler-debrid-collector:2.0.23
    labels:
      logging: promtail
    networks:
@@ -152,7 +152,7 @@ services:
      migrator:
        condition: service_completed_successfully
    env_file: stack.env
-    image: gabisonfire/knightcrawler-metadata:2.0.22
+    image: gabisonfire/knightcrawler-metadata:2.0.23
    networks:
      - knightcrawler-network
    restart: "no"
@@ -163,7 +163,7 @@ services:
      postgres:
        condition: service_healthy
    env_file: stack.env
-    image: gabisonfire/knightcrawler-migrator:2.0.22
+    image: gabisonfire/knightcrawler-migrator:2.0.23
    networks:
      - knightcrawler-network
    restart: "no"
@@ -182,7 +182,7 @@ services:
      redis:
        condition: service_healthy
    env_file: stack.env
-    image: gabisonfire/knightcrawler-producer:2.0.22
+    image: gabisonfire/knightcrawler-producer:2.0.23
    labels:
      logging: promtail
    networks:
@@ -207,7 +207,7 @@ services:
    deploy:
      replicas: ${QBIT_REPLICAS:-0}
    env_file: stack.env
-    image: gabisonfire/knightcrawler-qbit-collector:2.0.22
+    image: gabisonfire/knightcrawler-qbit-collector:2.0.23
    labels:
      logging: promtail
    networks:
--- a/deployment/docker/src/components/knightcrawler.yaml
+++ b/deployment/docker/src/components/knightcrawler.yaml
@@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends
 services:
  metadata:
-    image: gabisonfire/knightcrawler-metadata:2.0.22
+    image: gabisonfire/knightcrawler-metadata:2.0.23
    env_file: ../../.env
    networks:
      - knightcrawler-network
@@ -30,7 +30,7 @@ services:
        condition: service_completed_successfully
  migrator:
-    image: gabisonfire/knightcrawler-migrator:2.0.22
+    image: gabisonfire/knightcrawler-migrator:2.0.23
    env_file: ../../.env
    networks:
      - knightcrawler-network
@@ -40,7 +40,7 @@ services:
        condition: service_healthy
  addon:
-    image: gabisonfire/knightcrawler-addon:2.0.22
+    image: gabisonfire/knightcrawler-addon:2.0.23
    <<: [*knightcrawler-app, *knightcrawler-app-depends]
    restart: unless-stopped
    hostname: knightcrawler-addon
@@ -48,22 +48,22 @@ services:
      - "7000:7000"
  consumer:
-    image: gabisonfire/knightcrawler-consumer:2.0.22
+    image: gabisonfire/knightcrawler-consumer:2.0.23
    <<: [*knightcrawler-app, *knightcrawler-app-depends]
    restart: unless-stopped
  debridcollector:
-    image: gabisonfire/knightcrawler-debrid-collector:2.0.22
+    image: gabisonfire/knightcrawler-debrid-collector:2.0.23
    <<: [*knightcrawler-app, *knightcrawler-app-depends]
    restart: unless-stopped
  producer:
-    image: gabisonfire/knightcrawler-producer:2.0.22
+    image: gabisonfire/knightcrawler-producer:2.0.23
    <<: [*knightcrawler-app, *knightcrawler-app-depends]
    restart: unless-stopped
  qbitcollector:
-    image: gabisonfire/knightcrawler-qbit-collector:2.0.22
+    image: gabisonfire/knightcrawler-qbit-collector:2.0.23
    <<: [*knightcrawler-app, *knightcrawler-app-depends]
    restart: unless-stopped
    depends_on:
--- a/src/producer/src/Configuration/scrapers.json
+++ b/src/producer/src/Configuration/scrapers.json
@@ -4,27 +4,34 @@
      {
        "Name": "SyncEzTvJob",
        "IntervalSeconds": 60,
-        "Enabled": true
+        "Enabled": true,
        "Url": "https://eztv1.xyz/ezrss.xml",
        "XmlNamespace": "http://xmlns.ezrss.it/0.1/"
      },
      {
        "Name": "SyncNyaaJob",
        "IntervalSeconds": 60,
-        "Enabled": true
+        "Enabled": true,
        "Url": "https://nyaa.si/?page=rss&c=1_2&f=0",
        "XmlNamespace": "https://nyaa.si/xmlns/nyaa"
      },
      {
        "Name": "SyncTpbJob",
        "IntervalSeconds": 60,
-        "Enabled": true
+        "Enabled": true,
        "Url": "https://apibay.org/precompiled/data_top100_recent.json"
      },
      {
        "Name": "SyncYtsJob",
        "IntervalSeconds": 60,
-        "Enabled": true
+        "Enabled": true,
        "Url": "https://yts.am/rss"
      },
      {
        "Name": "SyncTgxJob",
        "IntervalSeconds": 60,
-        "Enabled": true
+        "Enabled": true,
        "Url": "https://tgx.rs/rss"
      },
      {
        "Name": "SyncDmmJob",
--- a/src/producer/src/Features/CrawlerSupport/BaseJsonCrawler.cs
+++ b/src/producer/src/Features/CrawlerSupport/BaseJsonCrawler.cs
@@ -6,6 +6,12 @@ public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILog
    protected virtual async Task Execute(string collectionName)
    {
        if (string.IsNullOrWhiteSpace(Url))
        {
            logger.LogWarning("No URL provided for {Source} crawl", Source);
            return;
        }
        logger.LogInformation("Starting {Source} crawl", Source);
        using var client = httpClientFactory.CreateClient("Scraper");
--- a/src/producer/src/Features/CrawlerSupport/BaseXmlCrawler.cs
+++ b/src/producer/src/Features/CrawlerSupport/BaseXmlCrawler.cs
@@ -4,6 +4,12 @@ public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogg
 {
    public override async Task Execute()
    {
        if (string.IsNullOrWhiteSpace(Url))
        {
            logger.LogWarning("No URL provided for {Source} crawl", Source);
            return;
        }
        logger.LogInformation("Starting {Source} crawl", Source);
        using var client = httpClientFactory.CreateClient(Literals.CrawlerClient);
--- a/src/producer/src/Features/CrawlerSupport/Scraper.cs
+++ b/src/producer/src/Features/CrawlerSupport/Scraper.cs
@@ -7,4 +7,8 @@ public class Scraper
    public int IntervalSeconds { get; set; } = 60;
    public bool Enabled { get; set; } = true;
    public string? Url { get; set; }
    public string? XmlNamespace { get; set; }
 }
--- a/src/producer/src/Features/Crawlers/EzTv/EzTvCrawler.cs
+++ b/src/producer/src/Features/Crawlers/EzTv/EzTvCrawler.cs
@@ -1,11 +1,10 @@
 namespace Producer.Features.Crawlers.EzTv;
-public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
+public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger<EzTvCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
 {
-    protected override string Url => "https://eztv1.xyz/ezrss.xml";
+    protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
    protected override string Source => "EZTV";
-
+    private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
    private static readonly XNamespace XmlNamespace = "http://xmlns.ezrss.it/0.1/";
    protected override IReadOnlyDictionary<string, string> Mappings =>
        new Dictionary<string, string>
--- a/src/producer/src/Features/Crawlers/Nyaa/NyaaCrawler.cs
+++ b/src/producer/src/Features/Crawlers/Nyaa/NyaaCrawler.cs
@@ -1,11 +1,10 @@
 namespace Producer.Features.Crawlers.Nyaa;
-public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
+public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger<NyaaCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
 {
-    protected override string Url => "https://nyaa.si/?page=rss&c=1_2&f=0";
+    protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
    protected override string Source => "Nyaa";
-
+    private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
    private static readonly XNamespace XmlNamespace = "https://nyaa.si/xmlns/nyaa";
    protected override IReadOnlyDictionary<string, string> Mappings =>
        new Dictionary<string, string>
--- a/src/producer/src/Features/Crawlers/Tgx/TgxCrawler.cs
+++ b/src/producer/src/Features/Crawlers/Tgx/TgxCrawler.cs
@@ -1,13 +1,13 @@
 namespace Producer.Features.Crawlers.Tgx;
-public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
+public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger<TgxCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
 {
    [GeneratedRegex(@"Size:\s+(.+?)\s+Added")]
    private static partial Regex SizeStringExtractor();
    [GeneratedRegex(@"(?i)\b(\d+(\.\d+)?)\s*([KMGT]?B)\b", RegexOptions.None, "en-GB")]
    private static partial Regex SizeStringParser();
-    protected override string Url => "https://tgx.rs/rss";
+    protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTgxJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
    protected override string Source => "TorrentGalaxy";
    protected override IReadOnlyDictionary<string, string> Mappings
--- a/src/producer/src/Features/Crawlers/Tpb/TpbCrawler.cs
+++ b/src/producer/src/Features/Crawlers/Tpb/TpbCrawler.cs
@@ -1,8 +1,8 @@
 namespace Producer.Features.Crawlers.Tpb;
-public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage)
+public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger<TpbCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseJsonCrawler(httpClientFactory, logger, storage)
 {
-    protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json";
+    protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTpbJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
    protected override string Source => "TPB";
--- a/src/producer/src/Features/Crawlers/Yts/YtsCrawler.cs
+++ b/src/producer/src/Features/Crawlers/Yts/YtsCrawler.cs
@@ -1,9 +1,8 @@
 namespace Producer.Features.Crawlers.Yts;
-public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage)
+public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger<YtsCrawler> logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage)
 {
-    protected override string Url => "https://yts.am/rss";
+    protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncYtsJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty;
    protected override string Source => "YTS";
    protected override IReadOnlyDictionary<string, string> Mappings
        => new Dictionary<string, string>