From e6a63fd72e192a29b63a6d3bdcbc790b72a60141 Mon Sep 17 00:00:00 2001 From: iPromKnight <156901906+iPromKnight@users.noreply.github.com> Date: Thu, 11 Apr 2024 18:23:42 +0100 Subject: [PATCH] Allow configuration of producer urls (#203) * Allow configuration of urls in scrapers by mounting the scrapers.json file over the one in the container * version bump --- deployment/docker/docker-compose.yaml | 14 +++++++------- .../docker/src/components/knightcrawler.yaml | 14 +++++++------- src/producer/src/Configuration/scrapers.json | 17 ++++++++++++----- .../Features/CrawlerSupport/BaseJsonCrawler.cs | 6 ++++++ .../Features/CrawlerSupport/BaseXmlCrawler.cs | 6 ++++++ .../src/Features/CrawlerSupport/Scraper.cs | 4 ++++ .../src/Features/Crawlers/EzTv/EzTvCrawler.cs | 7 +++---- .../src/Features/Crawlers/Nyaa/NyaaCrawler.cs | 7 +++---- .../src/Features/Crawlers/Tgx/TgxCrawler.cs | 4 ++-- .../src/Features/Crawlers/Tpb/TpbCrawler.cs | 4 ++-- .../src/Features/Crawlers/Yts/YtsCrawler.cs | 5 ++--- 11 files changed, 54 insertions(+), 34 deletions(-) diff --git a/deployment/docker/docker-compose.yaml b/deployment/docker/docker-compose.yaml index 29ef4cd..25a7f2c 100644 --- a/deployment/docker/docker-compose.yaml +++ b/deployment/docker/docker-compose.yaml @@ -94,7 +94,7 @@ services: condition: service_healthy env_file: stack.env hostname: knightcrawler-addon - image: gabisonfire/knightcrawler-addon:2.0.22 + image: gabisonfire/knightcrawler-addon:2.0.23 labels: logging: promtail networks: @@ -117,7 +117,7 @@ services: redis: condition: service_healthy env_file: stack.env - image: gabisonfire/knightcrawler-consumer:2.0.22 + image: gabisonfire/knightcrawler-consumer:2.0.23 labels: logging: promtail networks: @@ -138,7 +138,7 @@ services: redis: condition: service_healthy env_file: stack.env - image: gabisonfire/knightcrawler-debrid-collector:2.0.22 + image: gabisonfire/knightcrawler-debrid-collector:2.0.23 labels: logging: promtail networks: @@ -152,7 +152,7 @@ services: migrator: condition: service_completed_successfully env_file: stack.env - image: gabisonfire/knightcrawler-metadata:2.0.22 + image: gabisonfire/knightcrawler-metadata:2.0.23 networks: - knightcrawler-network restart: "no" @@ -163,7 +163,7 @@ services: postgres: condition: service_healthy env_file: stack.env - image: gabisonfire/knightcrawler-migrator:2.0.22 + image: gabisonfire/knightcrawler-migrator:2.0.23 networks: - knightcrawler-network restart: "no" @@ -182,7 +182,7 @@ services: redis: condition: service_healthy env_file: stack.env - image: gabisonfire/knightcrawler-producer:2.0.22 + image: gabisonfire/knightcrawler-producer:2.0.23 labels: logging: promtail networks: @@ -207,7 +207,7 @@ services: deploy: replicas: ${QBIT_REPLICAS:-0} env_file: stack.env - image: gabisonfire/knightcrawler-qbit-collector:2.0.22 + image: gabisonfire/knightcrawler-qbit-collector:2.0.23 labels: logging: promtail networks: diff --git a/deployment/docker/src/components/knightcrawler.yaml b/deployment/docker/src/components/knightcrawler.yaml index ff8d71d..6d12bf3 100644 --- a/deployment/docker/src/components/knightcrawler.yaml +++ b/deployment/docker/src/components/knightcrawler.yaml @@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends services: metadata: - image: gabisonfire/knightcrawler-metadata:2.0.22 + image: gabisonfire/knightcrawler-metadata:2.0.23 env_file: ../../.env networks: - knightcrawler-network @@ -30,7 +30,7 @@ services: condition: service_completed_successfully migrator: - image: gabisonfire/knightcrawler-migrator:2.0.22 + image: gabisonfire/knightcrawler-migrator:2.0.23 env_file: ../../.env networks: - knightcrawler-network @@ -40,7 +40,7 @@ services: condition: service_healthy addon: - image: gabisonfire/knightcrawler-addon:2.0.22 + image: gabisonfire/knightcrawler-addon:2.0.23 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped hostname: knightcrawler-addon @@ -48,22 +48,22 @@ services: - "7000:7000" consumer: - image: gabisonfire/knightcrawler-consumer:2.0.22 + image: gabisonfire/knightcrawler-consumer:2.0.23 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped debridcollector: - image: gabisonfire/knightcrawler-debrid-collector:2.0.22 + image: gabisonfire/knightcrawler-debrid-collector:2.0.23 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped producer: - image: gabisonfire/knightcrawler-producer:2.0.22 + image: gabisonfire/knightcrawler-producer:2.0.23 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped qbitcollector: - image: gabisonfire/knightcrawler-qbit-collector:2.0.22 + image: gabisonfire/knightcrawler-qbit-collector:2.0.23 <<: [*knightcrawler-app, *knightcrawler-app-depends] restart: unless-stopped depends_on: diff --git a/src/producer/src/Configuration/scrapers.json b/src/producer/src/Configuration/scrapers.json index da4a2dd..d83696b 100644 --- a/src/producer/src/Configuration/scrapers.json +++ b/src/producer/src/Configuration/scrapers.json @@ -4,27 +4,34 @@ { "Name": "SyncEzTvJob", "IntervalSeconds": 60, - "Enabled": true + "Enabled": true, + "Url": "https://eztv1.xyz/ezrss.xml", + "XmlNamespace": "http://xmlns.ezrss.it/0.1/" }, { "Name": "SyncNyaaJob", "IntervalSeconds": 60, - "Enabled": true + "Enabled": true, + "Url": "https://nyaa.si/?page=rss&c=1_2&f=0", + "XmlNamespace": "https://nyaa.si/xmlns/nyaa" }, { "Name": "SyncTpbJob", "IntervalSeconds": 60, - "Enabled": true + "Enabled": true, + "Url": "https://apibay.org/precompiled/data_top100_recent.json" }, { "Name": "SyncYtsJob", "IntervalSeconds": 60, - "Enabled": true + "Enabled": true, + "Url": "https://yts.am/rss" }, { "Name": "SyncTgxJob", "IntervalSeconds": 60, - "Enabled": true + "Enabled": true, + "Url": "https://tgx.rs/rss" }, { "Name": "SyncDmmJob", diff --git a/src/producer/src/Features/CrawlerSupport/BaseJsonCrawler.cs b/src/producer/src/Features/CrawlerSupport/BaseJsonCrawler.cs index e13758b..e005260 100644 --- a/src/producer/src/Features/CrawlerSupport/BaseJsonCrawler.cs +++ b/src/producer/src/Features/CrawlerSupport/BaseJsonCrawler.cs @@ -6,6 +6,12 @@ public abstract class BaseJsonCrawler(IHttpClientFactory httpClientFactory, ILog protected virtual async Task Execute(string collectionName) { + if (string.IsNullOrWhiteSpace(Url)) + { + logger.LogWarning("No URL provided for {Source} crawl", Source); + return; + } + logger.LogInformation("Starting {Source} crawl", Source); using var client = httpClientFactory.CreateClient("Scraper"); diff --git a/src/producer/src/Features/CrawlerSupport/BaseXmlCrawler.cs b/src/producer/src/Features/CrawlerSupport/BaseXmlCrawler.cs index 0eae69d..c5f8f68 100644 --- a/src/producer/src/Features/CrawlerSupport/BaseXmlCrawler.cs +++ b/src/producer/src/Features/CrawlerSupport/BaseXmlCrawler.cs @@ -4,6 +4,12 @@ public abstract class BaseXmlCrawler(IHttpClientFactory httpClientFactory, ILogg { public override async Task Execute() { + if (string.IsNullOrWhiteSpace(Url)) + { + logger.LogWarning("No URL provided for {Source} crawl", Source); + return; + } + logger.LogInformation("Starting {Source} crawl", Source); using var client = httpClientFactory.CreateClient(Literals.CrawlerClient); diff --git a/src/producer/src/Features/CrawlerSupport/Scraper.cs b/src/producer/src/Features/CrawlerSupport/Scraper.cs index 24f0591..0d70b00 100644 --- a/src/producer/src/Features/CrawlerSupport/Scraper.cs +++ b/src/producer/src/Features/CrawlerSupport/Scraper.cs @@ -7,4 +7,8 @@ public class Scraper public int IntervalSeconds { get; set; } = 60; public bool Enabled { get; set; } = true; + + public string? Url { get; set; } + + public string? XmlNamespace { get; set; } } diff --git a/src/producer/src/Features/Crawlers/EzTv/EzTvCrawler.cs b/src/producer/src/Features/Crawlers/EzTv/EzTvCrawler.cs index 6e80818..86cb0d7 100644 --- a/src/producer/src/Features/Crawlers/EzTv/EzTvCrawler.cs +++ b/src/producer/src/Features/Crawlers/EzTv/EzTvCrawler.cs @@ -1,11 +1,10 @@ namespace Producer.Features.Crawlers.EzTv; -public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) +public class EzTvCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage) { - protected override string Url => "https://eztv1.xyz/ezrss.xml"; + protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty; protected override string Source => "EZTV"; - - private static readonly XNamespace XmlNamespace = "http://xmlns.ezrss.it/0.1/"; + private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncEzTvJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty; protected override IReadOnlyDictionary Mappings => new Dictionary diff --git a/src/producer/src/Features/Crawlers/Nyaa/NyaaCrawler.cs b/src/producer/src/Features/Crawlers/Nyaa/NyaaCrawler.cs index 6936c09..f67b745 100644 --- a/src/producer/src/Features/Crawlers/Nyaa/NyaaCrawler.cs +++ b/src/producer/src/Features/Crawlers/Nyaa/NyaaCrawler.cs @@ -1,11 +1,10 @@ namespace Producer.Features.Crawlers.Nyaa; -public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) +public class NyaaCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage) { - protected override string Url => "https://nyaa.si/?page=rss&c=1_2&f=0"; + protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty; protected override string Source => "Nyaa"; - - private static readonly XNamespace XmlNamespace = "https://nyaa.si/xmlns/nyaa"; + private XNamespace XmlNamespace => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncNyaaJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty; protected override IReadOnlyDictionary Mappings => new Dictionary diff --git a/src/producer/src/Features/Crawlers/Tgx/TgxCrawler.cs b/src/producer/src/Features/Crawlers/Tgx/TgxCrawler.cs index 996cbfe..681b6c8 100644 --- a/src/producer/src/Features/Crawlers/Tgx/TgxCrawler.cs +++ b/src/producer/src/Features/Crawlers/Tgx/TgxCrawler.cs @@ -1,13 +1,13 @@ namespace Producer.Features.Crawlers.Tgx; -public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) +public partial class TgxCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage) { [GeneratedRegex(@"Size:\s+(.+?)\s+Added")] private static partial Regex SizeStringExtractor(); [GeneratedRegex(@"(?i)\b(\d+(\.\d+)?)\s*([KMGT]?B)\b", RegexOptions.None, "en-GB")] private static partial Regex SizeStringParser(); - protected override string Url => "https://tgx.rs/rss"; + protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTgxJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty; protected override string Source => "TorrentGalaxy"; protected override IReadOnlyDictionary Mappings diff --git a/src/producer/src/Features/Crawlers/Tpb/TpbCrawler.cs b/src/producer/src/Features/Crawlers/Tpb/TpbCrawler.cs index 2e56ba0..f0ecf7d 100644 --- a/src/producer/src/Features/Crawlers/Tpb/TpbCrawler.cs +++ b/src/producer/src/Features/Crawlers/Tpb/TpbCrawler.cs @@ -1,8 +1,8 @@ namespace Producer.Features.Crawlers.Tpb; -public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseJsonCrawler(httpClientFactory, logger, storage) +public class TpbCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseJsonCrawler(httpClientFactory, logger, storage) { - protected override string Url => "https://apibay.org/precompiled/data_top100_recent.json"; + protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncTpbJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty; protected override string Source => "TPB"; diff --git a/src/producer/src/Features/Crawlers/Yts/YtsCrawler.cs b/src/producer/src/Features/Crawlers/Yts/YtsCrawler.cs index 9e3bbfc..94d89de 100644 --- a/src/producer/src/Features/Crawlers/Yts/YtsCrawler.cs +++ b/src/producer/src/Features/Crawlers/Yts/YtsCrawler.cs @@ -1,9 +1,8 @@ namespace Producer.Features.Crawlers.Yts; -public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage) : BaseXmlCrawler(httpClientFactory, logger, storage) +public class YtsCrawler(IHttpClientFactory httpClientFactory, ILogger logger, IDataStorage storage, ScrapeConfiguration scrapeConfiguration) : BaseXmlCrawler(httpClientFactory, logger, storage) { - protected override string Url => "https://yts.am/rss"; - + protected override string Url => scrapeConfiguration.Scrapers.FirstOrDefault(x => x.Name.Equals("SyncYtsJob", StringComparison.OrdinalIgnoreCase))?.Url ?? string.Empty; protected override string Source => "YTS"; protected override IReadOnlyDictionary Mappings => new Dictionary