First self-hosted release

2024-12-20 03:29:51 +00:00 · 2024-01-17 00:27:03 -05:00
parent 94ec4a7a52
commit a34e40fbc8
16 changed files with 1742 additions and 95 deletions
--- a/addon/hosted/scraper.py
+++ b/addon/hosted/scraper.py
@@ -0,0 +1,41 @@
+import os
+import threading
+from p1337x import process_links, get_links_initial, get_links_and_process
+from apscheduler.schedulers.background import BackgroundScheduler
+from shared import read_config
+
+print("Scaper starting...")
+
+if os.path.isfile(".init"):
+    print("Found init file, executing initial sync. Be patient.")
+    process_links(get_links_initial())
+    os.remove(".init")
+
+sched = BackgroundScheduler(timezone="America/New_York")
+sched.start()
+
+# 1337x
+PROVIDER = "1337x"
+pages = read_config(PROVIDER, "urls_to_scrape")
+interval = read_config(PROVIDER, "scrape_interval")
+for page in pages:
+    j = sched.add_job(
+        get_links_and_process, 
+        'interval', 
+        days=interval["days"],
+        hours=interval["hours"],
+        minutes=interval["minutes"],
+        seconds=interval["seconds"],
+        id=page,
+        args=[page],
+        max_instances=1)
+    print(f"{page} willl be scraped {j.next_run_time}.")
+
+
+# Wait forever
+main_thread = threading.main_thread()
+while True:
+    L = threading.enumerate()
+    L.remove(main_thread)  # or avoid it in the for loop
+    for t in L:
+        t.join()