mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
First self-hosted release
This commit is contained in:
41
addon/hosted/scraper.py
Normal file
41
addon/hosted/scraper.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import os
|
||||
import threading
|
||||
from p1337x import process_links, get_links_initial, get_links_and_process
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from shared import read_config
|
||||
|
||||
print("Scaper starting...")
|
||||
|
||||
if os.path.isfile(".init"):
|
||||
print("Found init file, executing initial sync. Be patient.")
|
||||
process_links(get_links_initial())
|
||||
os.remove(".init")
|
||||
|
||||
sched = BackgroundScheduler(timezone="America/New_York")
|
||||
sched.start()
|
||||
|
||||
# 1337x
|
||||
PROVIDER = "1337x"
|
||||
pages = read_config(PROVIDER, "urls_to_scrape")
|
||||
interval = read_config(PROVIDER, "scrape_interval")
|
||||
for page in pages:
|
||||
j = sched.add_job(
|
||||
get_links_and_process,
|
||||
'interval',
|
||||
days=interval["days"],
|
||||
hours=interval["hours"],
|
||||
minutes=interval["minutes"],
|
||||
seconds=interval["seconds"],
|
||||
id=page,
|
||||
args=[page],
|
||||
max_instances=1)
|
||||
print(f"{page} willl be scraped {j.next_run_time}.")
|
||||
|
||||
|
||||
# Wait forever
|
||||
main_thread = threading.main_thread()
|
||||
while True:
|
||||
L = threading.enumerate()
|
||||
L.remove(main_thread) # or avoid it in the for loop
|
||||
for t in L:
|
||||
t.join()
|
||||
Reference in New Issue
Block a user