First self-hosted release

2024-12-20 03:29:51 +00:00 · 2024-01-17 00:27:03 -05:00
parent 94ec4a7a52
commit a34e40fbc8
16 changed files with 1742 additions and 95 deletions
--- a/.github/workflows/deploy_addon.yml
+++ b/.github/workflows/deploy_addon.yml
@@ -1,44 +0,0 @@
-name: Deploy Addon
-
-on:
-  push:
-    branches: master
-    paths:
-      - 'addon/**'
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v1
-
-      - name: Create Docker image
-        run: |
-          docker build -t torrentio-addon:latest ./addon/
-          docker save torrentio-addon:latest > torrentio_addon_latest.tar
-
-      - name: Upload Docker image
-        uses: appleboy/scp-action@master
-        with:
-          host: ${{ secrets.SSH_HOST }}
-          username: ${{ secrets.SSH_USERNAME }}
-          port: ${{ secrets.SSH_PORT }}
-          key: ${{ secrets.SSH_KEY }}
-          source: torrentio_addon_latest.tar
-          target: /tmp/docker
-          overwrite: true
-
-      - name: Deploy Docker image
-        uses: appleboy/ssh-action@master
-        with:
-          host: ${{ secrets.SSH_HOST }}
-          username: ${{ secrets.SSH_USERNAME }}
-          port: ${{ secrets.SSH_PORT }}
-          key: ${{ secrets.SSH_KEY }}
-          script: |
-            docker load -i /tmp/docker/torrentio_addon_latest.tar
-            docker stop torrentio-addon
-            docker rm torrentio-addon
-            docker run -p 80:7000 -d --name torrentio-addon --restart always -e MONGODB_URI=${{ secrets.MONGODB_URI }} -e DATABASE_URI=${{ secrets.DATABASE_URI }} -e PROXY_HOSTS=${{ secrets.PROXY_HOSTS }} -e PROXY_USERNAME=${{ secrets.PROXY_USERNAME }} -e PROXY_PASSWORD=${{ secrets.PROXY_PASSWORD }} -e METRICS_USER=${{ secrets.METRICS_USER }} -e METRICS_PASSWORD=${{ secrets.METRICS_PASSWORD }} torrentio-addon:latest
-            docker image prune -f
--- a/.github/workflows/deploy_catalogs.yml
+++ b/.github/workflows/deploy_catalogs.yml
@@ -1,44 +0,0 @@
-name: Deploy Catalogs
-
-on:
-  push:
-    branches: master
-    paths:
-      - 'catalogs/**'
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v1
-
-      - name: Create Docker image
-        run: |
-          docker build -t torrentio-catalogs:latest . -f ./catalogs/Dockerfile
-          docker save torrentio-catalogs:latest > torrentio_catalogs_latest.tar
-
-      - name: Upload Docker image
-        uses: appleboy/scp-action@master
-        with:
-          host: ${{ secrets.SCRAPER_SSH_HOST }}
-          username: ${{ secrets.SCRAPER_SSH_USERNAME }}
-          port: ${{ secrets.SCRAPER_SSH_PORT }}
-          key: ${{ secrets.SCRAPER_SSH_KEY }}
-          source: torrentio_catalogs_latest.tar
-          target: /tmp/docker
-          overwrite: true
-
-      - name: Deploy Docker image
-        uses: appleboy/ssh-action@master
-        with:
-          host: ${{ secrets.SCRAPER_SSH_HOST }}
-          username: ${{ secrets.SCRAPER_SSH_USERNAME }}
-          port: ${{ secrets.SCRAPER_SSH_PORT }}
-          key: ${{ secrets.SCRAPER_SSH_KEY }}
-          script: |
-            docker load -i /tmp/docker/torrentio_catalogs_latest.tar
-            docker stop torrentio-catalogs
-            docker rm torrentio-catalogs
-            docker run -p 80:7000 -d --name torrentio-catalogs --restart always -e MONGODB_URI=${{ secrets.SCRAPER_MONGODB_URI }} -e DATABASE_URI=${{ secrets.DATABASE_URI }} torrentio-catalogs:latest
-            docker image prune -f
--- a/README.md
+++ b/README.md
@@ -1,3 +1,10 @@
 # Torrentio

 - [torrentio-addon](addon) - the Stremio addon which will query scraped entries and return Stremio stream results.
+
+## Self-hosted quickstart
+```
+cd addon/hosted
+docker-compose up -d
+```
+Then open your browser to `127.0.0.1:7000`
--- a/addon/hosted/.dockerignore
+++ b/addon/hosted/.dockerignore
@@ -0,0 +1,4 @@
+*.lock
+*/scrape-cache
+Dockerfile
+docker-compose.yml
--- a/addon/hosted/Dockerfile
+++ b/addon/hosted/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.10.13
+COPY . .
+RUN pip --no-cache-dir install pipenv && \
+    apt-get update && apt-get install sqlite3 && \
+    apt-get clean autoclean && \
+    apt-get autoremove --yes && \
+    mkdir /sqlite && \
+    sqlite3 /sqlite/torrentio.sqlite "VACUUM;" && \
+    cat torrentio.sql | sqlite3 /sqlite/torrentio.sqlite && \
+    ls -l /sqlite && \
+    pipenv install && touch .init
+CMD [ "pipenv", "run", "python", "-u", "scraper.py" ]
--- a/addon/hosted/Pipfile
+++ b/addon/hosted/Pipfile
@@ -0,0 +1,16 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+requests = "*"
+beautifulsoup4 = "*"
+imdbpy = "*"
+apscheduler = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.10"
+python_full_version = "3.10.13"
--- a/addon/hosted/config.json
+++ b/addon/hosted/config.json
@@ -0,0 +1,15 @@
+{
+    "1337x": {
+        "sleep": 10,
+        "urls_to_scrape": [
+            "https://1337x.to/trending/w/movies/",
+            "https://1337x.to/trending/d/movies/"
+        ],
+        "scrape_interval": {
+            "days": 0,
+            "hours" : 1,
+            "minutes": 0,
+            "seconds": 0
+        }
+    }
+}
--- a/addon/hosted/docker-compose.yml
+++ b/addon/hosted/docker-compose.yml
@@ -0,0 +1,30 @@
+version: '2'
+
+services:
+  mongodb:
+    image: docker.io/bitnami/mongodb:7.0
+    ports:
+      - "27017:27017"
+    volumes:
+      - 'mongodb_data:/bitnami/mongodb'
+  scraper:
+    build: ./
+    volumes:
+      - 'sqlite_data:/sqlite'
+  torrentio:
+    build: ../
+    volumes:
+      - 'sqlite_data:/sqlite'
+    ports:
+      - "7000:7000"
+    links:
+      - "mongodb:mongodb"
+    environment:
+      - MONGODB_URI=mongodb://mongodb:27017/torrentio
+      - DATABASE_URI=sqlite:/sqlite/torrentio.sqlite
+
+volumes:
+  mongodb_data:
+    driver: local
+  sqlite_data:
+    driver: local
--- a/addon/hosted/p1337x.py
+++ b/addon/hosted/p1337x.py
@@ -0,0 +1,137 @@
+import os
+import re
+import math
+import requests
+import time
+from imdb import Cinemagoer
+from bs4 import BeautifulSoup
+from shared import imdb_find, build_and_write, extract_title, read_config
+
+TORRENT_CACHES = ('http://itorrents.org', 'http://torrage.info', 'http://btcache.me')
+BASE_URL = 'https://1337x.to'
+MOVIE_BASE = 'https://1337x.to/movie-library'
+MOVIE_LIBRARY_MAX_PAGE = 301
+CACHE_DIR = "./scrape-cache"
+PROVIDER = "1337x"
+SLEEP_BETWEEN_REQUESTS = read_config(PROVIDER, "sleep")
+
+if not os.path.exists(CACHE_DIR):
+    os.makedirs(CACHE_DIR)
+
+def get_links_and_process(url):
+    links = []
+    print(f"Requesting movies from: {url}")
+    req = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
+    main = req.text
+    soup = BeautifulSoup(main, "html.parser")
+    for a in soup.find_all("a"):
+        if a.get("href").startswith("/torrent/"):
+            links.append((a.get("href"), extract_title(a.text)))
+    process_links(links)
+
+def get_links_initial():
+    links = []
+    for i in range(1,MOVIE_LIBRARY_MAX_PAGE + 1):
+        try:
+            print(f"Sleeping {SLEEP_BETWEEN_REQUESTS}")
+            time.sleep(SLEEP_BETWEEN_REQUESTS)
+
+            main = ""
+            if os.path.isfile(f"{CACHE_DIR}/main-{i}.html"):
+                print(f"Reading main page({i}) from cache...")
+                main = open(f"{CACHE_DIR}/main-{i}.html", "r").read()
+            else:
+                print(f"Requesting main index: {MOVIE_BASE}/{i}/")
+                req = requests.get(f"{MOVIE_BASE}/{i}/", headers={'User-Agent': 'Mozilla/5.0'})
+                if req.status_code == 404:
+                    print(f"Page does not exist: {MOVIE_BASE}/{i}/. Breaking loop.")
+                    break
+                main = req.text
+                open(f"{CACHE_DIR}/main-{i}.html", "w+").write(main)
+
+            movies = []
+            soup = BeautifulSoup(main, "html.parser")
+            for h3 in soup.find_all("h3"):
+                a = h3.findChildren("a", href=True)[0]
+                movie_link = a.get("href")
+                movie_title = a.text
+                movies.append((movie_title, movie_link))
+
+            for movie in movies:
+                if os.path.isfile(f"{CACHE_DIR}{movie[1]}html.html"):
+                    print(f"Reading movie page({movie[0]}) from cache...")
+                    main = open(f"{CACHE_DIR}{movie[1]}html.html").read()
+                else:
+                    print(f"Requesting movie releases: {BASE_URL}{movie[1]}")
+                    req = requests.get(f"{BASE_URL}{movie[1]}", headers={'User-Agent': 'Mozilla/5.0'})
+                    main = req.text
+                if not os.path.exists(f"{CACHE_DIR}{movie[1]}"):
+                    os.makedirs(f"{CACHE_DIR}{movie[1]}")
+                open(f"{CACHE_DIR}{movie[1]}html.html", "w+").write(main)
+                soup = BeautifulSoup(main, "html.parser")
+                for href in soup.find_all("a"):
+                    if href.get("href").startswith("/torrent/"):
+                        links.append((href.get("href"), movie[0]))
+        except Exception as e:
+            print(e)
+    return links
+
+def process_links(links):
+    print(f"Checking links...({len(links)})")
+    counter = 1
+    for link in links:
+        print(f"Processing: {BASE_URL}{link[0]} {counter}/{len(links)}")
+        req = requests.get(f"{BASE_URL}{link[0]}", headers={'User-Agent': 'Mozilla/5.0'})
+        torrent_html = req.text
+        t = {}
+        soup = BeautifulSoup(torrent_html, "html.parser")
+        t['title'] = soup.find("h1").text.strip()
+        t['size'] = 0
+        t['magnets'] = []
+        t['torrents'] = []
+        all_a = soup.find_all("a")
+        for a in all_a:
+            if a.get("href").startswith("https://www.imdb.com/title"):
+                t['imdbid'] = a.get("href").rstrip("\\").split('/')[-1]
+            if a.get("href").startswith("magnet:"):
+                t['magnets'].append(a.get("href"))
+            if a.get("href").startswith(TORRENT_CACHES):
+                t['torrents'].append(a.get("href"))
+        all_li = soup.find_all("li")
+        for li in all_li:
+            if "Total size" in li.text:
+                size = li.findChildren("span")[0].text
+                mb = False
+                if "MB" in size: mb = True
+                size = re.sub('\s(GB|MB)', '', size).split('.')[0].replace(',','')
+                if mb:
+                    t['size'] = math.trunc(float(size) * 107374182)
+                else:
+                    t['size'] = math.trunc(float(size) * 1073741824)
+        t['seeders'] = soup.find("span", {"class": "seeds"}).text
+        all_p = soup.find_all("p")
+        for p in all_p:
+            if "Infohash :" in p.text:
+                t['infoHash'] = p.findChildren("span")[0].text.lower()
+        t['files'] = []
+        file_div = soup.find("div", {"id":"files"})
+        for li in file_div.findChildren("li"):
+            f = re.sub('\s\(.*\)', '', li.text)
+            t["files"].append(f)
+        t['trackers'] = []
+        tracker_div = soup.find("div", {"id":"tracker-list"})
+        for tracker in tracker_div.findChildren("li"):
+            t['trackers'].append(tracker.text.strip())
+        if not 'imdbid' in t or t['imdbid'] == '':
+            found = re.search("https:\/\/www\.imdb\.com\/title\/tt\d+", torrent_html)
+            if found is not None:
+                t['imdbid'] = found.group(0).rstrip("\\").split('/')[-1]
+            else:
+                new_id = imdb_find(link[1])
+                if new_id is not None:
+                    t['imdbid'] = f"tt{new_id}"
+                else:
+                    print(f"{t['title']} has no IMDB Id")
+                    continue
+        build_and_write(t)
+        counter += 1
--- a/addon/hosted/scraper.py
+++ b/addon/hosted/scraper.py
@@ -0,0 +1,41 @@
+import os
+import threading
+from p1337x import process_links, get_links_initial, get_links_and_process
+from apscheduler.schedulers.background import BackgroundScheduler
+from shared import read_config
+
+print("Scaper starting...")
+
+if os.path.isfile(".init"):
+    print("Found init file, executing initial sync. Be patient.")
+    process_links(get_links_initial())
+    os.remove(".init")
+
+sched = BackgroundScheduler(timezone="America/New_York")
+sched.start()
+
+# 1337x
+PROVIDER = "1337x"
+pages = read_config(PROVIDER, "urls_to_scrape")
+interval = read_config(PROVIDER, "scrape_interval")
+for page in pages:
+    j = sched.add_job(
+        get_links_and_process, 
+        'interval', 
+        days=interval["days"],
+        hours=interval["hours"],
+        minutes=interval["minutes"],
+        seconds=interval["seconds"],
+        id=page,
+        args=[page],
+        max_instances=1)
+    print(f"{page} willl be scraped {j.next_run_time}.")
+
+
+# Wait forever
+main_thread = threading.main_thread()
+while True:
+    L = threading.enumerate()
+    L.remove(main_thread)  # or avoid it in the for loop
+    for t in L:
+        t.join()
--- a/addon/hosted/shared.py
+++ b/addon/hosted/shared.py
@@ -0,0 +1,73 @@
+import sqlite3
+import re
+import os
+import json
+from imdb import Cinemagoer
+
+SQLITE_PATH = "/sqlite/torrentio.sqlite"
+
+ia = Cinemagoer()
+CONFIG = "config.json"
+
+def read_config(provider, key):
+    if os.path.isfile(CONFIG):
+        f = open(CONFIG, "r")
+        cfg = json.load(f)
+        return cfg[provider][key]
+
+def filter_file(file):
+    allowed_ext = ['.mp4', '.mkv', '.avi', '.mpeg', '.mpg', '.mpv', '.mov']
+    if os.path.splitext(file)[1] in allowed_ext:
+        return True
+    return False
+
+def create_connection(db_file):
+    conn = None
+    try:
+        conn = sqlite3.connect(db_file, check_same_thread=False)
+    except Exception as e:
+        print(e)
+        exit(1)
+    return conn
+
+sqlite = create_connection(SQLITE_PATH)
+
+def build_and_write(torrent):
+    try:
+        print(f"Recording {torrent['title']} in the database")  
+        q = f"INSERT OR REPLACE INTO torrents (infoHash, provider, title, size, type, uploadDate, seeders, trackers) VALUES (?,?,?,?,?,?,?,?)"
+        p = (torrent['infoHash'],'1337x',torrent['title'],torrent['size'],'movie','1/1/2024',torrent['seeders'],','.join(torrent['trackers']))
+        cursor = sqlite.cursor()
+        cursor.execute(q,p)
+        for file in torrent['files']:
+            if filter_file(file):
+                q = f"INSERT OR REPLACE INTO files (infoHash, fileIndex, title, size, imdbId) VALUES (?,?,?,?,?)"
+                p = (torrent['infoHash'], torrent['files'].index(file), file, torrent['size'], torrent['imdbid'])
+                cursor.execute(q,p)
+        sqlite.commit()
+        cursor.close()
+    except sqlite3.Error as error:
+        print(error)
+
+def imdb_find(name):
+    movie = ia.search_movie(name)
+    if len(movie) >= 1:
+        return movie[0].movieID
+    return None
+
+def extract_title(filename):
+    try:
+        filename.strip()
+        filename = filename.replace('.', ' ')
+        res = re.search('([^\\\]+)\.(avi|mkv|mpeg|mpg|mov|mp4)$', filename)
+        if res:
+            filename = res.group(1)
+        res = re.search('(.*?)(dvdrip|xvid| cd[0-9]|dvdscr|brrip|divx|[\{\(\[]?[0-9]{4}).*', filename)
+        if res:
+            filename = res.group(1)
+        res = re.search('(.*?)\(.*\)(.*)', filename)
+        if res:
+            filename = res.group(1)
+        return filename
+    except:
+        return ""
--- a/addon/hosted/torrentio.sql
+++ b/addon/hosted/torrentio.sql
@@ -0,0 +1,44 @@
+CREATE TABLE "torrents" (
+	"infoHash"	TEXT,
+	"provider"	TEXT NOT NULL,
+	"torrentId"	TEXT,
+	"title"	TEXT NOT NULL,
+	"size"	INTEGER,
+	"type"	TEXT NOT NULL,
+	"uploadDate"	TEXT NOT NULL,
+	"seeders"	INTEGER,
+	"trackers"	TEXT,
+	"languages"	TEXT,
+	"resolution"	TEXT,
+	"createdAt" TEXT,
+	"updatedAt" TEXT,
+	PRIMARY KEY("infoHash")
+);
+
+CREATE TABLE "files" (
+	"id" INTEGER,
+	"infoHash" TEXT NOT NULL,
+	"fileIndex"	TEXT,
+	"title" INTEGER,
+	"size" INTEGER,
+	"imdbId" TEXT,
+	"imdbSeason" INTEGER,
+	"imdbEpisode" INTEGER,
+    "kitsuId" INTEGER,
+    "kitsuEpisode" INTEGER,
+	"createdAt" TEXT,
+	"updatedAt" TEXT,
+    FOREIGN KEY("infoHash") REFERENCES "torrent"("infoHash") ON DELETE CASCADE,
+	PRIMARY KEY("id" AUTOINCREMENT)
+	UNIQUE(infoHash, fileIndex)
+);
+
+CREATE TABLE "subtitles" (
+	"infoHash" TEXT NOT NULL,
+    "fileIndex" INTEGER NOT NULL,
+    "fileId" INTEGER,
+    "title" TEXT NOT NULL,
+    "size" INTEGER NOT NULL,
+    FOREIGN KEY("fileId") REFERENCES "file"("id") ON DELETE SET NULL
+    FOREIGN KEY("infoHash") REFERENCES "torrent"("infoHash") ON DELETE CASCADE
+);
--- a/addon/lib/manifest.js
+++ b/addon/lib/manifest.js
@@ -10,14 +10,14 @@ const CatalogMochs = Object.values(MochOptions).filter(moch => moch.catalog);
 export function manifest(config = {}) {
  const overrideManifest = getManifestOverride(config);
  const baseManifest = {
-    id: 'com.stremio.torrentio.addon',
+    id: 'com.stremio.torrentio-sh.addon',
    version: '0.0.14',
    name: getName(overrideManifest, config),
    description: getDescription(config),
    catalogs: getCatalogs(config),
    resources: getResources(config),
    types: [Type.MOVIE, Type.SERIES, Type.ANIME, Type.OTHER],
-    background: `https://i.ibb.co/VtSfFP9/t8wVwcg.jpg`,
+    background: `https://images6.alphacoders.com/134/1344105.png`,
    logo: `https://i.ibb.co/w4BnkC9/GwxAcDV.png`,
    behaviorHints: {
      configurable: true,
@@ -35,7 +35,7 @@ export function dummyManifest() {
 }

 function getName(manifest, config) {
-  const rootName = manifest?.name || 'Torrentio';
+  const rootName = manifest?.name || 'Torrentio-sh';
  const mochSuffix = Object.values(MochOptions)
      .filter(moch => config[moch.key])
      .map(moch => moch.shortName)
--- a/addon/lib/streamInfo.js
+++ b/addon/lib/streamInfo.js
@@ -4,7 +4,7 @@ import { mapLanguages } from './languages.js';
 import { enrichStreamSources, getSources } from './magnetHelper.js';
 import { getSubtitles } from './subtitles.js';

-const ADDON_NAME = 'Torrentio';
+const ADDON_NAME = 'Torrentio-sh';
 const SIZE_DELTA = 0.02;
 const UNKNOWN_SIZE = 300000000;
 const CAM_SOURCES = ['CAM', 'TeleSync', 'TeleCine', 'SCR'];
--- a/addon/package-lock.json
+++ b/addon/package-lock.json
--- a/addon/package.json
+++ b/addon/package.json
@@ -30,6 +30,7 @@
    "request-ip": "^3.3.0",
    "router": "^1.3.8",
    "sequelize": "^6.31.1",
+    "sqlite3": "^5.1.7",
    "stremio-addon-sdk": "^1.6.10",
    "swagger-stats": "^0.99.7",
    "ua-parser-js": "^1.0.36",