diff --git a/.gitignore b/.gitignore index 7a1e801..ba60ba0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ /.idea -/node_modules +**/node_modules **.env \ No newline at end of file diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..a7d75b5 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: node scraper/index.js \ No newline at end of file diff --git a/README.md b/README.md index 9f3c70b..35649bc 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,5 @@ -# Torrentio Scraper +# Torrentio -## Initial dumps - -### The Pirate Bay - -https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA - -https://thepiratebay.org/static/dump/csv/ - -### Kickass - -https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA - -https://web.archive.org/web/20150416071329/http://kickass.to/api - -### Migrating Database - -When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1. - -```sql -ALTER SEQUENCE files_id_seq RESTART WITH ; -``` \ No newline at end of file +Torrentio includes 2 parts: + - [torrentio-scraper](scraper) - a background process continuously scrapping newest torrents from configured torrent providers. + - [torrentio-addon](addon) - the Stremio addon which will query scraped entries and return Stremio stream results. \ No newline at end of file diff --git a/package.json b/package.json index 3280f93..2f6c49d 100644 --- a/package.json +++ b/package.json @@ -1,13 +1,13 @@ { "name": "stremio-torrention", "version": "1.0.0", - "main": "index.js", + "main": "scraper/index.js", "engines": { "npm": "6.x", "node": "10.x" }, "scripts": { - "start": "node index.js" + "start": "node scraper/index.js" }, "author": "TheBeastLT ", "license": "MIT", diff --git a/scraper/README.md b/scraper/README.md new file mode 100644 index 0000000..2850312 --- /dev/null +++ b/scraper/README.md @@ -0,0 +1,37 @@ +# Torrentio Scraper + +## Initial dumps + +### The Pirate Bay + +https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA + +https://thepiratebay.org/static/dump/csv/ + +### Kickass + +https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA + +https://web.archive.org/web/20150416071329/http://kickass.to/api + +### RARBG + +Scrape movie and tv catalog using [www.webscraper.io](https://www.webscraper.io/) for available `imdbIds` and use those via the api to search for torrents. + +Movies sitemap +```json +{"_id":"rarbg-movies","startUrl":["https://rarbgmirror.org/catalog/movies/[1-4110]"],"selectors":[{"id":"rarbg-movie-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width]","multiple":true,"regex":"tt[0-9]+","delay":0}]} +``` + +TV sitemap +```json +{"_id":"rarbg-tv","startUrl":["https://rarbgmirror.org/catalog/tv/[1-609]"],"selectors":[{"id":"rarbg-tv-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]} +``` + +### Migrating Database + +When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1. + +```sql +ALTER SEQUENCE files_id_seq RESTART WITH ; +``` \ No newline at end of file diff --git a/index.js b/scraper/index.js similarity index 82% rename from index.js rename to scraper/index.js index a102135..1cb6790 100644 --- a/index.js +++ b/scraper/index.js @@ -18,12 +18,14 @@ const PROVIDERS = [ kickassScraper, leetxScraper ]; -const SCRAPE_CRON = process.env.SCRAPE_CRON || '* 0/4 * * * *'; +const SCRAPE_CRON = process.env.SCRAPE_CRON || '* * 0/4 * * *'; async function scrape() { return PROVIDERS - .reduce((promise, scrapper) => promise - .then(() => scrapper.scrape().catch(() => Promise.resolve())), Promise.resolve()); + .reduce(async (previousPromise, nextProvider) => { + await previousPromise; + return nextProvider.scrape().catch(() => Promise.resolve()); + }, Promise.resolve()); } server.get('/', function (req, res) { diff --git a/lib/cache.js b/scraper/lib/cache.js similarity index 100% rename from lib/cache.js rename to scraper/lib/cache.js diff --git a/lib/metadata.js b/scraper/lib/metadata.js similarity index 100% rename from lib/metadata.js rename to scraper/lib/metadata.js diff --git a/lib/repository.js b/scraper/lib/repository.js similarity index 100% rename from lib/repository.js rename to scraper/lib/repository.js diff --git a/lib/torrent.js b/scraper/lib/torrent.js similarity index 100% rename from lib/torrent.js rename to scraper/lib/torrent.js diff --git a/lib/torrentEntries.js b/scraper/lib/torrentEntries.js similarity index 100% rename from lib/torrentEntries.js rename to scraper/lib/torrentEntries.js diff --git a/lib/torrentFiles.js b/scraper/lib/torrentFiles.js similarity index 100% rename from lib/torrentFiles.js rename to scraper/lib/torrentFiles.js diff --git a/lib/types.js b/scraper/lib/types.js similarity index 100% rename from lib/types.js rename to scraper/lib/types.js diff --git a/manual/manual.js b/scraper/manual/manual.js similarity index 100% rename from manual/manual.js rename to scraper/manual/manual.js diff --git a/scrapers/1337x/1337x_api.js b/scraper/scrapers/1337x/1337x_api.js similarity index 100% rename from scrapers/1337x/1337x_api.js rename to scraper/scrapers/1337x/1337x_api.js diff --git a/scrapers/1337x/1337x_scraper.js b/scraper/scrapers/1337x/1337x_scraper.js similarity index 100% rename from scrapers/1337x/1337x_scraper.js rename to scraper/scrapers/1337x/1337x_scraper.js diff --git a/scrapers/horriblesubs/horriblesubs_api.js b/scraper/scrapers/horriblesubs/horriblesubs_api.js similarity index 100% rename from scrapers/horriblesubs/horriblesubs_api.js rename to scraper/scrapers/horriblesubs/horriblesubs_api.js diff --git a/scrapers/horriblesubs/horriblesubs_mapping.json b/scraper/scrapers/horriblesubs/horriblesubs_mapping.json similarity index 100% rename from scrapers/horriblesubs/horriblesubs_mapping.json rename to scraper/scrapers/horriblesubs/horriblesubs_mapping.json diff --git a/scrapers/horriblesubs/horriblesubs_scraper.js b/scraper/scrapers/horriblesubs/horriblesubs_scraper.js similarity index 100% rename from scrapers/horriblesubs/horriblesubs_scraper.js rename to scraper/scrapers/horriblesubs/horriblesubs_scraper.js diff --git a/scrapers/kickass/kickass_api.js b/scraper/scrapers/kickass/kickass_api.js similarity index 100% rename from scrapers/kickass/kickass_api.js rename to scraper/scrapers/kickass/kickass_api.js diff --git a/scrapers/kickass/kickass_dump_scraper.js b/scraper/scrapers/kickass/kickass_dump_scraper.js similarity index 100% rename from scrapers/kickass/kickass_dump_scraper.js rename to scraper/scrapers/kickass/kickass_dump_scraper.js diff --git a/scrapers/kickass/kickass_scraper.js b/scraper/scrapers/kickass/kickass_scraper.js similarity index 100% rename from scrapers/kickass/kickass_scraper.js rename to scraper/scrapers/kickass/kickass_scraper.js diff --git a/scrapers/rarbg/rarbg_dump_scraper.js b/scraper/scrapers/rarbg/rarbg_dump_scraper.js similarity index 100% rename from scrapers/rarbg/rarbg_dump_scraper.js rename to scraper/scrapers/rarbg/rarbg_dump_scraper.js diff --git a/scrapers/rarbg/rarbg_scraper.js b/scraper/scrapers/rarbg/rarbg_scraper.js similarity index 100% rename from scrapers/rarbg/rarbg_scraper.js rename to scraper/scrapers/rarbg/rarbg_scraper.js diff --git a/scrapers/rarbg/rargb_movie_imdb_ids_2020-03-09.json b/scraper/scrapers/rarbg/rargb_movie_imdb_ids_2020-03-09.json similarity index 100% rename from scrapers/rarbg/rargb_movie_imdb_ids_2020-03-09.json rename to scraper/scrapers/rarbg/rargb_movie_imdb_ids_2020-03-09.json diff --git a/scrapers/rarbg/rargb_series_imdb_ids_2020-03-09.json b/scraper/scrapers/rarbg/rargb_series_imdb_ids_2020-03-09.json similarity index 100% rename from scrapers/rarbg/rargb_series_imdb_ids_2020-03-09.json rename to scraper/scrapers/rarbg/rargb_series_imdb_ids_2020-03-09.json diff --git a/scrapers/thepiratebay/thepiratebay_api.js b/scraper/scrapers/thepiratebay/thepiratebay_api.js similarity index 100% rename from scrapers/thepiratebay/thepiratebay_api.js rename to scraper/scrapers/thepiratebay/thepiratebay_api.js diff --git a/scrapers/thepiratebay/thepiratebay_dump_scraper.js b/scraper/scrapers/thepiratebay/thepiratebay_dump_scraper.js similarity index 100% rename from scrapers/thepiratebay/thepiratebay_dump_scraper.js rename to scraper/scrapers/thepiratebay/thepiratebay_dump_scraper.js diff --git a/scrapers/thepiratebay/thepiratebay_scraper.js b/scraper/scrapers/thepiratebay/thepiratebay_scraper.js similarity index 100% rename from scrapers/thepiratebay/thepiratebay_scraper.js rename to scraper/scrapers/thepiratebay/thepiratebay_scraper.js diff --git a/scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper.js b/scraper/scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper.js similarity index 100% rename from scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper.js rename to scraper/scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper.js