moves scraper to a subpackage

This commit is contained in:
TheBeastLT
2020-03-10 15:12:18 +01:00
parent dff66d8fb2
commit 58aba322c2
30 changed files with 50 additions and 28 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,3 @@
/.idea
/node_modules
**/node_modules
**.env

1
Procfile Normal file
View File

@@ -0,0 +1 @@
web: node scraper/index.js

View File

@@ -1,23 +1,5 @@
# Torrentio Scraper
# Torrentio
## Initial dumps
### The Pirate Bay
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
https://thepiratebay.org/static/dump/csv/
### Kickass
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
https://web.archive.org/web/20150416071329/http://kickass.to/api
### Migrating Database
When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1.
```sql
ALTER SEQUENCE files_id_seq RESTART WITH <last_file_id + 1>;
```
Torrentio includes 2 parts:
- [torrentio-scraper](scraper) - a background process continuously scrapping newest torrents from configured torrent providers.
- [torrentio-addon](addon) - the Stremio addon which will query scraped entries and return Stremio stream results.

View File

@@ -1,13 +1,13 @@
{
"name": "stremio-torrention",
"version": "1.0.0",
"main": "index.js",
"main": "scraper/index.js",
"engines": {
"npm": "6.x",
"node": "10.x"
},
"scripts": {
"start": "node index.js"
"start": "node scraper/index.js"
},
"author": "TheBeastLT <pauliox@beyond.lt>",
"license": "MIT",

37
scraper/README.md Normal file
View File

@@ -0,0 +1,37 @@
# Torrentio Scraper
## Initial dumps
### The Pirate Bay
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
https://thepiratebay.org/static/dump/csv/
### Kickass
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
https://web.archive.org/web/20150416071329/http://kickass.to/api
### RARBG
Scrape movie and tv catalog using [www.webscraper.io](https://www.webscraper.io/) for available `imdbIds` and use those via the api to search for torrents.
Movies sitemap
```json
{"_id":"rarbg-movies","startUrl":["https://rarbgmirror.org/catalog/movies/[1-4110]"],"selectors":[{"id":"rarbg-movie-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width]","multiple":true,"regex":"tt[0-9]+","delay":0}]}
```
TV sitemap
```json
{"_id":"rarbg-tv","startUrl":["https://rarbgmirror.org/catalog/tv/[1-609]"],"selectors":[{"id":"rarbg-tv-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]}
```
### Migrating Database
When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1.
```sql
ALTER SEQUENCE files_id_seq RESTART WITH <last_file_id + 1>;
```

View File

@@ -18,12 +18,14 @@ const PROVIDERS = [
kickassScraper,
leetxScraper
];
const SCRAPE_CRON = process.env.SCRAPE_CRON || '* 0/4 * * * *';
const SCRAPE_CRON = process.env.SCRAPE_CRON || '* * 0/4 * * *';
async function scrape() {
return PROVIDERS
.reduce((promise, scrapper) => promise
.then(() => scrapper.scrape().catch(() => Promise.resolve())), Promise.resolve());
.reduce(async (previousPromise, nextProvider) => {
await previousPromise;
return nextProvider.scrape().catch(() => Promise.resolve());
}, Promise.resolve());
}
server.get('/', function (req, res) {