mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
moves scraper to a subpackage
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,3 @@
|
|||||||
/.idea
|
/.idea
|
||||||
/node_modules
|
**/node_modules
|
||||||
**.env
|
**.env
|
||||||
26
README.md
26
README.md
@@ -1,23 +1,5 @@
|
|||||||
# Torrentio Scraper
|
# Torrentio
|
||||||
|
|
||||||
## Initial dumps
|
Torrentio includes 2 parts:
|
||||||
|
- [torrentio-scraper](scraper) - a background process continuously scrapping newest torrents from configured torrent providers.
|
||||||
### The Pirate Bay
|
- [torrentio-addon](addon) - the Stremio addon which will query scraped entries and return Stremio stream results.
|
||||||
|
|
||||||
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
|
|
||||||
|
|
||||||
https://thepiratebay.org/static/dump/csv/
|
|
||||||
|
|
||||||
### Kickass
|
|
||||||
|
|
||||||
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
|
|
||||||
|
|
||||||
https://web.archive.org/web/20150416071329/http://kickass.to/api
|
|
||||||
|
|
||||||
### Migrating Database
|
|
||||||
|
|
||||||
When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1.
|
|
||||||
|
|
||||||
```sql
|
|
||||||
ALTER SEQUENCE files_id_seq RESTART WITH <last_file_id + 1>;
|
|
||||||
```
|
|
||||||
@@ -1,13 +1,13 @@
|
|||||||
{
|
{
|
||||||
"name": "stremio-torrention",
|
"name": "stremio-torrention",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"main": "index.js",
|
"main": "scraper/index.js",
|
||||||
"engines": {
|
"engines": {
|
||||||
"npm": "6.x",
|
"npm": "6.x",
|
||||||
"node": "10.x"
|
"node": "10.x"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "node index.js"
|
"start": "node scraper/index.js"
|
||||||
},
|
},
|
||||||
"author": "TheBeastLT <pauliox@beyond.lt>",
|
"author": "TheBeastLT <pauliox@beyond.lt>",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
|||||||
37
scraper/README.md
Normal file
37
scraper/README.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# Torrentio Scraper
|
||||||
|
|
||||||
|
## Initial dumps
|
||||||
|
|
||||||
|
### The Pirate Bay
|
||||||
|
|
||||||
|
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
|
||||||
|
|
||||||
|
https://thepiratebay.org/static/dump/csv/
|
||||||
|
|
||||||
|
### Kickass
|
||||||
|
|
||||||
|
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
|
||||||
|
|
||||||
|
https://web.archive.org/web/20150416071329/http://kickass.to/api
|
||||||
|
|
||||||
|
### RARBG
|
||||||
|
|
||||||
|
Scrape movie and tv catalog using [www.webscraper.io](https://www.webscraper.io/) for available `imdbIds` and use those via the api to search for torrents.
|
||||||
|
|
||||||
|
Movies sitemap
|
||||||
|
```json
|
||||||
|
{"_id":"rarbg-movies","startUrl":["https://rarbgmirror.org/catalog/movies/[1-4110]"],"selectors":[{"id":"rarbg-movie-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width]","multiple":true,"regex":"tt[0-9]+","delay":0}]}
|
||||||
|
```
|
||||||
|
|
||||||
|
TV sitemap
|
||||||
|
```json
|
||||||
|
{"_id":"rarbg-tv","startUrl":["https://rarbgmirror.org/catalog/tv/[1-609]"],"selectors":[{"id":"rarbg-tv-imdb-id","type":"SelectorHTML","parentSelectors":["_root"],"selector":".lista-rounded table td[width='110']","multiple":true,"regex":"tt[0-9]+","delay":0}]}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Migrating Database
|
||||||
|
|
||||||
|
When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER SEQUENCE files_id_seq RESTART WITH <last_file_id + 1>;
|
||||||
|
```
|
||||||
@@ -18,12 +18,14 @@ const PROVIDERS = [
|
|||||||
kickassScraper,
|
kickassScraper,
|
||||||
leetxScraper
|
leetxScraper
|
||||||
];
|
];
|
||||||
const SCRAPE_CRON = process.env.SCRAPE_CRON || '* 0/4 * * * *';
|
const SCRAPE_CRON = process.env.SCRAPE_CRON || '* * 0/4 * * *';
|
||||||
|
|
||||||
async function scrape() {
|
async function scrape() {
|
||||||
return PROVIDERS
|
return PROVIDERS
|
||||||
.reduce((promise, scrapper) => promise
|
.reduce(async (previousPromise, nextProvider) => {
|
||||||
.then(() => scrapper.scrape().catch(() => Promise.resolve())), Promise.resolve());
|
await previousPromise;
|
||||||
|
return nextProvider.scrape().catch(() => Promise.resolve());
|
||||||
|
}, Promise.resolve());
|
||||||
}
|
}
|
||||||
|
|
||||||
server.get('/', function (req, res) {
|
server.get('/', function (req, res) {
|
||||||
Reference in New Issue
Block a user