mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
moves scraper to a subpackage
This commit is contained in:
76
scraper/scrapers/rarbg/rarbg_dump_scraper.js
Normal file
76
scraper/scrapers/rarbg/rarbg_dump_scraper.js
Normal file
@@ -0,0 +1,76 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const rarbg = require('rarbg-api');
|
||||
const decode = require('magnet-uri');
|
||||
const { Type } = require('../../lib/types');
|
||||
const {
|
||||
createTorrentEntry,
|
||||
getStoredTorrentEntry,
|
||||
updateTorrentSeeders
|
||||
} = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'RARBG';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
|
||||
|
||||
async function scrape() {
|
||||
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
|
||||
const movieImdbIds = require('./rargb_movie_imdb_ids_2020-03-09.json');
|
||||
const seriesImdbIds = require('./rargb_series_imdb_ids_2020-03-09.json');
|
||||
const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
||||
|
||||
return Promise.all(allImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
||||
}
|
||||
|
||||
async function getTorrentsForImdbId(imdbId) {
|
||||
return rarbg.search(imdbId, { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 }, 'imdb')
|
||||
.then(torrents => torrents.map(torrent => ({
|
||||
name: torrent.title,
|
||||
infoHash: decode(torrent.download).infoHash,
|
||||
magnetLink: torrent.download,
|
||||
seeders: torrent.seeders,
|
||||
leechers: torrent.leechers,
|
||||
category: torrent.category,
|
||||
size: torrent.size,
|
||||
uploadDate: new Date(torrent.pubdate),
|
||||
imdbId: torrent.episode_info && torrent.episode_info.imdb
|
||||
})))
|
||||
.catch((err) => []);
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await getStoredTorrentEntry(record)) {
|
||||
return updateTorrentSeeders(record);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
provider: NAME,
|
||||
infoHash: record.infoHash,
|
||||
title: record.name,
|
||||
type: getType(record.category),
|
||||
seeders: record.seeders,
|
||||
size: record.size,
|
||||
uploadDate: record.uploadDate,
|
||||
imdbId: record.imdbId
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
const seriesCategories = [
|
||||
'TV Episodes',
|
||||
'Movies/TV-UHD-episodes',
|
||||
'TV HD Episodes',
|
||||
];
|
||||
|
||||
function getType(category) {
|
||||
if (seriesCategories.includes(category)) {
|
||||
return Type.SERIES;
|
||||
}
|
||||
return Type.MOVIE;
|
||||
}
|
||||
|
||||
module.exports = { scrape };
|
||||
103
scraper/scrapers/rarbg/rarbg_scraper.js
Normal file
103
scraper/scrapers/rarbg/rarbg_scraper.js
Normal file
@@ -0,0 +1,103 @@
|
||||
const moment = require('moment');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const rarbg = require('rarbg-api');
|
||||
const decode = require('magnet-uri');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const {
|
||||
createTorrentEntry,
|
||||
getStoredTorrentEntry,
|
||||
updateTorrentSeeders
|
||||
} = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'RARBG';
|
||||
|
||||
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
|
||||
const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
|
||||
|
||||
async function scrape() {
|
||||
const scrapeStart = moment();
|
||||
const lastScrape = await repository.getProvider({ name: NAME });
|
||||
console.log(`[${scrapeStart}] starting ${NAME} scrape...`);
|
||||
|
||||
const latestTorrents = await getLatestTorrents();
|
||||
return Promise.all(latestTorrents.map(torrent => entryLimiter.schedule(() => processTorrentRecord(torrent))))
|
||||
.then(() => {
|
||||
lastScrape.lastScraped = scrapeStart;
|
||||
lastScrape.lastScrapedId = latestTorrents.length && latestTorrents[latestTorrents.length - 1].torrentId;
|
||||
return repository.updateProvider(lastScrape);
|
||||
})
|
||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||
}
|
||||
|
||||
async function getLatestTorrents() {
|
||||
const allowedCategories = [
|
||||
rarbg.CATEGORY['4K_MOVIES_X264_4k'],
|
||||
rarbg.CATEGORY['4K_X265_4k'],
|
||||
rarbg.CATEGORY['4k_X264_4k_HDR'],
|
||||
rarbg.CATEGORY.MOVIES_XVID,
|
||||
rarbg.CATEGORY.MOVIES_XVID_720P,
|
||||
rarbg.CATEGORY.MOVIES_X264,
|
||||
rarbg.CATEGORY.MOVIES_X264_1080P,
|
||||
rarbg.CATEGORY.MOVIES_X264_720P,
|
||||
rarbg.CATEGORY.MOVIES_X264_3D,
|
||||
rarbg.CATEGORY.MOVIES_FULL_BD,
|
||||
rarbg.CATEGORY.MOVIES_BD_REMUX,
|
||||
rarbg.CATEGORY.TV_EPISODES,
|
||||
rarbg.CATEGORY.TV_UHD_EPISODES,
|
||||
rarbg.CATEGORY.TV_HD_EPISODES
|
||||
];
|
||||
|
||||
return Promise.all(allowedCategories.map(category => limiter.schedule(() => getLatestTorrentsForCategory(category))))
|
||||
.then(entries => entries.reduce((a, b) => a.concat(b), []));
|
||||
}
|
||||
|
||||
async function getLatestTorrentsForCategory(category) {
|
||||
return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended', ranked: 0 })
|
||||
.then(torrents => torrents.map(torrent => ({
|
||||
name: torrent.title,
|
||||
infoHash: decode(torrent.download).infoHash,
|
||||
magnetLink: torrent.download,
|
||||
seeders: torrent.seeders,
|
||||
leechers: torrent.leechers,
|
||||
category: torrent.category,
|
||||
size: torrent.size,
|
||||
uploadDate: new Date(torrent.pubdate),
|
||||
imdbId: torrent.episode_info && torrent.episode_info.imdb
|
||||
})))
|
||||
.catch((err) => []);
|
||||
}
|
||||
|
||||
async function processTorrentRecord(record) {
|
||||
if (await getStoredTorrentEntry(record)) {
|
||||
return updateTorrentSeeders(record);
|
||||
}
|
||||
|
||||
const torrent = {
|
||||
provider: NAME,
|
||||
infoHash: record.infoHash,
|
||||
title: record.name,
|
||||
type: getType(record.category),
|
||||
seeders: record.seeders,
|
||||
size: record.size,
|
||||
uploadDate: record.uploadDate,
|
||||
imdbId: record.imdbId
|
||||
};
|
||||
|
||||
return createTorrentEntry(torrent);
|
||||
}
|
||||
|
||||
const seriesCategories = [
|
||||
'TV Episodes',
|
||||
'Movies/TV-UHD-episodes',
|
||||
'TV HD Episodes',
|
||||
];
|
||||
|
||||
function getType(category) {
|
||||
if (seriesCategories.includes(category)) {
|
||||
return Type.SERIES;
|
||||
}
|
||||
return Type.MOVIE;
|
||||
}
|
||||
|
||||
module.exports = { scrape };
|
||||
41038
scraper/scrapers/rarbg/rargb_movie_imdb_ids_2020-03-09.json
Normal file
41038
scraper/scrapers/rarbg/rargb_movie_imdb_ids_2020-03-09.json
Normal file
File diff suppressed because it is too large
Load Diff
6081
scraper/scrapers/rarbg/rargb_series_imdb_ids_2020-03-09.json
Normal file
6081
scraper/scrapers/rarbg/rargb_series_imdb_ids_2020-03-09.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user