adds rarbg dump scraper based on scraped imdb ids
This commit is contained in:
10
README.md
10
README.md
@@ -12,4 +12,12 @@ https://thepiratebay.org/static/dump/csv/
|
|||||||
|
|
||||||
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
|
https://mega.nz/#F!tktzySBS!ndSEaK3Z-Uc3zvycQYxhJA
|
||||||
|
|
||||||
https://web.archive.org/web/20150416071329/http://kickass.to/api
|
https://web.archive.org/web/20150416071329/http://kickass.to/api
|
||||||
|
|
||||||
|
### Migrating Database
|
||||||
|
|
||||||
|
When migrating database to a new one it is important to alter the `files_id_seq` sequence to the maximum file id value plus 1.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER SEQUENCE files_id_seq RESTART WITH <last_file_id + 1>;
|
||||||
|
```
|
||||||
@@ -15,6 +15,10 @@ async function createTorrentEntry(torrent) {
|
|||||||
torrent.imdbId = await getImdbId(titleInfo, torrent.type)
|
torrent.imdbId = await getImdbId(titleInfo, torrent.type)
|
||||||
.catch(() => undefined);
|
.catch(() => undefined);
|
||||||
}
|
}
|
||||||
|
if (torrent.imdbId && torrent.imdbId.length > 9 && torrent.imdbId.startsWith('tt0')) {
|
||||||
|
// sanitize imdbId from redundant zeros
|
||||||
|
torrent.imdbId = torrent.imdbId.replace(/tt0+([0-9]{7,})$/, 'tt$1');
|
||||||
|
}
|
||||||
if (!torrent.kitsuId && torrent.type === Type.ANIME) {
|
if (!torrent.kitsuId && torrent.type === Type.ANIME) {
|
||||||
torrent.kitsuId = await getKitsuId(titleInfo)
|
torrent.kitsuId = await getKitsuId(titleInfo)
|
||||||
.catch(() => undefined);
|
.catch(() => undefined);
|
||||||
@@ -32,8 +36,8 @@ async function createTorrentEntry(torrent) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
repository.createTorrent(torrent)
|
return repository.createTorrent(torrent)
|
||||||
.then(() => files.forEach(file => repository.createFile(file)))
|
.then(() => Promise.all(files.map(file => repository.createFile(file))))
|
||||||
.then(() => console.log(`Created entry for ${torrent.title}`));
|
.then(() => console.log(`Created entry for ${torrent.title}`));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -153,12 +153,12 @@ async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }
|
|||||||
&& sortedEpisodes.every(ep => metadata.episodeCount[div100(ep) - 1] >= mod100(ep))
|
&& sortedEpisodes.every(ep => metadata.episodeCount[div100(ep) - 1] >= mod100(ep))
|
||||||
&& files.every(file => !file.season || file.episodes.every(ep => div100(ep) === file.season))) {
|
&& files.every(file => !file.season || file.episodes.every(ep => div100(ep) === file.season))) {
|
||||||
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
|
decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
|
||||||
|
} else if (files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date)) {
|
||||||
|
decomposeDateEpisodeFiles(torrent, files, metadata);
|
||||||
} else if (files.filter(file => !file.isMovie && file.episodes).every(file => !file.season && file.episodes) ||
|
} else if (files.filter(file => !file.isMovie && file.episodes).every(file => !file.season && file.episodes) ||
|
||||||
files.some(file => file.season && file.episodes && file.episodes
|
files.some(file => file.season && file.episodes && file.episodes
|
||||||
.every(ep => metadata.episodeCount[file.season - 1] < ep))) {
|
.every(ep => metadata.episodeCount[file.season - 1] < ep))) {
|
||||||
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
|
decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
|
||||||
} else if (files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date)) {
|
|
||||||
decomposeDateEpisodeFiles(torrent, files, metadata);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return files;
|
return files;
|
||||||
|
|||||||
@@ -168,5 +168,5 @@ async function findAllFiles() {
|
|||||||
//addMissingEpisodes().then(() => console.log('Finished'));
|
//addMissingEpisodes().then(() => console.log('Finished'));
|
||||||
//findAllFiles().then(() => console.log('Finished'));
|
//findAllFiles().then(() => console.log('Finished'));
|
||||||
//updateMovieCollections().then(() => console.log('Finished'));
|
//updateMovieCollections().then(() => console.log('Finished'));
|
||||||
reapplyEpisodeDecomposing('83b61caa4191469a9c15ee851aff828184f9a78d', false).then(() => console.log('Finished'));
|
//reapplyEpisodeDecomposing('83b61caa4191469a9c15ee851aff828184f9a78d', false).then(() => console.log('Finished'));
|
||||||
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
|
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
|
||||||
76
scrapers/rarbg/rarbg_dump_scraper.js
Normal file
76
scrapers/rarbg/rarbg_dump_scraper.js
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
const moment = require('moment');
|
||||||
|
const Bottleneck = require('bottleneck');
|
||||||
|
const rarbg = require('rarbg-api');
|
||||||
|
const decode = require('magnet-uri');
|
||||||
|
const { Type } = require('../../lib/types');
|
||||||
|
const {
|
||||||
|
createTorrentEntry,
|
||||||
|
getStoredTorrentEntry,
|
||||||
|
updateTorrentSeeders
|
||||||
|
} = require('../../lib/torrentEntries');
|
||||||
|
|
||||||
|
const NAME = 'RARBG';
|
||||||
|
|
||||||
|
const limiter = new Bottleneck({ maxConcurrent: 1, minTime: 2500 });
|
||||||
|
const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
|
||||||
|
|
||||||
|
async function scrape() {
|
||||||
|
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
|
||||||
|
const movieImdbIds = require('./rargb_movie_imdb_ids_2020-03-09.json');
|
||||||
|
const seriesImdbIds = require('./rargb_series_imdb_ids_2020-03-09.json');
|
||||||
|
const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
||||||
|
|
||||||
|
return Promise.all(allImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||||
|
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||||
|
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getTorrentsForImdbId(imdbId) {
|
||||||
|
return rarbg.search(imdbId, { limit: 100, sort: 'seeders', format: 'json_extended', ranked: 0 }, 'imdb')
|
||||||
|
.then(torrents => torrents.map(torrent => ({
|
||||||
|
name: torrent.title,
|
||||||
|
infoHash: decode(torrent.download).infoHash,
|
||||||
|
magnetLink: torrent.download,
|
||||||
|
seeders: torrent.seeders,
|
||||||
|
leechers: torrent.leechers,
|
||||||
|
category: torrent.category,
|
||||||
|
size: torrent.size,
|
||||||
|
uploadDate: new Date(torrent.pubdate),
|
||||||
|
imdbId: torrent.episode_info && torrent.episode_info.imdb
|
||||||
|
})))
|
||||||
|
.catch((err) => []);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function processTorrentRecord(record) {
|
||||||
|
if (await getStoredTorrentEntry(record)) {
|
||||||
|
return updateTorrentSeeders(record);
|
||||||
|
}
|
||||||
|
|
||||||
|
const torrent = {
|
||||||
|
provider: NAME,
|
||||||
|
infoHash: record.infoHash,
|
||||||
|
title: record.name,
|
||||||
|
type: getType(record.category),
|
||||||
|
seeders: record.seeders,
|
||||||
|
size: record.size,
|
||||||
|
uploadDate: record.uploadDate,
|
||||||
|
imdbId: record.imdbId
|
||||||
|
};
|
||||||
|
|
||||||
|
return createTorrentEntry(torrent);
|
||||||
|
}
|
||||||
|
|
||||||
|
const seriesCategories = [
|
||||||
|
'TV Episodes',
|
||||||
|
'Movies/TV-UHD-episodes',
|
||||||
|
'TV HD Episodes',
|
||||||
|
];
|
||||||
|
|
||||||
|
function getType(category) {
|
||||||
|
if (seriesCategories.includes(category)) {
|
||||||
|
return Type.SERIES;
|
||||||
|
}
|
||||||
|
return Type.MOVIE;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { scrape };
|
||||||
@@ -53,7 +53,7 @@ async function getLatestTorrents() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getLatestTorrentsForCategory(category) {
|
async function getLatestTorrentsForCategory(category) {
|
||||||
return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended' })
|
return rarbg.list({ category: category, limit: 100, sort: 'last', format: 'json_extended', ranked: 0 })
|
||||||
.then(torrents => torrents.map(torrent => ({
|
.then(torrents => torrents.map(torrent => ({
|
||||||
name: torrent.title,
|
name: torrent.title,
|
||||||
infoHash: decode(torrent.download).infoHash,
|
infoHash: decode(torrent.download).infoHash,
|
||||||
|
|||||||
41038
scrapers/rarbg/rargb_movie_imdb_ids_2020-03-09.json
Normal file
41038
scrapers/rarbg/rargb_movie_imdb_ids_2020-03-09.json
Normal file
File diff suppressed because it is too large
Load Diff
6081
scrapers/rarbg/rargb_series_imdb_ids_2020-03-09.json
Normal file
6081
scrapers/rarbg/rargb_series_imdb_ids_2020-03-09.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user