mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
[scraper] updates scrapers and unique index
This commit is contained in:
@@ -14,7 +14,7 @@ const thepiratebayUnofficialDumpScraper = require('./scrapers/thepiratebay/thepi
|
|||||||
|
|
||||||
const PROVIDERS = [
|
const PROVIDERS = [
|
||||||
// horribleSubsScraper,
|
// horribleSubsScraper,
|
||||||
rarbgScraper,
|
// rarbgScraper,
|
||||||
thepiratebayScraper,
|
thepiratebayScraper,
|
||||||
kickassScraper,
|
kickassScraper,
|
||||||
leetxScraper
|
leetxScraper
|
||||||
@@ -42,7 +42,7 @@ function enableScheduling() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
server.get('/', function (req, res) {
|
server.get('/', function (req, res) {
|
||||||
res.send(200);
|
res.sendStatus(200);
|
||||||
});
|
});
|
||||||
|
|
||||||
server.listen(process.env.PORT || 7000, async () => {
|
server.listen(process.env.PORT || 7000, async () => {
|
||||||
|
|||||||
@@ -96,10 +96,11 @@ async function getImdbId(info, type) {
|
|||||||
});
|
});
|
||||||
}).catch(() => bing.web(`${name} ${year || ''} ${type} imdb`)
|
}).catch(() => bing.web(`${name} ${year || ''} ${type} imdb`)
|
||||||
.then(results => results
|
.then(results => results
|
||||||
.map((result) => result.link)
|
.map(result => result.link)
|
||||||
.find(result => result.includes('imdb.com/title/')))
|
.find(result => result.includes('imdb.com/title/')))
|
||||||
.then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
|
.then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
|
||||||
.then(match => match && match[1])));
|
.then(match => match && match[1])))
|
||||||
|
.then(imdbId => 'tt' + imdbId.replace(/tt0*([1-9][0-9]*)$/, '$1').padStart(7, '0'));
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getKitsuId(info) {
|
async function getKitsuId(info) {
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
const { Sequelize } = require('sequelize');
|
const { Sequelize, fn, col } = require('sequelize');
|
||||||
const Op = Sequelize.Op;
|
const Op = Sequelize.Op;
|
||||||
|
|
||||||
const DATABASE_URI = process.env.DATABASE_URI;
|
const DATABASE_URI = process.env.DATABASE_URI;
|
||||||
|
|
||||||
const database = new Sequelize(DATABASE_URI, { logging: false });
|
const database = new Sequelize(
|
||||||
|
DATABASE_URI,
|
||||||
|
{
|
||||||
|
logging: false
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
const Provider = database.define('provider', {
|
const Provider = database.define('provider', {
|
||||||
name: { type: Sequelize.STRING(32), primaryKey: true },
|
name: { type: Sequelize.STRING(32), primaryKey: true },
|
||||||
@@ -47,6 +52,19 @@ const File = database.define('file',
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
indexes: [
|
indexes: [
|
||||||
|
{
|
||||||
|
unique: true,
|
||||||
|
name: 'files_unique_file_constraint',
|
||||||
|
fields: [
|
||||||
|
col('infoHash'),
|
||||||
|
fn('COALESCE', (col('fileIndex')), -1),
|
||||||
|
fn('COALESCE', (col('imdbId')), 'null'),
|
||||||
|
fn('COALESCE', (col('imdbSeason')), -1),
|
||||||
|
fn('COALESCE', (col('imdbEpisode')), -1),
|
||||||
|
fn('COALESCE', (col('kitsuId')), -1),
|
||||||
|
fn('COALESCE', (col('kitsuEpisode')), -1)
|
||||||
|
]
|
||||||
|
},
|
||||||
{ unique: false, fields: ['imdbId', 'imdbSeason', 'imdbEpisode'] },
|
{ unique: false, fields: ['imdbId', 'imdbSeason', 'imdbEpisode'] },
|
||||||
{ unique: false, fields: ['kitsuId', 'kitsuEpisode'] }
|
{ unique: false, fields: ['kitsuId', 'kitsuEpisode'] }
|
||||||
]
|
]
|
||||||
@@ -63,7 +81,11 @@ const FailedImdbTorrent = database.define('failed_imdb_torrent', {
|
|||||||
});
|
});
|
||||||
|
|
||||||
function connect() {
|
function connect() {
|
||||||
return database.sync({ alter: true });
|
return database.sync({ alter: true })
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Failed syncing database: ', error);
|
||||||
|
throw error;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function getProvider(provider) {
|
function getProvider(provider) {
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ async function filesFromTorrentStream(torrent) {
|
|||||||
function filterVideos(files) {
|
function filterVideos(files) {
|
||||||
return files.filter((file) => {
|
return files.filter((file) => {
|
||||||
const match = file.path.match(/\.(\w{2,4})$/);
|
const match = file.path.match(/\.(\w{2,4})$/);
|
||||||
return match && EXTENSIONS.includes(match[1]);
|
return match && EXTENSIONS.includes(match[1].toLowerCase());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -62,8 +62,9 @@ async function parseTorrentFiles(torrent) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getSeriesFiles(torrent, parsedTorrentName) {
|
async function getSeriesFiles(torrent, parsedTorrentName) {
|
||||||
if ((parsedTorrentName.episode && (!parsedTorrentName.seasons || parsedTorrentName.seasons.length <= 1)) ||
|
if (!parsedTorrentName.complete && !parsedTorrentName.hasMovies &&
|
||||||
(!parsedTorrentName.episodes && parsedTorrentName.date)) {
|
((parsedTorrentName.episode && (!parsedTorrentName.seasons || parsedTorrentName.seasons.length <= 1)) ||
|
||||||
|
(!parsedTorrentName.episodes && parsedTorrentName.date))) {
|
||||||
return [{
|
return [{
|
||||||
name: torrent.title,
|
name: torrent.title,
|
||||||
path: torrent.title,
|
path: torrent.title,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ require('dotenv').config();
|
|||||||
const Bottleneck = require('bottleneck');
|
const Bottleneck = require('bottleneck');
|
||||||
const { parse } = require('parse-torrent-title');
|
const { parse } = require('parse-torrent-title');
|
||||||
const repository = require('../lib/repository');
|
const repository = require('../lib/repository');
|
||||||
|
const { getImdbId } = require('../lib/metadata');
|
||||||
const { parseTorrentFiles } = require('../lib/torrentFiles');
|
const { parseTorrentFiles } = require('../lib/torrentFiles');
|
||||||
const { Type } = require('../lib/types');
|
const { Type } = require('../lib/types');
|
||||||
|
|
||||||
@@ -15,7 +16,7 @@ async function addMissingEpisodes() {
|
|||||||
const imdbId = Object.values(storedFiles)[0].imdbId;
|
const imdbId = Object.values(storedFiles)[0].imdbId;
|
||||||
|
|
||||||
torrentFiles
|
torrentFiles
|
||||||
.filter((file) => !storedFiles[file.fileIndex])
|
.filter((file) => !storedFiles[file.fileIndex !== undefined ? file.fileIndex : null])
|
||||||
.map((file) => ({
|
.map((file) => ({
|
||||||
infoHash: torrent.infoHash,
|
infoHash: torrent.infoHash,
|
||||||
fileIndex: file.fileIndex,
|
fileIndex: file.fileIndex,
|
||||||
@@ -63,11 +64,22 @@ async function reapplySeriesSeasonsSavedAsMovies() {
|
|||||||
.then(() => console.log('Finished updating multiple torrents'));
|
.then(() => console.log('Finished updating multiple torrents'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function reapplyDecomposingToTorrentsOnRegex(regex) {
|
||||||
|
return repository.getTorrentsBasedOnTitle(regex, Type.ANIME)
|
||||||
|
.then(torrents => Promise.all(torrents
|
||||||
|
.map(torrent => limiter.schedule(() => reapplyEpisodeDecomposing(torrent.infoHash, true)))))
|
||||||
|
.then(() => console.log('Finished updating multiple torrents'));
|
||||||
|
}
|
||||||
|
|
||||||
async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) {
|
async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) {
|
||||||
const torrent = await repository.getTorrent({ infoHash });
|
const torrent = await repository.getTorrent({ infoHash });
|
||||||
const storedFiles = await repository.getFiles({ infoHash });
|
const storedFiles = await repository.getFiles({ infoHash });
|
||||||
const fileIndexMap = storedFiles
|
const fileIndexMap = storedFiles
|
||||||
.reduce((map, next) => (map[next.fileIndex] = (map[next.fileIndex] || []).concat(next), map), {});
|
.reduce((map, next) => {
|
||||||
|
const fileIndex = next.fileIndex !== undefined ? next.fileIndex : null;
|
||||||
|
map[fileIndex] = (map[fileIndex] || []).concat(next);
|
||||||
|
return map;
|
||||||
|
}, {});
|
||||||
const files = includeSourceFiles && Object.values(fileIndexMap)
|
const files = includeSourceFiles && Object.values(fileIndexMap)
|
||||||
.map(sameIndexFiles => sameIndexFiles[0])
|
.map(sameIndexFiles => sameIndexFiles[0])
|
||||||
.map(file => ({
|
.map(file => ({
|
||||||
@@ -76,12 +88,14 @@ async function reapplyEpisodeDecomposing(infoHash, includeSourceFiles = true) {
|
|||||||
path: file.title,
|
path: file.title,
|
||||||
size: file.size
|
size: file.size
|
||||||
}));
|
}));
|
||||||
const imdbId = storedFiles[0].imdbId;
|
const imdbId = storedFiles.length && storedFiles[0].imdbId || await getImdbId(parse(torrent.title));
|
||||||
|
|
||||||
return parseTorrentFiles({ ...torrent, imdbId, files })
|
return parseTorrentFiles({ ...torrent, imdbId, files })
|
||||||
.then(newFiles => newFiles.map(file => {
|
.then(newFiles => newFiles.map(file => {
|
||||||
if (fileIndexMap[file.fileIndex]) {
|
const fileIndex = file.fileIndex !== undefined ? file.fileIndex : null;
|
||||||
const originalFile = fileIndexMap[file.fileIndex].shift();
|
const mapping = fileIndexMap[fileIndex];
|
||||||
|
if (mapping) {
|
||||||
|
const originalFile = mapping.shift();
|
||||||
if (originalFile) {
|
if (originalFile) {
|
||||||
if (!originalFile.imdbId) {
|
if (!originalFile.imdbId) {
|
||||||
originalFile.imdbId = file.imdbId
|
originalFile.imdbId = file.imdbId
|
||||||
@@ -176,5 +190,7 @@ async function findAllFiles() {
|
|||||||
//addMissingEpisodes().then(() => console.log('Finished'));
|
//addMissingEpisodes().then(() => console.log('Finished'));
|
||||||
//findAllFiles().then(() => console.log('Finished'));
|
//findAllFiles().then(() => console.log('Finished'));
|
||||||
//updateMovieCollections().then(() => console.log('Finished'));
|
//updateMovieCollections().then(() => console.log('Finished'));
|
||||||
reapplyEpisodeDecomposing('d71c4fd1cb9bb9c5365a570b903a3a58774f61a5', true).then(() => console.log('Finished'));
|
reapplyEpisodeDecomposing('aec7bcac457ad68924e7119f859cf6fa3878f9f5', false).then(() => console.log('Finished'));
|
||||||
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
|
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
|
||||||
|
// reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished'));
|
||||||
|
//reapplyManualHashes().then(() => console.log('Finished'));
|
||||||
@@ -20,7 +20,7 @@ async function scrape() {
|
|||||||
return scrapeLatestTorrents()
|
return scrapeLatestTorrents()
|
||||||
.then(() => {
|
.then(() => {
|
||||||
lastScrape.lastScraped = scrapeStart;
|
lastScrape.lastScraped = scrapeStart;
|
||||||
return repository.updateProvider(lastScrape);
|
return lastScrape.save();
|
||||||
})
|
})
|
||||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||||
}
|
}
|
||||||
@@ -40,14 +40,15 @@ async function scrapeLatestTorrents() {
|
|||||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||||
return leetx.browse(({ category, page }))
|
return leetx.browse(({ category, page }))
|
||||||
|
.catch(error => {
|
||||||
|
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||||
|
return Promise.resolve([]);
|
||||||
|
})
|
||||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||||
: Promise.resolve())
|
: Promise.resolve());
|
||||||
.catch(error => {
|
|
||||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
|
||||||
return Promise.resolve();
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function processTorrentRecord(record) {
|
async function processTorrentRecord(record) {
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ const { getMetadata, getKitsuId } = require('../../lib/metadata');
|
|||||||
const showMappings = require('./horriblesubs_mapping.json');
|
const showMappings = require('./horriblesubs_mapping.json');
|
||||||
|
|
||||||
const NAME = 'HorribleSubs';
|
const NAME = 'HorribleSubs';
|
||||||
const NEXT_FULL_SCRAPE_OFFSET = 3 * 24 * 60 * 60; // 3 days;
|
const NEXT_FULL_SCRAPE_OFFSET = 5 * 24 * 60 * 60; // 5 days;
|
||||||
|
|
||||||
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
const limiter = new Bottleneck({ maxConcurrent: 5 });
|
||||||
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
|
const entryLimiter = new Bottleneck({ maxConcurrent: 10 });
|
||||||
@@ -26,7 +26,7 @@ async function scrape() {
|
|||||||
return _scrapeAllShows()
|
return _scrapeAllShows()
|
||||||
.then(() => {
|
.then(() => {
|
||||||
lastScrape.lastScraped = scrapeStart;
|
lastScrape.lastScraped = scrapeStart;
|
||||||
return repository.updateProvider(lastScrape);
|
return lastScrape.save();
|
||||||
})
|
})
|
||||||
.then(() => console.log(`[${moment()}] finished scrapping all ${NAME} shows`));
|
.then(() => console.log(`[${moment()}] finished scrapping all ${NAME} shows`));
|
||||||
} else {
|
} else {
|
||||||
@@ -140,7 +140,7 @@ async function _parseShowData(showData) {
|
|||||||
return kitsuId;
|
return kitsuId;
|
||||||
};
|
};
|
||||||
|
|
||||||
return Promise.all([].concat(showData.singleEpisodes).concat(showData.packEpisodes)
|
return Promise.all([].concat(showData.singleEpisodes || []).concat(showData.packEpisodes || [])
|
||||||
.map((episodeInfo) => episodeInfo.mirrors
|
.map((episodeInfo) => episodeInfo.mirrors
|
||||||
.filter((mirror) => mirror.magnetLink && mirror.magnetLink.length)
|
.filter((mirror) => mirror.magnetLink && mirror.magnetLink.length)
|
||||||
.map((mirror) => ({
|
.map((mirror) => ({
|
||||||
@@ -169,15 +169,23 @@ async function _parseShowData(showData) {
|
|||||||
async function verifyFiles(torrent, files) {
|
async function verifyFiles(torrent, files) {
|
||||||
if (files && files.length) {
|
if (files && files.length) {
|
||||||
const existingFiles = await repository.getFiles({ infoHash: files[0].infoHash })
|
const existingFiles = await repository.getFiles({ infoHash: files[0].infoHash })
|
||||||
.then((existing) => existing.reduce((map, file) => (map[file.fileIndex] = file, map), {}))
|
.then((existing) => existing
|
||||||
|
.reduce((map, next) => {
|
||||||
|
const fileIndex = next.fileIndex !== undefined ? next.fileIndex : null;
|
||||||
|
map[fileIndex] = (map[fileIndex] || []).concat(next);
|
||||||
|
return map;
|
||||||
|
}, {}))
|
||||||
.catch(() => undefined);
|
.catch(() => undefined);
|
||||||
if (existingFiles && Object.keys(existingFiles).length) {
|
if (existingFiles && Object.keys(existingFiles).length) {
|
||||||
return files
|
return files
|
||||||
.map(file => ({
|
.map(file => {
|
||||||
...file,
|
const mapping = existingFiles[file.fileIndex !== undefined ? file.fileIndex : null];
|
||||||
id: existingFiles[file.fileIndex] && existingFiles[file.fileIndex].id,
|
if (mapping) {
|
||||||
size: existingFiles[file.fileIndex] && existingFiles[file.fileIndex].size || file.size
|
const originalFile = mapping.shift();
|
||||||
}))
|
return { ...file, id: originalFile.id, size: originalFile.size || file.size };
|
||||||
|
}
|
||||||
|
return file;
|
||||||
|
})
|
||||||
}
|
}
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ async function scrape() {
|
|||||||
return scrapeLatestTorrents()
|
return scrapeLatestTorrents()
|
||||||
.then(() => {
|
.then(() => {
|
||||||
lastScrape.lastScraped = scrapeStart;
|
lastScrape.lastScraped = scrapeStart;
|
||||||
return repository.updateProvider(lastScrape);
|
return lastScrape.save();
|
||||||
})
|
})
|
||||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||||
}
|
}
|
||||||
@@ -39,14 +39,14 @@ async function scrapeLatestTorrents() {
|
|||||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||||
return kickass.browse(({ category, page }))
|
return kickass.browse(({ category, page }))
|
||||||
|
.catch(error => {
|
||||||
|
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||||
|
return Promise.resolve([]);
|
||||||
|
})
|
||||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||||
: Promise.resolve())
|
: Promise.resolve());
|
||||||
.catch(error => {
|
|
||||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
|
||||||
return Promise.resolve();
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function processTorrentRecord(record) {
|
async function processTorrentRecord(record) {
|
||||||
|
|||||||
@@ -16,12 +16,13 @@ const entryLimiter = new Bottleneck({ maxConcurrent: 40 });
|
|||||||
|
|
||||||
async function scrape() {
|
async function scrape() {
|
||||||
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
|
console.log(`[${moment()}] starting ${NAME} dump scrape...`);
|
||||||
const movieImdbIds = require('./rargb_movie_imdb_ids_2020-03-09.json');
|
//const movieImdbIds = require('./rargb_movie_imdb_ids_2020-03-09.json');
|
||||||
const seriesImdbIds = require('./rargb_series_imdb_ids_2020-03-09.json');
|
const seriesImdbIds = require('./rargb_series_imdb_ids_2020-03-09.json').slice(800);
|
||||||
const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
//const allImdbIds = [].concat(movieImdbIds).concat(seriesImdbIds);
|
||||||
|
|
||||||
return Promise.all(allImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId)
|
return Promise.all(
|
||||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t))))))))
|
seriesImdbIds.map(imdbId => limiter.schedule(() => getTorrentsForImdbId(imdbId))
|
||||||
|
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))))
|
||||||
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
.then(() => console.log(`[${moment()}] finished ${NAME} dump scrape`));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ async function scrape() {
|
|||||||
return scrapeLatestTorrents()
|
return scrapeLatestTorrents()
|
||||||
.then(() => {
|
.then(() => {
|
||||||
lastScrape.lastScraped = scrapeStart;
|
lastScrape.lastScraped = scrapeStart;
|
||||||
return repository.updateProvider(lastScrape);
|
return lastScrape.save();
|
||||||
})
|
})
|
||||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||||
}
|
}
|
||||||
@@ -64,8 +64,8 @@ async function scrapeLatestTorrentsForCategory(category) {
|
|||||||
})))
|
})))
|
||||||
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
|
.then(torrents => Promise.all(torrents.map(t => entryLimiter.schedule(() => processTorrentRecord(t)))))
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
console.warn(`Failed ${NAME} scrapping for ${category} due: `, error);
|
||||||
return Promise.resolve();
|
return Promise.resolve([]);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,9 +6,9 @@ const Promises = require('../../lib/promises');
|
|||||||
|
|
||||||
const defaultProxies = [
|
const defaultProxies = [
|
||||||
'https://thepiratebay.org',
|
'https://thepiratebay.org',
|
||||||
'https://piratebays.icu',
|
'https://proxybay.pro',
|
||||||
'https://piratebays.cool',
|
'https://ukpiratebayproxy.com',
|
||||||
'https://piratebays.life'];
|
'https://thepiratebayproxy.info'];
|
||||||
const dumpUrl = '/static/dump/csv/';
|
const dumpUrl = '/static/dump/csv/';
|
||||||
const defaultTimeout = 10000;
|
const defaultTimeout = 10000;
|
||||||
|
|
||||||
@@ -169,15 +169,18 @@ function parseBody(body) {
|
|||||||
$('table[id=\'searchResult\'] tr').each(function () {
|
$('table[id=\'searchResult\'] tr').each(function () {
|
||||||
const name = $(this).find('.detLink').text();
|
const name = $(this).find('.detLink').text();
|
||||||
const sizeMatcher = $(this).find('.detDesc').text().match(/(?:,\s?Size\s)(.+),/);
|
const sizeMatcher = $(this).find('.detDesc').text().match(/(?:,\s?Size\s)(.+),/);
|
||||||
|
const magnetLink = $(this).find('a[title=\'Download this torrent using magnet\']').attr('href');
|
||||||
if (!name || !sizeMatcher) {
|
if (!name || !sizeMatcher) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
torrents.push({
|
torrents.push({
|
||||||
torrentId: $(this).find('.detLink').attr('href').match(/torrent\/([^/]+)/)[1],
|
|
||||||
name: name,
|
name: name,
|
||||||
|
magnetLink: magnetLink,
|
||||||
|
infoHash: decode(magnetLink).infoHash,
|
||||||
|
torrentId: $(this).find('.detLink').attr('href').match(/torrent\/([^/]+)/)[1],
|
||||||
seeders: parseInt($(this).find('td[align=\'right\']').eq(0).text(), 10),
|
seeders: parseInt($(this).find('td[align=\'right\']').eq(0).text(), 10),
|
||||||
leechers: parseInt($(this).find('td[align=\'right\']').eq(1).text(), 10),
|
leechers: parseInt($(this).find('td[align=\'right\']').eq(1).text(), 10),
|
||||||
magnetLink: $(this).find('a[title=\'Download this torrent using magnet\']').attr('href'),
|
|
||||||
category: parseInt($(this).find('a[title=\'More from this category\']').eq(0).attr('href').match(/\d+$/)[0],
|
category: parseInt($(this).find('a[title=\'More from this category\']').eq(0).attr('href').match(/\d+$/)[0],
|
||||||
10),
|
10),
|
||||||
subcategory: parseInt($(this).find('a[title=\'More from this category\']').eq(1).attr('href').match(/\d+$/)[0],
|
subcategory: parseInt($(this).find('a[title=\'More from this category\']').eq(1).attr('href').match(/\d+$/)[0],
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ async function scrape() {
|
|||||||
return scrapeLatestTorrents()
|
return scrapeLatestTorrents()
|
||||||
.then(() => {
|
.then(() => {
|
||||||
lastScrape.lastScraped = scrapeStart;
|
lastScrape.lastScraped = scrapeStart;
|
||||||
return repository.updateProvider(lastScrape);
|
return lastScrape.save();
|
||||||
})
|
})
|
||||||
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
.then(() => console.log(`[${moment()}] finished ${NAME} scrape`));
|
||||||
}
|
}
|
||||||
@@ -45,14 +45,14 @@ async function scrapeLatestTorrents() {
|
|||||||
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
async function scrapeLatestTorrentsForCategory(category, page = 1) {
|
||||||
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
console.log(`Scrapping ${NAME} ${category} category page ${page}`);
|
||||||
return thepiratebay.browse(({ category, page }))
|
return thepiratebay.browse(({ category, page }))
|
||||||
|
.catch(error => {
|
||||||
|
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
||||||
|
return Promise.resolve([]);
|
||||||
|
})
|
||||||
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
.then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))))
|
||||||
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
.then(resolved => resolved.length > 0 && page < UNTIL_PAGE
|
||||||
? scrapeLatestTorrentsForCategory(category, page + 1)
|
? scrapeLatestTorrentsForCategory(category, page + 1)
|
||||||
: Promise.resolve())
|
: Promise.resolve());
|
||||||
.catch(error => {
|
|
||||||
console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error);
|
|
||||||
return Promise.resolve();
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function processTorrentRecord(record) {
|
async function processTorrentRecord(record) {
|
||||||
|
|||||||
Reference in New Issue
Block a user