[scraper] get torrent size for horriblesubs
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
const { Sequelize, fn, col } = require('sequelize');
|
const { Sequelize, fn, col, literal } = require('sequelize');
|
||||||
const Op = Sequelize.Op;
|
const Op = Sequelize.Op;
|
||||||
|
|
||||||
const DATABASE_URI = process.env.DATABASE_URI;
|
const DATABASE_URI = process.env.DATABASE_URI;
|
||||||
@@ -115,8 +115,15 @@ function getTorrentsBasedOnTitle(titleQuery, type) {
|
|||||||
return Torrent.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` }, type: type } });
|
return Torrent.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` }, type: type } });
|
||||||
}
|
}
|
||||||
|
|
||||||
function getTorrentsWithoutId(provider) {
|
function getTorrentsWithoutSize() {
|
||||||
return Torrent.findAll({ where: { provider: provider, torrentId: { [Op.is]: null } }, limit: 100 });
|
return Torrent.findAll({
|
||||||
|
where: literal(
|
||||||
|
'exists (select 1 from files where files."infoHash" = torrent."infoHash" and files.size = 300000000)'),
|
||||||
|
order: [
|
||||||
|
['seeders', 'DESC']
|
||||||
|
],
|
||||||
|
limit: 1000
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function getTorrentsUpdatedBetween(provider, startDate, endDate) {
|
function getTorrentsUpdatedBetween(provider, startDate, endDate) {
|
||||||
@@ -185,6 +192,6 @@ module.exports = {
|
|||||||
getSkipTorrent,
|
getSkipTorrent,
|
||||||
createSkipTorrent,
|
createSkipTorrent,
|
||||||
createFailedImdbTorrent,
|
createFailedImdbTorrent,
|
||||||
getTorrentsWithoutId,
|
getTorrentsWithoutSize,
|
||||||
getTorrentsUpdatedBetween
|
getTorrentsUpdatedBetween
|
||||||
};
|
};
|
||||||
@@ -34,6 +34,54 @@ module.exports.updateCurrentSeeders = function (torrent) {
|
|||||||
}).then((seeders) => ({ ...torrent, seeders: torrent.seeders || seeders }));
|
}).then((seeders) => ({ ...torrent, seeders: torrent.seeders || seeders }));
|
||||||
};
|
};
|
||||||
|
|
||||||
|
module.exports.updateTorrentSize = function (torrent) {
|
||||||
|
if (!torrent.infoHash && !torrent.magnetLink) {
|
||||||
|
return Promise.reject(new Error("no infoHash or magnetLink"));
|
||||||
|
}
|
||||||
|
const magnetLink = torrent.magnetLink || decode.encode({ infoHash: torrent.infoHash });
|
||||||
|
return new Promise((resolve, rejected) => {
|
||||||
|
const engine = new torrentStream(magnetLink, { connections: MAX_PEER_CONNECTIONS });
|
||||||
|
|
||||||
|
engine.ready(() => {
|
||||||
|
const size = engine.torrent.length;
|
||||||
|
engine.destroy();
|
||||||
|
resolve({ size });
|
||||||
|
});
|
||||||
|
setTimeout(() => {
|
||||||
|
engine.destroy();
|
||||||
|
rejected(new Error('No available connections for torrent!'));
|
||||||
|
}, SEEDS_CHECK_TIMEOUT);
|
||||||
|
}).then((size) => ({ ...torrent, size }));
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports.sizeAndFiles = function (torrent) {
|
||||||
|
if (!torrent.infoHash && !torrent.magnetLink) {
|
||||||
|
return Promise.reject(new Error("no infoHash or magnetLink"));
|
||||||
|
}
|
||||||
|
// const magnet = decode.encode({ infoHash: torrent.infoHash, announce: torrent.trackers });
|
||||||
|
return new Promise((resolve, rejected) => {
|
||||||
|
const engine = new torrentStream(torrent.infoHash, { connections: MAX_PEER_CONNECTIONS });
|
||||||
|
|
||||||
|
engine.ready(() => {
|
||||||
|
const files = engine.files
|
||||||
|
.map((file, fileId) => ({
|
||||||
|
fileIndex: fileId,
|
||||||
|
name: file.name,
|
||||||
|
path: file.path.replace(/^[^\/]+\//, ''),
|
||||||
|
size: file.length
|
||||||
|
}));
|
||||||
|
const size = engine.torrent.length;
|
||||||
|
|
||||||
|
engine.destroy();
|
||||||
|
resolve({ files, size });
|
||||||
|
});
|
||||||
|
setTimeout(() => {
|
||||||
|
engine.destroy();
|
||||||
|
rejected(new Error('No available connections for torrent!'));
|
||||||
|
}, 20000);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
module.exports.torrentFiles = function (torrent) {
|
module.exports.torrentFiles = function (torrent) {
|
||||||
return getFilesFromObject(torrent)
|
return getFilesFromObject(torrent)
|
||||||
.catch(() => filesFromTorrentFile(torrent))
|
.catch(() => filesFromTorrentFile(torrent))
|
||||||
|
|||||||
@@ -62,8 +62,9 @@ async function updateTorrentSeeders(torrent) {
|
|||||||
return repository.getTorrent(torrent)
|
return repository.getTorrent(torrent)
|
||||||
.catch(() => undefined)
|
.catch(() => undefined)
|
||||||
.then(stored => {
|
.then(stored => {
|
||||||
if (stored && stored.seeders !== torrent.seeders) {
|
if (stored) {
|
||||||
stored.seeders = torrent.seeders;
|
stored.seeders = torrent.seeders;
|
||||||
|
stored.changed('updatedAt', true);
|
||||||
return stored.save()
|
return stored.save()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ const decode = require('magnet-uri');
|
|||||||
const horriblesubs = require('./horriblesubs_api.js');
|
const horriblesubs = require('./horriblesubs_api.js');
|
||||||
const repository = require('../../lib/repository');
|
const repository = require('../../lib/repository');
|
||||||
const { Type } = require('../../lib/types');
|
const { Type } = require('../../lib/types');
|
||||||
const { updateCurrentSeeders } = require('../../lib/torrent');
|
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
|
||||||
const { parseTorrentFiles } = require('../../lib/torrentFiles');
|
const { parseTorrentFiles } = require('../../lib/torrentFiles');
|
||||||
const { getMetadata, getKitsuId } = require('../../lib/metadata');
|
const { getMetadata, getKitsuId } = require('../../lib/metadata');
|
||||||
const showMappings = require('./horriblesubs_mapping.json');
|
const showMappings = require('./horriblesubs_mapping.json');
|
||||||
@@ -153,7 +153,6 @@ async function _parseShowData(showData) {
|
|||||||
infoHash: decode(mirror.magnetLink).infoHash,
|
infoHash: decode(mirror.magnetLink).infoHash,
|
||||||
trackers: decode(mirror.magnetLink).tr.join(','),
|
trackers: decode(mirror.magnetLink).tr.join(','),
|
||||||
title: formatTitle(episodeInfo, mirror),
|
title: formatTitle(episodeInfo, mirror),
|
||||||
size: 300000000,
|
|
||||||
type: Type.ANIME,
|
type: Type.ANIME,
|
||||||
kitsuId: getKitsuId(episodeInfo.episode),
|
kitsuId: getKitsuId(episodeInfo.episode),
|
||||||
uploadDate: episodeInfo.uploadDate,
|
uploadDate: episodeInfo.uploadDate,
|
||||||
@@ -161,12 +160,14 @@ async function _parseShowData(showData) {
|
|||||||
.reduce((a, b) => a.concat(b), [])
|
.reduce((a, b) => a.concat(b), [])
|
||||||
.filter((incompleteTorrent) => incompleteTorrent.kitsuId)
|
.filter((incompleteTorrent) => incompleteTorrent.kitsuId)
|
||||||
.map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent)
|
.map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent)
|
||||||
|
.then((torrent) => torrent && updateTorrentSize(torrent))
|
||||||
.then((torrent) => torrent && updateCurrentSeeders(torrent))
|
.then((torrent) => torrent && updateCurrentSeeders(torrent))
|
||||||
.then((torrent) => torrent && parseTorrentFiles(torrent)
|
.then((torrent) => torrent && parseTorrentFiles(torrent)
|
||||||
.then((files) => verifyFiles(torrent, files))
|
.then((files) => verifyFiles(torrent, files))
|
||||||
.then((files) => repository.createTorrent(torrent)
|
.then((files) => repository.createTorrent(torrent)
|
||||||
.then(() => files.forEach(file => repository.createFile(file)))
|
.then(() => files.forEach(file => repository.createFile(file)))
|
||||||
.then(() => console.log(`Created entry for ${torrent.title}`)))))))
|
.then(() => console.log(`Created entry for ${torrent.title}`))))
|
||||||
|
.catch(error => console.warn(`Failed creating entry for ${incompleteTorrent.title}:`, error)))))
|
||||||
.then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`));
|
.then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,14 +6,12 @@ const Promises = require('../../lib/promises');
|
|||||||
|
|
||||||
const defaultProxies = [
|
const defaultProxies = [
|
||||||
// 'https://thepiratebay.org',
|
// 'https://thepiratebay.org',
|
||||||
// 'https://proxybay.pro',
|
'https://proxybay.pro',
|
||||||
// 'https://ukpiratebayproxy.com',
|
'https://ukpiratebayproxy.com',
|
||||||
// 'https://thepiratebayproxy.info',
|
'https://thepiratebayproxy.info',
|
||||||
// 'https://mypiratebay.co',
|
'https://mypiratebay.co',
|
||||||
'https://thepiratebay.asia',
|
// 'https://thepiratebay10.org',
|
||||||
'https://thepiratebay10.org',
|
// 'https://thepiratebay0.org',
|
||||||
'https://thepiratebay0.org',
|
|
||||||
'https://proxtpb.art',
|
|
||||||
];
|
];
|
||||||
const dumpUrl = '/static/dump/csv/';
|
const dumpUrl = '/static/dump/csv/';
|
||||||
const defaultTimeout = 10000;
|
const defaultTimeout = 10000;
|
||||||
|
|||||||
Reference in New Issue
Block a user