[scraper] get torrent size for horriblesubs

This commit is contained in:
TheBeastLT
2020-04-02 20:58:08 +02:00
parent 6b36667b7f
commit db78f5873b
5 changed files with 71 additions and 16 deletions

View File

@@ -1,4 +1,4 @@
const { Sequelize, fn, col } = require('sequelize');
const { Sequelize, fn, col, literal } = require('sequelize');
const Op = Sequelize.Op;
const DATABASE_URI = process.env.DATABASE_URI;
@@ -115,8 +115,15 @@ function getTorrentsBasedOnTitle(titleQuery, type) {
return Torrent.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` }, type: type } });
}
function getTorrentsWithoutId(provider) {
return Torrent.findAll({ where: { provider: provider, torrentId: { [Op.is]: null } }, limit: 100 });
function getTorrentsWithoutSize() {
return Torrent.findAll({
where: literal(
'exists (select 1 from files where files."infoHash" = torrent."infoHash" and files.size = 300000000)'),
order: [
['seeders', 'DESC']
],
limit: 1000
});
}
function getTorrentsUpdatedBetween(provider, startDate, endDate) {
@@ -185,6 +192,6 @@ module.exports = {
getSkipTorrent,
createSkipTorrent,
createFailedImdbTorrent,
getTorrentsWithoutId,
getTorrentsWithoutSize,
getTorrentsUpdatedBetween
};

View File

@@ -34,6 +34,54 @@ module.exports.updateCurrentSeeders = function (torrent) {
}).then((seeders) => ({ ...torrent, seeders: torrent.seeders || seeders }));
};
module.exports.updateTorrentSize = function (torrent) {
if (!torrent.infoHash && !torrent.magnetLink) {
return Promise.reject(new Error("no infoHash or magnetLink"));
}
const magnetLink = torrent.magnetLink || decode.encode({ infoHash: torrent.infoHash });
return new Promise((resolve, rejected) => {
const engine = new torrentStream(magnetLink, { connections: MAX_PEER_CONNECTIONS });
engine.ready(() => {
const size = engine.torrent.length;
engine.destroy();
resolve({ size });
});
setTimeout(() => {
engine.destroy();
rejected(new Error('No available connections for torrent!'));
}, SEEDS_CHECK_TIMEOUT);
}).then((size) => ({ ...torrent, size }));
};
module.exports.sizeAndFiles = function (torrent) {
if (!torrent.infoHash && !torrent.magnetLink) {
return Promise.reject(new Error("no infoHash or magnetLink"));
}
// const magnet = decode.encode({ infoHash: torrent.infoHash, announce: torrent.trackers });
return new Promise((resolve, rejected) => {
const engine = new torrentStream(torrent.infoHash, { connections: MAX_PEER_CONNECTIONS });
engine.ready(() => {
const files = engine.files
.map((file, fileId) => ({
fileIndex: fileId,
name: file.name,
path: file.path.replace(/^[^\/]+\//, ''),
size: file.length
}));
const size = engine.torrent.length;
engine.destroy();
resolve({ files, size });
});
setTimeout(() => {
engine.destroy();
rejected(new Error('No available connections for torrent!'));
}, 20000);
});
};
module.exports.torrentFiles = function (torrent) {
return getFilesFromObject(torrent)
.catch(() => filesFromTorrentFile(torrent))

View File

@@ -62,8 +62,9 @@ async function updateTorrentSeeders(torrent) {
return repository.getTorrent(torrent)
.catch(() => undefined)
.then(stored => {
if (stored && stored.seeders !== torrent.seeders) {
if (stored) {
stored.seeders = torrent.seeders;
stored.changed('updatedAt', true);
return stored.save()
}
})

View File

@@ -5,7 +5,7 @@ const decode = require('magnet-uri');
const horriblesubs = require('./horriblesubs_api.js');
const repository = require('../../lib/repository');
const { Type } = require('../../lib/types');
const { updateCurrentSeeders } = require('../../lib/torrent');
const { updateCurrentSeeders, updateTorrentSize } = require('../../lib/torrent');
const { parseTorrentFiles } = require('../../lib/torrentFiles');
const { getMetadata, getKitsuId } = require('../../lib/metadata');
const showMappings = require('./horriblesubs_mapping.json');
@@ -153,7 +153,6 @@ async function _parseShowData(showData) {
infoHash: decode(mirror.magnetLink).infoHash,
trackers: decode(mirror.magnetLink).tr.join(','),
title: formatTitle(episodeInfo, mirror),
size: 300000000,
type: Type.ANIME,
kitsuId: getKitsuId(episodeInfo.episode),
uploadDate: episodeInfo.uploadDate,
@@ -161,12 +160,14 @@ async function _parseShowData(showData) {
.reduce((a, b) => a.concat(b), [])
.filter((incompleteTorrent) => incompleteTorrent.kitsuId)
.map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent)
.then((torrent) => torrent && updateTorrentSize(torrent))
.then((torrent) => torrent && updateCurrentSeeders(torrent))
.then((torrent) => torrent && parseTorrentFiles(torrent)
.then((files) => verifyFiles(torrent, files))
.then((files) => repository.createTorrent(torrent)
.then(() => files.forEach(file => repository.createFile(file)))
.then(() => console.log(`Created entry for ${torrent.title}`)))))))
.then(() => console.log(`Created entry for ${torrent.title}`))))
.catch(error => console.warn(`Failed creating entry for ${incompleteTorrent.title}:`, error)))))
.then(() => console.log(`${NAME}: finished scrapping ${showData.title} data`));
}

View File

@@ -6,14 +6,12 @@ const Promises = require('../../lib/promises');
const defaultProxies = [
// 'https://thepiratebay.org',
// 'https://proxybay.pro',
// 'https://ukpiratebayproxy.com',
// 'https://thepiratebayproxy.info',
// 'https://mypiratebay.co',
'https://thepiratebay.asia',
'https://thepiratebay10.org',
'https://thepiratebay0.org',
'https://proxtpb.art',
'https://proxybay.pro',
'https://ukpiratebayproxy.com',
'https://thepiratebayproxy.info',
'https://mypiratebay.co',
// 'https://thepiratebay10.org',
// 'https://thepiratebay0.org',
];
const dumpUrl = '/static/dump/csv/';
const defaultTimeout = 10000;