[scraper] do not use imdbId for series from tpb

This commit is contained in:
TheBeastLT
2020-03-20 14:25:05 +01:00
parent 25aafc7555
commit 5f429ebdc0
7 changed files with 12 additions and 19 deletions

View File

@@ -1553,8 +1553,8 @@
} }
}, },
"parse-torrent-title": { "parse-torrent-title": {
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9", "version": "git://github.com/TheBeastLT/parse-torrent-title.git#afd4a374276420c13c52df8e3d07ae7699c46b60",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9", "from": "git://github.com/TheBeastLT/parse-torrent-title.git#afd4a374276420c13c52df8e3d07ae7699c46b60",
"requires": { "requires": {
"moment": "^2.24.0" "moment": "^2.24.0"
} }

View File

@@ -12,7 +12,7 @@
"cache-manager-mongodb": "^0.2.1", "cache-manager-mongodb": "^0.2.1",
"express-rate-limit": "^5.1.1", "express-rate-limit": "^5.1.1",
"needle": "^2.2.4", "needle": "^2.2.4",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9", "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#afd4a374276420c13c52df8e3d07ae7699c46b60",
"pg": "^7.8.2", "pg": "^7.8.2",
"pg-hstore": "^2.3.2", "pg-hstore": "^2.3.2",
"real-debrid-api": "^1.0.1", "real-debrid-api": "^1.0.1",

4
package-lock.json generated
View File

@@ -1385,8 +1385,8 @@
} }
}, },
"parse-torrent-title": { "parse-torrent-title": {
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9", "version": "git://github.com/TheBeastLT/parse-torrent-title.git#afd4a374276420c13c52df8e3d07ae7699c46b60",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9", "from": "git://github.com/TheBeastLT/parse-torrent-title.git#afd4a374276420c13c52df8e3d07ae7699c46b60",
"requires": { "requires": {
"moment": "^2.24.0" "moment": "^2.24.0"
} }

View File

@@ -30,7 +30,7 @@
"node-schedule": "^1.3.2", "node-schedule": "^1.3.2",
"nodejs-bing": "^0.1.0", "nodejs-bing": "^0.1.0",
"parse-torrent": "^6.1.2", "parse-torrent": "^6.1.2",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9", "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#afd4a374276420c13c52df8e3d07ae7699c46b60",
"peer-search": "^0.6.x", "peer-search": "^0.6.x",
"pg": "^7.8.2", "pg": "^7.8.2",
"pg-hstore": "^2.3.2", "pg-hstore": "^2.3.2",

View File

@@ -128,7 +128,7 @@ function getFiles(torrent) {
} }
function getFilesBasedOnTitle(titleQuery) { function getFilesBasedOnTitle(titleQuery) {
return File.findAll({ where: { title: { [Op.iLike]: `%${titleQuery}%` } } }); return File.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` } } });
} }
function deleteFile(file) { function deleteFile(file) {

View File

@@ -19,7 +19,7 @@ async function scrape() {
const lastDump = { updatedAt: 2147000000 }; const lastDump = { updatedAt: 2147000000 };
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate(); //const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]); //const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
const checkPoint = 611000; const checkPoint = 0;
if (lastDump) { if (lastDump) {
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`); console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
@@ -55,11 +55,6 @@ async function scrape() {
size: parseInt(row[3], 10) size: parseInt(row[3], 10)
}; };
// if (torrent.uploadDate > checkPoint) {
// entriesProcessed++;
// return;
// }
if (!limiter.empty()) { if (!limiter.empty()) {
lr.pause() lr.pause()
} }
@@ -74,8 +69,6 @@ async function scrape() {
console.log(err); console.log(err);
}); });
lr.on('end', () => { lr.on('end', () => {
fs.unlink(CSV_FILE_PATH, (error) => console.warn(error));
//repository.updateProvider({ name: NAME, lastScraped: lastDump.updatedAt });
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`); console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
}); });
} }
@@ -106,13 +99,13 @@ async function processTorrentRecord(record) {
} }
const torrent = { const torrent = {
infoHash: record.infoHash, infoHash: torrentFound.infoHash,
provider: NAME, provider: NAME,
torrentId: record.torrentId, torrentId: torrentFound.torrentId,
title: torrentFound.name, title: torrentFound.name,
size: torrentFound.size, size: torrentFound.size,
type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE, type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE,
imdbId: torrentFound.imdbId, imdbId: seriesCategories.includes(torrentFound.subcategory) && torrentFound.imdbId || undefined,
uploadDate: torrentFound.uploadDate || record.uploadDate, uploadDate: torrentFound.uploadDate || record.uploadDate,
seeders: torrentFound.seeders, seeders: torrentFound.seeders,
}; };

View File

@@ -73,7 +73,7 @@ async function processTorrentRecord(record) {
title: torrentFound.name.replace(/\t|\s+/g, ' '), title: torrentFound.name.replace(/\t|\s+/g, ' '),
size: torrentFound.size, size: torrentFound.size,
type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE, type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE,
imdbId: torrentFound.imdbId, imdbId: seriesCategories.includes(torrentFound.subcategory) && torrentFound.imdbId || undefined,
uploadDate: torrentFound.uploadDate, uploadDate: torrentFound.uploadDate,
seeders: torrentFound.seeders, seeders: torrentFound.seeders,
}; };