[scraper] updates tpb proxies

This commit is contained in:
TheBeastLT
2020-03-16 14:48:57 +01:00
parent b05d4dee33
commit 7e7e46a5f3
9 changed files with 22 additions and 13 deletions

View File

@@ -11,7 +11,7 @@
"cache-manager": "^2.9.0", "cache-manager": "^2.9.0",
"cache-manager-mongodb": "^0.2.1", "cache-manager-mongodb": "^0.2.1",
"express-rate-limit": "^5.1.1", "express-rate-limit": "^5.1.1",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#7c00602bc1c405f5574758eeabb72b133fea81d5", "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9",
"pg": "^7.8.2", "pg": "^7.8.2",
"pg-hstore": "^2.3.2", "pg-hstore": "^2.3.2",
"sequelize": "^4.43.0", "sequelize": "^4.43.0",

4
package-lock.json generated
View File

@@ -1385,8 +1385,8 @@
} }
}, },
"parse-torrent-title": { "parse-torrent-title": {
"version": "git://github.com/TheBeastLT/parse-torrent-title.git#7c00602bc1c405f5574758eeabb72b133fea81d5", "version": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9",
"from": "git://github.com/TheBeastLT/parse-torrent-title.git#7c00602bc1c405f5574758eeabb72b133fea81d5", "from": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9",
"requires": { "requires": {
"moment": "^2.24.0" "moment": "^2.24.0"
} }

View File

@@ -29,7 +29,7 @@
"node-schedule": "^1.3.2", "node-schedule": "^1.3.2",
"nodejs-bing": "^0.1.0", "nodejs-bing": "^0.1.0",
"parse-torrent": "^6.1.2", "parse-torrent": "^6.1.2",
"parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#7c00602bc1c405f5574758eeabb72b133fea81d5", "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#7259b01bfe6e1fbc3879ba68d9c58ebac84029e9",
"peer-search": "^0.6.x", "peer-search": "^0.6.x",
"pg": "^7.8.2", "pg": "^7.8.2",
"pg-hstore": "^2.3.2", "pg-hstore": "^2.3.2",

View File

@@ -13,12 +13,14 @@ const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_du
const thepiratebayUnofficialDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper'); const thepiratebayUnofficialDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper');
const PROVIDERS = [ const PROVIDERS = [
// horribleSubsScraper, horribleSubsScraper,
// rarbgScraper, rarbgScraper,
thepiratebayScraper, thepiratebayScraper,
kickassScraper, kickassScraper,
leetxScraper leetxScraper
// rarbgDumpScraper // rarbgDumpScraper
// thepiratebayDumpScraper
// thepiratebayUnofficialDumpScraper
]; ];
const SCRAPE_CRON = process.env.SCRAPE_CRON || '* * 0/4 * * *'; const SCRAPE_CRON = process.env.SCRAPE_CRON || '* * 0/4 * * *';

View File

@@ -190,7 +190,7 @@ async function findAllFiles() {
//addMissingEpisodes().then(() => console.log('Finished')); //addMissingEpisodes().then(() => console.log('Finished'));
//findAllFiles().then(() => console.log('Finished')); //findAllFiles().then(() => console.log('Finished'));
//updateMovieCollections().then(() => console.log('Finished')); //updateMovieCollections().then(() => console.log('Finished'));
reapplyEpisodeDecomposing('aec7bcac457ad68924e7119f859cf6fa3878f9f5', false).then(() => console.log('Finished')); reapplyEpisodeDecomposing('0b6c0f0692bdb151efb87e3de90e46e3b177444e', false).then(() => console.log('Finished'));
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished')); //reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
// reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished')); //reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished'));
//reapplyManualHashes().then(() => console.log('Finished')); //reapplyManualHashes().then(() => console.log('Finished'));

View File

@@ -7,7 +7,7 @@ const Promises = require('../../lib/promises');
const defaultProxies = [ const defaultProxies = [
'https://katcr.co' 'https://katcr.co'
]; ];
const defaultTimeout = 10000; const defaultTimeout = 30000;
const Categories = { const Categories = {
MOVIE: 'movies', MOVIE: 'movies',

View File

@@ -5,10 +5,12 @@ const decode = require('magnet-uri');
const Promises = require('../../lib/promises'); const Promises = require('../../lib/promises');
const defaultProxies = [ const defaultProxies = [
'https://thepiratebay.org', // 'https://thepiratebay.org',
'https://proxybay.pro', 'https://proxybay.pro',
'https://ukpiratebayproxy.com', 'https://ukpiratebayproxy.com',
'https://thepiratebayproxy.info' 'https://thepiratebayproxy.info',
'https://mypiratebay.co',
'https://thepiratebay.asia',
]; ];
const dumpUrl = '/static/dump/csv/'; const dumpUrl = '/static/dump/csv/';
const defaultTimeout = 10000; const defaultTimeout = 10000;

View File

@@ -7,7 +7,6 @@ const fs = require('fs');
const thepiratebay = require('./thepiratebay_api.js'); const thepiratebay = require('./thepiratebay_api.js');
const bing = require('nodejs-bing'); const bing = require('nodejs-bing');
const { Type } = require('../../lib/types'); const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const { escapeHTML } = require('../../lib/metadata'); const { escapeHTML } = require('../../lib/metadata');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries'); const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
@@ -20,6 +19,7 @@ async function scrape() {
const lastDump = { updatedAt: 2147000000 }; const lastDump = { updatedAt: 2147000000 };
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate(); //const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]); //const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
const checkPoint = 611000;
if (lastDump) { if (lastDump) {
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`); console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
@@ -34,6 +34,11 @@ async function scrape() {
if (entriesProcessed % 1000 === 0) { if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`); console.log(`Processed ${entriesProcessed} entries`);
} }
if (entriesProcessed <= checkPoint) {
entriesProcessed++;
return;
}
const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g); const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g);
if (row.length !== 4) { if (row.length !== 4) {
console.log(`Invalid row: ${line}`); console.log(`Invalid row: ${line}`);

View File

@@ -12,7 +12,7 @@ const CSV_FILE_PATH = '/tmp/tpb.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 }); const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() { async function scrape() {
// await processTorrentRecord({ torrentId: 35313644, category: 'Video' }); // await processTorrentRecord({ torrentId: 26877339, category: 'Video' });
console.log(`starting to scrape tpb dump...`); console.log(`starting to scrape tpb dump...`);
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate(); //const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
const checkPoint = 4115000; const checkPoint = 4115000;