[scraper] updates tpb proxies

This commit is contained in:
TheBeastLT
2020-03-16 14:48:57 +01:00
parent b05d4dee33
commit 7e7e46a5f3
9 changed files with 22 additions and 13 deletions

View File

@@ -7,7 +7,7 @@ const Promises = require('../../lib/promises');
const defaultProxies = [
'https://katcr.co'
];
const defaultTimeout = 10000;
const defaultTimeout = 30000;
const Categories = {
MOVIE: 'movies',

View File

@@ -5,10 +5,12 @@ const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const defaultProxies = [
'https://thepiratebay.org',
// 'https://thepiratebay.org',
'https://proxybay.pro',
'https://ukpiratebayproxy.com',
'https://thepiratebayproxy.info'
'https://thepiratebayproxy.info',
'https://mypiratebay.co',
'https://thepiratebay.asia',
];
const dumpUrl = '/static/dump/csv/';
const defaultTimeout = 10000;

View File

@@ -7,7 +7,6 @@ const fs = require('fs');
const thepiratebay = require('./thepiratebay_api.js');
const bing = require('nodejs-bing');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const { escapeHTML } = require('../../lib/metadata');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
@@ -20,6 +19,7 @@ async function scrape() {
const lastDump = { updatedAt: 2147000000 };
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
const checkPoint = 611000;
if (lastDump) {
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
@@ -34,6 +34,11 @@ async function scrape() {
if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`);
}
if (entriesProcessed <= checkPoint) {
entriesProcessed++;
return;
}
const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g);
if (row.length !== 4) {
console.log(`Invalid row: ${line}`);

View File

@@ -12,7 +12,7 @@ const CSV_FILE_PATH = '/tmp/tpb.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
// await processTorrentRecord({ torrentId: 35313644, category: 'Video' });
// await processTorrentRecord({ torrentId: 26877339, category: 'Video' });
console.log(`starting to scrape tpb dump...`);
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
const checkPoint = 4115000;