[scraper] updates tpb proxies

This commit is contained in:
TheBeastLT
2020-03-16 14:48:57 +01:00
parent b05d4dee33
commit 7e7e46a5f3
9 changed files with 22 additions and 13 deletions

View File

@@ -13,12 +13,14 @@ const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_du
const thepiratebayUnofficialDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper');
const PROVIDERS = [
// horribleSubsScraper,
// rarbgScraper,
horribleSubsScraper,
rarbgScraper,
thepiratebayScraper,
kickassScraper,
leetxScraper
// rarbgDumpScraper
// thepiratebayDumpScraper
// thepiratebayUnofficialDumpScraper
];
const SCRAPE_CRON = process.env.SCRAPE_CRON || '* * 0/4 * * *';

View File

@@ -190,7 +190,7 @@ async function findAllFiles() {
//addMissingEpisodes().then(() => console.log('Finished'));
//findAllFiles().then(() => console.log('Finished'));
//updateMovieCollections().then(() => console.log('Finished'));
reapplyEpisodeDecomposing('aec7bcac457ad68924e7119f859cf6fa3878f9f5', false).then(() => console.log('Finished'));
reapplyEpisodeDecomposing('0b6c0f0692bdb151efb87e3de90e46e3b177444e', false).then(() => console.log('Finished'));
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
// reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished'));
//reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished'));
//reapplyManualHashes().then(() => console.log('Finished'));

View File

@@ -7,7 +7,7 @@ const Promises = require('../../lib/promises');
const defaultProxies = [
'https://katcr.co'
];
const defaultTimeout = 10000;
const defaultTimeout = 30000;
const Categories = {
MOVIE: 'movies',

View File

@@ -5,10 +5,12 @@ const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const defaultProxies = [
'https://thepiratebay.org',
// 'https://thepiratebay.org',
'https://proxybay.pro',
'https://ukpiratebayproxy.com',
'https://thepiratebayproxy.info'
'https://thepiratebayproxy.info',
'https://mypiratebay.co',
'https://thepiratebay.asia',
];
const dumpUrl = '/static/dump/csv/';
const defaultTimeout = 10000;

View File

@@ -7,7 +7,6 @@ const fs = require('fs');
const thepiratebay = require('./thepiratebay_api.js');
const bing = require('nodejs-bing');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const { escapeHTML } = require('../../lib/metadata');
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
@@ -20,6 +19,7 @@ async function scrape() {
const lastDump = { updatedAt: 2147000000 };
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
const checkPoint = 611000;
if (lastDump) {
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
@@ -34,6 +34,11 @@ async function scrape() {
if (entriesProcessed % 1000 === 0) {
console.log(`Processed ${entriesProcessed} entries`);
}
if (entriesProcessed <= checkPoint) {
entriesProcessed++;
return;
}
const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g);
if (row.length !== 4) {
console.log(`Invalid row: ${line}`);

View File

@@ -12,7 +12,7 @@ const CSV_FILE_PATH = '/tmp/tpb.csv';
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
// await processTorrentRecord({ torrentId: 35313644, category: 'Video' });
// await processTorrentRecord({ torrentId: 26877339, category: 'Video' });
console.log(`starting to scrape tpb dump...`);
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
const checkPoint = 4115000;