mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
[scraper] updates tpb proxies
This commit is contained in:
@@ -13,12 +13,14 @@ const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_du
|
||||
const thepiratebayUnofficialDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper');
|
||||
|
||||
const PROVIDERS = [
|
||||
// horribleSubsScraper,
|
||||
// rarbgScraper,
|
||||
horribleSubsScraper,
|
||||
rarbgScraper,
|
||||
thepiratebayScraper,
|
||||
kickassScraper,
|
||||
leetxScraper
|
||||
// rarbgDumpScraper
|
||||
// thepiratebayDumpScraper
|
||||
// thepiratebayUnofficialDumpScraper
|
||||
];
|
||||
const SCRAPE_CRON = process.env.SCRAPE_CRON || '* * 0/4 * * *';
|
||||
|
||||
|
||||
@@ -190,7 +190,7 @@ async function findAllFiles() {
|
||||
//addMissingEpisodes().then(() => console.log('Finished'));
|
||||
//findAllFiles().then(() => console.log('Finished'));
|
||||
//updateMovieCollections().then(() => console.log('Finished'));
|
||||
reapplyEpisodeDecomposing('aec7bcac457ad68924e7119f859cf6fa3878f9f5', false).then(() => console.log('Finished'));
|
||||
reapplyEpisodeDecomposing('0b6c0f0692bdb151efb87e3de90e46e3b177444e', false).then(() => console.log('Finished'));
|
||||
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
|
||||
// reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished'));
|
||||
//reapplyDecomposingToTorrentsOnRegex('.*Boku no Hero Academia.*').then(() => console.log('Finished'));
|
||||
//reapplyManualHashes().then(() => console.log('Finished'));
|
||||
@@ -7,7 +7,7 @@ const Promises = require('../../lib/promises');
|
||||
const defaultProxies = [
|
||||
'https://katcr.co'
|
||||
];
|
||||
const defaultTimeout = 10000;
|
||||
const defaultTimeout = 30000;
|
||||
|
||||
const Categories = {
|
||||
MOVIE: 'movies',
|
||||
|
||||
@@ -5,10 +5,12 @@ const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
|
||||
const defaultProxies = [
|
||||
'https://thepiratebay.org',
|
||||
// 'https://thepiratebay.org',
|
||||
'https://proxybay.pro',
|
||||
'https://ukpiratebayproxy.com',
|
||||
'https://thepiratebayproxy.info'
|
||||
'https://thepiratebayproxy.info',
|
||||
'https://mypiratebay.co',
|
||||
'https://thepiratebay.asia',
|
||||
];
|
||||
const dumpUrl = '/static/dump/csv/';
|
||||
const defaultTimeout = 10000;
|
||||
|
||||
@@ -7,7 +7,6 @@ const fs = require('fs');
|
||||
const thepiratebay = require('./thepiratebay_api.js');
|
||||
const bing = require('nodejs-bing');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
|
||||
|
||||
@@ -20,6 +19,7 @@ async function scrape() {
|
||||
const lastDump = { updatedAt: 2147000000 };
|
||||
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
|
||||
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
|
||||
const checkPoint = 611000;
|
||||
|
||||
if (lastDump) {
|
||||
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
|
||||
@@ -34,6 +34,11 @@ async function scrape() {
|
||||
if (entriesProcessed % 1000 === 0) {
|
||||
console.log(`Processed ${entriesProcessed} entries`);
|
||||
}
|
||||
if (entriesProcessed <= checkPoint) {
|
||||
entriesProcessed++;
|
||||
return;
|
||||
}
|
||||
|
||||
const row = line.match(/(?<=^|;)(".*"|[^;]+)(?=;|$)/g);
|
||||
if (row.length !== 4) {
|
||||
console.log(`Invalid row: ${line}`);
|
||||
|
||||
@@ -12,7 +12,7 @@ const CSV_FILE_PATH = '/tmp/tpb.csv';
|
||||
const limiter = new Bottleneck({ maxConcurrent: 40 });
|
||||
|
||||
async function scrape() {
|
||||
// await processTorrentRecord({ torrentId: 35313644, category: 'Video' });
|
||||
// await processTorrentRecord({ torrentId: 26877339, category: 'Video' });
|
||||
console.log(`starting to scrape tpb dump...`);
|
||||
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
|
||||
const checkPoint = 4115000;
|
||||
|
||||
Reference in New Issue
Block a user