[scraper] update tpb fake removal cron and concurrency

This commit is contained in:
TheBeastLT
2021-06-29 16:27:04 +02:00
parent 991065b4b6
commit 446108a9fe
2 changed files with 5 additions and 4 deletions

View File

@@ -18,7 +18,7 @@ module.exports = [
{ scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */1 ? * *' },
{ scraper: rutorScraper, name: rutorScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */2 ? * *' },
{ scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 30 1 ? * * *' },
{ scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' },
{ scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' },
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
// { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },

View File

@@ -1,19 +1,20 @@
const moment = require('moment');
const { Sequelize } = require('sequelize');
const Bottleneck = require('bottleneck');
const thepiratebay = require('./thepiratebay_api.js');
const { Type } = require('../../lib/types');
const repository = require('../../lib/repository');
const Promises = require('../../lib/promises');
const NAME = 'ThePirateBay';
const EMPTY_HASH = '0000000000000000000000000000000000000000';
const Op = Sequelize.Op;
const limiter = new Bottleneck({ maxConcurrent: 10 });
async function scrape() {
console.log(`Starting ${NAME} fake removal...`);
const startCreatedAt = moment().subtract(14, 'day');
const endCreatedAt = moment().subtract(1, 'day');
const endCreatedAt = moment();
const whereQuery = {
provider: NAME,
type: Type.MOVIE,
@@ -22,7 +23,7 @@ async function scrape() {
return repository.getTorrentsBasedOnQuery(whereQuery)
.then(torrents => {
console.log(`Checking for ${NAME} fake entries in ${torrents.length} torrents`);
return Promises.sequence(torrents.map(torrent => () => removeIfFake(torrent)))
return Promise.all(torrents.map(torrent => limiter.schedule(() => removeIfFake(torrent))))
})
.then(results => {
const removed = results.filter(result => result);