mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
[scraper] update tpb fake removal cron and concurrency
This commit is contained in:
@@ -18,7 +18,7 @@ module.exports = [
|
|||||||
{ scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */1 ? * *' },
|
{ scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */1 ? * *' },
|
||||||
{ scraper: rutorScraper, name: rutorScraper.NAME, cron: '0 0 */4 ? * *' },
|
{ scraper: rutorScraper, name: rutorScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */2 ? * *' },
|
{ scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */2 ? * *' },
|
||||||
{ scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 30 1 ? * * *' },
|
{ scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 0 */12 ? * *' },
|
||||||
{ scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' },
|
{ scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
|
{ scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
// { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },
|
// { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' },
|
||||||
|
|||||||
@@ -1,19 +1,20 @@
|
|||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
const { Sequelize } = require('sequelize');
|
const { Sequelize } = require('sequelize');
|
||||||
|
const Bottleneck = require('bottleneck');
|
||||||
const thepiratebay = require('./thepiratebay_api.js');
|
const thepiratebay = require('./thepiratebay_api.js');
|
||||||
const { Type } = require('../../lib/types');
|
const { Type } = require('../../lib/types');
|
||||||
const repository = require('../../lib/repository');
|
const repository = require('../../lib/repository');
|
||||||
const Promises = require('../../lib/promises');
|
|
||||||
|
|
||||||
const NAME = 'ThePirateBay';
|
const NAME = 'ThePirateBay';
|
||||||
const EMPTY_HASH = '0000000000000000000000000000000000000000';
|
const EMPTY_HASH = '0000000000000000000000000000000000000000';
|
||||||
|
|
||||||
const Op = Sequelize.Op;
|
const Op = Sequelize.Op;
|
||||||
|
const limiter = new Bottleneck({ maxConcurrent: 10 });
|
||||||
|
|
||||||
async function scrape() {
|
async function scrape() {
|
||||||
console.log(`Starting ${NAME} fake removal...`);
|
console.log(`Starting ${NAME} fake removal...`);
|
||||||
const startCreatedAt = moment().subtract(14, 'day');
|
const startCreatedAt = moment().subtract(14, 'day');
|
||||||
const endCreatedAt = moment().subtract(1, 'day');
|
const endCreatedAt = moment();
|
||||||
const whereQuery = {
|
const whereQuery = {
|
||||||
provider: NAME,
|
provider: NAME,
|
||||||
type: Type.MOVIE,
|
type: Type.MOVIE,
|
||||||
@@ -22,7 +23,7 @@ async function scrape() {
|
|||||||
return repository.getTorrentsBasedOnQuery(whereQuery)
|
return repository.getTorrentsBasedOnQuery(whereQuery)
|
||||||
.then(torrents => {
|
.then(torrents => {
|
||||||
console.log(`Checking for ${NAME} fake entries in ${torrents.length} torrents`);
|
console.log(`Checking for ${NAME} fake entries in ${torrents.length} torrents`);
|
||||||
return Promises.sequence(torrents.map(torrent => () => removeIfFake(torrent)))
|
return Promise.all(torrents.map(torrent => limiter.schedule(() => removeIfFake(torrent))))
|
||||||
})
|
})
|
||||||
.then(results => {
|
.then(results => {
|
||||||
const removed = results.filter(result => result);
|
const removed = results.filter(result => result);
|
||||||
|
|||||||
Reference in New Issue
Block a user