diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index 05b5c05..7fa4d7c 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -170,7 +170,11 @@ function getTorrent(torrent) { } function getTorrentsBasedOnTitle(titleQuery, type) { - return Torrent.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` }, type: type } }); + return getTorrentsBasedOnQuery({ title: { [Op.regexp]: `${titleQuery}` }, type: type }); +} + +function getTorrentsBasedOnQuery(where) { + return Torrent.findAll({ where: where }); } function getTorrentsWithoutSize() { @@ -219,6 +223,10 @@ function setTorrentSeeders(torrent, seeders) { ); } +function deleteTorrent(torrent) { + return Torrent.destroy({ where: { infoHash: torrent.infoHash } }) +} + function createFile(file) { if (file.id) { return (file.dataValues ? file.save() : File.upsert(file)) @@ -304,6 +312,8 @@ module.exports = { setTorrentSeeders, getTorrent, getTorrentsBasedOnTitle, + getTorrentsBasedOnQuery, + deleteTorrent, getUpdateSeedersTorrents, getNoContentsTorrents, createFile, @@ -319,4 +329,4 @@ module.exports = { getSkipTorrent, createSkipTorrent, getTorrentsWithoutSize -}; \ No newline at end of file +}; diff --git a/scraper/scheduler/scrapers.js b/scraper/scheduler/scrapers.js index 88bd79f..1f1254e 100644 --- a/scraper/scheduler/scrapers.js +++ b/scraper/scheduler/scrapers.js @@ -1,4 +1,5 @@ const thepiratebayScraper = require('../scrapers/thepiratebay/thepiratebay_scraper'); +const thepiratebayFakeRemoval = require('../scrapers/thepiratebay/thepiratebay_fakes_removal'); const ytsScraper = require('../scrapers/yts/yts_scraper'); const eztvScraper = require('../scrapers/eztv/eztv_scraper'); const leetxScraper = require('../scrapers/1337x/1337x_scraper'); @@ -17,6 +18,7 @@ module.exports = [ { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */1 ? * *' }, { scraper: rutorScraper, name: rutorScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */2 ? * *' }, + { scraper: thepiratebayFakeRemoval, name: thepiratebayFakeRemoval.NAME, cron: '0 30 1 ? * * *' }, { scraper: torrentGalaxyScraper, name: torrentGalaxyScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: leetxScraper, name: leetxScraper.NAME, cron: '0 0 */4 ? * *' }, // { scraper: kickassScraper, name: kickassScraper.NAME, cron: '0 0 */4 ? * *' }, @@ -26,4 +28,4 @@ module.exports = [ // { scraper: require('../scrapers/thepiratebay/thepiratebay_dump_scraper') } // { scraper: require('../scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper') } // { scraper: require('../scrapers/thepiratebay/thepiratebay_update_size_scraper') } -]; \ No newline at end of file +]; diff --git a/scraper/scrapers/thepiratebay/thepiratebay_fakes_removal.js b/scraper/scrapers/thepiratebay/thepiratebay_fakes_removal.js new file mode 100644 index 0000000..beb9070 --- /dev/null +++ b/scraper/scrapers/thepiratebay/thepiratebay_fakes_removal.js @@ -0,0 +1,42 @@ +const moment = require('moment'); +const { Sequelize } = require('sequelize'); +const thepiratebay = require('./thepiratebay_api.js'); +const { Type } = require('../../lib/types'); +const repository = require('../../lib/repository'); +const Promises = require('../../lib/promises'); + +const NAME = 'ThePirateBay'; +const EMPTY_HASH = '0000000000000000000000000000000000000000'; + +const Op = Sequelize.Op; + +async function scrape() { + console.log(`Starting ${NAME} fake removal...`); + const startCreatedAt = moment().subtract(14, 'day'); + const endCreatedAt = moment().subtract(1, 'day'); + const whereQuery = { + provider: NAME, + type: Type.MOVIE, + createdAt: { [Op.between]: [startCreatedAt, endCreatedAt] } + }; + return repository.getTorrentsBasedOnQuery(whereQuery) + .then(torrents => { + console.log(`Checking for ${NAME} fake entries in ${torrents.length} torrents`); + return Promises.sequence(torrents.map(torrent => () => removeIfFake(torrent))) + }) + .then(results => { + const removed = results.filter(result => result); + console.log(`Finished ${NAME} fake removal with ${removed.length} removals in ${results.length} torrents`); + }); +} + +async function removeIfFake(torrent) { + const tpbTorrentInfo = await thepiratebay.torrent(torrent.torrentId).catch(() => null); + if (tpbTorrentInfo && tpbTorrentInfo.infoHash === EMPTY_HASH) { + console.log(`Removing ${NAME} fake torrent [${torrent.torrentId}][${torrent.infoHash}] ${torrent.title}`); + return repository.deleteTorrent(torrent).catch(() => null); + } + return Promise.resolve(null); +} + +module.exports = { scrape, NAME };