diff --git a/scraper/lib/repository.js b/scraper/lib/repository.js index b6daa63..0831e68 100644 --- a/scraper/lib/repository.js +++ b/scraper/lib/repository.js @@ -186,7 +186,7 @@ function getTorrentsWithoutSize() { function getUpdateSeedersTorrents() { const until = moment().subtract(7, 'days').format('YYYY-MM-DD'); return Torrent.findAll({ - where: literal(`torrent."updatedAt" < \'${until}\' and torrent."provider" not in (\'NyaaSi\')`), + where: literal(`torrent."updatedAt" < \'${until}\'`), limit: 100, order: [ ['seeders', 'DESC'], diff --git a/scraper/scheduler/scrapers.js b/scraper/scheduler/scrapers.js index a9edfb3..10994da 100644 --- a/scraper/scheduler/scrapers.js +++ b/scraper/scheduler/scrapers.js @@ -6,11 +6,13 @@ const leetxScraper = require('../scrapers/1337x/1337x_scraper'); const kickassScraper = require('../scrapers/kickass/kickass_scraper'); const rarbgScraper = require('../scrapers/rarbg/rarbg_scraper'); const nyaaPantsuScraper = require('../scrapers/nyaapantsu/nyaa_pantsu_scraper'); +const nyaaSiScraper = require('../scrapers/nyaasi/nyaa_si_scraper'); module.exports = [ { scraper: ytsScraper, name: ytsScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: eztvScraper, name: eztvScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: horribleSubsScraper, name: horribleSubsScraper.NAME, cron: '0 0 */4 ? * *' }, + { scraper: nyaaSiScraper, name: nyaaSiScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: nyaaPantsuScraper, name: nyaaPantsuScraper.NAME, cron: '0 0 */4 ? * *' }, { scraper: rarbgScraper, name: rarbgScraper.NAME, cron: '0 0 */2 ? * *' }, { scraper: thepiratebayScraper, name: thepiratebayScraper.NAME, cron: '0 0 */2 ? * *' }, diff --git a/scraper/scrapers/nyaasi/nyaa_si_api.js b/scraper/scrapers/nyaasi/nyaa_si_api.js new file mode 100644 index 0000000..2bb3be1 --- /dev/null +++ b/scraper/scrapers/nyaasi/nyaa_si_api.js @@ -0,0 +1,74 @@ +const { si } = require('nyaapi') + +const Categories = { + ANIME: { + ALL: '1_0', + MUSIC_VIDEO: '1_1', + ENGLISH: '1_2', + NON_ENGLISH: '1_3', + RAW: '1_4' + }, + LIVE_ACTION: { + ALL: '4_0', + ENGLISH: '4_1', + PROMOTIONAL_VIDEO: '4_2', + NON_ENGLISH: '4_3', + RAW: '4_4' + } +} + +function torrent(torrentId) { + if (!torrentId) { + return Promise.reject(new Error(`Failed ${torrentId} search`)); + } + + return si.infoRequest(torrentId) + .then(result => parseTorrent(result)) + .then(result => ({ ...result, torrentId })); +} + +function search(query) { + return si.search(query) + .then(results => results.map(torrent => parseTorrent(torrent))); +} + +function browse(config = {}) { + const page = config.page || 1; + const category = config.category || Categories.ANIME.ENGLISH; + const sort = config.sort || 'id' + + return si.list(category, page, { sort }) + .then(response => response.results || []) + .then(results => results.map(torrent => parseTorrent(torrent))); +} + +function parseTorrent(torrent) { + return { + title: torrent.name.replace(/\t|\s+/g, ' ').trim(), + torrentId: torrent.id, + infoHash: torrent.hash.trim().toLowerCase(), + magnetLink: torrent.magnet, + torrentLink: torrent.torrent, + seeders: parseInt(torrent.seeders), + size: parseSize(torrent.filesize), + uploadDate: new Date(torrent.date), + category: torrent.sub_category, + } +} + +function parseSize(sizeText) { + if (!sizeText) { + return undefined; + } + let scale = 1; + if (sizeText.includes('GiB')) { + scale = 1024 * 1024 * 1024 + } else if (sizeText.includes('MiB')) { + scale = 1024 * 1024; + } else if (sizeText.includes('KiB') || sizeText.includes('kB')) { + scale = 1024; + } + return Math.floor(parseFloat(sizeText.replace(/[',]/g, '')) * scale); +} + +module.exports = { torrent, search, browse, Categories }; diff --git a/scraper/scrapers/nyaasi/nyaa_si_scraper.js b/scraper/scrapers/nyaasi/nyaa_si_scraper.js new file mode 100644 index 0000000..8ea19f3 --- /dev/null +++ b/scraper/scrapers/nyaasi/nyaa_si_scraper.js @@ -0,0 +1,87 @@ +const moment = require('moment'); +const Bottleneck = require('bottleneck'); +const nyaasi = require('./nyaa_si_api'); +const { Type } = require('../../lib/types'); +const Promises = require('../../lib/promises'); +const repository = require('../../lib/repository'); +const { createTorrentEntry, checkAndUpdateTorrent } = require('../../lib/torrentEntries'); + +const NAME = 'NyaaSi'; +const UNTIL_PAGE = 5 + +const limiter = new Bottleneck({ maxConcurrent: 10 }); + +async function scrape() { + const scrapeStart = moment(); + const lastScrape = await repository.getProvider({ name: NAME }); + console.log(`[${scrapeStart}] starting ${NAME} scrape...`); + + // const ids = ['1292786']; + // return Promise.all(ids.map(id => limiter.schedule(() => nyaasi.torrent(id) + // .then(torrent => processTorrentRecord(torrent))))) + // .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); + return scrapeLatestTorrents() + .then(() => { + lastScrape.lastScraped = scrapeStart; + return lastScrape.save(); + }) + .then(() => console.log(`[${moment()}] finished ${NAME} scrape`)); +} + +async function updateSeeders(torrent) { + return limiter.schedule(() => nyaasi.torrent(torrent.torrentId)) + .then(foundTorrent => { + if (Number.isInteger(foundTorrent.seeders)) { + return [foundTorrent]; + } + return [] + }); +} + +async function scrapeLatestTorrents() { + const allowedCategories = [ + nyaasi.Categories.ANIME.ENGLISH + ]; + + return Promises.sequence(allowedCategories.map(category => () => scrapeLatestTorrentsForCategory(category))) + .then(entries => entries.reduce((a, b) => a.concat(b), [])); +} + +async function scrapeLatestTorrentsForCategory(category, page = 1) { + console.log(`Scrapping ${NAME} ${category} category page ${page}`); + return nyaasi.browse(({ page })) + .catch(error => { + console.warn(`Failed ${NAME} scrapping for [${page}] ${category} due: `, error); + return Promise.resolve([]); + }) + .then(torrents => Promise.all(torrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent) + .catch(error => { + console.warn(`Failed processing [${torrent.infoHash}] ${torrent.title} due: `, error); + return Promise.resolve(); + }))))) + .then(resolved => resolved.length > 0 && page < UNTIL_PAGE + ? scrapeLatestTorrentsForCategory(category, page + 1) + : Promise.resolve()); +} + +async function processTorrentRecord(record) { + if (!record || await checkAndUpdateTorrent(record)) { + return record; + } + + const torrent = { + infoHash: record.infoHash, + torrentLink: record.torrentLink, + provider: NAME, + torrentId: record.torrentId, + title: record.title, + type: Type.ANIME, + size: record.size, + seeders: record.seeders, + uploadDate: record.uploadDate, + }; + + return createTorrentEntry(torrent).then(() => torrent); +} + +module.exports = { scrape, updateSeeders, NAME }; \ No newline at end of file