From 42ac44d1d9192f7d8d64bfec146e2445730dde2f Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Thu, 13 Feb 2020 09:52:40 +0100 Subject: [PATCH] moves movies ids inside torrent object --- lib/torrentFiles.js | 25 +-- .../horriblesubs/horriblesubs_mapping.json | 200 +++++++++++++++++- scrapers/horriblesubs/horriblesubs_scraper.js | 9 +- scrapers/thepiratebay/thepiratebay_scraper.js | 34 +-- 4 files changed, 231 insertions(+), 37 deletions(-) diff --git a/lib/torrentFiles.js b/lib/torrentFiles.js index 3d28497..75bdf64 100644 --- a/lib/torrentFiles.js +++ b/lib/torrentFiles.js @@ -5,10 +5,11 @@ const { Type } = require('./types'); const MIN_SIZE = 20 * 1024 * 1024; // 20 MB -async function parseTorrentFiles(torrent, imdbId, kitsuId) { +async function parseTorrentFiles(torrent) { const parsedTorrentName = parse(torrent.title); parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/); - const metadata = await getMetadata(kitsuId || imdbId, torrent.type || Type.MOVIE).catch(() => undefined); + const metadata = await getMetadata(torrent.kitsuId || torrent.imdbId, torrent.type || Type.MOVIE) + .catch(() => undefined); // if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) { // throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`); @@ -37,8 +38,8 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) { infoHash: torrent.infoHash, title: torrent.title, size: torrent.size, - imdbId: imdbId || metadata && metadata.imdb_id, - kitsuId: kitsuId || metadata && metadata.kitsu_id + imdbId: torrent.imdbId || metadata && metadata.imdb_id, + kitsuId: torrent.kitsuId || metadata && metadata.kitsu_id }]; } @@ -49,8 +50,8 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) { .then((files) => decomposeEpisodes(torrent, files, metadata)) .then((files) => assignKitsuOrImdbEpisodes(files, metadata)) .then((files) => Promise.all(files.map(file => file.isMovie - ? mapSeriesMovie(file, torrent.infoHash) - : mapSeriesEpisode(file, torrent.infoHash, imdbId, kitsuId)))) + ? mapSeriesMovie(file, torrent) + : mapSeriesEpisode(file, torrent)))) .then((files) => files.reduce((a, b) => a.concat(b), [])) .catch((error) => { console.log(`Failed getting files for ${torrent.title}`, error.message); @@ -70,27 +71,27 @@ async function getSeriesFiles(torrent, parsedTorrentName) { return torrentFiles(torrent); } -async function mapSeriesEpisode(file, infoHash, imdbId, kitsuId) { +async function mapSeriesEpisode(file, torrent) { if (!file.episodes && !file.kitsuEpisodes) { return Promise.resolve([]); } const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()]; return Promise.resolve(episodeIndexes.map((index) => ({ - infoHash: infoHash, + infoHash: torrent.infoHash, fileIndex: file.fileIndex, title: file.path || file.name, size: file.size, - imdbId: imdbId || file.imdbId, + imdbId: torrent.imdbId || file.imdbId, imdbSeason: file.season, imdbEpisode: file.episodes && file.episodes[index], - kitsuId: kitsuId || file.kitsuId, + kitsuId: torrent.kitsuId || file.kitsuId, kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index] }))) } -async function mapSeriesMovie(file, infoHash) { +async function mapSeriesMovie(file, torrent) { return findMovieImdbId(file).then((imdbId) => [{ - infoHash: infoHash, + infoHash: torrent.infoHash, fileIndex: file.fileIndex, title: file.name, size: file.size, diff --git a/scrapers/horriblesubs/horriblesubs_mapping.json b/scrapers/horriblesubs/horriblesubs_mapping.json index b0e1d43..c6fb611 100644 --- a/scrapers/horriblesubs/horriblesubs_mapping.json +++ b/scrapers/horriblesubs/horriblesubs_mapping.json @@ -1036,7 +1036,11 @@ }, "Kyoukai no Rinne": { "showId": "258", - "kitsu_id": [ "10018", "11366", "12561" ] + "kitsu_id": [ + "10018", + "11366", + "12561" + ] }, "Kyoukai Senjou no Horizon S2": { "showId": "259", @@ -2380,7 +2384,10 @@ }, "Mobile Suit Gundam Thunderbolt": { "showId": "602", - "kitsu_id": ["11476", "12755"] + "kitsu_id": [ + "11476", + "12755" + ] }, "Durarara!!": { "showId": "603", @@ -3020,7 +3027,11 @@ }, "Nobunaga no Shinobi": { "showId": "773", - "kitsu_id": [ "11871", "13303", "14207" ] + "kitsu_id": [ + "11871", + "13303", + "14207" + ] }, "Zutto Mae kara Suki deshita": { "showId": "774", @@ -3380,7 +3391,12 @@ }, "THE iDOLM@STER CINDERELLA GIRLS Theater (TV)": { "showId": "866", - "kitsu_id": [ "12671","13895", "41377", "42229" ] + "kitsu_id": [ + "12671", + "13895", + "41377", + "42229" + ] }, "THE iDOLM@STER CINDERELLA GIRLS Theater (Web)": { "showId": "866", @@ -3812,7 +3828,10 @@ }, "Yuki Yuna wa Yusha de Aru - Washio Sumi no Shou": { "showId": "976", - "kitsu_id": [ "12678", "14029" ] + "kitsu_id": [ + "12678", + "14029" + ] }, "ID-0": { "showId": "978", @@ -3868,7 +3887,10 @@ }, "Hozuki no Reitetsu S2": { "showId": "991", - "kitsu_id": [ "13226", "14157" ] + "kitsu_id": [ + "13226", + "14157" + ] }, "Cardfight!! Vanguard G Z": { "showId": "992", @@ -4080,7 +4102,10 @@ }, "Beatless": { "showId": "1047", - "kitsu_id": [ "13939", "41407" ] + "kitsu_id": [ + "13939", + "41407" + ] }, "Zoku Touken Ranbu - Hanamaru": { "showId": "1048", @@ -4140,7 +4165,10 @@ }, "Souten no Ken Re-Genesis": { "showId": "1062", - "kitsu_id": [ "13983", "41953" ] + "kitsu_id": [ + "13983", + "41953" + ] }, "Gundam Build Divers": { "showId": "1063", @@ -4864,7 +4892,10 @@ }, "Fairy Gone": { "showId": "1250", - "kitsu_id": [ "42130", "42358" ] + "kitsu_id": [ + "42130", + "42358" + ] }, "Shoumetsu Toshi": { "showId": "1251", @@ -5212,5 +5243,156 @@ }, "Thunderbolt Fantasy - Bewitching Melody of the West": { "showId": "1342" + }, + "A3! Season Spring & Summer": { + "showId": "1378", + "kitsu_id": "42146" + }, + "ARP Backstage Pass": { + "showId": "1379", + "kitsu_id": "42879" + }, + "BanG Dream! S3": { + "showId": "1354", + "kitsu_id": "41290" + }, + "Boku no Tonari ni Ankoku Hakaishin ga Imasu": { + "showId": "1374", + "kitsu_id": "42100" + }, + "Darwin's Game": { + "showId": "1344", + "kitsu_id": "42260" + }, + "Eizouken ni wa Te wo Dasu na!": { + "showId": "1347", + "kitsu_id": "42343" + }, + "Haikyuu!! Riku vs Kuu": { + "showId": "1372", + "kitsu_id": "42502" + }, + "Haikyuu!! S4": { + "showId": "1369", + "kitsu_id": "42059" + }, + "Hatena Illusion": { + "showId": "1360", + "kitsu_id": "13704" + }, + "Heya Camp": { + "showId": "1351", + "kitsu_id": "41978" + }, + "Housekishou Richard-shi no Nazo Kantei": { + "showId": "1366", + "kitsu_id": "42488" + }, + "ID INVADED": { + "showId": "1348", + "kitsu_id": "42436" + }, + "Infinite Dendrogram": { + "showId": "1362", + "kitsu_id": "42131" + }, + "Isekai Quartet S2": { + "showId": "1380", + "kitsu_id": "42410" + }, + "Ishuzoku Reviewers": { + "showId": "1375", + "kitsu_id": "42744" + }, + "Itai no wa Iya nano de Bougyoryoku ni Kyokufuri Shitai to Omoimasu": { + "showId": "1357", + "kitsu_id": "42043" + }, + "Jibaku Shounen Hanako-kun": { + "showId": "1365", + "kitsu_id": "42322" + }, + "Koisuru Asteroid": { + "showId": "1345", + "kitsu_id": "42470" + }, + "Kyochuu Rettou Movie": { + "showId": "1371", + "kitsu_id": "42941" + }, + "Kyokou Suiri": { + "showId": "1377", + "kitsu_id": "42117" + }, + "Magia Record": { + "showId": "1346", + "kitsu_id": "42016" + }, + "Majutsushi Orphen Hagure Tabi": { + "showId": "1355", + "kitsu_id": "42329" + }, + "Murenase! Seton Gakuen": { + "showId": "1352", + "kitsu_id": "42601" + }, + "Nanabun no Nijyuuni": { + "showId": "1376", + "kitsu_id": "42456" + }, + "Nekopara": { + "showId": "1359", + "kitsu_id": "13121" + }, + "number24": { + "showId": "1356", + "kitsu_id": "42209" + }, + "Oda Cinnamon Nobunaga": { + "showId": "1373", + "kitsu_id": "42911" + }, + "Oshi ga Budoukan Ittekuretara Shinu": { + "showId": "1363", + "kitsu_id": "41309" + }, + "Pet": { + "showId": "1350", + "kitsu_id": "41089" + }, + "Plunderer": { + "showId": "1358", + "kitsu_id": "40600" + }, + "Re Zero kara Hajimeru Isekai Seikatsu - Director's Cut": { + "showId": "660" + }, + "Rikei ga Koi ni Ochita no de Shoumei shitemita": { + "showId": "1368", + "kitsu_id": "42297" + }, + "Runway de Waratte": { + "showId": "1370", + "kitsu_id": "42552" + }, + "Show By Rock!! Mashumairesh!!": { + "showId": "1361", + "kitsu_id": "42885" + }, + "Somali to Mori no Kamisama": { + "showId": "1349", + "kitsu_id": "42201" + }, + "Toaru Kagaku no Railgun T": { + "showId": "1367", + "kitsu_id": "41979" + }, + "Uchi Tama - Uchi no Tama Shirimasen ka": { + "showId": "1364", + "kitsu_id": "42397" + }, + "Yatogame-chan Kansatsu Nikki S2": { + "showId": "1353", + "kitsu_id": "42398" } } \ No newline at end of file diff --git a/scrapers/horriblesubs/horriblesubs_scraper.js b/scrapers/horriblesubs/horriblesubs_scraper.js index 17506ee..879c77e 100644 --- a/scrapers/horriblesubs/horriblesubs_scraper.js +++ b/scrapers/horriblesubs/horriblesubs_scraper.js @@ -63,10 +63,10 @@ async function enrichShow(show) { return { showId: showId, + kitsu_id: metadata.kitsuId, ...show, - kitsu_id: metadata.kitsu_id, - kitsuTitle: metadata.name, - imdb_id: metadata.imdb_id + kitsuTitle: metadata.title, + imdb_id: metadata.imdbId } } @@ -89,12 +89,13 @@ async function _parseShowData(showData) { title: `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`, size: 300000000, type: Type.ANIME, + kitsuId: kitsuId, uploadDate: episodeInfo.uploadDate, }))) .reduce((a, b) => a.concat(b), []) .map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent) .then((torrent) => torrent && updateCurrentSeeders(torrent)) - .then((torrent) => torrent && parseTorrentFiles(torrent, undefined, kitsuId) + .then((torrent) => torrent && parseTorrentFiles(torrent) .then((files) => verifyFiles(torrent, files)) .then((files) => repository.createTorrent(torrent) .then(() => files.forEach(file => repository.createFile(file))) diff --git a/scrapers/thepiratebay/thepiratebay_scraper.js b/scrapers/thepiratebay/thepiratebay_scraper.js index 367cd5a..365514e 100644 --- a/scrapers/thepiratebay/thepiratebay_scraper.js +++ b/scrapers/thepiratebay/thepiratebay_scraper.js @@ -15,7 +15,7 @@ const { parseTorrentFiles } = require('../../lib/torrentFiles'); const NAME = 'ThePirateBay'; const CSV_FILE_PATH = '/tmp/tpb_dump.csv'; -const limiter = new Bottleneck({maxConcurrent: 40}); +const limiter = new Bottleneck({ maxConcurrent: 40 }); async function scrape() { const lastScraped = await repository.getProvider({ name: NAME }); @@ -73,15 +73,16 @@ async function scrape() { .then(() => entriesProcessed++); }); lr.on('error', (err) => { - console.log(err); + console.log(err); }); lr.on('end', () => { - fs.unlink(CSV_FILE_PATH); - updateProvider({ name: NAME, lastScraped: lastDump.updatedAt }); - console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`); + fs.unlink(CSV_FILE_PATH); + updateProvider({ name: NAME, lastScraped: lastDump.updatedAt }); + console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`); }); } } + const allowedCategories = [ thepiratebay.Categories.VIDEO.MOVIES, thepiratebay.Categories.VIDEO.MOVIES_HD, @@ -94,12 +95,13 @@ const seriesCategories = [ thepiratebay.Categories.VIDEO.TV_SHOWS, thepiratebay.Categories.VIDEO.TV_SHOWS_HD ]; + async function processTorrentRecord(record) { const alreadyExists = await repository.getSkipTorrent(record) .catch(() => repository.getTorrent(record)) .catch(() => undefined); if (alreadyExists) { - return; + return; } const torrentFound = await findTorrent(record); @@ -128,17 +130,18 @@ async function processTorrentRecord(record) { title: torrentFound.name, size: record.size, type: type, + imdbId: imdbId, uploadDate: record.uploadDate, seeders: torrentFound.seeders, }; - if (!imdbId && !titleInfo.complete) { + if (!torrent.imdbId && !titleInfo.complete) { console.log(`imdbId not found: ${torrentFound.name}`); repository.createFailedImdbTorrent(torrent); return; } - const files = await parseTorrentFiles(torrent, imdbId); + const files = await parseTorrentFiles(torrent); if (!files || !files.length) { console.log(`no video files found: ${torrentFound.name}`); return; @@ -171,7 +174,8 @@ async function findTorrentInSource(record) { async function findTorrentViaBing(record) { return bing.web(`${record.infoHash}`) .then((results) => results - .find(result => result.description.includes('Direct download via magnet link') || result.description.includes('Get this torrent'))) + .find(result => result.description.includes('Direct download via magnet link') || result.description.includes( + 'Get this torrent'))) .then((result) => { if (!result) { throw new Error(`Failed to find torrent ${record.title}`); @@ -187,15 +191,21 @@ function downloadDump(dump) { console.log('dump file already exist...'); return; } - } catch(err) { + } catch (err) { console.error(err) } console.log('downloading dump file...'); return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' }) .then((response) => response.body) - .then((body) => { console.log('unzipping dump file...'); return ungzip(body); }) - .then((unzipped) => { console.log('writing dump file...'); return fs.promises.writeFile(CSV_FILE_PATH, unzipped); }) + .then((body) => { + console.log('unzipping dump file...'); + return ungzip(body); + }) + .then((unzipped) => { + console.log('writing dump file...'); + return fs.promises.writeFile(CSV_FILE_PATH, unzipped); + }) } module.exports = { scrape }; \ No newline at end of file