moves movies ids inside torrent object

This commit is contained in:
TheBeastLT
2020-02-13 09:52:40 +01:00
parent 30419f3c64
commit 42ac44d1d9
4 changed files with 231 additions and 37 deletions

View File

@@ -5,10 +5,11 @@ const { Type } = require('./types');
const MIN_SIZE = 20 * 1024 * 1024; // 20 MB const MIN_SIZE = 20 * 1024 * 1024; // 20 MB
async function parseTorrentFiles(torrent, imdbId, kitsuId) { async function parseTorrentFiles(torrent) {
const parsedTorrentName = parse(torrent.title); const parsedTorrentName = parse(torrent.title);
parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/); parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/);
const metadata = await getMetadata(kitsuId || imdbId, torrent.type || Type.MOVIE).catch(() => undefined); const metadata = await getMetadata(torrent.kitsuId || torrent.imdbId, torrent.type || Type.MOVIE)
.catch(() => undefined);
// if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) { // if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) {
// throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`); // throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`);
@@ -37,8 +38,8 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) {
infoHash: torrent.infoHash, infoHash: torrent.infoHash,
title: torrent.title, title: torrent.title,
size: torrent.size, size: torrent.size,
imdbId: imdbId || metadata && metadata.imdb_id, imdbId: torrent.imdbId || metadata && metadata.imdb_id,
kitsuId: kitsuId || metadata && metadata.kitsu_id kitsuId: torrent.kitsuId || metadata && metadata.kitsu_id
}]; }];
} }
@@ -49,8 +50,8 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) {
.then((files) => decomposeEpisodes(torrent, files, metadata)) .then((files) => decomposeEpisodes(torrent, files, metadata))
.then((files) => assignKitsuOrImdbEpisodes(files, metadata)) .then((files) => assignKitsuOrImdbEpisodes(files, metadata))
.then((files) => Promise.all(files.map(file => file.isMovie .then((files) => Promise.all(files.map(file => file.isMovie
? mapSeriesMovie(file, torrent.infoHash) ? mapSeriesMovie(file, torrent)
: mapSeriesEpisode(file, torrent.infoHash, imdbId, kitsuId)))) : mapSeriesEpisode(file, torrent))))
.then((files) => files.reduce((a, b) => a.concat(b), [])) .then((files) => files.reduce((a, b) => a.concat(b), []))
.catch((error) => { .catch((error) => {
console.log(`Failed getting files for ${torrent.title}`, error.message); console.log(`Failed getting files for ${torrent.title}`, error.message);
@@ -70,27 +71,27 @@ async function getSeriesFiles(torrent, parsedTorrentName) {
return torrentFiles(torrent); return torrentFiles(torrent);
} }
async function mapSeriesEpisode(file, infoHash, imdbId, kitsuId) { async function mapSeriesEpisode(file, torrent) {
if (!file.episodes && !file.kitsuEpisodes) { if (!file.episodes && !file.kitsuEpisodes) {
return Promise.resolve([]); return Promise.resolve([]);
} }
const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()]; const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()];
return Promise.resolve(episodeIndexes.map((index) => ({ return Promise.resolve(episodeIndexes.map((index) => ({
infoHash: infoHash, infoHash: torrent.infoHash,
fileIndex: file.fileIndex, fileIndex: file.fileIndex,
title: file.path || file.name, title: file.path || file.name,
size: file.size, size: file.size,
imdbId: imdbId || file.imdbId, imdbId: torrent.imdbId || file.imdbId,
imdbSeason: file.season, imdbSeason: file.season,
imdbEpisode: file.episodes && file.episodes[index], imdbEpisode: file.episodes && file.episodes[index],
kitsuId: kitsuId || file.kitsuId, kitsuId: torrent.kitsuId || file.kitsuId,
kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index] kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index]
}))) })))
} }
async function mapSeriesMovie(file, infoHash) { async function mapSeriesMovie(file, torrent) {
return findMovieImdbId(file).then((imdbId) => [{ return findMovieImdbId(file).then((imdbId) => [{
infoHash: infoHash, infoHash: torrent.infoHash,
fileIndex: file.fileIndex, fileIndex: file.fileIndex,
title: file.name, title: file.name,
size: file.size, size: file.size,

View File

@@ -1036,7 +1036,11 @@
}, },
"Kyoukai no Rinne": { "Kyoukai no Rinne": {
"showId": "258", "showId": "258",
"kitsu_id": [ "10018", "11366", "12561" ] "kitsu_id": [
"10018",
"11366",
"12561"
]
}, },
"Kyoukai Senjou no Horizon S2": { "Kyoukai Senjou no Horizon S2": {
"showId": "259", "showId": "259",
@@ -2380,7 +2384,10 @@
}, },
"Mobile Suit Gundam Thunderbolt": { "Mobile Suit Gundam Thunderbolt": {
"showId": "602", "showId": "602",
"kitsu_id": ["11476", "12755"] "kitsu_id": [
"11476",
"12755"
]
}, },
"Durarara!!": { "Durarara!!": {
"showId": "603", "showId": "603",
@@ -3020,7 +3027,11 @@
}, },
"Nobunaga no Shinobi": { "Nobunaga no Shinobi": {
"showId": "773", "showId": "773",
"kitsu_id": [ "11871", "13303", "14207" ] "kitsu_id": [
"11871",
"13303",
"14207"
]
}, },
"Zutto Mae kara Suki deshita": { "Zutto Mae kara Suki deshita": {
"showId": "774", "showId": "774",
@@ -3380,7 +3391,12 @@
}, },
"THE iDOLM@STER CINDERELLA GIRLS Theater (TV)": { "THE iDOLM@STER CINDERELLA GIRLS Theater (TV)": {
"showId": "866", "showId": "866",
"kitsu_id": [ "12671","13895", "41377", "42229" ] "kitsu_id": [
"12671",
"13895",
"41377",
"42229"
]
}, },
"THE iDOLM@STER CINDERELLA GIRLS Theater (Web)": { "THE iDOLM@STER CINDERELLA GIRLS Theater (Web)": {
"showId": "866", "showId": "866",
@@ -3812,7 +3828,10 @@
}, },
"Yuki Yuna wa Yusha de Aru - Washio Sumi no Shou": { "Yuki Yuna wa Yusha de Aru - Washio Sumi no Shou": {
"showId": "976", "showId": "976",
"kitsu_id": [ "12678", "14029" ] "kitsu_id": [
"12678",
"14029"
]
}, },
"ID-0": { "ID-0": {
"showId": "978", "showId": "978",
@@ -3868,7 +3887,10 @@
}, },
"Hozuki no Reitetsu S2": { "Hozuki no Reitetsu S2": {
"showId": "991", "showId": "991",
"kitsu_id": [ "13226", "14157" ] "kitsu_id": [
"13226",
"14157"
]
}, },
"Cardfight!! Vanguard G Z": { "Cardfight!! Vanguard G Z": {
"showId": "992", "showId": "992",
@@ -4080,7 +4102,10 @@
}, },
"Beatless": { "Beatless": {
"showId": "1047", "showId": "1047",
"kitsu_id": [ "13939", "41407" ] "kitsu_id": [
"13939",
"41407"
]
}, },
"Zoku Touken Ranbu - Hanamaru": { "Zoku Touken Ranbu - Hanamaru": {
"showId": "1048", "showId": "1048",
@@ -4140,7 +4165,10 @@
}, },
"Souten no Ken Re-Genesis": { "Souten no Ken Re-Genesis": {
"showId": "1062", "showId": "1062",
"kitsu_id": [ "13983", "41953" ] "kitsu_id": [
"13983",
"41953"
]
}, },
"Gundam Build Divers": { "Gundam Build Divers": {
"showId": "1063", "showId": "1063",
@@ -4864,7 +4892,10 @@
}, },
"Fairy Gone": { "Fairy Gone": {
"showId": "1250", "showId": "1250",
"kitsu_id": [ "42130", "42358" ] "kitsu_id": [
"42130",
"42358"
]
}, },
"Shoumetsu Toshi": { "Shoumetsu Toshi": {
"showId": "1251", "showId": "1251",
@@ -5212,5 +5243,156 @@
}, },
"Thunderbolt Fantasy - Bewitching Melody of the West": { "Thunderbolt Fantasy - Bewitching Melody of the West": {
"showId": "1342" "showId": "1342"
},
"A3! Season Spring & Summer": {
"showId": "1378",
"kitsu_id": "42146"
},
"ARP Backstage Pass": {
"showId": "1379",
"kitsu_id": "42879"
},
"BanG Dream! S3": {
"showId": "1354",
"kitsu_id": "41290"
},
"Boku no Tonari ni Ankoku Hakaishin ga Imasu": {
"showId": "1374",
"kitsu_id": "42100"
},
"Darwin's Game": {
"showId": "1344",
"kitsu_id": "42260"
},
"Eizouken ni wa Te wo Dasu na!": {
"showId": "1347",
"kitsu_id": "42343"
},
"Haikyuu!! Riku vs Kuu": {
"showId": "1372",
"kitsu_id": "42502"
},
"Haikyuu!! S4": {
"showId": "1369",
"kitsu_id": "42059"
},
"Hatena Illusion": {
"showId": "1360",
"kitsu_id": "13704"
},
"Heya Camp": {
"showId": "1351",
"kitsu_id": "41978"
},
"Housekishou Richard-shi no Nazo Kantei": {
"showId": "1366",
"kitsu_id": "42488"
},
"ID INVADED": {
"showId": "1348",
"kitsu_id": "42436"
},
"Infinite Dendrogram": {
"showId": "1362",
"kitsu_id": "42131"
},
"Isekai Quartet S2": {
"showId": "1380",
"kitsu_id": "42410"
},
"Ishuzoku Reviewers": {
"showId": "1375",
"kitsu_id": "42744"
},
"Itai no wa Iya nano de Bougyoryoku ni Kyokufuri Shitai to Omoimasu": {
"showId": "1357",
"kitsu_id": "42043"
},
"Jibaku Shounen Hanako-kun": {
"showId": "1365",
"kitsu_id": "42322"
},
"Koisuru Asteroid": {
"showId": "1345",
"kitsu_id": "42470"
},
"Kyochuu Rettou Movie": {
"showId": "1371",
"kitsu_id": "42941"
},
"Kyokou Suiri": {
"showId": "1377",
"kitsu_id": "42117"
},
"Magia Record": {
"showId": "1346",
"kitsu_id": "42016"
},
"Majutsushi Orphen Hagure Tabi": {
"showId": "1355",
"kitsu_id": "42329"
},
"Murenase! Seton Gakuen": {
"showId": "1352",
"kitsu_id": "42601"
},
"Nanabun no Nijyuuni": {
"showId": "1376",
"kitsu_id": "42456"
},
"Nekopara": {
"showId": "1359",
"kitsu_id": "13121"
},
"number24": {
"showId": "1356",
"kitsu_id": "42209"
},
"Oda Cinnamon Nobunaga": {
"showId": "1373",
"kitsu_id": "42911"
},
"Oshi ga Budoukan Ittekuretara Shinu": {
"showId": "1363",
"kitsu_id": "41309"
},
"Pet": {
"showId": "1350",
"kitsu_id": "41089"
},
"Plunderer": {
"showId": "1358",
"kitsu_id": "40600"
},
"Re Zero kara Hajimeru Isekai Seikatsu - Director's Cut": {
"showId": "660"
},
"Rikei ga Koi ni Ochita no de Shoumei shitemita": {
"showId": "1368",
"kitsu_id": "42297"
},
"Runway de Waratte": {
"showId": "1370",
"kitsu_id": "42552"
},
"Show By Rock!! Mashumairesh!!": {
"showId": "1361",
"kitsu_id": "42885"
},
"Somali to Mori no Kamisama": {
"showId": "1349",
"kitsu_id": "42201"
},
"Toaru Kagaku no Railgun T": {
"showId": "1367",
"kitsu_id": "41979"
},
"Uchi Tama - Uchi no Tama Shirimasen ka": {
"showId": "1364",
"kitsu_id": "42397"
},
"Yatogame-chan Kansatsu Nikki S2": {
"showId": "1353",
"kitsu_id": "42398"
} }
} }

View File

@@ -63,10 +63,10 @@ async function enrichShow(show) {
return { return {
showId: showId, showId: showId,
kitsu_id: metadata.kitsuId,
...show, ...show,
kitsu_id: metadata.kitsu_id, kitsuTitle: metadata.title,
kitsuTitle: metadata.name, imdb_id: metadata.imdbId
imdb_id: metadata.imdb_id
} }
} }
@@ -89,12 +89,13 @@ async function _parseShowData(showData) {
title: `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`, title: `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`,
size: 300000000, size: 300000000,
type: Type.ANIME, type: Type.ANIME,
kitsuId: kitsuId,
uploadDate: episodeInfo.uploadDate, uploadDate: episodeInfo.uploadDate,
}))) })))
.reduce((a, b) => a.concat(b), []) .reduce((a, b) => a.concat(b), [])
.map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent) .map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent)
.then((torrent) => torrent && updateCurrentSeeders(torrent)) .then((torrent) => torrent && updateCurrentSeeders(torrent))
.then((torrent) => torrent && parseTorrentFiles(torrent, undefined, kitsuId) .then((torrent) => torrent && parseTorrentFiles(torrent)
.then((files) => verifyFiles(torrent, files)) .then((files) => verifyFiles(torrent, files))
.then((files) => repository.createTorrent(torrent) .then((files) => repository.createTorrent(torrent)
.then(() => files.forEach(file => repository.createFile(file))) .then(() => files.forEach(file => repository.createFile(file)))

View File

@@ -15,7 +15,7 @@ const { parseTorrentFiles } = require('../../lib/torrentFiles');
const NAME = 'ThePirateBay'; const NAME = 'ThePirateBay';
const CSV_FILE_PATH = '/tmp/tpb_dump.csv'; const CSV_FILE_PATH = '/tmp/tpb_dump.csv';
const limiter = new Bottleneck({maxConcurrent: 40}); const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() { async function scrape() {
const lastScraped = await repository.getProvider({ name: NAME }); const lastScraped = await repository.getProvider({ name: NAME });
@@ -73,15 +73,16 @@ async function scrape() {
.then(() => entriesProcessed++); .then(() => entriesProcessed++);
}); });
lr.on('error', (err) => { lr.on('error', (err) => {
console.log(err); console.log(err);
}); });
lr.on('end', () => { lr.on('end', () => {
fs.unlink(CSV_FILE_PATH); fs.unlink(CSV_FILE_PATH);
updateProvider({ name: NAME, lastScraped: lastDump.updatedAt }); updateProvider({ name: NAME, lastScraped: lastDump.updatedAt });
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`); console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
}); });
} }
} }
const allowedCategories = [ const allowedCategories = [
thepiratebay.Categories.VIDEO.MOVIES, thepiratebay.Categories.VIDEO.MOVIES,
thepiratebay.Categories.VIDEO.MOVIES_HD, thepiratebay.Categories.VIDEO.MOVIES_HD,
@@ -94,12 +95,13 @@ const seriesCategories = [
thepiratebay.Categories.VIDEO.TV_SHOWS, thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD thepiratebay.Categories.VIDEO.TV_SHOWS_HD
]; ];
async function processTorrentRecord(record) { async function processTorrentRecord(record) {
const alreadyExists = await repository.getSkipTorrent(record) const alreadyExists = await repository.getSkipTorrent(record)
.catch(() => repository.getTorrent(record)) .catch(() => repository.getTorrent(record))
.catch(() => undefined); .catch(() => undefined);
if (alreadyExists) { if (alreadyExists) {
return; return;
} }
const torrentFound = await findTorrent(record); const torrentFound = await findTorrent(record);
@@ -128,17 +130,18 @@ async function processTorrentRecord(record) {
title: torrentFound.name, title: torrentFound.name,
size: record.size, size: record.size,
type: type, type: type,
imdbId: imdbId,
uploadDate: record.uploadDate, uploadDate: record.uploadDate,
seeders: torrentFound.seeders, seeders: torrentFound.seeders,
}; };
if (!imdbId && !titleInfo.complete) { if (!torrent.imdbId && !titleInfo.complete) {
console.log(`imdbId not found: ${torrentFound.name}`); console.log(`imdbId not found: ${torrentFound.name}`);
repository.createFailedImdbTorrent(torrent); repository.createFailedImdbTorrent(torrent);
return; return;
} }
const files = await parseTorrentFiles(torrent, imdbId); const files = await parseTorrentFiles(torrent);
if (!files || !files.length) { if (!files || !files.length) {
console.log(`no video files found: ${torrentFound.name}`); console.log(`no video files found: ${torrentFound.name}`);
return; return;
@@ -171,7 +174,8 @@ async function findTorrentInSource(record) {
async function findTorrentViaBing(record) { async function findTorrentViaBing(record) {
return bing.web(`${record.infoHash}`) return bing.web(`${record.infoHash}`)
.then((results) => results .then((results) => results
.find(result => result.description.includes('Direct download via magnet link') || result.description.includes('Get this torrent'))) .find(result => result.description.includes('Direct download via magnet link') || result.description.includes(
'Get this torrent')))
.then((result) => { .then((result) => {
if (!result) { if (!result) {
throw new Error(`Failed to find torrent ${record.title}`); throw new Error(`Failed to find torrent ${record.title}`);
@@ -187,15 +191,21 @@ function downloadDump(dump) {
console.log('dump file already exist...'); console.log('dump file already exist...');
return; return;
} }
} catch(err) { } catch (err) {
console.error(err) console.error(err)
} }
console.log('downloading dump file...'); console.log('downloading dump file...');
return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' }) return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' })
.then((response) => response.body) .then((response) => response.body)
.then((body) => { console.log('unzipping dump file...'); return ungzip(body); }) .then((body) => {
.then((unzipped) => { console.log('writing dump file...'); return fs.promises.writeFile(CSV_FILE_PATH, unzipped); }) console.log('unzipping dump file...');
return ungzip(body);
})
.then((unzipped) => {
console.log('writing dump file...');
return fs.promises.writeFile(CSV_FILE_PATH, unzipped);
})
} }
module.exports = { scrape }; module.exports = { scrape };