moves movies ids inside torrent object

This commit is contained in:
TheBeastLT
2020-02-13 09:52:40 +01:00
parent 30419f3c64
commit 42ac44d1d9
4 changed files with 231 additions and 37 deletions

View File

@@ -5,10 +5,11 @@ const { Type } = require('./types');
const MIN_SIZE = 20 * 1024 * 1024; // 20 MB
async function parseTorrentFiles(torrent, imdbId, kitsuId) {
async function parseTorrentFiles(torrent) {
const parsedTorrentName = parse(torrent.title);
parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/);
const metadata = await getMetadata(kitsuId || imdbId, torrent.type || Type.MOVIE).catch(() => undefined);
const metadata = await getMetadata(torrent.kitsuId || torrent.imdbId, torrent.type || Type.MOVIE)
.catch(() => undefined);
// if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) {
// throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`);
@@ -37,8 +38,8 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) {
infoHash: torrent.infoHash,
title: torrent.title,
size: torrent.size,
imdbId: imdbId || metadata && metadata.imdb_id,
kitsuId: kitsuId || metadata && metadata.kitsu_id
imdbId: torrent.imdbId || metadata && metadata.imdb_id,
kitsuId: torrent.kitsuId || metadata && metadata.kitsu_id
}];
}
@@ -49,8 +50,8 @@ async function parseTorrentFiles(torrent, imdbId, kitsuId) {
.then((files) => decomposeEpisodes(torrent, files, metadata))
.then((files) => assignKitsuOrImdbEpisodes(files, metadata))
.then((files) => Promise.all(files.map(file => file.isMovie
? mapSeriesMovie(file, torrent.infoHash)
: mapSeriesEpisode(file, torrent.infoHash, imdbId, kitsuId))))
? mapSeriesMovie(file, torrent)
: mapSeriesEpisode(file, torrent))))
.then((files) => files.reduce((a, b) => a.concat(b), []))
.catch((error) => {
console.log(`Failed getting files for ${torrent.title}`, error.message);
@@ -70,27 +71,27 @@ async function getSeriesFiles(torrent, parsedTorrentName) {
return torrentFiles(torrent);
}
async function mapSeriesEpisode(file, infoHash, imdbId, kitsuId) {
async function mapSeriesEpisode(file, torrent) {
if (!file.episodes && !file.kitsuEpisodes) {
return Promise.resolve([]);
}
const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()];
return Promise.resolve(episodeIndexes.map((index) => ({
infoHash: infoHash,
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.path || file.name,
size: file.size,
imdbId: imdbId || file.imdbId,
imdbId: torrent.imdbId || file.imdbId,
imdbSeason: file.season,
imdbEpisode: file.episodes && file.episodes[index],
kitsuId: kitsuId || file.kitsuId,
kitsuId: torrent.kitsuId || file.kitsuId,
kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index]
})))
}
async function mapSeriesMovie(file, infoHash) {
async function mapSeriesMovie(file, torrent) {
return findMovieImdbId(file).then((imdbId) => [{
infoHash: infoHash,
infoHash: torrent.infoHash,
fileIndex: file.fileIndex,
title: file.name,
size: file.size,

View File

@@ -1036,7 +1036,11 @@
},
"Kyoukai no Rinne": {
"showId": "258",
"kitsu_id": [ "10018", "11366", "12561" ]
"kitsu_id": [
"10018",
"11366",
"12561"
]
},
"Kyoukai Senjou no Horizon S2": {
"showId": "259",
@@ -2380,7 +2384,10 @@
},
"Mobile Suit Gundam Thunderbolt": {
"showId": "602",
"kitsu_id": ["11476", "12755"]
"kitsu_id": [
"11476",
"12755"
]
},
"Durarara!!": {
"showId": "603",
@@ -3020,7 +3027,11 @@
},
"Nobunaga no Shinobi": {
"showId": "773",
"kitsu_id": [ "11871", "13303", "14207" ]
"kitsu_id": [
"11871",
"13303",
"14207"
]
},
"Zutto Mae kara Suki deshita": {
"showId": "774",
@@ -3380,7 +3391,12 @@
},
"THE iDOLM@STER CINDERELLA GIRLS Theater (TV)": {
"showId": "866",
"kitsu_id": [ "12671","13895", "41377", "42229" ]
"kitsu_id": [
"12671",
"13895",
"41377",
"42229"
]
},
"THE iDOLM@STER CINDERELLA GIRLS Theater (Web)": {
"showId": "866",
@@ -3812,7 +3828,10 @@
},
"Yuki Yuna wa Yusha de Aru - Washio Sumi no Shou": {
"showId": "976",
"kitsu_id": [ "12678", "14029" ]
"kitsu_id": [
"12678",
"14029"
]
},
"ID-0": {
"showId": "978",
@@ -3868,7 +3887,10 @@
},
"Hozuki no Reitetsu S2": {
"showId": "991",
"kitsu_id": [ "13226", "14157" ]
"kitsu_id": [
"13226",
"14157"
]
},
"Cardfight!! Vanguard G Z": {
"showId": "992",
@@ -4080,7 +4102,10 @@
},
"Beatless": {
"showId": "1047",
"kitsu_id": [ "13939", "41407" ]
"kitsu_id": [
"13939",
"41407"
]
},
"Zoku Touken Ranbu - Hanamaru": {
"showId": "1048",
@@ -4140,7 +4165,10 @@
},
"Souten no Ken Re-Genesis": {
"showId": "1062",
"kitsu_id": [ "13983", "41953" ]
"kitsu_id": [
"13983",
"41953"
]
},
"Gundam Build Divers": {
"showId": "1063",
@@ -4864,7 +4892,10 @@
},
"Fairy Gone": {
"showId": "1250",
"kitsu_id": [ "42130", "42358" ]
"kitsu_id": [
"42130",
"42358"
]
},
"Shoumetsu Toshi": {
"showId": "1251",
@@ -5212,5 +5243,156 @@
},
"Thunderbolt Fantasy - Bewitching Melody of the West": {
"showId": "1342"
},
"A3! Season Spring & Summer": {
"showId": "1378",
"kitsu_id": "42146"
},
"ARP Backstage Pass": {
"showId": "1379",
"kitsu_id": "42879"
},
"BanG Dream! S3": {
"showId": "1354",
"kitsu_id": "41290"
},
"Boku no Tonari ni Ankoku Hakaishin ga Imasu": {
"showId": "1374",
"kitsu_id": "42100"
},
"Darwin's Game": {
"showId": "1344",
"kitsu_id": "42260"
},
"Eizouken ni wa Te wo Dasu na!": {
"showId": "1347",
"kitsu_id": "42343"
},
"Haikyuu!! Riku vs Kuu": {
"showId": "1372",
"kitsu_id": "42502"
},
"Haikyuu!! S4": {
"showId": "1369",
"kitsu_id": "42059"
},
"Hatena Illusion": {
"showId": "1360",
"kitsu_id": "13704"
},
"Heya Camp": {
"showId": "1351",
"kitsu_id": "41978"
},
"Housekishou Richard-shi no Nazo Kantei": {
"showId": "1366",
"kitsu_id": "42488"
},
"ID INVADED": {
"showId": "1348",
"kitsu_id": "42436"
},
"Infinite Dendrogram": {
"showId": "1362",
"kitsu_id": "42131"
},
"Isekai Quartet S2": {
"showId": "1380",
"kitsu_id": "42410"
},
"Ishuzoku Reviewers": {
"showId": "1375",
"kitsu_id": "42744"
},
"Itai no wa Iya nano de Bougyoryoku ni Kyokufuri Shitai to Omoimasu": {
"showId": "1357",
"kitsu_id": "42043"
},
"Jibaku Shounen Hanako-kun": {
"showId": "1365",
"kitsu_id": "42322"
},
"Koisuru Asteroid": {
"showId": "1345",
"kitsu_id": "42470"
},
"Kyochuu Rettou Movie": {
"showId": "1371",
"kitsu_id": "42941"
},
"Kyokou Suiri": {
"showId": "1377",
"kitsu_id": "42117"
},
"Magia Record": {
"showId": "1346",
"kitsu_id": "42016"
},
"Majutsushi Orphen Hagure Tabi": {
"showId": "1355",
"kitsu_id": "42329"
},
"Murenase! Seton Gakuen": {
"showId": "1352",
"kitsu_id": "42601"
},
"Nanabun no Nijyuuni": {
"showId": "1376",
"kitsu_id": "42456"
},
"Nekopara": {
"showId": "1359",
"kitsu_id": "13121"
},
"number24": {
"showId": "1356",
"kitsu_id": "42209"
},
"Oda Cinnamon Nobunaga": {
"showId": "1373",
"kitsu_id": "42911"
},
"Oshi ga Budoukan Ittekuretara Shinu": {
"showId": "1363",
"kitsu_id": "41309"
},
"Pet": {
"showId": "1350",
"kitsu_id": "41089"
},
"Plunderer": {
"showId": "1358",
"kitsu_id": "40600"
},
"Re Zero kara Hajimeru Isekai Seikatsu - Director's Cut": {
"showId": "660"
},
"Rikei ga Koi ni Ochita no de Shoumei shitemita": {
"showId": "1368",
"kitsu_id": "42297"
},
"Runway de Waratte": {
"showId": "1370",
"kitsu_id": "42552"
},
"Show By Rock!! Mashumairesh!!": {
"showId": "1361",
"kitsu_id": "42885"
},
"Somali to Mori no Kamisama": {
"showId": "1349",
"kitsu_id": "42201"
},
"Toaru Kagaku no Railgun T": {
"showId": "1367",
"kitsu_id": "41979"
},
"Uchi Tama - Uchi no Tama Shirimasen ka": {
"showId": "1364",
"kitsu_id": "42397"
},
"Yatogame-chan Kansatsu Nikki S2": {
"showId": "1353",
"kitsu_id": "42398"
}
}

View File

@@ -63,10 +63,10 @@ async function enrichShow(show) {
return {
showId: showId,
kitsu_id: metadata.kitsuId,
...show,
kitsu_id: metadata.kitsu_id,
kitsuTitle: metadata.name,
imdb_id: metadata.imdb_id
kitsuTitle: metadata.title,
imdb_id: metadata.imdbId
}
}
@@ -89,12 +89,13 @@ async function _parseShowData(showData) {
title: `${episodeInfo.title} - ${episodeInfo.episode} [${mirror.resolution}]`,
size: 300000000,
type: Type.ANIME,
kitsuId: kitsuId,
uploadDate: episodeInfo.uploadDate,
})))
.reduce((a, b) => a.concat(b), [])
.map((incompleteTorrent) => entryLimiter.schedule(() => checkIfExists(incompleteTorrent)
.then((torrent) => torrent && updateCurrentSeeders(torrent))
.then((torrent) => torrent && parseTorrentFiles(torrent, undefined, kitsuId)
.then((torrent) => torrent && parseTorrentFiles(torrent)
.then((files) => verifyFiles(torrent, files))
.then((files) => repository.createTorrent(torrent)
.then(() => files.forEach(file => repository.createFile(file)))

View File

@@ -15,7 +15,7 @@ const { parseTorrentFiles } = require('../../lib/torrentFiles');
const NAME = 'ThePirateBay';
const CSV_FILE_PATH = '/tmp/tpb_dump.csv';
const limiter = new Bottleneck({maxConcurrent: 40});
const limiter = new Bottleneck({ maxConcurrent: 40 });
async function scrape() {
const lastScraped = await repository.getProvider({ name: NAME });
@@ -73,15 +73,16 @@ async function scrape() {
.then(() => entriesProcessed++);
});
lr.on('error', (err) => {
console.log(err);
console.log(err);
});
lr.on('end', () => {
fs.unlink(CSV_FILE_PATH);
updateProvider({ name: NAME, lastScraped: lastDump.updatedAt });
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
fs.unlink(CSV_FILE_PATH);
updateProvider({ name: NAME, lastScraped: lastDump.updatedAt });
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
});
}
}
const allowedCategories = [
thepiratebay.Categories.VIDEO.MOVIES,
thepiratebay.Categories.VIDEO.MOVIES_HD,
@@ -94,12 +95,13 @@ const seriesCategories = [
thepiratebay.Categories.VIDEO.TV_SHOWS,
thepiratebay.Categories.VIDEO.TV_SHOWS_HD
];
async function processTorrentRecord(record) {
const alreadyExists = await repository.getSkipTorrent(record)
.catch(() => repository.getTorrent(record))
.catch(() => undefined);
if (alreadyExists) {
return;
return;
}
const torrentFound = await findTorrent(record);
@@ -128,17 +130,18 @@ async function processTorrentRecord(record) {
title: torrentFound.name,
size: record.size,
type: type,
imdbId: imdbId,
uploadDate: record.uploadDate,
seeders: torrentFound.seeders,
};
if (!imdbId && !titleInfo.complete) {
if (!torrent.imdbId && !titleInfo.complete) {
console.log(`imdbId not found: ${torrentFound.name}`);
repository.createFailedImdbTorrent(torrent);
return;
}
const files = await parseTorrentFiles(torrent, imdbId);
const files = await parseTorrentFiles(torrent);
if (!files || !files.length) {
console.log(`no video files found: ${torrentFound.name}`);
return;
@@ -171,7 +174,8 @@ async function findTorrentInSource(record) {
async function findTorrentViaBing(record) {
return bing.web(`${record.infoHash}`)
.then((results) => results
.find(result => result.description.includes('Direct download via magnet link') || result.description.includes('Get this torrent')))
.find(result => result.description.includes('Direct download via magnet link') || result.description.includes(
'Get this torrent')))
.then((result) => {
if (!result) {
throw new Error(`Failed to find torrent ${record.title}`);
@@ -187,15 +191,21 @@ function downloadDump(dump) {
console.log('dump file already exist...');
return;
}
} catch(err) {
} catch (err) {
console.error(err)
}
console.log('downloading dump file...');
return needle('get', dump.url, { open_timeout: 2000, output: '/tmp/tpb_dump.gz' })
.then((response) => response.body)
.then((body) => { console.log('unzipping dump file...'); return ungzip(body); })
.then((unzipped) => { console.log('writing dump file...'); return fs.promises.writeFile(CSV_FILE_PATH, unzipped); })
.then((body) => {
console.log('unzipping dump file...');
return ungzip(body);
})
.then((unzipped) => {
console.log('writing dump file...');
return fs.promises.writeFile(CSV_FILE_PATH, unzipped);
})
}
module.exports = { scrape };