[scraper] assigns subtitle indexes directly to file table as json array

This commit is contained in:
TheBeastLT
2020-05-04 21:46:39 +02:00
parent 46b75f0466
commit d47cf4304e
4 changed files with 71 additions and 88 deletions

View File

@@ -1,6 +1,6 @@
const moment = require('moment'); const moment = require('moment');
const Promises = require('./promises') const Promises = require('./promises')
const { Sequelize, fn, col, literal } = require('sequelize'); const { Sequelize, DataTypes, fn, col, literal } = require('sequelize');
const Op = Sequelize.Op; const Op = Sequelize.Op;
const DATABASE_URI = process.env.DATABASE_URI; const DATABASE_URI = process.env.DATABASE_URI;
@@ -13,46 +13,47 @@ const database = new Sequelize(
); );
const Provider = database.define('provider', { const Provider = database.define('provider', {
name: { type: Sequelize.STRING(32), primaryKey: true }, name: { type: DataTypes.STRING(32), primaryKey: true },
lastScraped: { type: Sequelize.DATE }, lastScraped: { type: DataTypes.DATE },
lastScrapedId: { type: Sequelize.STRING(128) } lastScrapedId: { type: DataTypes.STRING(128) }
}); });
const Torrent = database.define('torrent', const Torrent = database.define('torrent',
{ {
infoHash: { type: Sequelize.STRING(64), primaryKey: true }, infoHash: { type: DataTypes.STRING(64), primaryKey: true },
provider: { type: Sequelize.STRING(32), allowNull: false }, provider: { type: DataTypes.STRING(32), allowNull: false },
torrentId: { type: Sequelize.STRING(512) }, torrentId: { type: DataTypes.STRING(512) },
title: { type: Sequelize.STRING(512), allowNull: false }, title: { type: DataTypes.STRING(512), allowNull: false },
size: { type: Sequelize.BIGINT }, size: { type: DataTypes.BIGINT },
type: { type: Sequelize.STRING(16), allowNull: false }, type: { type: DataTypes.STRING(16), allowNull: false },
uploadDate: { type: Sequelize.DATE, allowNull: false }, uploadDate: { type: DataTypes.DATE, allowNull: false },
seeders: { type: Sequelize.SMALLINT }, seeders: { type: DataTypes.SMALLINT },
trackers: { type: Sequelize.STRING(4096) }, trackers: { type: DataTypes.STRING(4096) },
languages: { type: Sequelize.STRING(256) }, languages: { type: DataTypes.STRING(256) },
resolution: { type: Sequelize.STRING(16) }, resolution: { type: DataTypes.STRING(16) },
reviewed: { type: Sequelize.BOOLEAN, allowNull: false, defaultValue: false }, reviewed: { type: DataTypes.BOOLEAN, allowNull: false, defaultValue: false },
opened: { type: Sequelize.BOOLEAN, allowNull: false, defaultValue: false } opened: { type: DataTypes.BOOLEAN, allowNull: false, defaultValue: false }
} }
); );
const File = database.define('file', const File = database.define('file',
{ {
id: { type: Sequelize.BIGINT, autoIncrement: true, primaryKey: true }, id: { type: DataTypes.BIGINT, autoIncrement: true, primaryKey: true },
infoHash: { infoHash: {
type: Sequelize.STRING(64), type: DataTypes.STRING(64),
allowNull: false, allowNull: false,
references: { model: Torrent, key: 'infoHash' }, references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE' onDelete: 'CASCADE'
}, },
fileIndex: { type: Sequelize.INTEGER }, fileIndex: { type: DataTypes.INTEGER },
title: { type: Sequelize.STRING(512), allowNull: false }, subtitleIndexes: { type: DataTypes.JSON },
size: { type: Sequelize.BIGINT }, title: { type: DataTypes.STRING(512), allowNull: false },
imdbId: { type: Sequelize.STRING(32) }, size: { type: DataTypes.BIGINT },
imdbSeason: { type: Sequelize.INTEGER }, imdbId: { type: DataTypes.STRING(32) },
imdbEpisode: { type: Sequelize.INTEGER }, imdbSeason: { type: DataTypes.INTEGER },
kitsuId: { type: Sequelize.INTEGER }, imdbEpisode: { type: DataTypes.INTEGER },
kitsuEpisode: { type: Sequelize.INTEGER } kitsuId: { type: DataTypes.INTEGER },
kitsuEpisode: { type: DataTypes.INTEGER }
}, },
{ {
indexes: [ indexes: [
@@ -75,27 +76,27 @@ const File = database.define('file',
} }
); );
const Subtitle = database.define('subtitle', const UnassignedSubtitle = database.define('subtitle',
{ {
infoHash: { infoHash: {
type: Sequelize.STRING(64), type: DataTypes.STRING(64),
primaryKey: true, primaryKey: true,
allowNull: false, allowNull: false,
references: { model: Torrent, key: 'infoHash' }, references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE' onDelete: 'CASCADE'
}, },
fileIndex: { fileIndex: {
type: Sequelize.INTEGER, type: DataTypes.INTEGER,
primaryKey: true, primaryKey: true,
allowNull: false allowNull: false
}, },
fileId: { fileId: {
type: Sequelize.BIGINT, type: DataTypes.BIGINT,
allowNull: true, allowNull: true,
references: { model: File, key: 'id' }, references: { model: File, key: 'id' },
onDelete: 'SET NULL' onDelete: 'SET NULL'
}, },
title: { type: Sequelize.STRING(512), allowNull: false }, title: { type: DataTypes.STRING(512), allowNull: false },
}, },
{ {
timestamps: false, timestamps: false,
@@ -108,19 +109,19 @@ const Subtitle = database.define('subtitle',
const Content = database.define('content', const Content = database.define('content',
{ {
infoHash: { infoHash: {
type: Sequelize.STRING(64), type: DataTypes.STRING(64),
primaryKey: true, primaryKey: true,
allowNull: false, allowNull: false,
references: { model: Torrent, key: 'infoHash' }, references: { model: Torrent, key: 'infoHash' },
onDelete: 'CASCADE' onDelete: 'CASCADE'
}, },
fileIndex: { fileIndex: {
type: Sequelize.INTEGER, type: DataTypes.INTEGER,
primaryKey: true, primaryKey: true,
allowNull: false allowNull: false
}, },
path: { type: Sequelize.STRING(512), allowNull: false }, path: { type: DataTypes.STRING(512), allowNull: false },
size: { type: Sequelize.BIGINT }, size: { type: DataTypes.BIGINT },
}, },
{ {
timestamps: false, timestamps: false,
@@ -128,15 +129,13 @@ const Content = database.define('content',
); );
const SkipTorrent = database.define('skip_torrent', { const SkipTorrent = database.define('skip_torrent', {
infoHash: { type: Sequelize.STRING(64), primaryKey: true }, infoHash: { type: DataTypes.STRING(64), primaryKey: true },
}); });
Torrent.hasMany(File, { foreignKey: 'infoHash', constraints: false }); Torrent.hasMany(File, { foreignKey: 'infoHash', constraints: false });
File.belongsTo(Torrent, { foreignKey: 'infoHash', constraints: false }); File.belongsTo(Torrent, { foreignKey: 'infoHash', constraints: false });
Torrent.hasMany(Content, { foreignKey: 'infoHash', constraints: false }); Torrent.hasMany(Content, { foreignKey: 'infoHash', constraints: false });
Content.belongsTo(Torrent, { foreignKey: 'infoHash', constraints: false }); Content.belongsTo(Torrent, { foreignKey: 'infoHash', constraints: false });
File.hasMany(Subtitle, { foreignKey: 'fileId', constraints: false });
Subtitle.belongsTo(File, { foreignKey: 'fileId', constraints: false });
function connect() { function connect() {
if (process.env.ENABLE_SYNC) { if (process.env.ENABLE_SYNC) {
@@ -194,7 +193,7 @@ function getUpdateSeedersTorrents() {
function createTorrent(torrent) { function createTorrent(torrent) {
return Torrent.upsert(torrent) return Torrent.upsert(torrent)
.then(() => createContents(torrent.infoHash, torrent.contents)) .then(() => createContents(torrent.infoHash, torrent.contents))
.then(() => createSubtitles(torrent.infoHash, torrent.subtitles)); .then(() => createUnassignedSubtitles(torrent.infoHash, torrent.subtitles));
} }
function setTorrentSeeders(infoHash, seeders) { function setTorrentSeeders(infoHash, seeders) {
@@ -205,13 +204,12 @@ function setTorrentSeeders(infoHash, seeders) {
} }
function createFile(file) { function createFile(file) {
if (file.id) { if (file.subtitles) {
return File.upsert(file).then(() => upsertSubtitles(file.id, file.subtitles)); const newSubtitleIndexes = file.subtitles.map(sub => Number.isInteger(sub) ? sub : sub.fileIndex);
const subtitleIndexes = (file.subtitleIndexes || []).concat(newSubtitleIndexes);
file.subtitleIndexes = subtitleIndexes.length ? [...new Set(subtitleIndexes)] : undefined;
} }
if (file.subtitles && file.subtitles.length) { return File.upsert(file);
file.subtitles = file.subtitles.map(subtitle => ({ infoHash: file.infoHash, title: subtitle.path, ...subtitle }));
}
return File.create(file, { include: [Subtitle] });
} }
function getFiles(torrent) { function getFiles(torrent) {
@@ -226,39 +224,21 @@ function deleteFile(file) {
return File.destroy({ where: { id: file.id } }) return File.destroy({ where: { id: file.id } })
} }
function createSubtitles(infoHash, subtitles) { function createUnassignedSubtitles(infoHash, subtitles) {
if (subtitles && subtitles.length) { if (subtitles && subtitles.length) {
return Subtitle.bulkCreate(subtitles.map(subtitle => ({ infoHash, title: subtitle.path, ...subtitle }))); return UnassignedSubtitle.bulkCreate(subtitles.map(subtitle => ({ infoHash, title: subtitle.path, ...subtitle })));
} }
return Promise.resolve(); return Promise.resolve();
} }
function upsertSubtitles(file, subtitles) {
if (file.id && subtitles && subtitles.length) {
return Promises.sequence(subtitles
.map(subtitle => {
subtitle.fileId = file.id;
subtitle.infoHash = subtitle.infoHash || file.infoHash;
subtitle.title = subtitle.title || subtitle.path;
return subtitle;
})
.map(subtitle => () => subtitle.dataValues ? subtitle.save() : Subtitle.upsert(subtitle)));
}
return Promise.resolve();
}
function getSubtitles(torrent) {
return Subtitle.findAll({ where: { infoHash: torrent.infoHash } });
}
function getUnassignedSubtitles() { function getUnassignedSubtitles() {
return Subtitle.findAll({ where: { fileId: null } }); return UnassignedSubtitle.findAll();
} }
function createContents(infoHash, contents) { function createContents(infoHash, contents) {
if (contents && contents.length) { if (contents && contents.length) {
return Content.bulkCreate(contents.map(content => ({ infoHash, ...content }))) return Content.bulkCreate(contents.map(content => ({ infoHash, ...content })))
.then(() => Torrent.update({ opened: true }, { where: { infoHash: infoHash } })); .then(() => Torrent.update({ opened: true }, { where: { infoHash: infoHash }, silent: true }));
} }
return Promise.resolve(); return Promise.resolve();
} }
@@ -293,9 +273,7 @@ module.exports = {
getFiles, getFiles,
getFilesBasedOnTitle, getFilesBasedOnTitle,
deleteFile, deleteFile,
createSubtitles, createUnassignedSubtitles,
upsertSubtitles,
getSubtitles,
getUnassignedSubtitles, getUnassignedSubtitles,
createContents, createContents,
getContents, getContents,

View File

@@ -1,5 +1,6 @@
const { parse } = require('parse-torrent-title'); const { parse } = require('parse-torrent-title');
const { Type } = require('./types'); const { Type } = require('./types');
const Promises = require('./promises');
const repository = require('./repository'); const repository = require('./repository');
const { getImdbId, getKitsuId } = require('./metadata'); const { getImdbId, getKitsuId } = require('./metadata');
const { parseTorrentFiles } = require('./torrentFiles'); const { parseTorrentFiles } = require('./torrentFiles');
@@ -47,7 +48,7 @@ async function createTorrentEntry(torrent, overwrite = false) {
} }
return repository.createTorrent({ ...torrent, contents, subtitles }) return repository.createTorrent({ ...torrent, contents, subtitles })
.then(() => Promise.all(videos.map(video => repository.createFile(video)))) .then(() => Promises.sequence(videos.map(video => () => repository.createFile(video))))
.then(() => console.log(`Created ${torrent.provider} entry for [${torrent.infoHash}] ${torrent.title}`)); .then(() => console.log(`Created ${torrent.provider} entry for [${torrent.infoHash}] ${torrent.title}`));
} }

View File

@@ -10,15 +10,15 @@ function assignSubtitles({ contents, videos, subtitles }) {
const parsedVideos = videos const parsedVideos = videos
.map(video => _parseVideo(video)); .map(video => _parseVideo(video));
const assignedSubs = subtitles const assignedSubs = subtitles
.map(subtitle => ({ subtitle, video: _mostProbableSubtitleVideo(subtitle, parsedVideos) })); .map(subtitle => ({ subtitle, videos: _mostProbableSubtitleVideos(subtitle, parsedVideos) }));
const unassignedSubs = assignedSubs const unassignedSubs = assignedSubs
.filter(assignedSub => !assignedSub.video) .filter(assignedSub => !assignedSub.videos)
.map(assignedSub => assignedSub.subtitle); .map(assignedSub => assignedSub.subtitle);
assignedSubs assignedSubs
.filter(assignedSub => assignedSub.video) .filter(assignedSub => assignedSub.videos)
.forEach(assignedSub => .forEach(assignedSub => assignedSub.videos
assignedSub.video.subtitles = (assignedSub.video.subtitles || []).concat(assignedSub.subtitle)) .forEach(video => video.subtitles = (video.subtitles || []).concat(assignedSub.subtitle)));
return { contents, videos, subtitles: unassignedSubs }; return { contents, videos, subtitles: unassignedSubs };
} }
return { contents, videos, subtitles }; return { contents, videos, subtitles };
@@ -35,33 +35,37 @@ function _parseVideo(video) {
}; };
} }
function _mostProbableSubtitleVideo(subtitle, parsedVideos) { function _mostProbableSubtitleVideos(subtitle, parsedVideos) {
const subTitle = subtitle.title || subtitle.path; const subTitle = subtitle.title || subtitle.path;
const parsedSub = parse(subTitle.replace(/\.(\w{2,4})$/, '')); const parsedSub = parse(subTitle.replace(/\.(\w{2,4})$/, ''));
const byFileName = parsedVideos.filter(video => subTitle.includes(video.fileName)); const byFileName = parsedVideos.filter(video => subTitle.includes(video.fileName));
if (byFileName.length === 1) { if (byFileName.length === 1) {
return byFileName[0].videoFile; return byFileName.map(v => v.videoFile);
} }
const byTitleSeasonEpisode = parsedVideos.filter(video => video.title === parsedSub.title const byTitleSeasonEpisode = parsedVideos.filter(video => video.title === parsedSub.title
&& video.seasons === parsedSub.seasons && video.seasons === parsedSub.seasons
&& JSON.stringify(video.episodes) === JSON.stringify(parsedSub.episodes)); && JSON.stringify(video.episodes) === JSON.stringify(parsedSub.episodes));
if (byTitleSeasonEpisode.length === 1) { if (singleVideoFile(byTitleSeasonEpisode)) {
return byTitleSeasonEpisode[0].videoFile; return byTitleSeasonEpisode.map(v => v.videoFile);
} }
const bySeasonEpisode = parsedVideos.filter(video => video.seasons === parsedSub.seasons const bySeasonEpisode = parsedVideos.filter(video => video.seasons === parsedSub.seasons
&& video.episodes === parsedSub.episodes); && video.episodes === parsedSub.episodes);
if (bySeasonEpisode.length === 1) { if (singleVideoFile(bySeasonEpisode)) {
return bySeasonEpisode[0].videoFile; return bySeasonEpisode.map(v => v.videoFile);
} }
const byTitle = parsedVideos.filter(video => video.title && video.title === parsedSub.title); const byTitle = parsedVideos.filter(video => video.title && video.title === parsedSub.title);
if (byTitle.length === 1) { if (singleVideoFile(byTitle)) {
return byTitle[0].videoFile; return byTitle.map(v => v.videoFile);
} }
const byEpisode = parsedVideos.filter(video => JSON.stringify(video.episodes) === JSON.stringify(parsedSub.episodes)); const byEpisode = parsedVideos.filter(video => JSON.stringify(video.episodes) === JSON.stringify(parsedSub.episodes));
if (byEpisode.length === 1) { if (singleVideoFile(byEpisode)) {
return byEpisode[0].videoFile; return byEpisode.map(v => v.videoFile);
} }
return undefined; return undefined;
} }
function singleVideoFile(videos) {
return new Set(videos.map(v => v.videoFile.fileIndex)).size === 1;
}
module.exports = { assignSubtitles } module.exports = { assignSubtitles }

View File

@@ -106,7 +106,7 @@ async function assignSubs() {
const assignedContents = assignSubtitles({ videos, subtitles }); const assignedContents = assignSubtitles({ videos, subtitles });
return Promise.all(assignedContents.videos return Promise.all(assignedContents.videos
.filter(video => video.subtitles) .filter(video => video.subtitles)
.map(video => repository.upsertSubtitles(video, video.subtitles))); .map(video => repository.createFile(video)));
})); }));
} }