moves scraper to a subpackage

2024-12-20 03:29:51 +00:00 · 2020-03-10 15:12:18 +01:00
parent dff66d8fb2
commit 58aba322c2
30 changed files with 50 additions and 28 deletions
--- a/scraper/lib/cache.js
+++ b/scraper/lib/cache.js
@@ -0,0 +1,81 @@
+const cacheManager = require('cache-manager');
+const mangodbStore = require('cache-manager-mongodb');
+
+const GLOBAL_KEY_PREFIX = 'stremio-torrentio';
+const IMDB_ID_PREFIX = `${GLOBAL_KEY_PREFIX}|imdb_id`;
+const METADATA_PREFIX = `${GLOBAL_KEY_PREFIX}|metadata`;
+const TORRENT_FILES_KEY_PREFIX = `stremio-tpb|files`;
+
+const GLOBAL_TTL = process.env.METADATA_TTL || 7 * 24 * 60 * 60; // 7 days
+const MEMORY_TTL = process.env.METADATA_TTL || 2 * 60 * 60; // 2 hours
+
+const MONGO_URI = process.env.MONGODB_URI;
+
+const memoryCache = initiateMemoryCache();
+const remoteCache = initiateRemoteCache();
+const torrentFilesCache = initiateTorrentFilesCache();
+
+function initiateTorrentFilesCache() {
+  if (MONGO_URI) {
+    return cacheManager.caching({
+      store: mangodbStore,
+      uri: MONGO_URI,
+      options: {
+        collection: 'cacheManager',
+      },
+      ttl: GLOBAL_TTL,
+      ignoreCacheErrors: true
+    });
+  }
+}
+
+function initiateRemoteCache() {
+  if (MONGO_URI) {
+    return cacheManager.caching({
+      store: mangodbStore,
+      uri: MONGO_URI,
+      options: {
+        collection: 'torrentio_scraper_collection',
+      },
+      ttl: GLOBAL_TTL,
+      ignoreCacheErrors: true
+    });
+  } else {
+    return cacheManager.caching({
+      store: 'memory',
+      ttl: GLOBAL_TTL
+    });
+  }
+}
+
+function initiateMemoryCache() {
+  return cacheManager.caching({
+    store: 'memory',
+    ttl: MEMORY_TTL
+  });
+}
+
+function retrieveTorrentFiles(infoHash) {
+  return torrentFilesCache.get(`${TORRENT_FILES_KEY_PREFIX}:${infoHash}`)
+      .then((results) => {
+        if (!results) {
+          throw new Error('No cached files found');
+        }
+        return results;
+      });
+}
+
+function cacheWrap(cache, key, method, options) {
+  return cache.wrap(key, method, options);
+}
+
+function cacheWrapImdbId(key, method) {
+  return cacheWrap(remoteCache, `${IMDB_ID_PREFIX}:${key}`, method, { ttl: GLOBAL_TTL });
+}
+
+function cacheWrapMetadata(id, method) {
+  return cacheWrap(memoryCache, `${METADATA_PREFIX}:${id}`, method, { ttl: GLOBAL_TTL });
+}
+
+module.exports = { cacheWrapImdbId, cacheWrapMetadata, retrieveTorrentFiles };
+
--- a/scraper/lib/metadata.js
+++ b/scraper/lib/metadata.js
@@ -0,0 +1,119 @@
+const needle = require('needle');
+const nameToImdb = require('name-to-imdb');
+const bing = require('nodejs-bing');
+const { cacheWrapImdbId, cacheWrapMetadata } = require('./cache');
+const { Type } = require('./types');
+
+const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
+const KITSU_URL = 'https://anime-kitsu.now.sh';
+const TIMEOUT = 20000;
+
+function getMetadata(id, type = Type.SERIES) {
+  if (!id) {
+    return Promise.reject("no valid id provided");
+  }
+
+  const key = id.match(/^\d+$/) ? `kitsu:${id}` : id;
+  const metaType = type === Type.MOVIE ? Type.MOVIE : Type.SERIES;
+  return cacheWrapMetadata(key,
+      () => _requestMetadata(`${KITSU_URL}/meta/${metaType}/${key}.json`)
+          .catch(() => _requestMetadata(`${CINEMETA_URL}/meta/${metaType}/${key}.json`))
+          .catch((error) => {
+            throw new Error(`failed metadata query ${kitsuId} due: ${error.message}`);
+          }));
+}
+
+function _requestMetadata(url) {
+  return needle('get', url, { open_timeout: TIMEOUT })
+      .then((response) => {
+        const body = response.body;
+        if (body && body.meta && body.meta.id) {
+          return {
+            kitsuId: body.meta.kitsu_id,
+            imdbId: body.meta.imdb_id,
+            title: body.meta.name,
+            year: body.meta.year,
+            country: body.meta.country,
+            genres: body.meta.genres,
+            videos: (body.meta.videos || [])
+                .map((video) => video.imdbSeason
+                    ? {
+                      season: video.season,
+                      episode: video.episode,
+                      imdbSeason: video.imdbSeason,
+                      imdbEpisode: video.imdbEpisode
+                    }
+                    : {
+                      season: video.season,
+                      episode: video.episode,
+                      kitsuId: video.kitsu_id,
+                      kitsuEpisode: video.kitsuEpisode,
+                      released: video.released
+                    }
+                ),
+            episodeCount: Object.values((body.meta.videos || [])
+                .filter((entry) => entry.season !== 0)
+                .sort((a, b) => a.season - b.season)
+                .reduce((map, next) => {
+                  map[next.season] = map[next.season] + 1 || 1;
+                  return map;
+                }, {})),
+            totalCount: body.meta.videos && body.meta.videos
+                .filter((entry) => entry.season !== 0).length
+          };
+        } else {
+          throw new Error('No search results');
+        }
+      });
+}
+
+function escapeTitle(title) {
+  return title.toLowerCase()
+      .normalize('NFKD') // normalize non-ASCII characters
+      .replace(/[\u0300-\u036F]/g, '')
+      .replace(/&/g, 'and')
+      .replace(/[;, ~\-]+/g, ' ') // replace dots, commas or underscores with spaces
+      .replace(/[^\w ()+#@!']+/g, '') // remove all non-alphanumeric chars
+      .trim();
+}
+
+async function getImdbId(info, type) {
+  const name = escapeTitle(info.title);
+  const year = info.year || info.date && info.date.slice(0, 4);
+  const key = `${name}_${year}_${type}`;
+
+  return cacheWrapImdbId(key,
+      () => new Promise((resolve, reject) => {
+        nameToImdb({ name, year, type }, function (err, res) {
+          if (res) {
+            resolve(res);
+          } else {
+            reject(err || new Error('failed imdbId search'));
+          }
+        });
+      }).catch(() => bing.web(`${name} ${year || ''} ${type} imdb`)
+          .then(results => results
+              .map((result) => result.link)
+              .find(result => result.includes('imdb.com/title/')))
+          .then(result => result && result.match(/imdb\.com\/title\/(tt\d+)/))
+          .then(match => match && match[1])));
+}
+
+async function getKitsuId(info) {
+  const title = escapeTitle(info.title);
+  const season = info.season > 1 ? ` S${info.season}` : '';
+  const query = `${title}${season}`;
+
+  return cacheWrapImdbId(query,
+      () => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
+          .then((response) => {
+            const body = response.body;
+            if (body && body.metas && body.metas.length) {
+              return body.metas[0].id.replace('kitsu:', '');
+            } else {
+              throw new Error('No search results');
+            }
+          }));
+}
+
+module.exports = { getMetadata, getImdbId, getKitsuId };
--- a/scraper/lib/repository.js
+++ b/scraper/lib/repository.js
@@ -0,0 +1,151 @@
+const { Sequelize } = require('sequelize');
+const Op = Sequelize.Op;
+
+const DATABASE_URI = process.env.DATABASE_URI;
+
+const database = new Sequelize(DATABASE_URI, { logging: false });
+
+const Provider = database.define('provider', {
+  name: { type: Sequelize.STRING(32), primaryKey: true },
+  lastScraped: { type: Sequelize.DATE },
+  lastScrapedId: { type: Sequelize.STRING(128) }
+});
+
+const Torrent = database.define('torrent', {
+  infoHash: { type: Sequelize.STRING(64), primaryKey: true },
+  provider: { type: Sequelize.STRING(32), allowNull: false },
+  torrentId: { type: Sequelize.STRING(128) },
+  title: { type: Sequelize.STRING(256), allowNull: false },
+  size: { type: Sequelize.BIGINT },
+  type: { type: Sequelize.STRING(16), allowNull: false },
+  uploadDate: { type: Sequelize.DATE, allowNull: false },
+  seeders: { type: Sequelize.SMALLINT },
+  trackers: { type: Sequelize.STRING(4096) }
+});
+
+const File = database.define('file',
+    {
+      id: { type: Sequelize.BIGINT, autoIncrement: true, primaryKey: true },
+      infoHash: {
+        type: Sequelize.STRING(64),
+        allowNull: false,
+        references: { model: Torrent, key: 'infoHash' },
+        onDelete: 'CASCADE'
+      },
+      fileIndex: { type: Sequelize.INTEGER },
+      title: { type: Sequelize.STRING(256), allowNull: false },
+      size: { type: Sequelize.BIGINT },
+      imdbId: { type: Sequelize.STRING(32) },
+      imdbSeason: { type: Sequelize.INTEGER },
+      imdbEpisode: { type: Sequelize.INTEGER },
+      kitsuId: { type: Sequelize.INTEGER },
+      kitsuEpisode: { type: Sequelize.INTEGER }
+    },
+    {
+      indexes: [
+        { unique: true, fields: ['infoHash'], where: { fileIndex: { [Op.eq]: null } } },
+        { unique: true, fields: ['infoHash', 'fileIndex', 'imdbSeason', 'imdbEpisode'] },
+        { unique: false, fields: ['imdbId', 'imdbSeason', 'imdbEpisode'] },
+        { unique: false, fields: ['kitsuId', 'kitsuEpisode'] }
+      ]
+    }
+);
+
+const SkipTorrent = database.define('skip_torrent', {
+  infoHash: { type: Sequelize.STRING(64), primaryKey: true },
+});
+
+const FailedImdbTorrent = database.define('failed_imdb_torrent', {
+  infoHash: { type: Sequelize.STRING(64), primaryKey: true },
+  title: { type: Sequelize.STRING(256), allowNull: false }
+});
+
+function connect() {
+  return database.sync({ alter: true });
+}
+
+function getProvider(provider) {
+  return Provider.findOrCreate({ where: { name: { [Op.eq]: provider.name } }, defaults: provider });
+}
+
+function updateProvider(provider) {
+  return Provider.update(provider, { where: { name: { [Op.eq]: provider.name } } });
+}
+
+function getTorrent(torrent) {
+  return Torrent.findByPk(torrent.infoHash)
+      .then((result) => {
+        if (!result) {
+          throw new Error(`torrent not found: ${torrent.infoHash}`);
+        }
+        return result.dataValues;
+      })
+}
+
+function getTorrentsBasedOnTitle(titleQuery, type) {
+  return Torrent.findAll({ where: { title: { [Op.regexp]: `${titleQuery}` }, type: type } });
+}
+
+function createTorrent(torrent) {
+  return Torrent.upsert(torrent);
+}
+
+function createFile(file) {
+  return File.upsert(file);
+}
+
+function getFiles(torrent) {
+  return File.findAll({ where: { infoHash: torrent.infoHash } });
+}
+
+function getFilesBasedOnTitle(titleQuery) {
+  return File.findAll({ where: { title: { [Op.iLike]: `%${titleQuery}%` } } });
+}
+
+function deleteFile(file) {
+  return File.destroy({ where: { id: file.id } })
+}
+
+function getSkipTorrent(torrent) {
+  return SkipTorrent.findByPk(torrent.infoHash)
+      .then((result) => {
+        if (!result) {
+          return getFailedImdbTorrent(torrent);
+        }
+        return result.dataValues;
+      })
+}
+
+function createSkipTorrent(torrent) {
+  return SkipTorrent.upsert({ infoHash: torrent.infoHash });
+}
+
+function getFailedImdbTorrent(torrent) {
+  return FailedImdbTorrent.findByPk(torrent.infoHash)
+      .then((result) => {
+        if (!result) {
+          throw new Error(`torrent not found: ${torrent.infoHash}`);
+        }
+        return result.dataValues;
+      })
+}
+
+function createFailedImdbTorrent(torrent) {
+  return FailedImdbTorrent.upsert(torrent);
+}
+
+module.exports = {
+  connect,
+  getProvider,
+  updateProvider,
+  createTorrent,
+  getTorrent,
+  getTorrentsBasedOnTitle,
+  createFile,
+  getFiles,
+  getFilesBasedOnTitle,
+  deleteFile,
+  getSkipTorrent,
+  createSkipTorrent,
+  createFailedImdbTorrent
+};
--- a/scraper/lib/torrent.js
+++ b/scraper/lib/torrent.js
@@ -0,0 +1,144 @@
+const torrentStream = require('torrent-stream');
+const needle = require('needle');
+const parseTorrent = require('parse-torrent');
+const async = require('async');
+const decode = require('magnet-uri');
+const { retrieveTorrentFiles } = require('./cache');
+
+const MAX_PEER_CONNECTIONS = process.env.MAX_PEER_CONNECTIONS || 20;
+const SEEDS_CHECK_TIMEOUT = process.env.SEEDS_CHECK_TIMEOUT || 10 * 1000; // 10 secs
+const EXTENSIONS = ["3g2", "3gp", "avi", "flv", "mkv", "mov", "mp2", "mp4", "mpe", "mpeg", "mpg", "mpv", "webm", "wmv"];
+
+module.exports.updateCurrentSeeders = function (torrent) {
+  return new Promise((resolve) => {
+    if (!torrent.magnetLink) {
+      return resolve(0);
+    }
+    const Tracker = require("peer-search/tracker");
+
+    const seeders = {};
+    const decodedMagnetLink = decode(torrent.magnetLink);
+    const trackers = decodedMagnetLink && decodedMagnetLink.tr;
+    const callback = () => resolve(Math.max(...Object.values(seeders).map(values => values[0]).concat(0)));
+    setTimeout(callback, SEEDS_CHECK_TIMEOUT);
+
+    async.each(trackers, function (tracker, ready) {
+      const t = new Tracker(tracker, {}, torrent.infoHash);
+      console.error = () => 0; // do nothing
+      t.run();
+      t.on("info", function (inf) {
+        seeders[tracker] = [inf.seeders, inf.leechers];
+        ready();
+      });
+    }, callback);
+  }).then((seeders) => ({ ...torrent, seeders: torrent.seeders || seeders }));
+};
+
+module.exports.torrentFiles = function (torrent) {
+  return getFilesFromObject(torrent)
+      .catch(() => filesFromTorrentFile(torrent))
+      .catch(() => filesFromTorrentStream(torrent))
+      .catch(() => filesFromCache(torrent.infoHash))
+      .then((files) => filterVideos(files))
+      .then((files) => filterSamples(files))
+      .then((files) => filterExtras(files));
+};
+
+function getFilesFromObject(torrent) {
+  if (torrent.files && torrent.files.length) {
+    return Promise.resolve(torrent.files);
+  }
+  return Promise.reject("No files in the object");
+}
+
+function filesFromCache(infoHash) {
+  return retrieveTorrentFiles(infoHash)
+      .then((files) => files.map((file) => ({
+        fileIndex: parseInt(file.match(/^(\d+)@@/)[1]),
+        name: file.replace(/.+\/|^\d+@@/, ''),
+        path: file.replace(/^\d+@@/, ''),
+        size: 300000000
+      })));
+}
+
+async function filesFromTorrentFile(torrent) {
+  if (!torrent.torrentLink) {
+    return Promise.reject(new Error("no torrentLink"));
+  }
+
+  return needle('get', torrent.torrentLink, { open_timeout: 10000 })
+      .then((response) => {
+        if (!response.body || response.statusCode !== 200) {
+          throw new Error('torrent not found')
+        }
+        return response.body
+      })
+      .then((body) => parseTorrent(body))
+      .then((info) => info.files.map((file, fileId) => ({
+        fileIndex: fileId,
+        name: file.name,
+        path: file.path.replace(/^[^\/]+\//, ''),
+        size: file.length
+      })));
+}
+
+async function filesFromTorrentStream(torrent) {
+  if (!torrent.infoHash && !torrent.magnetLink) {
+    return Promise.reject(new Error("no infoHash or magnetLink"));
+  }
+  return new Promise((resolve, rejected) => {
+    const engine = new torrentStream(torrent.magnetLink || torrent.infoHash, { connections: MAX_PEER_CONNECTIONS });
+
+    engine.ready(() => {
+      const files = engine.files
+          .map((file, fileId) => ({
+            fileIndex: fileId,
+            name: file.name,
+            path: file.path.replace(/^[^\/]+\//, ''),
+            size: file.length
+          }));
+
+      engine.destroy();
+      resolve(files);
+    });
+    setTimeout(() => {
+      engine.destroy();
+      rejected(new Error('No available connections for torrent!'));
+    }, 60000);
+  });
+}
+
+function filterVideos(files) {
+  return files.filter((file) => {
+    const match = file.path.match(/\.(\w{2,4})$/);
+    return match && EXTENSIONS.includes(match[1]);
+  });
+}
+
+function filterSamples(files) {
+  const maxSize = Math.max(...files.map(file => file.size));
+  const isSample = file => file.name.match(/sample/i) && maxSize / file.size < 10;
+  return files.filter(file => !isSample(file));
+}
+
+function filterExtras(files) {
+  const isExtra = file => file.path.match(/extras?\//i);
+  return files.filter(file => !isExtra(file));
+}
+
+function dynamicTimeout(torrent) {
+  if (torrent.seeders < 5) {
+    return 5000;
+  } else if (torrent.seeders < 10) {
+    return 7000;
+  } else if (torrent.seeders < 20) {
+    return 10000;
+  } else if (torrent.seeders < 30) {
+    return 15000;
+  } else if (torrent.seeders < 50) {
+    return 20000;
+  } else {
+    return 30000;
+  }
+}
+
--- a/scraper/lib/torrentEntries.js
+++ b/scraper/lib/torrentEntries.js
@@ -0,0 +1,66 @@
+const { parse } = require('parse-torrent-title');
+const { Type } = require('./types');
+const repository = require('./repository');
+const { getImdbId, getKitsuId } = require('./metadata');
+const { parseTorrentFiles } = require('./torrentFiles');
+
+async function createTorrentEntry(torrent) {
+  const titleInfo = parse(torrent.title);
+
+  if (titleInfo.seasons && torrent.type === Type.MOVIE) {
+    // sometimes series torrent might be put into movies category
+    torrent.type = Type.SERIES;
+  }
+  if (!torrent.imdbId && torrent.type !== Type.ANIME) {
+    torrent.imdbId = await getImdbId(titleInfo, torrent.type)
+        .catch(() => undefined);
+  }
+  if (torrent.imdbId && torrent.imdbId.length > 9 && torrent.imdbId.startsWith('tt0')) {
+    // sanitize imdbId from redundant zeros
+    torrent.imdbId = torrent.imdbId.replace(/tt0+([0-9]{7,})$/, 'tt$1');
+  }
+  if (!torrent.kitsuId && torrent.type === Type.ANIME) {
+    torrent.kitsuId = await getKitsuId(titleInfo)
+        .catch(() => undefined);
+  }
+
+  if (!torrent.imdbId && !torrent.kitsuId && !titleInfo.complete) {
+    console.log(`imdbId or kitsuId not found: ${torrent.title}`);
+    repository.createFailedImdbTorrent(torrent);
+    return;
+  }
+
+  const files = await parseTorrentFiles(torrent);
+  if (!files || !files.length) {
+    console.log(`no video files found: ${torrent.title}`);
+    return;
+  }
+
+  return repository.createTorrent(torrent)
+      .then(() => Promise.all(files.map(file => repository.createFile(file))))
+      .then(() => console.log(`Created entry for ${torrent.title}`));
+}
+
+async function createSkipTorrentEntry(torrent) {
+  return repository.createSkipTorrent(torrent);
+}
+
+async function getStoredTorrentEntry(torrent) {
+  return repository.getSkipTorrent(torrent)
+      .catch(() => repository.getTorrent(torrent))
+      .catch(() => undefined);
+}
+
+async function updateTorrentSeeders(torrent) {
+  if (torrent.seeders === undefined) {
+    return;
+  }
+
+  return repository.getTorrent(torrent)
+      .then(stored => {
+        stored.seeders = torrent.seeders;
+        return stored.save();
+      }).catch(() => undefined);
+}
+
+module.exports = { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry, updateTorrentSeeders };
--- a/scraper/lib/torrentFiles.js
+++ b/scraper/lib/torrentFiles.js
@@ -0,0 +1,297 @@
+const moment = require('moment');
+const { parse } = require('parse-torrent-title');
+const { torrentFiles } = require('../lib/torrent');
+const { escapeTitle, getMetadata, getImdbId } = require('../lib/metadata');
+const { Type } = require('./types');
+
+const MIN_SIZE = 10 * 1024 * 1024; // 10 MB
+
+async function parseTorrentFiles(torrent) {
+  const parsedTorrentName = parse(torrent.title);
+  parsedTorrentName.hasMovies = parsedTorrentName.complete || !!torrent.title.match(/movies?(?:\W|$)/);
+  const metadata = await getMetadata(torrent.kitsuId || torrent.imdbId, torrent.type || Type.MOVIE)
+      .catch(() => undefined);
+
+  // if (metadata && metadata.type !== torrent.type && torrent.type !== Type.ANIME) {
+  //   throw new Error(`Mismatching entry type for ${torrent.name}: ${torrent.type}!=${metadata.type}`);
+  // }
+
+  if (torrent.type === Type.MOVIE && !parsedTorrentName.seasons) {
+    if (parsedTorrentName.complete) {
+      return torrentFiles(torrent)
+          .then(files => files.filter(file => file.size > MIN_SIZE))
+          .then(files => Promise.all(files
+              .map((file) => findMovieImdbId(file.name)
+                  .then((newImdbId) => ({
+                    infoHash: torrent.infoHash,
+                    fileIndex: file.fileIndex,
+                    title: file.name,
+                    size: file.size,
+                    imdbId: newImdbId,
+                  })))))
+          .catch(error => {
+            console.log(`Failed getting files for ${torrent.title}`, error.message);
+            return [];
+          });
+    }
+
+    return [{
+      infoHash: torrent.infoHash,
+      title: torrent.title,
+      size: torrent.size,
+      imdbId: torrent.imdbId || metadata && metadata.imdb_id,
+      kitsuId: torrent.kitsuId || metadata && metadata.kitsu_id
+    }];
+  }
+
+  return getSeriesFiles(torrent, parsedTorrentName)
+      .then((files) => files
+          .filter((file) => file.size > MIN_SIZE)
+          .map((file) => parseSeriesFile(file, parsedTorrentName)))
+      .then((files) => decomposeEpisodes(torrent, files, metadata))
+      .then((files) => assignKitsuOrImdbEpisodes(files, metadata))
+      .then((files) => Promise.all(files.map(file => file.isMovie
+          ? mapSeriesMovie(file, torrent)
+          : mapSeriesEpisode(file, torrent, files))))
+      .then((files) => files.reduce((a, b) => a.concat(b), []))
+      .catch((error) => {
+        console.log(`Failed getting files for ${torrent.title}`, error.message);
+        return [];
+      });
+}
+
+async function getSeriesFiles(torrent, parsedTorrentName) {
+  if (parsedTorrentName.episode || (!parsedTorrentName.episodes && parsedTorrentName.date)) {
+    return [{
+      name: torrent.title,
+      path: torrent.title,
+      size: torrent.size
+    }];
+  }
+
+  return torrentFiles(torrent);
+}
+
+async function mapSeriesEpisode(file, torrent, files) {
+  if (!file.episodes && !file.kitsuEpisodes) {
+    if (files.some(otherFile => otherFile.episodes || otherFile.kitsuEpisodes) || parse(torrent.title).seasons) {
+      return Promise.resolve({
+        infoHash: torrent.infoHash,
+        fileIndex: file.fileIndex,
+        title: file.path || file.name,
+        size: file.size,
+        imdbId: torrent.imdbId || file.imdbId,
+      });
+    }
+    return Promise.resolve([]);
+  }
+  const episodeIndexes = [...(file.episodes || file.kitsuEpisodes).keys()];
+  return Promise.resolve(episodeIndexes.map((index) => ({
+    infoHash: torrent.infoHash,
+    fileIndex: file.fileIndex,
+    title: file.path || file.name,
+    size: file.size,
+    imdbId: torrent.imdbId || file.imdbId,
+    imdbSeason: file.season,
+    imdbEpisode: file.episodes && file.episodes[index],
+    kitsuId: torrent.kitsuId || file.kitsuId,
+    kitsuEpisode: file.kitsuEpisodes && file.kitsuEpisodes[index]
+  })))
+}
+
+async function mapSeriesMovie(file, torrent) {
+  return findMovieImdbId(file).then((imdbId) => [{
+    infoHash: torrent.infoHash,
+    fileIndex: file.fileIndex,
+    title: file.name,
+    size: file.size,
+    imdbId: imdbId
+  }])
+}
+
+function parseSeriesFile(file, parsedTorrentName) {
+  const fileInfo = parse(file.name);
+  // the episode may be in a folder containing season number
+  if (!fileInfo.season && parsedTorrentName.season) {
+    fileInfo.season = parsedTorrentName.season;
+  } else if (!fileInfo.season && file.path.includes('/')) {
+    const folders = file.path.split('/');
+    const pathInfo = parse(folders[folders.length - 2]);
+    fileInfo.season = pathInfo.season;
+  }
+  // force episode to any found number if it was not parsed
+  if (!fileInfo.episodes) {
+    const epMatcher = fileInfo.title.match(/(?<!movie\W*|film\W*)(?:^|\W)(\d{1,4})(?:a|b|v\d)?(?:\W|$)(?!movie|film)/i);
+    fileInfo.episodes = epMatcher && [parseInt(epMatcher[1], 10)];
+    fileInfo.episode = fileInfo.episodes && fileInfo.episodes[0];
+  }
+  fileInfo.isMovie = (parsedTorrentName.hasMovies && !fileInfo.season && (!fileInfo.episodes || !!fileInfo.year))
+      || (!fileInfo.season && !!file.name.match(/\b(?:\d+[ .]movie|movie[ .]\d+)\b/i));
+
+  return { ...file, ...fileInfo };
+}
+
+async function decomposeEpisodes(torrent, files, metadata = { episodeCount: [] }) {
+  if (files.every(file => !file.episodes && !file.date)) {
+    return files;
+  }
+  // for anime type episodes are always absolute and for a single season
+  if (torrent.type === Type.ANIME) {
+    files
+        .filter(file => file.episodes)
+        .forEach(file => file.season = 1);
+    return files;
+  }
+
+  const sortedEpisodes = files
+      .map(file => !file.isMovie && file.episodes || [])
+      .reduce((a, b) => a.concat(b), [])
+      .sort((a, b) => a - b);
+
+  if (sortedEpisodes.every(ep => ep > 100)
+      && sortedEpisodes.slice(1).some((ep, index) => ep - sortedEpisodes[index] > 10)
+      && sortedEpisodes.every(ep => metadata.episodeCount[div100(ep) - 1] >= mod100(ep))
+      && files.every(file => !file.season || file.episodes.every(ep => div100(ep) === file.season))) {
+    decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata);
+  } else if (files.every(file => (!file.season || !metadata.episodeCount[file.season - 1]) && file.date)) {
+    decomposeDateEpisodeFiles(torrent, files, metadata);
+  } else if (files.filter(file => !file.isMovie && file.episodes).every(file => !file.season && file.episodes) ||
+      files.some(file => file.season && file.episodes && file.episodes
+          .every(ep => metadata.episodeCount[file.season - 1] < ep))) {
+    decomposeAbsoluteEpisodeFiles(torrent, files, metadata);
+  }
+
+  return files;
+}
+
+function decomposeConcatSeasonAndEpisodeFiles(torrent, files, metadata) {
+  // decompose concat season and episode files (ex. 101=S01E01) in case:
+  // 1. file has a season, but individual files are concatenated with that season (ex. path Season 5/511 - Prize
+  // Fighters.avi)
+  // 2. file does not have a season and the episode does not go out of range for the concat season
+  // episode count
+  files
+      .filter(file => file.episodes && file.episodes.every(ep => ep > 100))
+      .filter(file => metadata.episodeCount[(file.season || div100(file.episodes[0])) - 1] < 100)
+      .filter(file => file.season && file.episodes.every(ep => div100(ep) === file.season) || !file.season)
+      .forEach(file => {
+        file.season = div100(file.episodes[0]);
+        file.episodes = file.episodes.map(ep => mod100(ep))
+      });
+
+}
+
+function decomposeAbsoluteEpisodeFiles(torrent, files, metadata) {
+  if (metadata.episodeCount.length === 0) {
+    files
+        .filter(file => !file.season && file.episodes && !file.isMovie)
+        .forEach(file => {
+          file.season = 1;
+        });
+    return;
+  }
+  files
+      .filter(file => file.episodes && !file.isMovie)
+      .forEach(file => {
+        const seasonIdx = ([...metadata.episodeCount.keys()]
+            .find((i) => metadata.episodeCount.slice(0, i + 1).reduce((a, b) => a + b) >= file.episodes[0])
+            + 1 || metadata.episodeCount.length) - 1;
+
+        file.season = seasonIdx + 1;
+        file.episodes = file.episodes
+            .map(ep => ep - metadata.episodeCount.slice(0, seasonIdx).reduce((a, b) => a + b, 0))
+      });
+}
+
+function decomposeDateEpisodeFiles(torrent, files, metadata) {
+  if (!metadata || !metadata.videos || !metadata.videos.length) {
+    return;
+  }
+
+  const timeZoneOffset = getTimeZoneOffset(metadata.country);
+  const offsetVideos = metadata.videos
+      .reduce((map, video) => {
+        const releaseDate = moment(video.released).utcOffset(timeZoneOffset).format('YYYY-MM-DD');
+        map[releaseDate] = video;
+        return map;
+      }, {});
+
+  files
+      .filter(file => file.date)
+      .forEach(file => {
+        const video = offsetVideos[file.date];
+        if (video) {
+          file.season = video.season;
+          file.episodes = [video.episode];
+        }
+      });
+}
+
+function getTimeZoneOffset(country) {
+  switch (country) {
+    case 'USA':
+      return '-08:00';
+    default:
+      return '00:00';
+  }
+}
+
+function assignKitsuOrImdbEpisodes(files, metadata) {
+  if (!metadata || !metadata.videos || !metadata.videos.length) {
+    return files;
+  }
+
+  const seriesMapping = metadata.videos
+      .reduce((map, video) => {
+        const episodeMap = map[video.season] || {};
+        episodeMap[video.episode] = video;
+        map[video.season] = episodeMap;
+        return map;
+      }, {});
+
+  if (metadata.videos.some(video => video.imdbSeason) || !metadata.imdbId) {
+    // kitsu episode info is the base
+    files
+        .filter(file => file.season && file.episodes)
+        .map(file => {
+          const seasonMapping = seriesMapping[file.season];
+          file.kitsuEpisodes = file.episodes;
+          if (seasonMapping && seasonMapping[file.episodes[0]] && seasonMapping[file.episodes[0]].imdbSeason) {
+            file.imdbId = metadata.imdbId;
+            file.season = seasonMapping[file.episodes[0]].imdbSeason;
+            file.episodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].imdbEpisode);
+          } else {
+            // no imdb mapping available for episode
+            file.season = undefined;
+            file.episodes = undefined;
+          }
+        })
+  } else if (metadata.videos.some(video => video.kitsuEpisode)) {
+    // imdb episode info is base
+    files
+        .filter(file => file.season && file.episodes)
+        .forEach(file => {
+          const seasonMapping = seriesMapping[file.season];
+          if (seasonMapping && seasonMapping[file.episodes[0]] && seasonMapping[file.episodes[0]].kitsuId) {
+            file.kitsuId = seasonMapping[file.episodes[0]].kitsuId;
+            file.kitsuEpisodes = file.episodes.map(ep => seasonMapping[ep] && seasonMapping[ep].kitsuEpisode);
+          }
+        })
+  }
+  return files;
+}
+
+function findMovieImdbId(title) {
+  const parsedTitle = typeof title === 'string' ? parse(title) : title;
+  return getImdbId(parsedTitle, Type.MOVIE).catch(() => undefined);
+}
+
+function div100(episode) {
+  return (episode / 100 >> 0); // floor to nearest int
+}
+
+function mod100(episode) {
+  return episode % 100;
+}
+
+module.exports = { parseTorrentFiles };
--- a/scraper/lib/types.js
+++ b/scraper/lib/types.js
@@ -0,0 +1,5 @@
+exports.Type = {
+  MOVIE: 'movie',
+  SERIES: 'series',
+  ANIME: 'anime'
+};