updates id search input arguments

2024-12-20 03:29:51 +00:00 · 2020-03-03 20:36:10 +01:00
parent 30d1a60d00
commit cbfcea65f8
8 changed files with 30 additions and 33 deletions
--- a/index.js
+++ b/index.js
@@ -6,7 +6,7 @@ const thepiratebayScraper = require('./scrapers/thepiratebay/thepiratebay_dump_s
 const horribleSubsScraper = require('./scrapers/horriblesubs/horriblesubs_scraper');
 const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper');

-const providers = [thepiratebayDumpScraper];
+const providers = [thepiratebayScraper];

 async function scrape() {
  providers.forEach((provider) => provider.scrape());
--- a/lib/metadata.js
+++ b/lib/metadata.js
@@ -77,19 +77,21 @@ function escapeTitle(title, hyphenEscape = true) {
      .trim();
 }

-async function getImdbId(info) {
-  const key = `${info.name}_${info.year}_${info.type}`;
+async function getImdbId(info, type) {
+  const name = escapeTitle(info.title).toLowerCase();
+  const year = info.year || info.date && info.date.slice(0, 4);
+  const key = `${name}_${year}_${type}`;

  return cacheWrapImdbId(key,
      () => new Promise((resolve, reject) => {
-        nameToImdb(info, function (err, res) {
+        nameToImdb({ name, year, type }, function (err, res) {
          if (res) {
            resolve(res);
          } else {
            reject(err || new Error('failed imdbId search'));
          }
        });
-      }).catch(() => bing.web(`${info.name} ${info.year || ''} ${info.type} imdb`)
+      }).catch(() => bing.web(`${name} ${year || ''} ${type} imdb`)
          .then(results => results
              .map((result) => result.link)
              .find(result => result.includes('imdb.com/title/')))
@@ -98,8 +100,10 @@ async function getImdbId(info) {
 }

 async function getKitsuId(info) {
-  const title = info.season > 1 ? `${info.name} S${info.season}` : info.name;
-  const query = title.replace(/[;]+/g, ' ').replace(/[,%']+/g, '');
+  const title = escapeTitle(info.title).toLowerCase().replace(/[;]+/g, ' ').replace(/[,%']+/g, '');
+  const season = info.season > 1 ? ` S${info.season}` : '';
+  const query = `${title}${season}`;
+
  return cacheWrapImdbId(query,
      () => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
          .then((response) => {
@@ -112,4 +116,4 @@ async function getKitsuId(info) {
          }));
 }

-module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuId };
+module.exports = { getMetadata, getImdbId, getKitsuId };
--- a/lib/torrentEntries.js
+++ b/lib/torrentEntries.js
@@ -1,23 +1,22 @@
 const { parse } = require('parse-torrent-title');
 const { Type } = require('./types');
 const repository = require('./repository');
-const { getImdbId, getKitsuId, escapeTitle } = require('./metadata');
+const { getImdbId, getKitsuId } = require('./metadata');
 const { parseTorrentFiles } = require('./torrentFiles');

 async function createTorrentEntry(torrent) {
  const titleInfo = parse(torrent.title);
-  const searchTitle = escapeTitle(titleInfo.title).toLowerCase();

  if (titleInfo.seasons && torrent.type === Type.MOVIE) {
    // sometimes series torrent might be put into movies category
    torrent.type = Type.SERIES;
  }
  if (!torrent.imdbId && torrent.type !== Type.ANIME) {
-    torrent.imdbId = await getImdbId({ name: searchTitle, year: titleInfo.year, type: torrent.type })
+    torrent.imdbId = await getImdbId(titleInfo, torrent.type)
        .catch(() => undefined);
  }
  if (!torrent.kitsuId && torrent.type === Type.ANIME) {
-    torrent.kitsuId = await getKitsuId({ name: searchTitle, season: titleInfo.season })
+    torrent.kitsuId = await getKitsuId(titleInfo)
        .catch(() => undefined);
  }

--- a/lib/torrentFiles.js
+++ b/lib/torrentFiles.js
@@ -283,12 +283,7 @@ function assignKitsuOrImdbEpisodes(files, metadata) {

 function findMovieImdbId(title) {
  const parsedTitle = typeof title === 'string' ? parse(title) : title;
-  const searchQuery = {
-    name: escapeTitle(parsedTitle.title).toLowerCase(),
-    year: parsedTitle.year,
-    type: Type.MOVIE
-  };
-  return getImdbId(searchQuery).catch((error) => undefined);
+  return getImdbId(parsedTitle, Type.MOVIE).catch(() => undefined);
 }

 function div100(episode) {
--- a/manual/manual.js
+++ b/manual/manual.js
@@ -168,5 +168,5 @@ async function findAllFiles() {
 //addMissingEpisodes().then(() => console.log('Finished'));
 //findAllFiles().then(() => console.log('Finished'));
 //updateMovieCollections().then(() => console.log('Finished'));
-reapplyEpisodeDecomposing('87e7354028f2aaab56dfd0dabbab679a1b54c3c0', false).then(() => console.log('Finished'));
+reapplyEpisodeDecomposing('83b61caa4191469a9c15ee851aff828184f9a78d', false).then(() => console.log('Finished'));
 //reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
--- a/scrapers/horriblesubs/horriblesubs_scraper.js
+++ b/scrapers/horriblesubs/horriblesubs_scraper.js
@@ -1,6 +1,5 @@
 const fs = require('fs');
 const Bottleneck = require('bottleneck');
-const { parse } = require('parse-torrent-title');
 const decode = require('magnet-uri');
 const horriblesubs = require('./horriblesubs_api.js');
 const repository = require('../../lib/repository');
@@ -54,8 +53,8 @@ async function initMapping() {
 async function enrichShow(show) {
  console.log(`${NAME}: getting show info for ${show.title}...`);
  const showId = await horriblesubs._getShowId(show.url)
-      .catch((error) => show.title);
-  const metadata = await getKitsuId({ name: show.title })
+      .catch(() => show.title);
+  const metadata = await getKitsuId({ title: show.title })
      .then((kitsuId) => getMetadata(kitsuId))
      .catch((error) => {
        console.log(`Failed getting kitsu meta: ${error.message}`);
--- a/scrapers/thepiratebay/thepiratebay_dump_scraper.js
+++ b/scrapers/thepiratebay/thepiratebay_dump_scraper.js
@@ -18,12 +18,12 @@ const limiter = new Bottleneck({ maxConcurrent: 40 });
 async function scrape() {
  const lastScraped = await repository.getProvider({ name: NAME });
  const lastDump = { updatedAt: 2147000000 };
-  const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
+  //const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
  //const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);

  if (!lastScraped.lastScraped || lastScraped.lastScraped < lastDump.updatedAt) {
    console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
-    //await downloadDump(lastDump);
+    await downloadDump(lastDump);

    let entriesProcessed = 0;
    const lr = new LineByLineReader(CSV_FILE_PATH);
@@ -51,10 +51,10 @@ async function scrape() {
        size: parseInt(row[3], 10)
      };

-      if (torrent.uploadDate > checkPoint) {
-        entriesProcessed++;
-        return;
-      }
+      // if (torrent.uploadDate > checkPoint) {
+      //   entriesProcessed++;
+      //   return;
+      // }

      if (lastScraped.lastScraped && lastScraped.lastScraped > torrent.uploadDate) {
        // torrent was already scraped previously, skipping
@@ -75,8 +75,8 @@ async function scrape() {
      console.log(err);
    });
    lr.on('end', () => {
-      fs.unlink(CSV_FILE_PATH);
-      repository.updateProvider({ name: NAME, lastScraped: lastDump.updatedAt });
+      fs.unlink(CSV_FILE_PATH, (error) => console.warn(error));
+      //repository.updateProvider({ name: NAME, lastScraped: lastDump.updatedAt });
      console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
    });
  }
@@ -144,8 +144,8 @@ async function findTorrentInSource(record) {
 async function findTorrentViaBing(record) {
  return bing.web(`${record.infoHash}`)
      .then((results) => results
-          .find(result => result.description.includes('Direct download via magnet link') || result.description.includes(
-              'Get this torrent')))
+          .find(result => result.description.includes('Direct download via magnet link') ||
+              result.description.includes('Get this torrent')))
      .then((result) => {
        if (!result) {
          throw new Error(`Failed to find torrent ${record.title}`);
--- a/scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper.js
+++ b/scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper.js
@@ -15,7 +15,7 @@ const limiter = new Bottleneck({ maxConcurrent: 40 });
 async function scrape() {
  console.log(`starting to scrape tpb dump...`);
  //const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
-  const checkPoint = 951000;
+  const checkPoint = 4115000;

  let entriesProcessed = 0;
  const lr = new LineByLineReader(CSV_FILE_PATH);