mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
updates id search input arguments
This commit is contained in:
2
index.js
2
index.js
@@ -6,7 +6,7 @@ const thepiratebayScraper = require('./scrapers/thepiratebay/thepiratebay_dump_s
|
|||||||
const horribleSubsScraper = require('./scrapers/horriblesubs/horriblesubs_scraper');
|
const horribleSubsScraper = require('./scrapers/horriblesubs/horriblesubs_scraper');
|
||||||
const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper');
|
const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper');
|
||||||
|
|
||||||
const providers = [thepiratebayDumpScraper];
|
const providers = [thepiratebayScraper];
|
||||||
|
|
||||||
async function scrape() {
|
async function scrape() {
|
||||||
providers.forEach((provider) => provider.scrape());
|
providers.forEach((provider) => provider.scrape());
|
||||||
|
|||||||
@@ -77,19 +77,21 @@ function escapeTitle(title, hyphenEscape = true) {
|
|||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getImdbId(info) {
|
async function getImdbId(info, type) {
|
||||||
const key = `${info.name}_${info.year}_${info.type}`;
|
const name = escapeTitle(info.title).toLowerCase();
|
||||||
|
const year = info.year || info.date && info.date.slice(0, 4);
|
||||||
|
const key = `${name}_${year}_${type}`;
|
||||||
|
|
||||||
return cacheWrapImdbId(key,
|
return cacheWrapImdbId(key,
|
||||||
() => new Promise((resolve, reject) => {
|
() => new Promise((resolve, reject) => {
|
||||||
nameToImdb(info, function (err, res) {
|
nameToImdb({ name, year, type }, function (err, res) {
|
||||||
if (res) {
|
if (res) {
|
||||||
resolve(res);
|
resolve(res);
|
||||||
} else {
|
} else {
|
||||||
reject(err || new Error('failed imdbId search'));
|
reject(err || new Error('failed imdbId search'));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}).catch(() => bing.web(`${info.name} ${info.year || ''} ${info.type} imdb`)
|
}).catch(() => bing.web(`${name} ${year || ''} ${type} imdb`)
|
||||||
.then(results => results
|
.then(results => results
|
||||||
.map((result) => result.link)
|
.map((result) => result.link)
|
||||||
.find(result => result.includes('imdb.com/title/')))
|
.find(result => result.includes('imdb.com/title/')))
|
||||||
@@ -98,8 +100,10 @@ async function getImdbId(info) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getKitsuId(info) {
|
async function getKitsuId(info) {
|
||||||
const title = info.season > 1 ? `${info.name} S${info.season}` : info.name;
|
const title = escapeTitle(info.title).toLowerCase().replace(/[;]+/g, ' ').replace(/[,%']+/g, '');
|
||||||
const query = title.replace(/[;]+/g, ' ').replace(/[,%']+/g, '');
|
const season = info.season > 1 ? ` S${info.season}` : '';
|
||||||
|
const query = `${title}${season}`;
|
||||||
|
|
||||||
return cacheWrapImdbId(query,
|
return cacheWrapImdbId(query,
|
||||||
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
|
() => needle('get', `${KITSU_URL}/catalog/series/kitsu-anime-list/search=${query}.json`, { open_timeout: 60000 })
|
||||||
.then((response) => {
|
.then((response) => {
|
||||||
@@ -112,4 +116,4 @@ async function getKitsuId(info) {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { escapeTitle, getMetadata, getImdbId, getKitsuId };
|
module.exports = { getMetadata, getImdbId, getKitsuId };
|
||||||
|
|||||||
@@ -1,23 +1,22 @@
|
|||||||
const { parse } = require('parse-torrent-title');
|
const { parse } = require('parse-torrent-title');
|
||||||
const { Type } = require('./types');
|
const { Type } = require('./types');
|
||||||
const repository = require('./repository');
|
const repository = require('./repository');
|
||||||
const { getImdbId, getKitsuId, escapeTitle } = require('./metadata');
|
const { getImdbId, getKitsuId } = require('./metadata');
|
||||||
const { parseTorrentFiles } = require('./torrentFiles');
|
const { parseTorrentFiles } = require('./torrentFiles');
|
||||||
|
|
||||||
async function createTorrentEntry(torrent) {
|
async function createTorrentEntry(torrent) {
|
||||||
const titleInfo = parse(torrent.title);
|
const titleInfo = parse(torrent.title);
|
||||||
const searchTitle = escapeTitle(titleInfo.title).toLowerCase();
|
|
||||||
|
|
||||||
if (titleInfo.seasons && torrent.type === Type.MOVIE) {
|
if (titleInfo.seasons && torrent.type === Type.MOVIE) {
|
||||||
// sometimes series torrent might be put into movies category
|
// sometimes series torrent might be put into movies category
|
||||||
torrent.type = Type.SERIES;
|
torrent.type = Type.SERIES;
|
||||||
}
|
}
|
||||||
if (!torrent.imdbId && torrent.type !== Type.ANIME) {
|
if (!torrent.imdbId && torrent.type !== Type.ANIME) {
|
||||||
torrent.imdbId = await getImdbId({ name: searchTitle, year: titleInfo.year, type: torrent.type })
|
torrent.imdbId = await getImdbId(titleInfo, torrent.type)
|
||||||
.catch(() => undefined);
|
.catch(() => undefined);
|
||||||
}
|
}
|
||||||
if (!torrent.kitsuId && torrent.type === Type.ANIME) {
|
if (!torrent.kitsuId && torrent.type === Type.ANIME) {
|
||||||
torrent.kitsuId = await getKitsuId({ name: searchTitle, season: titleInfo.season })
|
torrent.kitsuId = await getKitsuId(titleInfo)
|
||||||
.catch(() => undefined);
|
.catch(() => undefined);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -283,12 +283,7 @@ function assignKitsuOrImdbEpisodes(files, metadata) {
|
|||||||
|
|
||||||
function findMovieImdbId(title) {
|
function findMovieImdbId(title) {
|
||||||
const parsedTitle = typeof title === 'string' ? parse(title) : title;
|
const parsedTitle = typeof title === 'string' ? parse(title) : title;
|
||||||
const searchQuery = {
|
return getImdbId(parsedTitle, Type.MOVIE).catch(() => undefined);
|
||||||
name: escapeTitle(parsedTitle.title).toLowerCase(),
|
|
||||||
year: parsedTitle.year,
|
|
||||||
type: Type.MOVIE
|
|
||||||
};
|
|
||||||
return getImdbId(searchQuery).catch((error) => undefined);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function div100(episode) {
|
function div100(episode) {
|
||||||
|
|||||||
@@ -168,5 +168,5 @@ async function findAllFiles() {
|
|||||||
//addMissingEpisodes().then(() => console.log('Finished'));
|
//addMissingEpisodes().then(() => console.log('Finished'));
|
||||||
//findAllFiles().then(() => console.log('Finished'));
|
//findAllFiles().then(() => console.log('Finished'));
|
||||||
//updateMovieCollections().then(() => console.log('Finished'));
|
//updateMovieCollections().then(() => console.log('Finished'));
|
||||||
reapplyEpisodeDecomposing('87e7354028f2aaab56dfd0dabbab679a1b54c3c0', false).then(() => console.log('Finished'));
|
reapplyEpisodeDecomposing('83b61caa4191469a9c15ee851aff828184f9a78d', false).then(() => console.log('Finished'));
|
||||||
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
|
//reapplySeriesSeasonsSavedAsMovies().then(() => console.log('Finished'));
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const Bottleneck = require('bottleneck');
|
const Bottleneck = require('bottleneck');
|
||||||
const { parse } = require('parse-torrent-title');
|
|
||||||
const decode = require('magnet-uri');
|
const decode = require('magnet-uri');
|
||||||
const horriblesubs = require('./horriblesubs_api.js');
|
const horriblesubs = require('./horriblesubs_api.js');
|
||||||
const repository = require('../../lib/repository');
|
const repository = require('../../lib/repository');
|
||||||
@@ -54,8 +53,8 @@ async function initMapping() {
|
|||||||
async function enrichShow(show) {
|
async function enrichShow(show) {
|
||||||
console.log(`${NAME}: getting show info for ${show.title}...`);
|
console.log(`${NAME}: getting show info for ${show.title}...`);
|
||||||
const showId = await horriblesubs._getShowId(show.url)
|
const showId = await horriblesubs._getShowId(show.url)
|
||||||
.catch((error) => show.title);
|
.catch(() => show.title);
|
||||||
const metadata = await getKitsuId({ name: show.title })
|
const metadata = await getKitsuId({ title: show.title })
|
||||||
.then((kitsuId) => getMetadata(kitsuId))
|
.then((kitsuId) => getMetadata(kitsuId))
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
console.log(`Failed getting kitsu meta: ${error.message}`);
|
console.log(`Failed getting kitsu meta: ${error.message}`);
|
||||||
|
|||||||
@@ -18,12 +18,12 @@ const limiter = new Bottleneck({ maxConcurrent: 40 });
|
|||||||
async function scrape() {
|
async function scrape() {
|
||||||
const lastScraped = await repository.getProvider({ name: NAME });
|
const lastScraped = await repository.getProvider({ name: NAME });
|
||||||
const lastDump = { updatedAt: 2147000000 };
|
const lastDump = { updatedAt: 2147000000 };
|
||||||
const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
|
//const checkPoint = moment('2016-06-17 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
|
||||||
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
|
//const lastDump = await thepiratebay.dumps().then((dumps) => dumps.sort((a, b) => b.updatedAt - a.updatedAt)[0]);
|
||||||
|
|
||||||
if (!lastScraped.lastScraped || lastScraped.lastScraped < lastDump.updatedAt) {
|
if (!lastScraped.lastScraped || lastScraped.lastScraped < lastDump.updatedAt) {
|
||||||
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
|
console.log(`starting to scrape tpb dump: ${JSON.stringify(lastDump)}`);
|
||||||
//await downloadDump(lastDump);
|
await downloadDump(lastDump);
|
||||||
|
|
||||||
let entriesProcessed = 0;
|
let entriesProcessed = 0;
|
||||||
const lr = new LineByLineReader(CSV_FILE_PATH);
|
const lr = new LineByLineReader(CSV_FILE_PATH);
|
||||||
@@ -51,10 +51,10 @@ async function scrape() {
|
|||||||
size: parseInt(row[3], 10)
|
size: parseInt(row[3], 10)
|
||||||
};
|
};
|
||||||
|
|
||||||
if (torrent.uploadDate > checkPoint) {
|
// if (torrent.uploadDate > checkPoint) {
|
||||||
entriesProcessed++;
|
// entriesProcessed++;
|
||||||
return;
|
// return;
|
||||||
}
|
// }
|
||||||
|
|
||||||
if (lastScraped.lastScraped && lastScraped.lastScraped > torrent.uploadDate) {
|
if (lastScraped.lastScraped && lastScraped.lastScraped > torrent.uploadDate) {
|
||||||
// torrent was already scraped previously, skipping
|
// torrent was already scraped previously, skipping
|
||||||
@@ -75,8 +75,8 @@ async function scrape() {
|
|||||||
console.log(err);
|
console.log(err);
|
||||||
});
|
});
|
||||||
lr.on('end', () => {
|
lr.on('end', () => {
|
||||||
fs.unlink(CSV_FILE_PATH);
|
fs.unlink(CSV_FILE_PATH, (error) => console.warn(error));
|
||||||
repository.updateProvider({ name: NAME, lastScraped: lastDump.updatedAt });
|
//repository.updateProvider({ name: NAME, lastScraped: lastDump.updatedAt });
|
||||||
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
|
console.log(`finished to scrape tpb dump: ${JSON.stringify(lastDump)}!`);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -144,8 +144,8 @@ async function findTorrentInSource(record) {
|
|||||||
async function findTorrentViaBing(record) {
|
async function findTorrentViaBing(record) {
|
||||||
return bing.web(`${record.infoHash}`)
|
return bing.web(`${record.infoHash}`)
|
||||||
.then((results) => results
|
.then((results) => results
|
||||||
.find(result => result.description.includes('Direct download via magnet link') || result.description.includes(
|
.find(result => result.description.includes('Direct download via magnet link') ||
|
||||||
'Get this torrent')))
|
result.description.includes('Get this torrent')))
|
||||||
.then((result) => {
|
.then((result) => {
|
||||||
if (!result) {
|
if (!result) {
|
||||||
throw new Error(`Failed to find torrent ${record.title}`);
|
throw new Error(`Failed to find torrent ${record.title}`);
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ const limiter = new Bottleneck({ maxConcurrent: 40 });
|
|||||||
async function scrape() {
|
async function scrape() {
|
||||||
console.log(`starting to scrape tpb dump...`);
|
console.log(`starting to scrape tpb dump...`);
|
||||||
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
|
//const checkPoint = moment('2013-06-16 00:00:00', 'YYYY-MMM-DD HH:mm:ss').toDate();
|
||||||
const checkPoint = 951000;
|
const checkPoint = 4115000;
|
||||||
|
|
||||||
let entriesProcessed = 0;
|
let entriesProcessed = 0;
|
||||||
const lr = new LineByLineReader(CSV_FILE_PATH);
|
const lr = new LineByLineReader(CSV_FILE_PATH);
|
||||||
|
|||||||
Reference in New Issue
Block a user