mirror of
https://github.com/knightcrawler-stremio/knightcrawler.git
synced 2024-12-20 03:29:51 +00:00
[scraper] adds helper class for proxy and user agent
This commit is contained in:
42
package-lock.json
generated
42
package-lock.json
generated
@@ -598,6 +598,16 @@
|
||||
"resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz",
|
||||
"integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA="
|
||||
},
|
||||
"detect-indent": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-6.0.0.tgz",
|
||||
"integrity": "sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA=="
|
||||
},
|
||||
"docopt": {
|
||||
"version": "0.6.2",
|
||||
"resolved": "https://registry.npmjs.org/docopt/-/docopt-0.6.2.tgz",
|
||||
"integrity": "sha1-so6eIiDaXsSffqW7JKR3h0Be6xE="
|
||||
},
|
||||
"dom-serializer": {
|
||||
"version": "0.1.1",
|
||||
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.1.tgz",
|
||||
@@ -629,6 +639,16 @@
|
||||
"domelementtype": "1"
|
||||
}
|
||||
},
|
||||
"dot-json": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/dot-json/-/dot-json-1.2.0.tgz",
|
||||
"integrity": "sha512-4bEM7KHFl/U9gAI5nIvU0/fwVzNnE713K339vcxAMtxd2D9mZP6o65UwlcXigJL4rfk90UM0J+D7IPIFYZMQ8Q==",
|
||||
"requires": {
|
||||
"detect-indent": "~6.0.0",
|
||||
"docopt": "~0.6.2",
|
||||
"underscore-keypath": "~0.0.22"
|
||||
}
|
||||
},
|
||||
"dotenv": {
|
||||
"version": "8.2.0",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-8.2.0.tgz",
|
||||
@@ -1173,6 +1193,11 @@
|
||||
"resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
|
||||
"integrity": "sha1-soqmKIorn8ZRA1x3EfZathkDMaY="
|
||||
},
|
||||
"lodash.clonedeep": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||
"integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8="
|
||||
},
|
||||
"lodash.defaults": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||
@@ -2607,6 +2632,14 @@
|
||||
"resolved": "https://registry.npmjs.org/underscore/-/underscore-1.9.2.tgz",
|
||||
"integrity": "sha512-D39qtimx0c1fI3ya1Lnhk3E9nONswSKhnffBI0gME9C99fYOkNi04xs8K6pePLhvl1frbDemkaBQ5ikWllR2HQ=="
|
||||
},
|
||||
"underscore-keypath": {
|
||||
"version": "0.0.22",
|
||||
"resolved": "https://registry.npmjs.org/underscore-keypath/-/underscore-keypath-0.0.22.tgz",
|
||||
"integrity": "sha1-SKUoOSu278QkvhyqVtpLX6zPJk0=",
|
||||
"requires": {
|
||||
"underscore": "*"
|
||||
}
|
||||
},
|
||||
"uniq": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uniq/-/uniq-1.0.1.tgz",
|
||||
@@ -2633,6 +2666,15 @@
|
||||
"iconv-lite": "~0.4.11"
|
||||
}
|
||||
},
|
||||
"user-agents": {
|
||||
"version": "1.0.559",
|
||||
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.0.559.tgz",
|
||||
"integrity": "sha512-HdAlNS3vDxOGMRwmv8or05xL96MV3CEwQhUSFTCRoOvTOEnWhTEBPAHRry/xZpVTTOtx77UHMal8YKcx6fs7Lg==",
|
||||
"requires": {
|
||||
"dot-json": "^1.2.0",
|
||||
"lodash.clonedeep": "^4.5.0"
|
||||
}
|
||||
},
|
||||
"util-deprecate": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
"real-debrid-api": "^1.0.1",
|
||||
"sequelize": "^5.21.5",
|
||||
"sugar-date": "^2.0.6",
|
||||
"torrent-stream": "^1.1.0"
|
||||
"torrent-stream": "^1.1.0",
|
||||
"user-agents": "^1.0.559"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ const bing = require('nodejs-bing');
|
||||
const he = require('he');
|
||||
const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache');
|
||||
const { Type } = require('./types');
|
||||
const { getRandomUserAgent } = require('./request_helper');
|
||||
|
||||
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
|
||||
const KITSU_URL = 'https://anime-kitsu.strem.fun';
|
||||
@@ -107,7 +108,7 @@ async function getImdbId(info, type) {
|
||||
reject(err || new Error('failed imdbId search'));
|
||||
}
|
||||
});
|
||||
}).catch(() => googleIt({ query, disableConsole: true })
|
||||
}).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true })
|
||||
.catch(() => bing.web(query))
|
||||
.then(results => results
|
||||
.map(result => result.link)
|
||||
|
||||
26
scraper/lib/request_helper.js
Normal file
26
scraper/lib/request_helper.js
Normal file
@@ -0,0 +1,26 @@
|
||||
const UserAgent = require('user-agents');
|
||||
|
||||
const PROXY_HOSTS = process.env.PROXY_HOST && process.env.PROXY_HOST.split(',');
|
||||
const PROXY_USERNAME = process.env.PROXY_USERNAME;
|
||||
const PROXY_PASSWORD = process.env.PROXY_PASSWORD;
|
||||
const userAgent = new UserAgent();
|
||||
|
||||
function getRandomUserAgent() {
|
||||
return userAgent.random().toString();
|
||||
}
|
||||
|
||||
function getRandomProxy() {
|
||||
if (PROXY_HOSTS && PROXY_HOSTS.length && PROXY_USERNAME && PROXY_PASSWORD) {
|
||||
return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOSTS[Math.floor(Math.random() * PROXY_HOSTS.length)]}`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function getProxy() {
|
||||
if (PROXY_HOSTS && PROXY_HOSTS.length && PROXY_USERNAME && PROXY_PASSWORD) {
|
||||
return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOSTS[0]}`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
module.exports = { getRandomUserAgent, getRandomProxy, getProxy };
|
||||
@@ -2,10 +2,7 @@ const { encode } = require('magnet-uri');
|
||||
const RealDebridClient = require('real-debrid-api');
|
||||
const namedQueue = require('named-queue');
|
||||
const { cacheWrapResolvedUrl } = require('../lib/cache');
|
||||
|
||||
const PROXY_HOST = process.env.PROXY_HOST;
|
||||
const PROXY_USERNAME = process.env.PROXY_USERNAME;
|
||||
const PROXY_PASSWORD = process.env.PROXY_PASSWORD;
|
||||
const { getProxy } = require('../lib/request_helper');
|
||||
|
||||
const unrestrictQueue = new namedQueue((task, callback) => task.method()
|
||||
.then(result => callback(false, result))
|
||||
@@ -70,11 +67,4 @@ async function _unrestrictLink(RD, link) {
|
||||
// });
|
||||
}
|
||||
|
||||
function getProxy() {
|
||||
if (PROXY_HOST && PROXY_USERNAME && PROXY_PASSWORD) {
|
||||
return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOST}`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
module.exports = { resolve };
|
||||
@@ -3,6 +3,7 @@ const needle = require('needle');
|
||||
const moment = require('moment');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { getRandomProxy, getRandomUserAgent } = require('./../../lib/request_helper');
|
||||
|
||||
const defaultProxies = [
|
||||
'https://katcr.co'
|
||||
@@ -64,8 +65,9 @@ function browse(config = {}, retries = 2) {
|
||||
|
||||
function singleRequest(requestUrl, config = {}) {
|
||||
const timeout = config.timeout || defaultTimeout;
|
||||
const options = { userAgent: getRandomUserAgent(), proxy: getRandomProxy(), open_timeout: timeout, follow: 2 };
|
||||
|
||||
return needle('get', requestUrl, { open_timeout: timeout, follow: 2 })
|
||||
return needle('get', requestUrl, options)
|
||||
.then((response) => {
|
||||
const body = response.body;
|
||||
if (!body) {
|
||||
|
||||
Reference in New Issue
Block a user