[scraper] adds helper class for proxy and user agent

This commit is contained in:
TheBeastLT
2020-04-02 22:47:52 +02:00
parent db78f5873b
commit 2833c9d3a2
6 changed files with 76 additions and 14 deletions

42
package-lock.json generated
View File

@@ -598,6 +598,16 @@
"resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz",
"integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA="
},
"detect-indent": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-6.0.0.tgz",
"integrity": "sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA=="
},
"docopt": {
"version": "0.6.2",
"resolved": "https://registry.npmjs.org/docopt/-/docopt-0.6.2.tgz",
"integrity": "sha1-so6eIiDaXsSffqW7JKR3h0Be6xE="
},
"dom-serializer": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.1.tgz",
@@ -629,6 +639,16 @@
"domelementtype": "1"
}
},
"dot-json": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/dot-json/-/dot-json-1.2.0.tgz",
"integrity": "sha512-4bEM7KHFl/U9gAI5nIvU0/fwVzNnE713K339vcxAMtxd2D9mZP6o65UwlcXigJL4rfk90UM0J+D7IPIFYZMQ8Q==",
"requires": {
"detect-indent": "~6.0.0",
"docopt": "~0.6.2",
"underscore-keypath": "~0.0.22"
}
},
"dotenv": {
"version": "8.2.0",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-8.2.0.tgz",
@@ -1173,6 +1193,11 @@
"resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
"integrity": "sha1-soqmKIorn8ZRA1x3EfZathkDMaY="
},
"lodash.clonedeep": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
"integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8="
},
"lodash.defaults": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
@@ -2607,6 +2632,14 @@
"resolved": "https://registry.npmjs.org/underscore/-/underscore-1.9.2.tgz",
"integrity": "sha512-D39qtimx0c1fI3ya1Lnhk3E9nONswSKhnffBI0gME9C99fYOkNi04xs8K6pePLhvl1frbDemkaBQ5ikWllR2HQ=="
},
"underscore-keypath": {
"version": "0.0.22",
"resolved": "https://registry.npmjs.org/underscore-keypath/-/underscore-keypath-0.0.22.tgz",
"integrity": "sha1-SKUoOSu278QkvhyqVtpLX6zPJk0=",
"requires": {
"underscore": "*"
}
},
"uniq": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/uniq/-/uniq-1.0.1.tgz",
@@ -2633,6 +2666,15 @@
"iconv-lite": "~0.4.11"
}
},
"user-agents": {
"version": "1.0.559",
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.0.559.tgz",
"integrity": "sha512-HdAlNS3vDxOGMRwmv8or05xL96MV3CEwQhUSFTCRoOvTOEnWhTEBPAHRry/xZpVTTOtx77UHMal8YKcx6fs7Lg==",
"requires": {
"dot-json": "^1.2.0",
"lodash.clonedeep": "^4.5.0"
}
},
"util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",

View File

@@ -40,6 +40,7 @@
"real-debrid-api": "^1.0.1",
"sequelize": "^5.21.5",
"sugar-date": "^2.0.6",
"torrent-stream": "^1.1.0"
"torrent-stream": "^1.1.0",
"user-agents": "^1.0.559"
}
}

View File

@@ -5,6 +5,7 @@ const bing = require('nodejs-bing');
const he = require('he');
const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache');
const { Type } = require('./types');
const { getRandomUserAgent } = require('./request_helper');
const CINEMETA_URL = 'https://v3-cinemeta.strem.io';
const KITSU_URL = 'https://anime-kitsu.strem.fun';
@@ -107,7 +108,7 @@ async function getImdbId(info, type) {
reject(err || new Error('failed imdbId search'));
}
});
}).catch(() => googleIt({ query, disableConsole: true })
}).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true })
.catch(() => bing.web(query))
.then(results => results
.map(result => result.link)

View File

@@ -0,0 +1,26 @@
const UserAgent = require('user-agents');
const PROXY_HOSTS = process.env.PROXY_HOST && process.env.PROXY_HOST.split(',');
const PROXY_USERNAME = process.env.PROXY_USERNAME;
const PROXY_PASSWORD = process.env.PROXY_PASSWORD;
const userAgent = new UserAgent();
function getRandomUserAgent() {
return userAgent.random().toString();
}
function getRandomProxy() {
if (PROXY_HOSTS && PROXY_HOSTS.length && PROXY_USERNAME && PROXY_PASSWORD) {
return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOSTS[Math.floor(Math.random() * PROXY_HOSTS.length)]}`;
}
return undefined;
}
function getProxy() {
if (PROXY_HOSTS && PROXY_HOSTS.length && PROXY_USERNAME && PROXY_PASSWORD) {
return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOSTS[0]}`;
}
return undefined;
}
module.exports = { getRandomUserAgent, getRandomProxy, getProxy };

View File

@@ -2,10 +2,7 @@ const { encode } = require('magnet-uri');
const RealDebridClient = require('real-debrid-api');
const namedQueue = require('named-queue');
const { cacheWrapResolvedUrl } = require('../lib/cache');
const PROXY_HOST = process.env.PROXY_HOST;
const PROXY_USERNAME = process.env.PROXY_USERNAME;
const PROXY_PASSWORD = process.env.PROXY_PASSWORD;
const { getProxy } = require('../lib/request_helper');
const unrestrictQueue = new namedQueue((task, callback) => task.method()
.then(result => callback(false, result))
@@ -70,11 +67,4 @@ async function _unrestrictLink(RD, link) {
// });
}
function getProxy() {
if (PROXY_HOST && PROXY_USERNAME && PROXY_PASSWORD) {
return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOST}`;
}
return undefined;
}
module.exports = { resolve };

View File

@@ -3,6 +3,7 @@ const needle = require('needle');
const moment = require('moment');
const decode = require('magnet-uri');
const Promises = require('../../lib/promises');
const { getRandomProxy, getRandomUserAgent } = require('./../../lib/request_helper');
const defaultProxies = [
'https://katcr.co'
@@ -64,8 +65,9 @@ function browse(config = {}, retries = 2) {
function singleRequest(requestUrl, config = {}) {
const timeout = config.timeout || defaultTimeout;
const options = { userAgent: getRandomUserAgent(), proxy: getRandomProxy(), open_timeout: timeout, follow: 2 };
return needle('get', requestUrl, { open_timeout: timeout, follow: 2 })
return needle('get', requestUrl, options)
.then((response) => {
const body = response.body;
if (!body) {