From 2833c9d3a285118d82531708a0c0d255c42eba31 Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Thu, 2 Apr 2020 22:47:52 +0200 Subject: [PATCH] [scraper] adds helper class for proxy and user agent --- package-lock.json | 42 +++++++++++++++++++++++++ package.json | 3 +- scraper/lib/metadata.js | 3 +- scraper/lib/request_helper.js | 26 +++++++++++++++ scraper/moch/realdebrid.js | 12 +------ scraper/scrapers/kickass/kickass_api.js | 4 ++- 6 files changed, 76 insertions(+), 14 deletions(-) create mode 100644 scraper/lib/request_helper.js diff --git a/package-lock.json b/package-lock.json index 9367c5d..05a2f90 100644 --- a/package-lock.json +++ b/package-lock.json @@ -598,6 +598,16 @@ "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" }, + "detect-indent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-6.0.0.tgz", + "integrity": "sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA==" + }, + "docopt": { + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/docopt/-/docopt-0.6.2.tgz", + "integrity": "sha1-so6eIiDaXsSffqW7JKR3h0Be6xE=" + }, "dom-serializer": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.1.tgz", @@ -629,6 +639,16 @@ "domelementtype": "1" } }, + "dot-json": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/dot-json/-/dot-json-1.2.0.tgz", + "integrity": "sha512-4bEM7KHFl/U9gAI5nIvU0/fwVzNnE713K339vcxAMtxd2D9mZP6o65UwlcXigJL4rfk90UM0J+D7IPIFYZMQ8Q==", + "requires": { + "detect-indent": "~6.0.0", + "docopt": "~0.6.2", + "underscore-keypath": "~0.0.22" + } + }, "dotenv": { "version": "8.2.0", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-8.2.0.tgz", @@ -1173,6 +1193,11 @@ "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", "integrity": "sha1-soqmKIorn8ZRA1x3EfZathkDMaY=" }, + "lodash.clonedeep": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", + "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=" + }, "lodash.defaults": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz", @@ -2607,6 +2632,14 @@ "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.9.2.tgz", "integrity": "sha512-D39qtimx0c1fI3ya1Lnhk3E9nONswSKhnffBI0gME9C99fYOkNi04xs8K6pePLhvl1frbDemkaBQ5ikWllR2HQ==" }, + "underscore-keypath": { + "version": "0.0.22", + "resolved": "https://registry.npmjs.org/underscore-keypath/-/underscore-keypath-0.0.22.tgz", + "integrity": "sha1-SKUoOSu278QkvhyqVtpLX6zPJk0=", + "requires": { + "underscore": "*" + } + }, "uniq": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/uniq/-/uniq-1.0.1.tgz", @@ -2633,6 +2666,15 @@ "iconv-lite": "~0.4.11" } }, + "user-agents": { + "version": "1.0.559", + "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.0.559.tgz", + "integrity": "sha512-HdAlNS3vDxOGMRwmv8or05xL96MV3CEwQhUSFTCRoOvTOEnWhTEBPAHRry/xZpVTTOtx77UHMal8YKcx6fs7Lg==", + "requires": { + "dot-json": "^1.2.0", + "lodash.clonedeep": "^4.5.0" + } + }, "util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", diff --git a/package.json b/package.json index da75343..f727e23 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ "real-debrid-api": "^1.0.1", "sequelize": "^5.21.5", "sugar-date": "^2.0.6", - "torrent-stream": "^1.1.0" + "torrent-stream": "^1.1.0", + "user-agents": "^1.0.559" } } diff --git a/scraper/lib/metadata.js b/scraper/lib/metadata.js index 5cf5330..21d4ced 100644 --- a/scraper/lib/metadata.js +++ b/scraper/lib/metadata.js @@ -5,6 +5,7 @@ const bing = require('nodejs-bing'); const he = require('he'); const { cacheWrapImdbId, cacheWrapKitsuId, cacheWrapMetadata } = require('./cache'); const { Type } = require('./types'); +const { getRandomUserAgent } = require('./request_helper'); const CINEMETA_URL = 'https://v3-cinemeta.strem.io'; const KITSU_URL = 'https://anime-kitsu.strem.fun'; @@ -107,7 +108,7 @@ async function getImdbId(info, type) { reject(err || new Error('failed imdbId search')); } }); - }).catch(() => googleIt({ query, disableConsole: true }) + }).catch(() => googleIt({ query, userAgent: getRandomUserAgent(), disableConsole: true }) .catch(() => bing.web(query)) .then(results => results .map(result => result.link) diff --git a/scraper/lib/request_helper.js b/scraper/lib/request_helper.js new file mode 100644 index 0000000..ce241d7 --- /dev/null +++ b/scraper/lib/request_helper.js @@ -0,0 +1,26 @@ +const UserAgent = require('user-agents'); + +const PROXY_HOSTS = process.env.PROXY_HOST && process.env.PROXY_HOST.split(','); +const PROXY_USERNAME = process.env.PROXY_USERNAME; +const PROXY_PASSWORD = process.env.PROXY_PASSWORD; +const userAgent = new UserAgent(); + +function getRandomUserAgent() { + return userAgent.random().toString(); +} + +function getRandomProxy() { + if (PROXY_HOSTS && PROXY_HOSTS.length && PROXY_USERNAME && PROXY_PASSWORD) { + return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOSTS[Math.floor(Math.random() * PROXY_HOSTS.length)]}`; + } + return undefined; +} + +function getProxy() { + if (PROXY_HOSTS && PROXY_HOSTS.length && PROXY_USERNAME && PROXY_PASSWORD) { + return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOSTS[0]}`; + } + return undefined; +} + +module.exports = { getRandomUserAgent, getRandomProxy, getProxy }; \ No newline at end of file diff --git a/scraper/moch/realdebrid.js b/scraper/moch/realdebrid.js index 32b5334..ad35771 100644 --- a/scraper/moch/realdebrid.js +++ b/scraper/moch/realdebrid.js @@ -2,10 +2,7 @@ const { encode } = require('magnet-uri'); const RealDebridClient = require('real-debrid-api'); const namedQueue = require('named-queue'); const { cacheWrapResolvedUrl } = require('../lib/cache'); - -const PROXY_HOST = process.env.PROXY_HOST; -const PROXY_USERNAME = process.env.PROXY_USERNAME; -const PROXY_PASSWORD = process.env.PROXY_PASSWORD; +const { getProxy } = require('../lib/request_helper'); const unrestrictQueue = new namedQueue((task, callback) => task.method() .then(result => callback(false, result)) @@ -70,11 +67,4 @@ async function _unrestrictLink(RD, link) { // }); } -function getProxy() { - if (PROXY_HOST && PROXY_USERNAME && PROXY_PASSWORD) { - return `http://${PROXY_USERNAME}:${PROXY_PASSWORD}@${PROXY_HOST}`; - } - return undefined; -} - module.exports = { resolve }; \ No newline at end of file diff --git a/scraper/scrapers/kickass/kickass_api.js b/scraper/scrapers/kickass/kickass_api.js index 5ce6952..0170705 100644 --- a/scraper/scrapers/kickass/kickass_api.js +++ b/scraper/scrapers/kickass/kickass_api.js @@ -3,6 +3,7 @@ const needle = require('needle'); const moment = require('moment'); const decode = require('magnet-uri'); const Promises = require('../../lib/promises'); +const { getRandomProxy, getRandomUserAgent } = require('./../../lib/request_helper'); const defaultProxies = [ 'https://katcr.co' @@ -64,8 +65,9 @@ function browse(config = {}, retries = 2) { function singleRequest(requestUrl, config = {}) { const timeout = config.timeout || defaultTimeout; + const options = { userAgent: getRandomUserAgent(), proxy: getRandomProxy(), open_timeout: timeout, follow: 2 }; - return needle('get', requestUrl, { open_timeout: timeout, follow: 2 }) + return needle('get', requestUrl, options) .then((response) => { const body = response.body; if (!body) {