From 853c21472ab0eb72c55f4ed170af1b0d9f4babde Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Sun, 8 Mar 2020 00:58:06 +0100 Subject: [PATCH] adds 1337x and tpb latest scrapers --- index.js | 8 +- lib/repository.js | 7 +- package-lock.json | 509 ++++-------------- package.json | 9 +- scrapers/1337x/1337x_api.js | 174 ++++++ scrapers/1337x/1337x_scraper.js | 75 +++ scrapers/horriblesubs/horriblesubs_scraper.js | 2 +- scrapers/thepiratebay/thepiratebay_api.js | 31 +- scrapers/thepiratebay/thepiratebay_scraper.js | 74 +++ 9 files changed, 457 insertions(+), 432 deletions(-) create mode 100644 scrapers/1337x/1337x_api.js create mode 100644 scrapers/1337x/1337x_scraper.js create mode 100644 scrapers/thepiratebay/thepiratebay_scraper.js diff --git a/index.js b/index.js index 44892bc..369cba6 100644 --- a/index.js +++ b/index.js @@ -2,11 +2,13 @@ require('dotenv').config(); const express = require("express"); const server = express(); const { connect } = require('./lib/repository'); -const thepiratebayScraper = require('./scrapers/thepiratebay/thepiratebay_dump_scraper'); +const thepiratebayScraper = require('./scrapers/thepiratebay/thepiratebay_scraper'); const horribleSubsScraper = require('./scrapers/horriblesubs/horriblesubs_scraper'); -const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper'); +const leetxScraper = require('./scrapers/1337x/1337x_scraper'); +const thepiratebayDumpScraper = require('./scrapers/thepiratebay/thepiratebay_dump_scraper'); +const thepiratebayUnofficialDumpScraper = require('./scrapers/thepiratebay/thepiratebay_unofficial_dump_scraper'); -const providers = [horribleSubsScraper]; +const providers = [thepiratebayScraper]; async function scrape() { providers.forEach((provider) => provider.scrape()); diff --git a/lib/repository.js b/lib/repository.js index f1724ce..0142efa 100644 --- a/lib/repository.js +++ b/lib/repository.js @@ -1,13 +1,14 @@ const { Sequelize } = require('sequelize'); const Op = Sequelize.Op; -const POSTGRES_URI = process.env.POSTGRES_URI || 'postgres://torrentio:postgres@localhost:5432/torrentio'; +const DATABASE_URI = process.env.DATABASE_URI; -const database = new Sequelize(POSTGRES_URI, { logging: false }); +const database = new Sequelize(DATABASE_URI, { logging: false }); const Provider = database.define('provider', { name: { type: Sequelize.STRING(32), primaryKey: true }, - lastScraped: { type: Sequelize.DATE } + lastScraped: { type: Sequelize.DATE }, + lastScrapedId: { type: Sequelize.STRING(128) } }); const Torrent = database.define('torrent', { diff --git a/package-lock.json b/package-lock.json index 1f7a7c5..af7e436 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,30 +14,6 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-11.11.0.tgz", "integrity": "sha512-D5Rt+HXgEywr3RQJcGlZUCTCx1qVbCZpVk3/tOOA6spLNZdGm8BU+zRgdRYDoF1pO3RuXLxADzMrF903JlQXqg==" }, - "CSSselect": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/CSSselect/-/CSSselect-0.4.1.tgz", - "integrity": "sha1-+Kt+H4QYzmPNput713ioXX7EkrI=", - "requires": { - "CSSwhat": "0.4", - "domutils": "1.4" - }, - "dependencies": { - "domutils": { - "version": "1.4.3", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.4.3.tgz", - "integrity": "sha1-CGVRN5bGswYDGFDhdVFrr4C3Km8=", - "requires": { - "domelementtype": "1" - } - } - } - }, - "CSSwhat": { - "version": "0.4.7", - "resolved": "https://registry.npmjs.org/CSSwhat/-/CSSwhat-0.4.7.tgz", - "integrity": "sha1-hn2g/zn3eGEyQsRM/qg/CqTr35s=" - }, "accepts": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz", @@ -269,15 +245,6 @@ "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=" }, - "boom": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/boom/-/boom-0.4.2.tgz", - "integrity": "sha1-emNune1O/O+xnO9JR6PGffrukRs=", - "optional": true, - "requires": { - "hoek": "0.9.x" - } - }, "bottleneck": { "version": "2.17.1", "resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.17.1.tgz", @@ -408,15 +375,6 @@ "lodash.some": "^4.4.0" } }, - "cloudscraper": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cloudscraper/-/cloudscraper-3.0.0.tgz", - "integrity": "sha512-dJ7KHUo1YvvOWTIjQ3bOphflvsCyEoZkZzO3vu45Zf/lh4mZBjoVp9X/FVs8QJ+0BPkd0UZgit25kDbRjl9gjA==", - "requires": { - "request": "^2.88.0", - "request-promise": "^4.2.4" - } - }, "cls-bluebird": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/cls-bluebird/-/cls-bluebird-2.1.0.tgz", @@ -472,13 +430,23 @@ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" }, - "cryptiles": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/cryptiles/-/cryptiles-0.2.2.tgz", - "integrity": "sha1-7ZH/HxetE9N0gohZT4pIoNJvMlw=", - "optional": true, + "cron-parser": { + "version": "2.13.0", + "resolved": "https://registry.npmjs.org/cron-parser/-/cron-parser-2.13.0.tgz", + "integrity": "sha512-UWeIpnRb0eyoWPVk+pD3TDpNx3KCFQeezO224oJIkktBrcW6RoAPOx5zIKprZGfk6vcYSmA8yQXItejSaDBhbQ==", "requires": { - "boom": "0.4.x" + "is-nan": "^1.2.1", + "moment-timezone": "^0.5.25" + }, + "dependencies": { + "moment-timezone": { + "version": "0.5.28", + "resolved": "https://registry.npmjs.org/moment-timezone/-/moment-timezone-0.5.28.tgz", + "integrity": "sha512-TDJkZvAyKIVWg5EtVqRzU97w0Rb0YVbfpqyjgu6GwXCAohVRqwZjf4fOzDE6p1Ch98Sro/8hQQi65WDXW5STPw==", + "requires": { + "moment": ">= 2.9.0" + } + } } }, "css-select": { @@ -497,12 +465,6 @@ "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz", "integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg==" }, - "ctype": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/ctype/-/ctype-0.5.3.tgz", - "integrity": "sha1-gsGMJGH3QRTvFsE1IkrQuRRMoS8=", - "optional": true - }, "cyclist": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/cyclist/-/cyclist-0.1.1.tgz", @@ -516,15 +478,6 @@ "assert-plus": "^1.0.0" } }, - "ddg-scraper": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/ddg-scraper/-/ddg-scraper-1.0.2.tgz", - "integrity": "sha1-YJ+aj3VFvTylll6pBxIh/zn6cCA=", - "requires": { - "cheerio": "^0.22.0", - "request": "^2.80.0" - } - }, "debug": { "version": "3.2.6", "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz", @@ -541,6 +494,14 @@ "mimic-response": "^1.0.0" } }, + "define-properties": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz", + "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==", + "requires": { + "object-keys": "^1.0.12" + } + }, "delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -839,187 +800,6 @@ "path-is-absolute": "^1.0.0" } }, - "google-search-scraper": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/google-search-scraper/-/google-search-scraper-0.1.0.tgz", - "integrity": "sha1-KZKPKJtK0goAz4DBDDVOBPv718k=", - "requires": { - "cheerio": "~0.13.1", - "request": "~2.33.0" - }, - "dependencies": { - "asn1": { - "version": "0.1.11", - "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.1.11.tgz", - "integrity": "sha1-VZvhg3bQik7E2+gId9J4GGObLfc=", - "optional": true - }, - "assert-plus": { - "version": "0.1.5", - "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-0.1.5.tgz", - "integrity": "sha1-7nQAlBMALYTOxyGcasgRgS5yMWA=", - "optional": true - }, - "aws-sign2": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.5.0.tgz", - "integrity": "sha1-xXED96F/wDfwLXwuZLYC6iI/fWM=", - "optional": true - }, - "cheerio": { - "version": "0.13.1", - "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-0.13.1.tgz", - "integrity": "sha1-SK8RNFYbNSf4PZFWxPmo69grBuw=", - "requires": { - "CSSselect": "~0.4.0", - "entities": "0.x", - "htmlparser2": "~3.4.0", - "underscore": "~1.5" - } - }, - "combined-stream": { - "version": "0.0.7", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-0.0.7.tgz", - "integrity": "sha1-ATfmV7qlp1QcV6w3rF/AfXO03B8=", - "optional": true, - "requires": { - "delayed-stream": "0.0.5" - } - }, - "delayed-stream": { - "version": "0.0.5", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-0.0.5.tgz", - "integrity": "sha1-1LH0OpPoKW3+AmlPRoC8N6MTxz8=", - "optional": true - }, - "domhandler": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.2.1.tgz", - "integrity": "sha1-Wd+dzSJ+gIs2Wuc+H2aErD2Ub8I=", - "requires": { - "domelementtype": "1" - } - }, - "domutils": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.3.0.tgz", - "integrity": "sha1-mtTVm1r2ymhMYv5tdo7xcOcN8ZI=", - "requires": { - "domelementtype": "1" - } - }, - "entities": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-0.5.0.tgz", - "integrity": "sha1-9hHLWuIhBQ4AEsZpeVA/164ZzEk=" - }, - "forever-agent": { - "version": "0.5.2", - "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.5.2.tgz", - "integrity": "sha1-bQ4JxJIflKJ/Y9O0nF/v8epMUTA=" - }, - "form-data": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-0.1.4.tgz", - "integrity": "sha1-kavXiKupcCsaq/qLwBAxoqyeOxI=", - "optional": true, - "requires": { - "async": "~0.9.0", - "combined-stream": "~0.0.4", - "mime": "~1.2.11" - } - }, - "htmlparser2": { - "version": "3.4.0", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.4.0.tgz", - "integrity": "sha1-oc1l9YI60oXhnWOwha1yLQpR6uc=", - "requires": { - "domelementtype": "1", - "domhandler": "2.2", - "domutils": "1.3", - "readable-stream": "1.1" - } - }, - "http-signature": { - "version": "0.10.1", - "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-0.10.1.tgz", - "integrity": "sha1-T72sEyVZqoMjEh5UB3nAoBKyfmY=", - "optional": true, - "requires": { - "asn1": "0.1.11", - "assert-plus": "^0.1.5", - "ctype": "0.5.3" - } - }, - "mime": { - "version": "1.2.11", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.2.11.tgz", - "integrity": "sha1-WCA+7Ybjpe8XrtK32evUfwpg3RA=" - }, - "node-uuid": { - "version": "1.4.8", - "resolved": "https://registry.npmjs.org/node-uuid/-/node-uuid-1.4.8.tgz", - "integrity": "sha1-sEDrCSOWivq/jTL7HxfxFn/auQc=" - }, - "oauth-sign": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.3.0.tgz", - "integrity": "sha1-y1QPk7srIqfVlBaRoojWDo6pOG4=", - "optional": true - }, - "qs": { - "version": "0.6.6", - "resolved": "https://registry.npmjs.org/qs/-/qs-0.6.6.tgz", - "integrity": "sha1-bgFQmP9RlouKPIGQAdXyyJvEsQc=" - }, - "readable-stream": { - "version": "1.1.14", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", - "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=", - "requires": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.1", - "isarray": "0.0.1", - "string_decoder": "~0.10.x" - } - }, - "request": { - "version": "2.33.0", - "resolved": "https://registry.npmjs.org/request/-/request-2.33.0.tgz", - "integrity": "sha1-UWeHgTFyYHDsYzdS6iMKI3ncZf8=", - "requires": { - "aws-sign2": "~0.5.0", - "forever-agent": "~0.5.0", - "form-data": "~0.1.0", - "hawk": "~1.0.0", - "http-signature": "~0.10.0", - "json-stringify-safe": "~5.0.0", - "mime": "~1.2.9", - "node-uuid": "~1.4.0", - "oauth-sign": "~0.3.0", - "qs": "~0.6.0", - "tough-cookie": ">=0.12.0", - "tunnel-agent": "~0.3.0" - } - }, - "string_decoder": { - "version": "0.10.31", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", - "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=" - }, - "tunnel-agent": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.3.0.tgz", - "integrity": "sha1-rWgbaPUyGtKCfEz7G31d8s/pQu4=", - "optional": true - }, - "underscore": { - "version": "1.5.2", - "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.5.2.tgz", - "integrity": "sha1-EzXF5PXm0zu7SwBrqMhqAPVW3gg=" - } - } - }, "har-schema": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz", @@ -1039,24 +819,6 @@ "resolved": "https://registry.npmjs.org/hat/-/hat-0.0.3.tgz", "integrity": "sha1-uwFKnmSzeIrtgAWRdBPU/z1QLYo=" }, - "hawk": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/hawk/-/hawk-1.0.0.tgz", - "integrity": "sha1-uQuxaYByhUEdp//LjdJZhQLTtS0=", - "optional": true, - "requires": { - "boom": "0.4.x", - "cryptiles": "0.2.x", - "hoek": "0.9.x", - "sntp": "0.2.x" - } - }, - "hoek": { - "version": "0.9.1", - "resolved": "https://registry.npmjs.org/hoek/-/hoek-0.9.1.tgz", - "integrity": "sha1-PTIkYrrfB3Fup+uFuviAec3c5QU=", - "optional": true - }, "htmlparser2": { "version": "3.10.1", "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.1.tgz", @@ -1099,110 +861,6 @@ "safer-buffer": ">= 2.1.2 < 3" } }, - "imdb": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/imdb/-/imdb-1.1.0.tgz", - "integrity": "sha1-6X0syyARjvFAGQBRiai5FcZQjDI=", - "requires": { - "cheerio": "^0.19.0", - "request": "^2.65.0" - }, - "dependencies": { - "cheerio": { - "version": "0.19.0", - "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-0.19.0.tgz", - "integrity": "sha1-dy5wFfLuKZZQltcepBdbdas1SSU=", - "requires": { - "css-select": "~1.0.0", - "dom-serializer": "~0.1.0", - "entities": "~1.1.1", - "htmlparser2": "~3.8.1", - "lodash": "^3.2.0" - } - }, - "css-select": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.0.0.tgz", - "integrity": "sha1-sRIcpRhI3SZOIkTQWM7iVN7rRLA=", - "requires": { - "boolbase": "~1.0.0", - "css-what": "1.0", - "domutils": "1.4", - "nth-check": "~1.0.0" - } - }, - "css-what": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/css-what/-/css-what-1.0.0.tgz", - "integrity": "sha1-18wt9FGAZm+Z0rFEYmOUaeAPc2w=" - }, - "domhandler": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.3.0.tgz", - "integrity": "sha1-LeWaCCLVAn+r/28DLCsloqir5zg=", - "requires": { - "domelementtype": "1" - } - }, - "domutils": { - "version": "1.4.3", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.4.3.tgz", - "integrity": "sha1-CGVRN5bGswYDGFDhdVFrr4C3Km8=", - "requires": { - "domelementtype": "1" - } - }, - "htmlparser2": { - "version": "3.8.3", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.8.3.tgz", - "integrity": "sha1-mWwosZFRaovoZQGn15dX5ccMEGg=", - "requires": { - "domelementtype": "1", - "domhandler": "2.3", - "domutils": "1.5", - "entities": "1.0", - "readable-stream": "1.1" - }, - "dependencies": { - "domutils": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", - "integrity": "sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8=", - "requires": { - "dom-serializer": "0", - "domelementtype": "1" - } - }, - "entities": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-1.0.0.tgz", - "integrity": "sha1-sph6o4ITR/zeZCsk/fyeT7cSvyY=" - } - } - }, - "lodash": { - "version": "3.10.1", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-3.10.1.tgz", - "integrity": "sha1-W/Rejkm6QYnhfUgnid/RW9FAt7Y=" - }, - "readable-stream": { - "version": "1.1.14", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", - "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=", - "requires": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.1", - "isarray": "0.0.1", - "string_decoder": "~0.10.x" - } - }, - "string_decoder": { - "version": "0.10.31", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", - "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=" - } - } - }, "immediate-chunk-store": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/immediate-chunk-store/-/immediate-chunk-store-1.0.8.tgz", @@ -1250,19 +908,19 @@ "resolved": "https://registry.npmjs.org/is-bluebird/-/is-bluebird-1.0.2.tgz", "integrity": "sha1-CWQ5Bg9KpBGr7hkUOoTWpVNG1uI=" }, + "is-nan": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/is-nan/-/is-nan-1.3.0.tgz", + "integrity": "sha512-z7bbREymOqt2CCaZVly8aC4ML3Xhfi0ekuOnjO2L8vKdl+CttdVoGZQhd4adMFAsxQ5VeRVwORs4tU8RH+HFtQ==", + "requires": { + "define-properties": "^1.1.3" + } + }, "is-typedarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz", "integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=" }, - "is-video": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/is-video/-/is-video-1.0.1.tgz", - "integrity": "sha1-e8Z3YLAQPUArwfELtULxajaHdzs=", - "requires": { - "video-extensions": "^1.0.0" - } - }, "isarray": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", @@ -1424,6 +1082,11 @@ "resolved": "https://registry.npmjs.org/lodash.some/-/lodash.some-4.6.0.tgz", "integrity": "sha1-G7nzFO9ri63tE7VJFpsqlF62jk0=" }, + "long-timeout": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/long-timeout/-/long-timeout-0.1.1.tgz", + "integrity": "sha1-lyHXiLR+C8taJMLivuGg2lXatRQ=" + }, "lru": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/lru/-/lru-2.0.1.tgz", @@ -1620,6 +1283,16 @@ "resolved": "https://registry.npmjs.org/node-gzip/-/node-gzip-1.1.2.tgz", "integrity": "sha512-ZB6zWpfZHGtxZnPMrJSKHVPrRjURoUzaDbLFj3VO70mpLTW5np96vXyHwft4Id0o+PYIzgDkBUjIzaNHhQ8srw==" }, + "node-schedule": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/node-schedule/-/node-schedule-1.3.2.tgz", + "integrity": "sha512-GIND2pHMHiReSZSvS6dpZcDH7pGPGFfWBIEud6S00Q8zEIzAs9ommdyRK1ZbQt8y1LyZsJYZgPnyi7gpU2lcdw==", + "requires": { + "cron-parser": "^2.7.3", + "long-timeout": "0.1.1", + "sorted-array-functions": "^1.0.0" + } + }, "nodejs-bing": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/nodejs-bing/-/nodejs-bing-0.1.0.tgz", @@ -1644,6 +1317,11 @@ "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==" }, + "object-keys": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==" + }, "on-finished": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", @@ -1715,7 +1393,10 @@ }, "parse-torrent-title": { "version": "git://github.com/TheBeastLT/parse-torrent-title.git#ddd5037820289d35e600baec9d8e730935d261af", - "from": "git://github.com/TheBeastLT/parse-torrent-title.git#master" + "from": "git://github.com/TheBeastLT/parse-torrent-title.git#master", + "requires": { + "moment": "^2.24.0" + } }, "parseurl": { "version": "1.3.2", @@ -1833,15 +1514,16 @@ "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=" }, "pg": { - "version": "7.8.2", - "resolved": "https://registry.npmjs.org/pg/-/pg-7.8.2.tgz", - "integrity": "sha512-5U4fjV43DnQxelkhyPdU3YfUbYVa21bNmreXRCM/gFFw09YxWaitWWITm/u0twUNF5EYOSDhkgyEAocgtpP9JQ==", + "version": "7.18.2", + "resolved": "https://registry.npmjs.org/pg/-/pg-7.18.2.tgz", + "integrity": "sha512-Mvt0dGYMwvEADNKy5PMQGlzPudKcKKzJds/VbOeZJpb6f/pI3mmoXX0JksPgI3l3JPP/2Apq7F36O63J7mgveA==", "requires": { "buffer-writer": "2.0.0", "packet-reader": "1.0.0", "pg-connection-string": "0.1.3", - "pg-pool": "^2.0.4", - "pg-types": "~2.0.0", + "pg-packet-stream": "^1.1.0", + "pg-pool": "^2.0.10", + "pg-types": "^2.1.0", "pgpass": "1.x", "semver": "4.3.2" }, @@ -1859,9 +1541,9 @@ "integrity": "sha1-2hhHsglA5C7hSSvq9l1J2RskXfc=" }, "pg-hstore": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/pg-hstore/-/pg-hstore-2.3.2.tgz", - "integrity": "sha1-9+8FPnubiSrphq8vfL6GQy388k8=", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/pg-hstore/-/pg-hstore-2.3.3.tgz", + "integrity": "sha512-qpeTpdkguFgfdoidtfeTho1Q1zPVPbtMHgs8eQ+Aan05iLmIs3Z3oo5DOZRclPGoQ4i68I1kCtQSJSa7i0ZVYg==", "requires": { "underscore": "^1.7.0" } @@ -1871,20 +1553,25 @@ "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", "integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==" }, + "pg-packet-stream": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/pg-packet-stream/-/pg-packet-stream-1.1.0.tgz", + "integrity": "sha512-kRBH0tDIW/8lfnnOyTwKD23ygJ/kexQVXZs7gEyBljw4FYqimZFxnMMx50ndZ8In77QgfGuItS5LLclC2TtjYg==" + }, "pg-pool": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/pg-pool/-/pg-pool-2.0.6.tgz", - "integrity": "sha512-hod2zYQxM8Gt482q+qONGTYcg/qVcV32VHVPtktbBJs0us3Dj7xibISw0BAAXVMCzt8A/jhfJvpZaxUlqtqs0g==" + "version": "2.0.10", + "resolved": "https://registry.npmjs.org/pg-pool/-/pg-pool-2.0.10.tgz", + "integrity": "sha512-qdwzY92bHf3nwzIUcj+zJ0Qo5lpG/YxchahxIN8+ZVmXqkahKXsnl2aiJPHLYN9o5mB/leG+Xh6XKxtP7e0sjg==" }, "pg-types": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.0.0.tgz", - "integrity": "sha512-THUD7gQll5tys+5eQ8Rvs7DjHiIC3bLqixk3gMN9Hu8UrCBAOjf35FoI39rTGGc3lM2HU/R+Knpxvd11mCwOMA==", + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", + "integrity": "sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==", "requires": { "pg-int8": "1.0.1", "postgres-array": "~2.0.0", "postgres-bytea": "~1.0.0", - "postgres-date": "~1.0.0", + "postgres-date": "~1.0.4", "postgres-interval": "^1.1.0" } }, @@ -1907,9 +1594,9 @@ "integrity": "sha1-AntTPAqokOJtFy1Hz5zOzFIazTU=" }, "postgres-date": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.3.tgz", - "integrity": "sha1-4tiXAu/bJY/52c7g/pG9BpdSV6g=" + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.4.tgz", + "integrity": "sha512-bESRvKVuTrjoBluEcpv2346+6kgB7UlnqWZsnbnCccTNq/pqfj1j6oBaN5+b/NrDXepYUT/HKadqv3iS9lJuVA==" }, "postgres-interval": { "version": "1.2.0", @@ -2412,14 +2099,10 @@ } } }, - "sntp": { - "version": "0.2.4", - "resolved": "https://registry.npmjs.org/sntp/-/sntp-0.2.4.tgz", - "integrity": "sha1-+4hfGLDzqtGJ+CSGJTa87ux1CQA=", - "optional": true, - "requires": { - "hoek": "0.9.x" - } + "sorted-array-functions": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/sorted-array-functions/-/sorted-array-functions-1.2.0.tgz", + "integrity": "sha512-sWpjPhIZJtqO77GN+LD8dDsDKcWZ9GCOJNqKzi1tvtjGIzwfoyuRH8S0psunmc6Z5P+qfDqztSbwYR5X/e1UTg==" }, "sparse-bitfield": { "version": "3.0.3", @@ -2486,6 +2169,19 @@ "safe-buffer": "~5.1.0" } }, + "sugar-core": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/sugar-core/-/sugar-core-2.0.6.tgz", + "integrity": "sha512-YmLFysR3Si6RImqL1+aB6JH81EXxvXn5iXhPf2PsjfoUYEwCxFDYCQY+zC3WqviuGWzxFaSkkJvkUE05Y03L5Q==" + }, + "sugar-date": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/sugar-date/-/sugar-date-2.0.6.tgz", + "integrity": "sha512-5aPXcTl9pIgae3j8wOieRZOEbaowHHpL+MPgZwHHjXdhZz3FjzpacjzM+Aq7rZTjDsWyWuKHzkIALx2uUhnmyg==", + "requires": { + "sugar-core": "^2.0.0" + } + }, "terraformer": { "version": "1.0.9", "resolved": "https://registry.npmjs.org/terraformer/-/terraformer-1.0.9.tgz", @@ -2648,9 +2344,9 @@ "integrity": "sha512-UIEXBNeYmKptWH6z8ZnqTeS8fV74zG0/eRU9VGkpzz+LIJNs8W/zM/L+7ctCkRrgbNnnR0xxw4bKOr0cW0N0Og==" }, "underscore": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.9.1.tgz", - "integrity": "sha512-5/4etnCkd9c8gwgowi5/om/mYO5ajCaOgdzj/oW+0eQV9WxKBDZw5+ycmKmeaTXjInS/W0BzpGLo2xR2aBwZdg==" + "version": "1.9.2", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.9.2.tgz", + "integrity": "sha512-D39qtimx0c1fI3ya1Lnhk3E9nONswSKhnffBI0gME9C99fYOkNi04xs8K6pePLhvl1frbDemkaBQ5ikWllR2HQ==" }, "uniq": { "version": "1.0.1", @@ -2721,11 +2417,6 @@ "extsprintf": "^1.2.0" } }, - "video-extensions": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/video-extensions/-/video-extensions-1.1.0.tgz", - "integrity": "sha1-6qhrRfKahTwrhz6djiO1E3Epl9Y=" - }, "wkx": { "version": "0.4.6", "resolved": "https://registry.npmjs.org/wkx/-/wkx-0.4.6.tgz", diff --git a/package.json b/package.json index ee89e85..f626a22 100644 --- a/package.json +++ b/package.json @@ -12,31 +12,28 @@ "author": "TheBeastLT ", "license": "MIT", "dependencies": { + "async": "0.9.2", "bottleneck": "^2.16.2", "cache-manager": "^2.9.0", "cache-manager-mongodb": "^0.2.1", "cheerio": "^0.22.0", - "cloudscraper": "^3.0.0", - "ddg-scraper": "^1.0.2", "dotenv": "^8.2.0", "express": "^4.16.4", - "google-search-scraper": "^0.1.0", - "imdb": "^1.1.0", - "is-video": "^1.0.1", "line-by-line": "^0.1.6", "magnet-uri": "^5.1.7", "moment": "^2.24.0", "name-to-imdb": "^2.3.0", "needle": "^2.2.4", "node-gzip": "^1.1.2", + "node-schedule": "^1.3.2", "nodejs-bing": "^0.1.0", "parse-torrent": "^6.1.2", "parse-torrent-title": "git://github.com/TheBeastLT/parse-torrent-title.git#master", "peer-search": "^0.6.x", - "async": "0.9.2", "pg": "^7.8.2", "pg-hstore": "^2.3.2", "sequelize": "^4.43.0", + "sugar-date": "^2.0.6", "torrent-stream": "^1.1.0" } } diff --git a/scrapers/1337x/1337x_api.js b/scrapers/1337x/1337x_api.js new file mode 100644 index 0000000..2930c37 --- /dev/null +++ b/scrapers/1337x/1337x_api.js @@ -0,0 +1,174 @@ +const cheerio = require('cheerio'); +const needle = require('needle'); +const Sugar = require('sugar-date'); +const decode = require('magnet-uri'); + +const defaultProxies = [ + 'https://1337x.to' +]; +const defaultTimeout = 30000; + +const Categories = { + MOVIE: 'Movies', + TV: 'TV', + ANIME: 'Anime', + DOCUMENTARIES: 'Documentaries', + APPS: 'Apps', + GAMES: 'Games', + MUSIC: 'Music', + PORN: 'XXX', + OTHER: 'Other', +}; + +function torrent(torrentSlug, config = {}, retries = 2) { + if (!torrentSlug || retries === 0) { + return Promise.reject(new Error(`Failed ${torrentSlug} query`)); + } + const proxyList = config.proxyList || defaultProxies; + const slug = torrentSlug.startsWith('/torrent/') ? torrentSlug.replace('/torrent/', '') : torrentSlug; + + return raceFirstSuccessful(proxyList + .map((proxyUrl) => singleRequest(`${proxyUrl}/torrent/${slug}`, config))) + .then((body) => parseTorrentPage(body)) + .then((torrent) => ({ torrentId: slug, ...torrent })) + .catch((err) => torrent(slug, config, retries - 1)); +} + +function search(keyword, config = {}, retries = 2) { + if (!keyword || retries === 0) { + return Promise.reject(new Error(`Failed ${keyword} search`)); + } + const proxyList = config.proxyList || defaultProxies; + const page = config.page || 1; + + return raceFirstSuccessful(proxyList + .map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/`, config))) + .then((body) => parseTableBody(body)) + .catch((err) => search(keyword, config, retries - 1)); +} + +function browse(config = {}, retries = 2) { + if (retries === 0) { + return Promise.reject(new Error(`Failed browse request`)); + } + const proxyList = config.proxyList || defaultProxies; + const page = config.page || 1; + const category = config.category || 0; + + return raceFirstSuccessful(proxyList + .map((proxyUrl) => singleRequest(`${proxyUrl}/cat/${category}/${page}/`, config))) + .then((body) => parseTableBody(body)) + .catch((err) => browse(config, retries - 1)); +} + +function singleRequest(requestUrl, config = {}) { + const timeout = config.timeout || defaultTimeout; + + return needle('get', requestUrl, { open_timeout: timeout, follow: 2 }) + .then((response) => { + const body = response.body; + if (!body) { + throw new Error(`No body: ${requestUrl}`); + } else if (body.includes('502: Bad gateway') || + body.includes('403 Forbidden') || + !(body.includes('1337x'))) { + throw new Error(`Invalid body contents: ${requestUrl}`); + } + return body; + }); +} + +function parseTableBody(body) { + return new Promise((resolve, reject) => { + const $ = cheerio.load(body); + + if (!$) { + reject(new Error('Failed loading body')); + } + + const torrents = []; + + $('.table > tbody > tr').each((i, element) => { + const row = $(element); + torrents.push({ + name: row.find('a').eq(1).text(), + slug: row.find('a').eq(1).attr('href').replace('/torrent/', ''), + seeders: parseInt(row.children('td.coll-2').text()), + leechers: parseInt(row.children('td.coll-3').text()), + size: parseSize(row.children('td.coll-4').text()) + }); + }); + + resolve(torrents); + }); +} + +function parseTorrentPage(body) { + return new Promise((resolve, reject) => { + const $ = cheerio.load(body); + + if (!$) { + reject(new Error('Failed loading body')); + } + + const details = $('.torrent-detail-page'); + const magnetLink = details.find('a:contains(\'Magnet Download\')').attr('href'); + const imdbIdMatch = details.find('div[id=\'description\']').html().match(/imdb\.com\/title\/tt(\d+)/i); + + const torrent = { + name: decode(magnetLink).dn, + magnetLink: magnetLink, + infoHash: details.find('strong:contains(\'Infohash\')').next().text(), + seeders: parseInt(details.find('strong:contains(\'Seeders\')').next().text(), 10), + leechers: parseInt(details.find('strong:contains(\'Leechers\')').next().text(), 10), + category: details.find('strong:contains(\'Category\')').next().text(), + language: details.find('strong:contains(\'Language\')').next().text(), + size: parseSize(details.find('strong:contains(\'Total size\')').next().text()), + uploadDate: Sugar.Date.create(details.find('strong:contains(\'Date uploaded\')').next().text()), + imdbId: imdbIdMatch && `tt${imdbIdMatch[1].padStart(7, '0')}`, + files: details.find('div[id=\'files\']').first().find('li') + .map((i, elem) => $(elem).text()) + .map((i, text) => ({ + fileIndex: i, + name: text.match(/^(.+)\s\(.+\)$/)[1].replace(/^.+\//g, ''), + path: text.match(/^(.+)\s\(.+\)$/)[1], + size: parseSize(text.match(/^.+\s\((.+)\)$/)[1]) + })).get() + }; + resolve(torrent); + }); +} + +function parseSize(sizeText) { + if (!sizeText) { + return undefined; + } + let scale = 1; + if (sizeText.includes('GB')) { + scale = 1024 * 1024 * 1024 + } else if (sizeText.includes('MB')) { + scale = 1024 * 1024; + } else if (sizeText.includes('KB')) { + scale = 1024; + } + return Math.floor(parseFloat(sizeText) * scale); +} + +function raceFirstSuccessful(promises) { + return Promise.all(promises.map((p) => { + // If a request fails, count that as a resolution so it will keep + // waiting for other possible successes. If a request succeeds, + // treat it as a rejection so Promise.all immediately bails out. + return p.then( + (val) => Promise.reject(val), + (err) => Promise.resolve(err) + ); + })).then( + // If '.all' resolved, we've just got an array of errors. + (errors) => Promise.reject(errors), + // If '.all' rejected, we've got the result we wanted. + (val) => Promise.resolve(val) + ); +} + +module.exports = { torrent, search, browse, Categories }; diff --git a/scrapers/1337x/1337x_scraper.js b/scrapers/1337x/1337x_scraper.js new file mode 100644 index 0000000..f6f61ce --- /dev/null +++ b/scrapers/1337x/1337x_scraper.js @@ -0,0 +1,75 @@ +const moment = require('moment'); +const Bottleneck = require('bottleneck'); +const leetx = require('./1337x_api'); +const { Type } = require('../../lib/types'); +const repository = require('../../lib/repository'); +const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries'); + +const NAME = '1337x'; +const UNTIL_PAGE = 1; +const TYPE_MAPPING = { + 'Movies': Type.MOVIE, + 'Documentaries': Type.MOVIE, + 'TV': Type.SERIES, + 'Anime': Type.ANIME +}; + +const limiter = new Bottleneck({ maxConcurrent: 40 }); + +async function scrape() { + const scrapeStart = moment(); + const lastScrape = await repository.getProvider({ name: NAME }); + console.log(`[${scrapeStart}] starting ${NAME} scrape...`); + + const latestTorrents = await getLatestTorrents(); + return Promise.all(latestTorrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))) + .then(() => { + lastScrape.lastScraped = scrapeStart; + lastScrape.lastScrapedId = latestTorrents.length && latestTorrents[latestTorrents.length - 1].torrentId; + return lastScrape.save(); + }); +} + +async function getLatestTorrents() { + const movies = await getLatestTorrentsForCategory(leetx.Categories.MOVIE); + const series = await getLatestTorrentsForCategory(leetx.Categories.TV); + const anime = await getLatestTorrentsForCategory(leetx.Categories.ANIME); + const docs = await getLatestTorrentsForCategory(leetx.Categories.DOCUMENTARIES); + return movies.concat(series).concat(anime).concat(docs); +} + +async function getLatestTorrentsForCategory(category, page = 1) { + return leetx.browse(({ category: category, page: page })) + .then(torrents => torrents.length && page < UNTIL_PAGE + ? getLatestTorrents(category, page + 1).then(nextTorrents => torrents.concat(nextTorrents)) + : torrents) + .catch(() => []); +} + +async function processTorrentRecord(record) { + if (await getStoredTorrentEntry(record)) { + return; + } + + const torrentFound = await leetx.torrent(record.slug).catch(() => undefined); + + if (!torrentFound || !TYPE_MAPPING[torrentFound.category]) { + return createSkipTorrentEntry(record); + } + + const torrent = { + infoHash: torrentFound.infoHash, + provider: NAME, + torrentId: torrentFound.torrentId, + title: torrentFound.name.replace(/\t|\s+/g, ' '), + seeders: torrentFound.seeders, + size: torrentFound.size, + type: TYPE_MAPPING[torrentFound.category], + uploadDate: torrentFound.uploadDate, + imdbId: torrentFound.imdbId, + }; + + return createTorrentEntry(torrent); +} + +module.exports = { scrape }; \ No newline at end of file diff --git a/scrapers/horriblesubs/horriblesubs_scraper.js b/scrapers/horriblesubs/horriblesubs_scraper.js index a67f220..3cf691c 100644 --- a/scrapers/horriblesubs/horriblesubs_scraper.js +++ b/scrapers/horriblesubs/horriblesubs_scraper.js @@ -19,7 +19,7 @@ async function scrape() { if (!lastScraped.lastScraped) { console.log(`${NAME}: no previous scrapping exist`); - await _scrapeAllShows() + //await _scrapeAllShows() } } diff --git a/scrapers/thepiratebay/thepiratebay_api.js b/scrapers/thepiratebay/thepiratebay_api.js index 42f937d..638b209 100644 --- a/scrapers/thepiratebay/thepiratebay_api.js +++ b/scrapers/thepiratebay/thepiratebay_api.js @@ -1,6 +1,7 @@ const cheerio = require('cheerio'); const needle = require('needle'); const moment = require('moment'); +const decode = require('magnet-uri'); const defaultProxies = [ 'https://thepiratebay.org', @@ -10,12 +11,7 @@ const defaultProxies = [ const dumpUrl = '/static/dump/csv/'; const defaultTimeout = 30000; -const errors = { - REQUEST_ERROR: { code: 'REQUEST_ERROR' }, - PARSER_ERROR: { code: 'PARSER_ERROR' } -}; - -Categories = { +const Categories = { AUDIO: { ALL: 100, MUSIC: 101, @@ -99,7 +95,7 @@ function search(keyword, config = {}, retries = 2) { } const proxyList = config.proxyList || defaultProxies; const page = config.page || 0; - const category = config.cat || 0; + const category = config.category || 0; return raceFirstSuccessful(proxyList .map((proxyUrl) => singleRequest(`${proxyUrl}/search/${keyword}/${page}/99/${category}`, config))) @@ -107,6 +103,20 @@ function search(keyword, config = {}, retries = 2) { .catch((err) => search(keyword, config, retries - 1)); } +function browse(config = {}, retries = 2) { + if (retries === 0) { + return Promise.reject(new Error(`Failed browse request`)); + } + const proxyList = config.proxyList || defaultProxies; + const page = config.page || 0; + const category = config.category || 0; + + return raceFirstSuccessful(proxyList + .map((proxyUrl) => singleRequest(`${proxyUrl}/browse/${category}/${page}`, config))) + .then((body) => parseBody(body)) + .catch((err) => browse(config, retries - 1)); +} + function dumps(config = {}, retries = 2) { if (retries === 0) { return Promise.reject(new Error(`Failed dump search`)); @@ -150,7 +160,7 @@ function parseBody(body) { const $ = cheerio.load(body); if (!$) { - reject(new Error(errors.PARSER_ERROR)); + reject(new Error('Failed loading body')); } const torrents = []; @@ -183,7 +193,7 @@ function parseTorrentPage(body) { const $ = cheerio.load(body); if (!$) { - reject(new Error(errors.PARSER_ERROR)); + reject(new Error('Failed loading body')); } const details = $('div[id=\'details\']'); const col1 = details.find('dl[class=\'col1\']'); @@ -194,6 +204,7 @@ function parseTorrentPage(body) { seeders: parseInt(details.find('dt:contains(\'Seeders:\')').next().text(), 10), leechers: parseInt(details.find('dt:contains(\'Leechers:\')').next().text(), 10), magnetLink: details.find('a[title=\'Get this torrent\']').attr('href'), + infoHash: decode(details.find('a[title=\'Get this torrent\']').attr('href')).infoHash, category: Categories.VIDEO.ALL, subcategory: parseInt(col1.find('a[title=\'More from this category\']').eq(0).attr('href').match(/\d+$/)[0], 10), size: parseSize(details.find('dt:contains(\'Size:\')').next().text().match(/(\d+)(?:.?Bytes)/)[1]), @@ -237,4 +248,4 @@ function raceFirstSuccessful(promises) { ); } -module.exports = { torrent, search, dumps, Categories }; +module.exports = { torrent, search, browse, dumps, Categories }; diff --git a/scrapers/thepiratebay/thepiratebay_scraper.js b/scrapers/thepiratebay/thepiratebay_scraper.js new file mode 100644 index 0000000..c857b03 --- /dev/null +++ b/scrapers/thepiratebay/thepiratebay_scraper.js @@ -0,0 +1,74 @@ +const moment = require('moment'); +const Bottleneck = require('bottleneck'); +const thepiratebay = require('./thepiratebay_api.js'); +const { Type } = require('../../lib/types'); +const repository = require('../../lib/repository'); +const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries'); + +const NAME = 'ThePirateBay'; +const UNTIL_PAGE = 1; + +const limiter = new Bottleneck({ maxConcurrent: 40 }); + +const allowedCategories = [ + thepiratebay.Categories.VIDEO.MOVIES, + thepiratebay.Categories.VIDEO.MOVIES_HD, + thepiratebay.Categories.VIDEO.MOVIES_DVDR, + thepiratebay.Categories.VIDEO.MOVIES_3D, + thepiratebay.Categories.VIDEO.TV_SHOWS, + thepiratebay.Categories.VIDEO.TV_SHOWS_HD +]; +const seriesCategories = [ + thepiratebay.Categories.VIDEO.TV_SHOWS, + thepiratebay.Categories.VIDEO.TV_SHOWS_HD +]; + +async function scrape() { + const scrapeStart = moment(); + const lastScrape = await repository.getProvider({ name: NAME }); + console.log(`[${scrapeStart}] starting ${NAME} scrape...`); + + const latestTorrents = await getLatestTorrents(); + return Promise.all(latestTorrents.map(torrent => limiter.schedule(() => processTorrentRecord(torrent)))) + .then(() => { + lastScrape.lastScraped = scrapeStart; + lastScrape.lastScrapedId = latestTorrents.length && latestTorrents[latestTorrents.length - 1].torrentId; + return lastScrape.save(); + }); +} + +async function getLatestTorrents(page = 0) { + return thepiratebay.browse(({ category: thepiratebay.Categories.VIDEO.ALL, page: page })) + .then(torrents => torrents.length && page < UNTIL_PAGE + ? getLatestTorrents(page + 1).then(nextTorrents => torrents.concat(nextTorrents)) + : torrents) + .catch(() => []); +} + +async function processTorrentRecord(record) { + if (await getStoredTorrentEntry(record)) { + return; + } + + const torrentFound = await thepiratebay.torrent(record.torrentId).catch(() => undefined); + + if (!torrentFound || !allowedCategories.includes(torrentFound.subcategory)) { + return createSkipTorrentEntry(record); + } + + const torrent = { + infoHash: torrentFound.infoHash, + provider: NAME, + torrentId: torrentFound.torrentId, + title: torrentFound.name.replace(/\t|\s+/g, ' '), + size: torrentFound.size, + type: seriesCategories.includes(torrentFound.subcategory) ? Type.SERIES : Type.MOVIE, + imdbId: torrentFound.imdbId, + uploadDate: torrentFound.uploadDate, + seeders: torrentFound.seeders, + }; + + return createTorrentEntry(torrent); +} + +module.exports = { scrape }; \ No newline at end of file