From a0fae89b81df1851d45396fcfda0de40d5f1c7b1 Mon Sep 17 00:00:00 2001 From: TheBeastLT Date: Tue, 14 Sep 2021 15:08:45 +0200 Subject: [PATCH] add scraper helper --- scraper/scrapers/scraperHelper.js | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 scraper/scrapers/scraperHelper.js diff --git a/scraper/scrapers/scraperHelper.js b/scraper/scrapers/scraperHelper.js new file mode 100644 index 0000000..f353a59 --- /dev/null +++ b/scraper/scrapers/scraperHelper.js @@ -0,0 +1,24 @@ +function isPtDubbed(name) { + return name.toLowerCase().match(/dublado|dual|nacional|multi/); +} + +function sanitizePtName(name) { + return name + .replace(/(.*)\b(\d{3,4}P)\b(?!.*\d{3,4}[Pp])(.*)/, '$1$3 $2') // add resolution to the end if missing + .replace(/^[[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|ORG|TO|TV)\b\s?[-\]}]+[\s.]*/i, '') // replace watermarks + .replace(/^(\d*(?:\.\d)?(?:[4A-Z-]{3,}|P)[-.]+)+/, '') // replace metadata prefixes + .trim(); +} + +function sanitizePtOriginalName(name) { + return name.trim().replace(/S\d+$/, ''); +} + +function sanitizePtLanguages(languages) { + return languages + .replace(/��/g, 'ê') + .replace(/ /g, '') + .trim(); +} + +module.exports = { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages } \ No newline at end of file