Added back original scrapers, integrated with PGSQL

This commit is contained in:
Gabisonfire
2024-01-17 16:20:00 -05:00
parent 364d1545b1
commit cab7f38c66
79 changed files with 65832 additions and 1702 deletions

View File

@@ -0,0 +1,26 @@
function isPtDubbed(name) {
return name.toLowerCase().match(/dublado|dual|nacional|multi/);
}
function sanitizePtName(name) {
return name
.replace(/(.*)\b(\d{3,4}P)\b(?!.*\d{3,4}[Pp])(.*)/, '$1$3 $2') // add resolution to the end if missing
.replace(/^[\[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks
.replace(/^(\d*(?:\.\d{1,2})?(?:[4A-Z-]{3,}|P)[-.]+)+/, '') // replace metadata prefixes
.replace(/^[\[{]?(?:ACESSE.*|WWW\.)?[A-Z]+\.(COM|NET|ORG|TO|TV|ME)\b\s*[-\]}]+[\s.]*/i, '') // replace watermarks2
.replace(/^(COM|NET|ORG|TO|TV|ME)\b\s*-+[\s.]*/, '') // replace dangling site endings
.trim();
}
function sanitizePtOriginalName(name) {
return name.trim().replace(/S\d+$|\d.\s?[Tt]emporada/, '');
}
function sanitizePtLanguages(languages) {
return languages
.replace(/<2F><>/g, 'ê')
.replace(/ /g, '')
.trim();
}
module.exports = { isPtDubbed, sanitizePtName, sanitizePtOriginalName, sanitizePtLanguages }