[scraper] add html entities escape
This commit is contained in:
@@ -80,6 +80,14 @@ function escapeTitle(title) {
|
||||
.trim();
|
||||
}
|
||||
|
||||
function escapeHTML(title) {
|
||||
return title
|
||||
.replace(/'|'/g, '\'')
|
||||
.replace(/&|&/g, '&')
|
||||
.replace(/!/g, '!')
|
||||
.replace(/%/g, '%');
|
||||
}
|
||||
|
||||
async function getImdbId(info, type) {
|
||||
const name = escapeTitle(info.title);
|
||||
const year = info.year || info.date && info.date.slice(0, 4);
|
||||
@@ -121,4 +129,4 @@ async function getKitsuId(info) {
|
||||
}));
|
||||
}
|
||||
|
||||
module.exports = { getMetadata, getImdbId, getKitsuId };
|
||||
module.exports = { getMetadata, getImdbId, getKitsuId, escapeHTML };
|
||||
|
||||
@@ -3,6 +3,7 @@ const needle = require('needle');
|
||||
const Sugar = require('sugar-date');
|
||||
const decode = require('magnet-uri');
|
||||
const Promises = require('../../lib/promises');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
|
||||
const defaultProxies = [
|
||||
'https://1337x.to'
|
||||
@@ -117,7 +118,7 @@ function parseTorrentPage(body) {
|
||||
const imdbIdMatch = details.find('div[id=\'description\']').html().match(/imdb\.com\/title\/(tt\d+)/i);
|
||||
|
||||
const torrent = {
|
||||
name: decode(magnetLink).name.replace(/\+/g, ' '),
|
||||
name: escapeHTML(decode(magnetLink).name.replace(/\+/g, ' ')),
|
||||
infoHash: decode(magnetLink).infoHash,
|
||||
magnetLink: magnetLink,
|
||||
seeders: parseInt(details.find('strong:contains(\'Seeders\')').next().text(), 10),
|
||||
|
||||
@@ -8,6 +8,7 @@ const thepiratebay = require('./thepiratebay_api.js');
|
||||
const bing = require('nodejs-bing');
|
||||
const { Type } = require('../../lib/types');
|
||||
const repository = require('../../lib/repository');
|
||||
const { escapeHTML } = require('../../lib/metadata');
|
||||
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
|
||||
|
||||
const NAME = 'ThePirateBay';
|
||||
@@ -42,10 +43,9 @@ async function scrape() {
|
||||
const torrent = {
|
||||
uploadDate: moment(row[0], 'YYYY-MMM-DD HH:mm:ss').toDate(),
|
||||
infoHash: Buffer.from(row[1], 'base64').toString('hex'),
|
||||
title: row[2]
|
||||
title: escapeHTML(row[2])
|
||||
.replace(/^"|"$/g, '')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/&\w{2,6};/g, ' ')
|
||||
.replace(/&#?\w{2,6};/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim(),
|
||||
size: parseInt(row[3], 10)
|
||||
|
||||
Reference in New Issue
Block a user