[scraper] add html entities escape
This commit is contained in:
@@ -80,6 +80,14 @@ function escapeTitle(title) {
|
|||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function escapeHTML(title) {
|
||||||
|
return title
|
||||||
|
.replace(/'|'/g, '\'')
|
||||||
|
.replace(/&|&/g, '&')
|
||||||
|
.replace(/!/g, '!')
|
||||||
|
.replace(/%/g, '%');
|
||||||
|
}
|
||||||
|
|
||||||
async function getImdbId(info, type) {
|
async function getImdbId(info, type) {
|
||||||
const name = escapeTitle(info.title);
|
const name = escapeTitle(info.title);
|
||||||
const year = info.year || info.date && info.date.slice(0, 4);
|
const year = info.year || info.date && info.date.slice(0, 4);
|
||||||
@@ -121,4 +129,4 @@ async function getKitsuId(info) {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { getMetadata, getImdbId, getKitsuId };
|
module.exports = { getMetadata, getImdbId, getKitsuId, escapeHTML };
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ const needle = require('needle');
|
|||||||
const Sugar = require('sugar-date');
|
const Sugar = require('sugar-date');
|
||||||
const decode = require('magnet-uri');
|
const decode = require('magnet-uri');
|
||||||
const Promises = require('../../lib/promises');
|
const Promises = require('../../lib/promises');
|
||||||
|
const { escapeHTML } = require('../../lib/metadata');
|
||||||
|
|
||||||
const defaultProxies = [
|
const defaultProxies = [
|
||||||
'https://1337x.to'
|
'https://1337x.to'
|
||||||
@@ -117,7 +118,7 @@ function parseTorrentPage(body) {
|
|||||||
const imdbIdMatch = details.find('div[id=\'description\']').html().match(/imdb\.com\/title\/(tt\d+)/i);
|
const imdbIdMatch = details.find('div[id=\'description\']').html().match(/imdb\.com\/title\/(tt\d+)/i);
|
||||||
|
|
||||||
const torrent = {
|
const torrent = {
|
||||||
name: decode(magnetLink).name.replace(/\+/g, ' '),
|
name: escapeHTML(decode(magnetLink).name.replace(/\+/g, ' ')),
|
||||||
infoHash: decode(magnetLink).infoHash,
|
infoHash: decode(magnetLink).infoHash,
|
||||||
magnetLink: magnetLink,
|
magnetLink: magnetLink,
|
||||||
seeders: parseInt(details.find('strong:contains(\'Seeders\')').next().text(), 10),
|
seeders: parseInt(details.find('strong:contains(\'Seeders\')').next().text(), 10),
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ const thepiratebay = require('./thepiratebay_api.js');
|
|||||||
const bing = require('nodejs-bing');
|
const bing = require('nodejs-bing');
|
||||||
const { Type } = require('../../lib/types');
|
const { Type } = require('../../lib/types');
|
||||||
const repository = require('../../lib/repository');
|
const repository = require('../../lib/repository');
|
||||||
|
const { escapeHTML } = require('../../lib/metadata');
|
||||||
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
|
const { createTorrentEntry, createSkipTorrentEntry, getStoredTorrentEntry } = require('../../lib/torrentEntries');
|
||||||
|
|
||||||
const NAME = 'ThePirateBay';
|
const NAME = 'ThePirateBay';
|
||||||
@@ -42,10 +43,9 @@ async function scrape() {
|
|||||||
const torrent = {
|
const torrent = {
|
||||||
uploadDate: moment(row[0], 'YYYY-MMM-DD HH:mm:ss').toDate(),
|
uploadDate: moment(row[0], 'YYYY-MMM-DD HH:mm:ss').toDate(),
|
||||||
infoHash: Buffer.from(row[1], 'base64').toString('hex'),
|
infoHash: Buffer.from(row[1], 'base64').toString('hex'),
|
||||||
title: row[2]
|
title: escapeHTML(row[2])
|
||||||
.replace(/^"|"$/g, '')
|
.replace(/^"|"$/g, '')
|
||||||
.replace(/&/g, '&')
|
.replace(/&#?\w{2,6};/g, ' ')
|
||||||
.replace(/&\w{2,6};/g, ' ')
|
|
||||||
.replace(/\s+/g, ' ')
|
.replace(/\s+/g, ' ')
|
||||||
.trim(),
|
.trim(),
|
||||||
size: parseInt(row[3], 10)
|
size: parseInt(row[3], 10)
|
||||||
|
|||||||
Reference in New Issue
Block a user