Skip to content

Commit

Permalink
Merge pull request #275 from vejja/html-parser
Browse files Browse the repository at this point in the history
feat(csp): use cheerio parser
  • Loading branch information
Baroshem authored Nov 1, 2023
2 parents 76841ce + 442993b commit 04e0c45
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 48 deletions.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,18 @@
"dependencies": {
"@nuxt/kit": "^3.8.0",
"basic-auth": "^2.0.1",
"cheerio": "^1.0.0-rc.12",
"defu": "^6.1.1",
"nuxt-csurf": "^1.3.1",
"pathe": "^1.0.0",
"unplugin-remove": "^0.1.3",
"xss": "^1.0.14"
},
"devDependencies": {
"@nuxt/eslint-config": "^0.2.0",
"@nuxt/module-builder": "^0.5.2",
"@nuxt/schema": "^3.8.0",
"@nuxt/test-utils": "^3.8.0",
"@nuxt/eslint-config": "^0.2.0",
"@types/node": "^18.18.1",
"eslint": "^8.50.0",
"nuxt": "^3.8.0",
Expand Down
51 changes: 26 additions & 25 deletions src/runtime/nitro/plugins/02-cspSsg.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,27 @@ import path from 'node:path'
import crypto from 'node:crypto'
import type { H3Event } from 'h3'
import defu from 'defu'
import type {
ModuleOptions
} from '../../../types'
import type {
ContentSecurityPolicyValue
} from '../../../types/headers'
import { defineNitroPlugin, useRuntimeConfig, getRouteRules } from '#imports'
import { useNitro } from '@nuxt/kit'
import * as cheerio from 'cheerio'

const moduleOptions = useRuntimeConfig().security

export default defineNitroPlugin((nitroApp) => {
nitroApp.hooks.hook('render:html', (html, { event }) => {
// Content Security Policy

if (!isContentSecurityPolicyEnabled(event, moduleOptions)) {
if (!isContentSecurityPolicyEnabled(event)) {
return
}

if (!moduleOptions.headers) {
return
}

// Detect bothe inline scripts and inline styles
const inlineScriptPattern = /<script[^>]*>(.*?)<\/script>/gs
const inlineStylePattern = /<style>(.*?)<\/style>/gs
// Whitelist external scripts based on integrity attribute
const externalScriptPattern = /<script .*?integrity="(.*?)".*?(\/>|>.*?<\/script>)/gs
const scriptHashes: string[] = []
const styleHashes: string[] = []
const hashAlgorithm = 'sha256'
Expand All @@ -39,22 +32,31 @@ export default defineNitroPlugin((nitroApp) => {
const htmlRecords = html as unknown as Record<string, string[]>
const elements = htmlRecords[section]
for (const element of elements) {
let match
while ((match = inlineScriptPattern.exec(element)) !== null) {
if (match[1]) {
scriptHashes.push(generateHash(match[1], hashAlgorithm))
}
}
while ((match = inlineStylePattern.exec(element)) !== null) {
if (match[1]) {
styleHashes.push(generateHash(match[1], hashAlgorithm))
const $ = cheerio.load(element, null, false)

// Parse all script tags
$('script').each((i, script) => {
const scriptText = $(script).text()
const scriptAttrs = $(script).attr()
const src = scriptAttrs?.src
const integrity = scriptAttrs?.integrity
if (!src && scriptText) {
// Hash inline scripts with content
scriptHashes.push(generateHash(scriptText, hashAlgorithm))
} else if (src && integrity) {
// Whitelist external scripts with integrity
scriptHashes.push(`'${integrity}'`)
}
}
while ((match = externalScriptPattern.exec(element)) !== null) {
if (match[1]) {
scriptHashes.push(`'${match[1]}'`)
})

// Parse all style tags
$('style').each((i, style) => {
const styleText = $(style).text()
if (styleText) {
// Hash inline styles with content
styleHashes.push(generateHash(styleText, hashAlgorithm))
}
}
})
}
}

Expand All @@ -68,7 +70,6 @@ export default defineNitroPlugin((nitroApp) => {
updateRouteRules(event, content)
}


})

// Insert hashes in the CSP meta tag for both the script-src and the style-src policies
Expand Down Expand Up @@ -143,7 +144,7 @@ export default defineNitroPlugin((nitroApp) => {
* @param options ModuleOptions
* @returns boolean
*/
function isContentSecurityPolicyEnabled (event: H3Event, options: ModuleOptions): boolean {
function isContentSecurityPolicyEnabled (event: H3Event): boolean {
const nitroPrerenderHeader = 'x-nitro-prerender'
const nitroPrerenderHeaderValue = event.node.req.headers[nitroPrerenderHeader]

Expand Down
37 changes: 16 additions & 21 deletions src/runtime/nitro/plugins/99-cspNonce.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
import { defineNitroPlugin } from '#imports'
import type { H3Event } from 'h3'
import * as cheerio from 'cheerio'

// To prevent the nonce attribute from being added to literal strings,
// we need to make sure that the tag is not preceded by a single or double quote.
// This is done by using a negative lookbehind assertion. See https://www.regular-expressions.info/lookaround.html
// See https://regex101.com/r/DBE57j/1 for some examples.
const tagNotPrecededByQuotes = (tag: string) => new RegExp(`(?<!['|"])<${tag}`, 'g')

export default defineNitroPlugin((nitroApp) => {
nitroApp.hooks.hook('render:html', (html, { event }) => {
if (isPrerendering(event)) {
// In SSG mode, do not inject nonces in html
// However first make sure we erase nonce placeholders from CSP meta
html.head = html.head.map((meta) => {
if (!meta.startsWith('<meta http-equiv="Content-Security-Policy"')) { return meta }
return meta.replaceAll("'nonce-{{nonce}}'", '')
})
return
}
const nonce = parseNonce(`${event.node.res.getHeader('Content-Security-Policy')}`)
Expand All @@ -28,17 +19,21 @@ export default defineNitroPlugin((nitroApp) => {
return meta.replaceAll('{{nonce}}', nonce)
})

// Add nonce attribute to all link tags
html.head = html.head.map(link => link.replaceAll(tagNotPrecededByQuotes('link'), `<link nonce="${nonce}"`))
html.bodyAppend = html.bodyAppend.map(link => link.replaceAll(tagNotPrecededByQuotes('link'), `<link nonce="${nonce}"`))

// Add nonce attribute to all script tags
html.head = html.head.map(script => script.replaceAll(tagNotPrecededByQuotes('script'), `<script nonce="${nonce}"`))
html.bodyAppend = html.bodyAppend.map(script => script.replaceAll(tagNotPrecededByQuotes('script'), `<script nonce="${nonce}"`))

// Add nonce attribute to all style tags
html.head = html.head.map(style => style.replaceAll(tagNotPrecededByQuotes('style'), `<style nonce="${nonce}"`))
html.bodyAppend = html.bodyAppend.map(style => style.replaceAll(tagNotPrecededByQuotes('style'), `<style nonce="${nonce}"`))
// Scan all relevant sections of the NuxtRenderHtmlContext
for (const section of ['body', 'bodyAppend', 'bodyPrepend', 'head']) {
const htmlRecords = html as unknown as Record<string, string[]>

htmlRecords[section] = htmlRecords[section].map(element => {
const $ = cheerio.load(element, null, false)
// Add nonce to all link tags
$('link').attr('nonce', nonce)
// Add nonce to all script tags
$('script').attr('nonce', nonce)
// Add nonce to all style tags
$('style').attr('nonce', nonce)
return $.html()
})
}
})

function parseNonce (content: string) {
Expand Down
52 changes: 51 additions & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1861,6 +1861,31 @@ check-error@^1.0.2:
resolved "https://registry.npmjs.org/check-error/-/check-error-1.0.2.tgz"
integrity sha512-BrgHpW9NURQgzoNyjfq0Wu6VFO6D7IZEmJNdtgNqpzGG8RuNFHt2jQxWlAs4HMe119chBnv+34syEZtc6IhLtA==

cheerio-select@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4"
integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==
dependencies:
boolbase "^1.0.0"
css-select "^5.1.0"
css-what "^6.1.0"
domelementtype "^2.3.0"
domhandler "^5.0.3"
domutils "^3.0.1"

cheerio@^1.0.0-rc.12:
version "1.0.0-rc.12"
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683"
integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==
dependencies:
cheerio-select "^2.1.0"
dom-serializer "^2.0.0"
domhandler "^5.0.3"
domutils "^3.0.1"
htmlparser2 "^8.0.1"
parse5 "^7.0.0"
parse5-htmlparser2-tree-adapter "^7.0.0"

chokidar@^3.5.1, chokidar@^3.5.3:
version "3.5.3"
resolved "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz"
Expand Down Expand Up @@ -2398,7 +2423,7 @@ enhanced-resolve@^5.14.1:
graceful-fs "^4.2.4"
tapable "^2.2.0"

entities@^4.2.0:
entities@^4.2.0, entities@^4.4.0:
version "4.5.0"
resolved "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz"
integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==
Expand Down Expand Up @@ -3195,6 +3220,16 @@ html-tags@^3.3.1:
resolved "https://registry.npmjs.org/html-tags/-/html-tags-3.3.1.tgz"
integrity sha512-ztqyC3kLto0e9WbNp0aeP+M3kTt+nbaIveGmUxAtZa+8iFgKLUOD4YKM5j+f3QD89bra7UeumolZHKuOXnTmeQ==

htmlparser2@^8.0.1:
version "8.0.2"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21"
integrity sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==
dependencies:
domelementtype "^2.3.0"
domhandler "^5.0.3"
domutils "^3.0.1"
entities "^4.4.0"

http-cache-semantics@^4.1.1:
version "4.1.1"
resolved "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz"
Expand Down Expand Up @@ -4611,6 +4646,21 @@ parse-url@^8.1.0:
dependencies:
parse-path "^7.0.0"

parse5-htmlparser2-tree-adapter@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1"
integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==
dependencies:
domhandler "^5.0.2"
parse5 "^7.0.0"

parse5@^7.0.0:
version "7.1.2"
resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32"
integrity sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==
dependencies:
entities "^4.4.0"

parseurl@~1.3.3:
version "1.3.3"
resolved "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz"
Expand Down

0 comments on commit 04e0c45

Please sign in to comment.