Skip to content

Commit

Permalink
fix: properly parse non-url encoded file specs
Browse files Browse the repository at this point in the history
Properly creates file package args that contain characters that need to be url encoded.

There is also a refactor/cleanup in here
 - Removed the magic windows global for testing, fixing the tests to mock process.platform instead.
 - Moved inline regexes up to where the others are defined
 - Renamed a few variables to be more correct (i.e. isFilename to isFileType)
 - Refactored Result to be a proper Class instead of a function w/ prototypes

Closes: #193
  • Loading branch information
wraithgar committed Dec 18, 2024
1 parent d45dabc commit 6e6f8e9
Show file tree
Hide file tree
Showing 3 changed files with 220 additions and 111 deletions.
265 changes: 168 additions & 97 deletions lib/npa.js
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
'use strict'
module.exports = npa
module.exports.resolve = resolve
module.exports.toPurl = toPurl
module.exports.Result = Result

const { URL } = require('url')
const isWindows = process.platform === 'win32'

const { URL } = require('node:url')
const path = isWindows ? require('node:path').win32 : require('node:path')
const { homedir } = require('node:os')
const HostedGit = require('hosted-git-info')
const semver = require('semver')
const path = global.FAKE_WINDOWS ? require('path').win32 : require('path')
const validatePackageName = require('validate-npm-package-name')
const { homedir } = require('os')
const { log } = require('proc-log')

const isWindows = process.platform === 'win32' || global.FAKE_WINDOWS
const hasSlashes = isWindows ? /\\|[/]/ : /[/]/
const isURL = /^(?:git[+])?[a-z]+:/i
const isGit = /^[^@]+@[^:.]+\.[^:]+:.+$/i
const isFilename = /[.](?:tgz|tar.gz|tar)$/i
const isFileType = /[.](?:tgz|tar.gz|tar)$/i
const isPortNumber = /:[0-9]+(\/|$)/i
const isWindowsFile = /^(?:[.]|~[/]|[/\\]|[a-zA-Z]:)/
const isPosixFile = /^(?:[.]|~[/]|[/]|[a-zA-Z]:)/
const defaultRegistry = 'https://registry.npmjs.org'

function npa (arg, where) {
let name
Expand All @@ -31,13 +31,14 @@ function npa (arg, where) {
return npa(arg.raw, where || arg.where)
}
}
const nameEndsAt = arg[0] === '@' ? arg.slice(1).indexOf('@') + 1 : arg.indexOf('@')
const nameEndsAt = arg.indexOf('@', 1) // Skip possible leading @
const namePart = nameEndsAt > 0 ? arg.slice(0, nameEndsAt) : arg
if (isURL.test(arg)) {
spec = arg
} else if (isGit.test(arg)) {
spec = `git+ssh://${arg}`
} else if (namePart[0] !== '@' && (hasSlashes.test(namePart) || isFilename.test(namePart))) {
// eslint-disable-next-line max-len
} else if (!namePart.startsWith('@') && (hasSlashes.test(namePart) || isFileType.test(namePart))) {
spec = arg
} else if (nameEndsAt > 0) {
name = namePart
Expand All @@ -54,7 +55,25 @@ function npa (arg, where) {
return resolve(name, spec, where, arg)
}

const isFilespec = isWindows ? /^(?:[.]|~[/]|[/\\]|[a-zA-Z]:)/ : /^(?:[.]|~[/]|[/]|[a-zA-Z]:)/
function isFileSpec (spec) {
if (!spec) {
return false
}
if (spec.toLowerCase().startsWith('file:')) {
return true
}
if (isWindows) {
return isWindowsFile.test(spec)
}
return isPosixFile.test(spec)
}

function isAliasSpec (spec) {
if (!spec) {
return false
}
return spec.toLowerCase().startsWith('npm:')
}

function resolve (name, spec, where, arg) {
const res = new Result({
Expand All @@ -65,12 +84,16 @@ function resolve (name, spec, where, arg) {
})

if (name) {
res.setName(name)
res.name = name
}

if (spec && (isFilespec.test(spec) || /^file:/i.test(spec))) {
if (!where) {
where = process.cwd()
}

if (isFileSpec(spec)) {
return fromFile(res, where)
} else if (spec && /^npm:/i.test(spec)) {
} else if (isAliasSpec(spec)) {
return fromAlias(res, where)
}

Expand All @@ -82,15 +105,13 @@ function resolve (name, spec, where, arg) {
return fromHostedGit(res, hosted)
} else if (spec && isURL.test(spec)) {
return fromURL(res)
} else if (spec && (hasSlashes.test(spec) || isFilename.test(spec))) {
} else if (spec && (hasSlashes.test(spec) || isFileType.test(spec))) {
return fromFile(res, where)
} else {
return fromRegistry(res)
}
}

const defaultRegistry = 'https://registry.npmjs.org'

function toPurl (arg, reg = defaultRegistry) {
const res = npa(arg)

Expand Down Expand Up @@ -128,60 +149,59 @@ function invalidPurlType (type, raw) {
return err
}

function Result (opts) {
this.type = opts.type
this.registry = opts.registry
this.where = opts.where
if (opts.raw == null) {
this.raw = opts.name ? opts.name + '@' + opts.rawSpec : opts.rawSpec
} else {
this.raw = opts.raw
class Result {
constructor (opts) {
this.type = opts.type
this.registry = opts.registry
this.where = opts.where
if (opts.raw == null) {
this.raw = opts.name ? `${opts.name}@${opts.rawSpec}` : opts.rawSpec
} else {
this.raw = opts.raw
}
this.rawSpec = opts.rawSpec || ''
this.saveSpec = opts.saveSpec
this.fetchSpec = opts.fetchSpec
if (opts.name) {
this.setName(opts.name)
}
this.gitRange = opts.gitRange
this.gitCommittish = opts.gitCommittish
this.gitSubdir = opts.gitSubdir
this.hosted = opts.hosted
}

this.name = undefined
this.escapedName = undefined
this.scope = undefined
this.rawSpec = opts.rawSpec || ''
this.saveSpec = opts.saveSpec
this.fetchSpec = opts.fetchSpec
if (opts.name) {
this.setName(opts.name)
}
this.gitRange = opts.gitRange
this.gitCommittish = opts.gitCommittish
this.gitSubdir = opts.gitSubdir
this.hosted = opts.hosted
}
// TODO move this to a getter/setter in a semver major
setName (name) {
const valid = validatePackageName(name)
if (!valid.validForOldPackages) {
throw invalidPackageName(name, valid, this.raw)
}

Result.prototype.setName = function (name) {
const valid = validatePackageName(name)
if (!valid.validForOldPackages) {
throw invalidPackageName(name, valid, this.raw)
this.name = name
this.scope = name[0] === '@' ? name.slice(0, name.indexOf('/')) : undefined
// scoped packages in couch must have slash url-encoded, e.g. @foo%2Fbar
this.escapedName = name.replace('/', '%2f')

Check failure

Code scanning / CodeQL

Incomplete string escaping or encoding High

This replaces only the first occurrence of '/'.
return this
}

this.name = name
this.scope = name[0] === '@' ? name.slice(0, name.indexOf('/')) : undefined
// scoped packages in couch must have slash url-encoded, e.g. @foo%2Fbar
this.escapedName = name.replace('/', '%2f')
return this
}

Result.prototype.toString = function () {
const full = []
if (this.name != null && this.name !== '') {
full.push(this.name)
}
const spec = this.saveSpec || this.fetchSpec || this.rawSpec
if (spec != null && spec !== '') {
full.push(spec)
toString () {
const full = []
if (this.name != null && this.name !== '') {
full.push(this.name)
}
const spec = this.saveSpec || this.fetchSpec || this.rawSpec
if (spec != null && spec !== '') {
full.push(spec)
}
return full.length ? full.join('@') : this.raw
}
return full.length ? full.join('@') : this.raw
}

Result.prototype.toJSON = function () {
const result = Object.assign({}, this)
delete result.hosted
return result
toJSON () {
const result = Object.assign({}, this)
delete result.hosted
return result
}
}

// sets res.gitCommittish, res.gitRange, and res.gitSubdir
Expand Down Expand Up @@ -228,25 +248,89 @@ function setGitAttrs (res, committish) {
}
}

function fromFile (res, where) {
if (!where) {
where = process.cwd()
// Taken from: EncodePathChars and lookup_table in src/node_url.cc
// url.pathToFileURL only returns absolute references. We can't use it to encode paths.
// encodeURI mangles windows paths. We can't use it to encode paths.
// Under the hood, url.pathToFileURL does a limited set of encoding, with an extra windows step, and then calls path.resolve.
// The encoding node does without path.resolve is not available outside of the source, so we are recreating it here.
const encodedPathChars = new Map([
['\0', '%00'],
['\t', '%09'],
['\n', '%0A'],
['\r', '%0D'],
[' ', '%20'],
['"', '%22'],
['#', '%23'],
['%', '%25'],
['?', '%3F'],
['[', '%5B'],
['\\', isWindows ? '/' : '%5C'],
[']', '%5D'],
['^', '%5E'],
['|', '%7C'],
['~', '%7E'],
])

function pathToFileURL (str) {
let result = ''
for (let i = 0; i < str.length; i++) {
result = `${result}${encodedPathChars.get(str[i]) ?? str[i]}`
}
res.type = isFilename.test(res.rawSpec) ? 'file' : 'directory'
return `file:${result}`
}

/* parse file package args:
*
* /posix/path
* ./posix/path
* .dotfile
* .dot/path
* filename
* filename.with.ext
* C:\windows\path
* path/with/no/leading/separator
*
* translates to relative ./path
* - file:{path}
*
* translates to absolute /path
* - file:/{path}
* - file://{path} (this is not RFC compliant, but is supported for backward compatibility)
* - file:///{path}
*
* file: specs are url encoded, bare paths are not
*
*/
function fromFile (res, where) {
res.type = isFileType.test(res.rawSpec) ? 'file' : 'directory'
res.where = where

// always put the '/' on where when resolving urls, or else
// file:foo from /path/to/bar goes to /path/to/foo, when we want
// it to be /path/to/bar/foo
let rawSpec = res.rawSpec
if (!rawSpec.startsWith('file:')) {
rawSpec = pathToFileURL(rawSpec)
}

if (rawSpec.startsWith('file:/')) {
// XXX backwards compatibility lack of compliance with RFC 8089

// turn file://path into file:/path
if (/^file:\/\/[^/]/.test(rawSpec)) {
rawSpec = `file:/${rawSpec.slice(5)}`
}

// turn file:/../path into file:../path
// for 1 or 3 leading slashes (2 is already ruled out from handling file:// explicitly above)
if (/^\/{1,3}\.\.?(\/|$)/.test(rawSpec.slice(5))) {
rawSpec = rawSpec.replace(/^file:\/{1,3}/, 'file:')
}
}

let specUrl
let resolvedUrl
const prefix = (!/^file:/.test(res.rawSpec) ? 'file:' : '')
const rawWithPrefix = prefix + res.rawSpec
let rawNoPrefix = rawWithPrefix.replace(/^file:/, '')
let specUrl
try {
resolvedUrl = new URL(rawWithPrefix, `file://${path.resolve(where)}/`)
specUrl = new URL(rawWithPrefix)
// always put the '/' on "where", or else file:foo from /path/to/bar goes to /path/to/foo, when we want it to be /path/to/bar/foo
resolvedUrl = new URL(rawSpec, `${pathToFileURL(path.resolve(where))}/`)
specUrl = new URL(rawSpec)
} catch (originalError) {
const er = new Error('Invalid file: URL, must comply with RFC 8089')
throw Object.assign(er, {
Expand All @@ -257,24 +341,6 @@ function fromFile (res, where) {
})
}

// XXX backwards compatibility lack of compliance with RFC 8089
if (resolvedUrl.host && resolvedUrl.host !== 'localhost') {
const rawSpec = res.rawSpec.replace(/^file:\/\//, 'file:///')
resolvedUrl = new URL(rawSpec, `file://${path.resolve(where)}/`)
specUrl = new URL(rawSpec)
rawNoPrefix = rawSpec.replace(/^file:/, '')
}
// turn file:/../foo into file:../foo
// for 1, 2 or 3 leading slashes since we attempted
// in the previous step to make it a file protocol url with a leading slash
if (/^\/{1,3}\.\.?(\/|$)/.test(rawNoPrefix)) {
const rawSpec = res.rawSpec.replace(/^file:\/{1,3}/, 'file:')
resolvedUrl = new URL(rawSpec, `file://${path.resolve(where)}/`)
specUrl = new URL(rawSpec)
rawNoPrefix = rawSpec.replace(/^file:/, '')
}
// XXX end RFC 8089 violation backwards compatibility section

// turn /C:/blah into just C:/blah on windows
let specPath = decodeURIComponent(specUrl.pathname)
let resolvedPath = decodeURIComponent(resolvedUrl.pathname)
Expand All @@ -288,7 +354,7 @@ function fromFile (res, where) {
if (/^\/~(\/|$)/.test(specPath)) {
res.saveSpec = `file:${specPath.substr(1)}`
resolvedPath = path.resolve(homedir(), specPath.substr(3))
} else if (!path.isAbsolute(rawNoPrefix)) {
} else if (!path.isAbsolute(rawSpec.slice(5))) {
res.saveSpec = `file:${path.relative(where, resolvedPath)}`
} else {
res.saveSpec = `file:${path.resolve(resolvedPath)}`
Expand Down Expand Up @@ -416,3 +482,8 @@ function fromRegistry (res) {
}
return res
}

module.exports = npa
module.exports.resolve = resolve
module.exports.toPurl = toPurl
module.exports.Result = Result
Loading

0 comments on commit 6e6f8e9

Please sign in to comment.