From 6e6f8e92d825160b8629a3318d06077aa17f4e68 Mon Sep 17 00:00:00 2001 From: Gar Date: Wed, 18 Dec 2024 09:30:33 -0800 Subject: [PATCH] fix: properly parse non-url encoded file specs Properly creates file package args that contain characters that need to be url encoded. There is also a refactor/cleanup in here - Removed the magic windows global for testing, fixing the tests to mock process.platform instead. - Moved inline regexes up to where the others are defined - Renamed a few variables to be more correct (i.e. isFilename to isFileType) - Refactored Result to be a proper Class instead of a function w/ prototypes Closes: https://github.com/npm/npm-package-arg/issues/193 --- lib/npa.js | 265 ++++++++++++++++++++++++++++++------------------ test/basic.js | 47 +++++++-- test/windows.js | 19 ++-- 3 files changed, 220 insertions(+), 111 deletions(-) diff --git a/lib/npa.js b/lib/npa.js index e926058..bde6e7f 100644 --- a/lib/npa.js +++ b/lib/npa.js @@ -1,23 +1,23 @@ 'use strict' -module.exports = npa -module.exports.resolve = resolve -module.exports.toPurl = toPurl -module.exports.Result = Result -const { URL } = require('url') +const isWindows = process.platform === 'win32' + +const { URL } = require('node:url') +const path = isWindows ? require('node:path').win32 : require('node:path') +const { homedir } = require('node:os') const HostedGit = require('hosted-git-info') const semver = require('semver') -const path = global.FAKE_WINDOWS ? require('path').win32 : require('path') const validatePackageName = require('validate-npm-package-name') -const { homedir } = require('os') const { log } = require('proc-log') -const isWindows = process.platform === 'win32' || global.FAKE_WINDOWS const hasSlashes = isWindows ? /\\|[/]/ : /[/]/ const isURL = /^(?:git[+])?[a-z]+:/i const isGit = /^[^@]+@[^:.]+\.[^:]+:.+$/i -const isFilename = /[.](?:tgz|tar.gz|tar)$/i +const isFileType = /[.](?:tgz|tar.gz|tar)$/i const isPortNumber = /:[0-9]+(\/|$)/i +const isWindowsFile = /^(?:[.]|~[/]|[/\\]|[a-zA-Z]:)/ +const isPosixFile = /^(?:[.]|~[/]|[/]|[a-zA-Z]:)/ +const defaultRegistry = 'https://registry.npmjs.org' function npa (arg, where) { let name @@ -31,13 +31,14 @@ function npa (arg, where) { return npa(arg.raw, where || arg.where) } } - const nameEndsAt = arg[0] === '@' ? arg.slice(1).indexOf('@') + 1 : arg.indexOf('@') + const nameEndsAt = arg.indexOf('@', 1) // Skip possible leading @ const namePart = nameEndsAt > 0 ? arg.slice(0, nameEndsAt) : arg if (isURL.test(arg)) { spec = arg } else if (isGit.test(arg)) { spec = `git+ssh://${arg}` - } else if (namePart[0] !== '@' && (hasSlashes.test(namePart) || isFilename.test(namePart))) { + // eslint-disable-next-line max-len + } else if (!namePart.startsWith('@') && (hasSlashes.test(namePart) || isFileType.test(namePart))) { spec = arg } else if (nameEndsAt > 0) { name = namePart @@ -54,7 +55,25 @@ function npa (arg, where) { return resolve(name, spec, where, arg) } -const isFilespec = isWindows ? /^(?:[.]|~[/]|[/\\]|[a-zA-Z]:)/ : /^(?:[.]|~[/]|[/]|[a-zA-Z]:)/ +function isFileSpec (spec) { + if (!spec) { + return false + } + if (spec.toLowerCase().startsWith('file:')) { + return true + } + if (isWindows) { + return isWindowsFile.test(spec) + } + return isPosixFile.test(spec) +} + +function isAliasSpec (spec) { + if (!spec) { + return false + } + return spec.toLowerCase().startsWith('npm:') +} function resolve (name, spec, where, arg) { const res = new Result({ @@ -65,12 +84,16 @@ function resolve (name, spec, where, arg) { }) if (name) { - res.setName(name) + res.name = name } - if (spec && (isFilespec.test(spec) || /^file:/i.test(spec))) { + if (!where) { + where = process.cwd() + } + + if (isFileSpec(spec)) { return fromFile(res, where) - } else if (spec && /^npm:/i.test(spec)) { + } else if (isAliasSpec(spec)) { return fromAlias(res, where) } @@ -82,15 +105,13 @@ function resolve (name, spec, where, arg) { return fromHostedGit(res, hosted) } else if (spec && isURL.test(spec)) { return fromURL(res) - } else if (spec && (hasSlashes.test(spec) || isFilename.test(spec))) { + } else if (spec && (hasSlashes.test(spec) || isFileType.test(spec))) { return fromFile(res, where) } else { return fromRegistry(res) } } -const defaultRegistry = 'https://registry.npmjs.org' - function toPurl (arg, reg = defaultRegistry) { const res = npa(arg) @@ -128,60 +149,59 @@ function invalidPurlType (type, raw) { return err } -function Result (opts) { - this.type = opts.type - this.registry = opts.registry - this.where = opts.where - if (opts.raw == null) { - this.raw = opts.name ? opts.name + '@' + opts.rawSpec : opts.rawSpec - } else { - this.raw = opts.raw +class Result { + constructor (opts) { + this.type = opts.type + this.registry = opts.registry + this.where = opts.where + if (opts.raw == null) { + this.raw = opts.name ? `${opts.name}@${opts.rawSpec}` : opts.rawSpec + } else { + this.raw = opts.raw + } + this.rawSpec = opts.rawSpec || '' + this.saveSpec = opts.saveSpec + this.fetchSpec = opts.fetchSpec + if (opts.name) { + this.setName(opts.name) + } + this.gitRange = opts.gitRange + this.gitCommittish = opts.gitCommittish + this.gitSubdir = opts.gitSubdir + this.hosted = opts.hosted } - this.name = undefined - this.escapedName = undefined - this.scope = undefined - this.rawSpec = opts.rawSpec || '' - this.saveSpec = opts.saveSpec - this.fetchSpec = opts.fetchSpec - if (opts.name) { - this.setName(opts.name) - } - this.gitRange = opts.gitRange - this.gitCommittish = opts.gitCommittish - this.gitSubdir = opts.gitSubdir - this.hosted = opts.hosted -} + // TODO move this to a getter/setter in a semver major + setName (name) { + const valid = validatePackageName(name) + if (!valid.validForOldPackages) { + throw invalidPackageName(name, valid, this.raw) + } -Result.prototype.setName = function (name) { - const valid = validatePackageName(name) - if (!valid.validForOldPackages) { - throw invalidPackageName(name, valid, this.raw) + this.name = name + this.scope = name[0] === '@' ? name.slice(0, name.indexOf('/')) : undefined + // scoped packages in couch must have slash url-encoded, e.g. @foo%2Fbar + this.escapedName = name.replace('/', '%2f') + return this } - this.name = name - this.scope = name[0] === '@' ? name.slice(0, name.indexOf('/')) : undefined - // scoped packages in couch must have slash url-encoded, e.g. @foo%2Fbar - this.escapedName = name.replace('/', '%2f') - return this -} - -Result.prototype.toString = function () { - const full = [] - if (this.name != null && this.name !== '') { - full.push(this.name) - } - const spec = this.saveSpec || this.fetchSpec || this.rawSpec - if (spec != null && spec !== '') { - full.push(spec) + toString () { + const full = [] + if (this.name != null && this.name !== '') { + full.push(this.name) + } + const spec = this.saveSpec || this.fetchSpec || this.rawSpec + if (spec != null && spec !== '') { + full.push(spec) + } + return full.length ? full.join('@') : this.raw } - return full.length ? full.join('@') : this.raw -} -Result.prototype.toJSON = function () { - const result = Object.assign({}, this) - delete result.hosted - return result + toJSON () { + const result = Object.assign({}, this) + delete result.hosted + return result + } } // sets res.gitCommittish, res.gitRange, and res.gitSubdir @@ -228,25 +248,89 @@ function setGitAttrs (res, committish) { } } -function fromFile (res, where) { - if (!where) { - where = process.cwd() +// Taken from: EncodePathChars and lookup_table in src/node_url.cc +// url.pathToFileURL only returns absolute references. We can't use it to encode paths. +// encodeURI mangles windows paths. We can't use it to encode paths. +// Under the hood, url.pathToFileURL does a limited set of encoding, with an extra windows step, and then calls path.resolve. +// The encoding node does without path.resolve is not available outside of the source, so we are recreating it here. +const encodedPathChars = new Map([ + ['\0', '%00'], + ['\t', '%09'], + ['\n', '%0A'], + ['\r', '%0D'], + [' ', '%20'], + ['"', '%22'], + ['#', '%23'], + ['%', '%25'], + ['?', '%3F'], + ['[', '%5B'], + ['\\', isWindows ? '/' : '%5C'], + [']', '%5D'], + ['^', '%5E'], + ['|', '%7C'], + ['~', '%7E'], +]) + +function pathToFileURL (str) { + let result = '' + for (let i = 0; i < str.length; i++) { + result = `${result}${encodedPathChars.get(str[i]) ?? str[i]}` } - res.type = isFilename.test(res.rawSpec) ? 'file' : 'directory' + return `file:${result}` +} + +/* parse file package args: + * + * /posix/path + * ./posix/path + * .dotfile + * .dot/path + * filename + * filename.with.ext + * C:\windows\path + * path/with/no/leading/separator + * + * translates to relative ./path + * - file:{path} + * + * translates to absolute /path + * - file:/{path} + * - file://{path} (this is not RFC compliant, but is supported for backward compatibility) + * - file:///{path} + * + * file: specs are url encoded, bare paths are not + * + */ +function fromFile (res, where) { + res.type = isFileType.test(res.rawSpec) ? 'file' : 'directory' res.where = where - // always put the '/' on where when resolving urls, or else - // file:foo from /path/to/bar goes to /path/to/foo, when we want - // it to be /path/to/bar/foo + let rawSpec = res.rawSpec + if (!rawSpec.startsWith('file:')) { + rawSpec = pathToFileURL(rawSpec) + } + + if (rawSpec.startsWith('file:/')) { + // XXX backwards compatibility lack of compliance with RFC 8089 + + // turn file://path into file:/path + if (/^file:\/\/[^/]/.test(rawSpec)) { + rawSpec = `file:/${rawSpec.slice(5)}` + } + + // turn file:/../path into file:../path + // for 1 or 3 leading slashes (2 is already ruled out from handling file:// explicitly above) + if (/^\/{1,3}\.\.?(\/|$)/.test(rawSpec.slice(5))) { + rawSpec = rawSpec.replace(/^file:\/{1,3}/, 'file:') + } + } - let specUrl let resolvedUrl - const prefix = (!/^file:/.test(res.rawSpec) ? 'file:' : '') - const rawWithPrefix = prefix + res.rawSpec - let rawNoPrefix = rawWithPrefix.replace(/^file:/, '') + let specUrl try { - resolvedUrl = new URL(rawWithPrefix, `file://${path.resolve(where)}/`) - specUrl = new URL(rawWithPrefix) + // always put the '/' on "where", or else file:foo from /path/to/bar goes to /path/to/foo, when we want it to be /path/to/bar/foo + resolvedUrl = new URL(rawSpec, `${pathToFileURL(path.resolve(where))}/`) + specUrl = new URL(rawSpec) } catch (originalError) { const er = new Error('Invalid file: URL, must comply with RFC 8089') throw Object.assign(er, { @@ -257,24 +341,6 @@ function fromFile (res, where) { }) } - // XXX backwards compatibility lack of compliance with RFC 8089 - if (resolvedUrl.host && resolvedUrl.host !== 'localhost') { - const rawSpec = res.rawSpec.replace(/^file:\/\//, 'file:///') - resolvedUrl = new URL(rawSpec, `file://${path.resolve(where)}/`) - specUrl = new URL(rawSpec) - rawNoPrefix = rawSpec.replace(/^file:/, '') - } - // turn file:/../foo into file:../foo - // for 1, 2 or 3 leading slashes since we attempted - // in the previous step to make it a file protocol url with a leading slash - if (/^\/{1,3}\.\.?(\/|$)/.test(rawNoPrefix)) { - const rawSpec = res.rawSpec.replace(/^file:\/{1,3}/, 'file:') - resolvedUrl = new URL(rawSpec, `file://${path.resolve(where)}/`) - specUrl = new URL(rawSpec) - rawNoPrefix = rawSpec.replace(/^file:/, '') - } - // XXX end RFC 8089 violation backwards compatibility section - // turn /C:/blah into just C:/blah on windows let specPath = decodeURIComponent(specUrl.pathname) let resolvedPath = decodeURIComponent(resolvedUrl.pathname) @@ -288,7 +354,7 @@ function fromFile (res, where) { if (/^\/~(\/|$)/.test(specPath)) { res.saveSpec = `file:${specPath.substr(1)}` resolvedPath = path.resolve(homedir(), specPath.substr(3)) - } else if (!path.isAbsolute(rawNoPrefix)) { + } else if (!path.isAbsolute(rawSpec.slice(5))) { res.saveSpec = `file:${path.relative(where, resolvedPath)}` } else { res.saveSpec = `file:${path.resolve(resolvedPath)}` @@ -416,3 +482,8 @@ function fromRegistry (res) { } return res } + +module.exports = npa +module.exports.resolve = resolve +module.exports.toPurl = toPurl +module.exports.Result = Result diff --git a/test/basic.js b/test/basic.js index 9cabdb4..90d8403 100644 --- a/test/basic.js +++ b/test/basic.js @@ -1,5 +1,5 @@ -const path = require('path').posix -const os = require('os') +const path = require('node:path').posix +const os = require('node:os') const normalizePath = p => p && p.replace(/^[a-zA-Z]:/, '').replace(/\\/g, '/') @@ -13,7 +13,6 @@ const normalizePaths = spec => { const t = require('tap') const npa = t.mock('..', { path }) -t.on('bailout', () => process.exit(1)) t.test('basic', function (t) { t.setMaxListeners(999) @@ -635,10 +634,13 @@ t.test('basic', function (t) { } Object.keys(tests).forEach(function (arg) { - const res = normalizePaths(npa(arg, '/test/a/b')) - t.ok(res instanceof npa.Result, arg + ' is a result') - Object.keys(tests[arg]).forEach(function (key) { - t.match(res[key], tests[arg][key], arg + ' [' + key + ']') + t.test(arg, t => { + const res = normalizePaths(npa(arg, '/test/a/b')) + t.ok(res instanceof npa.Result, arg + ' is a result') + Object.keys(tests[arg]).forEach(function (key) { + t.match(res[key], tests[arg][key], arg + ' [' + key + ']') + }) + t.end() }) }) @@ -735,9 +737,38 @@ t.test('basic', function (t) { t.end() }) +t.test('directory with non URI compatible components', t => { + t.has(npa('/test%dir'), { + type: 'directory', + name: null, + rawSpec: '/test%dir', + fetchSpec: '/test%dir', + saveSpec: 'file:/test%dir', + }) + t.end() +}) + +t.test('directory cwd has non URI compatible components', t => { + // eslint-disable-next-line max-len + const where = '/tmp/ !"$%&\'()*+,-.0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~' + const originalCwd = process.cwd + t.teardown(() => { + process.cwd = originalCwd + }) + process.cwd = () => where + t.has(npa('./'), { + type: 'directory', + where, + name: null, + rawSpec: './', + fetchSpec: where, + }) + t.end() +}) + t.test('invalid url', t => { const broken = t.mock('..', { - url: { + 'node:url': { URL: class { constructor () { throw new Error('something went wrong') diff --git a/test/windows.js b/test/windows.js index 4406026..ba7c7a7 100644 --- a/test/windows.js +++ b/test/windows.js @@ -1,9 +1,13 @@ -global.FAKE_WINDOWS = true +// redefine process.platform before any requires so that we don't cache a require that got the non-redefined value +const { platform } = process +Object.defineProperty(process, 'platform', { value: 'win32' }) -const npa = require('..') const t = require('tap') +const npa = require('..') -t.on('bailout', () => process.exit(1)) +t.teardown(() => { + Object.defineProperty(process, 'platform', { value: platform }) +}) const cases = { 'C:\\x\\y\\z': { @@ -95,9 +99,12 @@ const cases = { t.test('parse a windows path', function (t) { Object.keys(cases).forEach(function (c) { - const expect = cases[c] - const actual = npa(c, 'C:\\test\\path') - t.has(actual, expect, c) + t.test(c, t => { + const expect = cases[c] + const actual = npa(c, 'C:\\test\\path') + t.has(actual, expect, c) + t.end() + }) }) t.end() })