diff --git a/composer.json b/composer.json index 96a8cc44..56892e23 100644 --- a/composer.json +++ b/composer.json @@ -68,7 +68,7 @@ "benchmark": "phpbench run --report=default", "phpcs": "PHP_CS_FIXER_IGNORE_ENV=1 php-cs-fixer fix -v --diff --dry-run --allow-risky=yes --ansi", "phpcs:fix": "php-cs-fixer fix -vvv --allow-risky=yes --ansi", - "phpstan": "phpstan analyse -l max -c phpstan.neon --ansi --memory-limit=256M", + "phpstan": "phpstan analyse -l max -c phpstan.neon --ansi --memory-limit=512M", "phpunit": "XDEBUG_MODE=coverage phpunit --coverage-text", "phpunit:min": "phpunit --no-coverage", "test": [ diff --git a/docs/interfaces/7.0/uri-parser-builder.md b/docs/interfaces/7.0/uri-parser-builder.md index e1255bcc..af691ef0 100644 --- a/docs/interfaces/7.0/uri-parser-builder.md +++ b/docs/interfaces/7.0/uri-parser-builder.md @@ -15,6 +15,7 @@ The class act as a drop-in replacement for PHP's `parse_url` feature. ## URI parsing ~~~php +UriString::resolve(string $uri, ?string $baseUri = null): array UriString::parse(string $uri): array UriString::parseAuthority(string $autority): array ~~~ @@ -67,6 +68,25 @@ var_export(UriString::parse('http:www.example.com'));

This invalid HTTP URI is successfully parsed.

The class also exposes a UriString::parseAuthority you can use to parse an authority string.

+If you need to resolve your URI in the context of a Base URI the `resolve` public static method will let you +do just that. The method expect either a full URI as its single parameter or a relative URI following by +a base URI which must be absolute, the URI will then be resolved using the base URI. + +```php +$components = UriString::resolve('"/foo", "https://example.com"); +//returns the following array +//array( +// 'scheme' => 'https', +// 'user' => null, +// 'pass' => null, +// 'host' => 'example.com'', +// 'port' => null, +// 'path' => '/foo', +// 'query' => null, +// 'fragment' => null, +//); +``` + ## URI Building ~~~php @@ -99,3 +119,39 @@ echo UriString::build($components); //displays http://hello:world@foo.com?@bar.c The `build` method provides similar functionality to the `http_build_url()` function from v1.x of the [`pecl_http`](https://pecl.php.net/package/pecl_http) PECL extension.

The class also exposes a UriString::buildAuthority you can use to build an authority from its hash representation.

+ +## URI Normalization + +It is possible to normalize a URI against the RFC3986 rules using the `UriString::normalize` method. +The method expects a string and will return the same array as `UriString::parse` but each component will +have been normalized. + +```php +use League\Uri\UriString; + +$parsed = UriString::parse("https://EXAMPLE.COM/foo/../bar"); +//returns the following array +//array( +// 'scheme' => 'http', +// 'user' => null, +// 'pass' => null, +// 'host' => 'EXAMPLE.COM', +// 'port' => null, +// 'path' => '/foo/../bar', +// 'query' => null, +// 'fragment' => null, +//); + +$normalized = UriString::normalize("https://EXAMPLE.COM/foo/../bar"); +//returns the following array +//array( +// 'scheme' => 'http', +// 'user' => null, +// 'pass' => null, +// 'host' => 'example.com', +// 'port' => null, +// 'path' => '/bar', +// 'query' => null, +// 'fragment' => null, +//); +``` diff --git a/docs/uri/7.0/index.md b/docs/uri/7.0/index.md index 440082ad..0c26dc4c 100644 --- a/docs/uri/7.0/index.md +++ b/docs/uri/7.0/index.md @@ -49,9 +49,6 @@ install the `fileinfo` extension otherwise an exception will be thrown. To convert a URI into an HTML anchor tag you need to have the `ext-dom` extension installed in your system. -To enable URI normalization, the `ext-mbstring` extension or a polyfill -like `symfony/polyfill-mbstring` must be present in your system. - Installation -------- diff --git a/interfaces/CHANGELOG.md b/interfaces/CHANGELOG.md index f17069df..0953748f 100644 --- a/interfaces/CHANGELOG.md +++ b/interfaces/CHANGELOG.md @@ -15,6 +15,9 @@ All Notable changes to `League\Uri\Interfaces` will be documented in this file - `UriInterface::toNormalizedString` - `UriInterface::getUser` - `League\Uri\IPv6\Converter::isIpv6` +- `UriString::resolve` +- `UriString::removeDotSegments` +- `UriString::normalize` ### Fixed diff --git a/interfaces/Encoder.php b/interfaces/Encoder.php index 4324e03c..94a7a242 100644 --- a/interfaces/Encoder.php +++ b/interfaces/Encoder.php @@ -41,6 +41,13 @@ final class Encoder private const REGEXP_PART_UNRESERVED = 'A-Za-z\d_\-.~'; private const REGEXP_PART_ENCODED = '%(?![A-Fa-f\d]{2})'; + /** + * Unreserved characters. + * + * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-2.3 + */ + private const REGEXP_UNRESERVED_CHARACTERS = ',%(2[1-9A-Fa-f]|[3-7][0-9A-Fa-f]|61|62|64|65|66|7[AB]|5F),'; + /** * Encode User. * @@ -173,4 +180,17 @@ private static function decode(Stringable|string|int|null $component, Closure $d default => $component, }; } + + public static function decodeUnreservedCharacters(?string $str): ?string + { + return match (true) { + null === $str, + '' === $str => $str, + default => preg_replace_callback( + self::REGEXP_UNRESERVED_CHARACTERS, + static fn (array $matches): string => rawurldecode($matches[0]), + $str + ) ?? '', + }; + } } diff --git a/interfaces/Idna/Converter.php b/interfaces/Idna/Converter.php index b993e9e0..6bd62a30 100644 --- a/interfaces/Idna/Converter.php +++ b/interfaces/Idna/Converter.php @@ -141,7 +141,7 @@ public static function toUnicode(Stringable|string $domain, Option|int|null $opt $domain = rawurldecode((string) $domain); if (false === stripos($domain, 'xn--')) { - return Result::fromIntl(['result' => $domain, 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]); + return Result::fromIntl(['result' => strtolower($domain), 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]); } FeatureDetection::supportsIdn(); diff --git a/interfaces/UriString.php b/interfaces/UriString.php index a7918498..8c4c934e 100644 --- a/interfaces/UriString.php +++ b/interfaces/UriString.php @@ -17,11 +17,17 @@ use League\Uri\Exceptions\MissingFeature; use League\Uri\Exceptions\SyntaxError; use League\Uri\Idna\Converter; +use League\Uri\Idna\Converter as IdnaConverter; +use League\Uri\IPv6\Converter as IPv6Converter; use Stringable; use function array_merge; +use function array_pop; +use function array_reduce; +use function end; use function explode; use function filter_var; +use function implode; use function inet_pton; use function preg_match; use function rawurldecode; @@ -40,8 +46,8 @@ * @author Ignace Nyamagana Butera * @since 6.0.0 * - * @phpstan-type AuthorityMap array{user:?string, pass:?string, host:?string, port:?int} - * @phpstan-type ComponentMap array{scheme:?string, user:?string, pass:?string, host:?string, port:?int, path:string, query:?string, fragment:?string} + * @phpstan-type AuthorityMap array{user: ?string, pass: ?string, host: ?string, port: ?int} + * @phpstan-type ComponentMap array{scheme: ?string, user: ?string, pass: ?string, host: ?string, port: ?int, path: string, query: ?string, fragment: ?string} * @phpstan-type InputComponentMap array{scheme? : ?string, user? : ?string, pass? : ?string, host? : ?string, port? : ?int, path? : ?string, query? : ?string, fragment? : ?string} */ final class UriString @@ -159,6 +165,9 @@ final class UriString */ private const REGEXP_IDN_PATTERN = '/[^\x20-\x7f]/'; + /** @var array */ + private const DOT_SEGMENTS = ['.' => 1, '..' => 1]; + /** * Only the address block fe80::/10 can have a Zone ID attach to * let's detect the link local significant 10 bits. @@ -262,6 +271,159 @@ public static function buildAuthority(array $components): ?string return $components['user'].':'.$components['pass'].$authority; } + /** + * Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints. + * + * @throws SyntaxError if the URI is not parsable + * + * @return ComponentMap + */ + public static function normalize(Stringable|string $uri): array + { + $components = UriString::parse($uri); + if (null !== $components['scheme']) { + $components['scheme'] = strtolower($components['scheme']); + } + + if (null !== $components['host']) { + $components['host'] = IdnaConverter::toUnicode((string)IPv6Converter::compress($components['host']))->domain(); + } + + $path = $components['path']; + if ('/' === ($path[0] ?? '') || '' !== $components['scheme'].self::buildAuthority($components)) { + $path = self::removeDotSegments($path); + } + + $path = Encoder::decodeUnreservedCharacters($path); + if (null !== self::buildAuthority($components) && ('' === $path || null === $path)) { + $path = '/'; + } + + $components['path'] = (string) $path; + $components['query'] = Encoder::decodeUnreservedCharacters($components['query']); + $components['fragment'] = Encoder::decodeUnreservedCharacters($components['fragment']); + $components['user'] = Encoder::decodeUnreservedCharacters($components['user']); + $components['pass'] = Encoder::decodeUnreservedCharacters($components['pass']); + + return $components; + } + + /** + * Resolves a URI against a base URI using RFC3986 rules. + * + * This method MUST retain the state of the submitted URI instance, and return + * a URI instance of the same type that contains the applied modifications. + * + * This method MUST be transparent when dealing with error and exceptions. + * It MUST not alter or silence them apart from validating its own parameters. + * + * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-5 + * + * @throws SyntaxError if the BaseUri is not absolute or in absence of a BaseUri if the uri is not absolute + * + * @return ComponentMap + */ + public static function resolve(Stringable|string $uri, Stringable|string|null $baseUri = null): array + { + $uri = self::parse($uri); + $baseUri = null !== $baseUri ? self::parse($baseUri) : $uri; + if (null === $baseUri['scheme']) { + throw new SyntaxError('The base URI must be an absolute URI or null; If the base URI is null the URI must be an absolute URI.'); + } + + if (null !== $uri['scheme'] && '' !== $uri['scheme']) { + $uri['path'] = self::removeDotSegments($uri['path']); + + return $uri; + } + + if (null !== self::buildAuthority($uri)) { + $uri['scheme'] = $baseUri['scheme']; + $uri['path'] = self::removeDotSegments($uri['path']); + + return $uri; + } + + [$path, $query] = self::resolvePathAndQuery($uri, $baseUri); + $path = UriString::removeDotSegments($path); + if ('' !== $path && '/' !== $path[0] && null !== self::buildAuthority($baseUri)) { + $path = '/'.$path; + } + + $baseUri['path'] = $path; + $baseUri['query'] = $query; + $baseUri['fragment'] = $uri['fragment']; + + return $baseUri; + } + + /** + * Remove dot segments from the URI path as per RFC specification. + */ + public static function removeDotSegments(Stringable|string $path): string + { + $path = (string) $path; + if (!str_contains($path, '.')) { + return $path; + } + + $reducer = function (array $carry, string $segment): array { + if ('..' === $segment) { + array_pop($carry); + + return $carry; + } + + if (!isset(self::DOT_SEGMENTS[$segment])) { + $carry[] = $segment; + } + + return $carry; + }; + + $oldSegments = explode('/', $path); + $newPath = implode('/', array_reduce($oldSegments, $reducer(...), [])); + if (isset(self::DOT_SEGMENTS[end($oldSegments)])) { + $newPath .= '/'; + } + + return $newPath; + } + + /** + * Resolves an URI path and query component. + * + * @param ComponentMap $uri + * @param ComponentMap $baseUri + * + * @return array{0:string, 1:string|null} + */ + private static function resolvePathAndQuery(array $uri, array $baseUri): array + { + if (str_starts_with($uri['path'], '/')) { + return [$uri['path'], $uri['query']]; + } + + if ('' === $uri['path']) { + return [$baseUri['path'], $uri['query'] ?? $baseUri['query']]; + } + + $targetPath = $uri['path']; + if (null !== self::buildAuthority($baseUri) && '' === $baseUri['path']) { + $targetPath = '/'.$targetPath; + } + + if ('' !== $baseUri['path']) { + $segments = explode('/', $baseUri['path']); + array_pop($segments); + if ([] !== $segments) { + $targetPath = implode('/', $segments).'/'.$targetPath; + } + } + + return [$targetPath, $uri['query']]; + } + /** * Parse a URI string into its components. * @@ -309,7 +471,7 @@ public static function parse(Stringable|string|int $uri): array $uri = (string) $uri; if (isset(self::URI_SHORTCUTS[$uri])) { /** @var ComponentMap $components */ - $components = array_merge(self::URI_COMPONENTS, self::URI_SHORTCUTS[$uri]); + $components = [...self::URI_COMPONENTS, ...self::URI_SHORTCUTS[$uri]]; return $components; } diff --git a/interfaces/UriStringTest.php b/interfaces/UriStringTest.php index 87fce8b5..1825d537 100644 --- a/interfaces/UriStringTest.php +++ b/interfaces/UriStringTest.php @@ -20,6 +20,8 @@ final class UriStringTest extends TestCase { + private const BASE_URI = 'http://a/b/c/d;p?q'; + #[DataProvider('validUriProvider')] public function testParseSucced(Stringable|string|int $uri, array $expected): void { @@ -974,4 +976,55 @@ public static function buildUriProvider(): array ], ]; } + + #[DataProvider('resolveProvider')] + public function testCreateResolve(string $baseUri, string $uri, string $expected): void + { + self::assertSame($expected, UriString::build(UriString::resolve($uri, $baseUri))); + } + + public static function resolveProvider(): array + { + return [ + 'base uri' => [self::BASE_URI, '', self::BASE_URI], + 'scheme' => [self::BASE_URI, 'http://d/e/f', 'http://d/e/f'], + 'path 1' => [self::BASE_URI, 'g', 'http://a/b/c/g'], + 'path 2' => [self::BASE_URI, './g', 'http://a/b/c/g'], + 'path 3' => [self::BASE_URI, 'g/', 'http://a/b/c/g/'], + 'path 4' => [self::BASE_URI, '/g', 'http://a/g'], + 'authority' => [self::BASE_URI, '//g', 'http://g'], + 'query' => [self::BASE_URI, '?y', 'http://a/b/c/d;p?y'], + 'path + query' => [self::BASE_URI, 'g?y', 'http://a/b/c/g?y'], + 'fragment' => [self::BASE_URI, '#s', 'http://a/b/c/d;p?q#s'], + 'path + fragment' => [self::BASE_URI, 'g#s', 'http://a/b/c/g#s'], + 'path + query + fragment' => [self::BASE_URI, 'g?y#s', 'http://a/b/c/g?y#s'], + 'single dot 1' => [self::BASE_URI, '.', 'http://a/b/c/'], + 'single dot 2' => [self::BASE_URI, './', 'http://a/b/c/'], + 'single dot 3' => [self::BASE_URI, './g/.', 'http://a/b/c/g/'], + 'single dot 4' => [self::BASE_URI, 'g/./h', 'http://a/b/c/g/h'], + 'double dot 1' => [self::BASE_URI, '..', 'http://a/b/'], + 'double dot 2' => [self::BASE_URI, '../', 'http://a/b/'], + 'double dot 3' => [self::BASE_URI, '../g', 'http://a/b/g'], + 'double dot 4' => [self::BASE_URI, '../..', 'http://a/'], + 'double dot 5' => [self::BASE_URI, '../../', 'http://a/'], + 'double dot 6' => [self::BASE_URI, '../../g', 'http://a/g'], + 'double dot 7' => [self::BASE_URI, '../../../g', 'http://a/g'], + 'double dot 8' => [self::BASE_URI, '../../../../g', 'http://a/g'], + 'double dot 9' => [self::BASE_URI, 'g/../h' , 'http://a/b/c/h'], + 'mulitple slashes' => [self::BASE_URI, 'foo////g', 'http://a/b/c/foo////g'], + 'complex path 1' => [self::BASE_URI, ';x', 'http://a/b/c/;x'], + 'complex path 2' => [self::BASE_URI, 'g;x', 'http://a/b/c/g;x'], + 'complex path 3' => [self::BASE_URI, 'g;x?y#s', 'http://a/b/c/g;x?y#s'], + 'complex path 4' => [self::BASE_URI, 'g;x=1/./y', 'http://a/b/c/g;x=1/y'], + 'complex path 5' => [self::BASE_URI, 'g;x=1/../y', 'http://a/b/c/y'], + 'dot segments presence 1' => [self::BASE_URI, '/./g', 'http://a/g'], + 'dot segments presence 2' => [self::BASE_URI, '/../g', 'http://a/g'], + 'dot segments presence 3' => [self::BASE_URI, 'g.', 'http://a/b/c/g.'], + 'dot segments presence 4' => [self::BASE_URI, '.g', 'http://a/b/c/.g'], + 'dot segments presence 5' => [self::BASE_URI, 'g..', 'http://a/b/c/g..'], + 'dot segments presence 6' => [self::BASE_URI, '..g', 'http://a/b/c/..g'], + 'origin uri without path' => ['http://h:b@a', 'b/../y', 'http://h:b@a/y'], + 'not same origin' => [self::BASE_URI, 'ftp://a/b/c/d', 'ftp://a/b/c/d'], + ]; + } } diff --git a/uri/Uri.php b/uri/Uri.php index e7c9baab..3d94a13f 100644 --- a/uri/Uri.php +++ b/uri/Uri.php @@ -43,7 +43,6 @@ use function array_keys; use function array_map; use function array_pop; -use function array_reduce; use function base64_decode; use function base64_encode; use function count; @@ -64,7 +63,6 @@ use function ltrim; use function preg_match; use function preg_replace_callback; -use function preg_split; use function rawurldecode; use function rawurlencode; use function restore_error_handler; @@ -74,13 +72,11 @@ use function str_repeat; use function str_replace; use function str_starts_with; -use function strcmp; use function strlen; use function strpos; use function strspn; use function strtolower; use function substr; -use function uksort; use const FILEINFO_MIME; use const FILEINFO_MIME_TYPE; @@ -91,7 +87,6 @@ use const FILTER_VALIDATE_IP; use const JSON_PRESERVE_ZERO_FRACTION; use const PHP_ROUND_HALF_EVEN; -use const PREG_SPLIT_NO_EMPTY; /** * @phpstan-import-type ComponentMap from UriString @@ -244,9 +239,6 @@ final class Uri implements Conditionable, UriInterface, UriRenderer, UriInspecto /** @var array */ private const WHATWG_SPECIAL_SCHEMES = ['ftp' => 1, 'http' => 1, 'https' => 1, 'ws' => 1, 'wss' => 1]; - /** @var array */ - private const DOT_SEGMENTS = ['.' => 1, '..' => 1]; - private readonly ?string $scheme; private readonly ?string $user; private readonly ?string $pass; @@ -1686,9 +1678,10 @@ public function equals(UriInterface|Stringable|string $uri, bool $excludeFragmen public function normalize(): UriInterface { return $this + ->withUserInfo($this->decodeUnreservedCharacters($this->user), $this->decodeUnreservedCharacters($this->pass)) ->withHost($this->normalizeHost()) ->withPath($this->normalizePath()) - ->withQuery($this->decodeUnreservedCharacters($this->sortQuery($this->query))) + ->withQuery($this->decodeUnreservedCharacters($this->query)) ->withFragment($this->decodeUnreservedCharacters($this->fragment)); } @@ -1696,7 +1689,7 @@ private function normalizePath(): string { $path = $this->path; if ('/' === ($path[0] ?? '') || '' !== $this->scheme.$this->authority) { - $path = self::removeDotSegments($path); + $path = UriString::removeDotSegments($path); } $path = (string) $this->decodeUnreservedCharacters($path); @@ -1720,71 +1713,6 @@ private function decodeUnreservedCharacters(?string $str): ?string }; } - private function sortQuery(?string $query): ?string - { - $codepoints = fn (?string $str): string => in_array($str, ['', null], true) ? '' : implode('.', array_map( - mb_ord(...), /* @phpstan-ignore-line */ - (array) preg_split(pattern:'//u', subject: $str, flags: PREG_SPLIT_NO_EMPTY) - )); - - $compare = fn (string $name1, string $name2): int => match (1) { - preg_match('/[^\x20-\x7f]/', $name1.$name2) => strcmp($codepoints($name1), $codepoints($name2)), - default => strcmp($name1, $name2), - }; - - $pairs = QueryString::parseFromValue($query); - $parameters = array_reduce($pairs, function (array $carry, array $pair) { - $carry[$pair[0]] ??= []; - $carry[$pair[0]][] = $pair[1]; - - return $carry; - }, []); - - uksort($parameters, $compare); - - $newPairs = []; - foreach ($parameters as $key => $values) { - $newPairs = [...$newPairs, ...array_map(fn ($value) => [$key, $value], $values)]; - } - - return match ($newPairs) { - $pairs => $query, - default => QueryString::buildFromPairs($newPairs), - }; - } - - /** - * Remove dot segments from the URI path as per RFC specification. - */ - private static function removeDotSegments(string $path): string - { - if (!str_contains($path, '.')) { - return $path; - } - - $reducer = function (array $carry, string $segment): array { - if ('..' === $segment) { - array_pop($carry); - - return $carry; - } - - if (!isset(static::DOT_SEGMENTS[$segment])) { - $carry[] = $segment; - } - - return $carry; - }; - - $oldSegments = explode('/', $path); - $newPath = implode('/', array_reduce($oldSegments, $reducer(...), [])); - if (isset(static::DOT_SEGMENTS[end($oldSegments)])) { - $newPath .= '/'; - } - - return $newPath; - } - /** * Resolves a URI against a base URI using RFC3986 rules. * @@ -1804,17 +1732,17 @@ public function resolve(Stringable|string $uri): UriInterface if (null !== $uri->getScheme()) { return $uri - ->withPath(self::removeDotSegments($uri->getPath())); + ->withPath(UriString::removeDotSegments($uri->getPath())); } if (null !== $uri->getAuthority()) { return $uri - ->withPath(self::removeDotSegments($uri->getPath())) + ->withPath(UriString::removeDotSegments($uri->getPath())) ->withScheme($this->scheme); } [$path, $query] = $this->resolvePathAndQuery($uri); - $path = self::removeDotSegments($path); + $path = UriString::removeDotSegments($path); if ('' !== $path && '/' !== $path[0] && null !== $this->getAuthority()) { $path = '/'.$path; } diff --git a/uri/UriTest.php b/uri/UriTest.php index 3dbbf615..93df2bf6 100644 --- a/uri/UriTest.php +++ b/uri/UriTest.php @@ -529,8 +529,6 @@ public static function resolveProvider(): array ]; } - - public function testRelativizeIsNotMade(): void { $uri = '//path#fragment'; diff --git a/uri/composer.json b/uri/composer.json index 62162a1a..d88b6343 100644 --- a/uri/composer.json +++ b/uri/composer.json @@ -64,8 +64,7 @@ "jeremykendall/php-domain-parser": "to resolve Public Suffix and Top Level Domain", "league/uri-components" : "Needed to easily manipulate URI objects components", "php-64bit": "to improve IPV4 host parsing", - "symfony/polyfill-intl-idn": "to handle IDN host via the Symfony polyfill if ext-intl is not present", - "symfony/polyfill-mbstring": "to handle URI normalization if the ext-mbstring is not present" + "symfony/polyfill-intl-idn": "to handle IDN host via the Symfony polyfill if ext-intl is not present" }, "extra": { "branch-alias": {