From 758c3c2d9edd29a3da1b26fb4747e8e4ae02a9bb Mon Sep 17 00:00:00 2001 From: Martin Rademacher Date: Mon, 3 Jun 2024 20:24:55 +1200 Subject: [PATCH] Re-implement `TokenScanner` using nikic/php-parser (#1597) --- composer.json | 1 + phpstan-baseline.neon | 2 +- src/Analysers/TokenScanner.php | 392 ++++-------------- tests/Analysers/TokenScannerTest.php | 40 +- tests/Fixtures/PHP/namespaces3.php | 8 + tests/Fixtures/Scratch/ExclusiveMinMax.php | 2 + .../Scratch/ExclusiveMinMax3.0.0.yaml | 2 +- .../Scratch/ExclusiveMinMax3.1.0.yaml | 2 +- tests/Fixtures/Scratch/NullRef.php | 2 + tests/Fixtures/Scratch/ParameterContent.php | 2 + .../Fixtures/Scratch/ThirdPartyAnnotation.php | 6 +- .../Scratch/ThirdPartyAnnotation3.0.0.yaml | 2 +- .../Scratch/ThirdPartyAnnotation3.1.0.yaml | 2 +- 13 files changed, 145 insertions(+), 318 deletions(-) create mode 100644 tests/Fixtures/PHP/namespaces3.php diff --git a/composer.json b/composer.json index 0a4912b2..6db5331b 100644 --- a/composer.json +++ b/composer.json @@ -46,6 +46,7 @@ "require": { "php": ">=7.4", "ext-json": "*", + "nikic/php-parser": "^4.19", "psr/log": "^1.1 || ^2.0 || ^3.0", "symfony/deprecation-contracts": "^2 || ^3", "symfony/finder": "^5.0 || ^6.0 || ^7.0", diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index abd69448..7120882d 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -31,7 +31,7 @@ parameters: path: Examples/using-links-php81/User.php - - message: "#^Strict comparison using \\=\\=\\= between array\\|string and false will always evaluate to false\\.$#" + message: "#^Call to function array_key_exists\\(\\) with string and array\\{\\} will always evaluate to false\\.$#" count: 1 path: src/Analysers/TokenScanner.php diff --git a/src/Analysers/TokenScanner.php b/src/Analysers/TokenScanner.php index 8ba9ce9e..2367d260 100644 --- a/src/Analysers/TokenScanner.php +++ b/src/Analysers/TokenScanner.php @@ -6,6 +6,16 @@ namespace OpenApi\Analysers; +use PhpParser\Error; +use PhpParser\Node\Stmt\Class_; +use PhpParser\Node\Stmt\ClassLike; +use PhpParser\Node\Stmt\Enum_; +use PhpParser\Node\Stmt\Interface_; +use PhpParser\Node\Stmt\Namespace_; +use PhpParser\Node\Stmt\Trait_; +use PhpParser\Node\Stmt\Use_; +use PhpParser\ParserFactory; + /** * High level, PHP token based, scanner. */ @@ -18,27 +28,37 @@ class TokenScanner */ public function scanFile(string $filename): array { - return $this->scanTokens(token_get_all(file_get_contents($filename))); + $parser = (new ParserFactory())->createForNewestSupportedVersion(); + try { + $stmts = $parser->parse(file_get_contents($filename)); + } catch (Error $e) { + throw new \RuntimeException($e->getMessage(), $e->getCode(), $e); + } + + $result = []; + $result += $this->collect_stmts($stmts, ''); + foreach ($stmts as $stmt) { + if ($stmt instanceof Namespace_) { + $namespace = (string) $stmt->name; + + $result += $this->collect_stmts($stmt->stmts, $namespace); + } + } + + return $result; } - /** - * Scan file for all classes, interfaces and traits. - * - * @return array> File details - */ - protected function scanTokens(array $tokens): array + protected function collect_stmts(array $stmts, string $namespace): array { - $units = []; $uses = []; - $isInterface = false; - $isAbstractFunction = false; - $namespace = ''; - $currentName = null; - $unitLevel = 0; - $lastToken = null; - $stack = []; + $resolve = function (string $name) use ($namespace, &$uses) { + if (array_key_exists($name, $uses)) { + return $uses[$name]; + } - $initUnit = function ($uses): array { + return $namespace . '\\' . $name; + }; + $details = function () use (&$uses) { return [ 'uses' => $uses, 'interfaces' => [], @@ -48,334 +68,86 @@ protected function scanTokens(array $tokens): array 'properties' => [], ]; }; - - while (false !== ($token = $this->nextToken($tokens))) { - // named arguments - $nextToken = $this->nextToken($tokens); - if (($token !== '}' && $nextToken === ':') || $nextToken === false) { - continue; - } - do { - $prevToken = prev($tokens); - } while ($token !== $prevToken); - - if (!is_array($token)) { - switch ($token) { - case '{': - $stack[] = $token; - break; - case '}': - array_pop($stack); - if (count($stack) === $unitLevel) { - $currentName = null; - } - break; - } - continue; - } - - switch ($token[0]) { - case T_ABSTRACT: - if ($stack !== []) { - $isAbstractFunction = true; - } - break; - - case T_CURLY_OPEN: - case T_DOLLAR_OPEN_CURLY_BRACES: - $stack[] = $token[1]; - break; - - case T_NAMESPACE: - $namespace = $this->nextWord($tokens); - break; - - case T_USE: - if (!$stack) { - $uses = array_merge($uses, $this->parseFQNStatement($tokens, $token)); - } elseif ($currentName) { - $traits = $this->resolveFQN($this->parseFQNStatement($tokens, $token), $namespace, $uses); - $units[$currentName]['traits'] = array_merge($units[$currentName]['traits'], $traits); - } - break; - - case T_CLASS: - if ($currentName) { - break; - } - - if ($lastToken && is_array($lastToken) && $lastToken[0] === T_DOUBLE_COLON) { - // ::class - break; - } - - // class name - $token = $this->nextToken($tokens); - - // unless ... - if (is_string($token) && ($token === '(' || $token === '{')) { - // new class[()] { ... } - if ('{' === $token) { - prev($tokens); - } - break; - } elseif (is_array($token) && in_array($token[1], ['extends', 'implements'])) { - // new class[()] extends { ... } - break; - } - - $isInterface = false; - $currentName = $namespace . '\\' . $token[1]; - $unitLevel = count($stack); - $units[$currentName] = $initUnit($uses); - break; - - case T_INTERFACE: - if ($currentName) { - break; - } - - $isInterface = true; - $token = $this->nextToken($tokens); - $currentName = $namespace . '\\' . $token[1]; - $unitLevel = count($stack); - $units[$currentName] = $initUnit($uses); - break; - - case T_EXTENDS: - $fqns = $this->parseFQNStatement($tokens, $token); - if ($isInterface && $currentName) { - $units[$currentName]['interfaces'] = $this->resolveFQN($fqns, $namespace, $uses); - } - if (!is_array($token) || T_IMPLEMENTS !== $token[0]) { - break; - } - // no break - case T_IMPLEMENTS: - $fqns = $this->parseFQNStatement($tokens, $token); - if ($currentName) { - $units[$currentName]['interfaces'] = $this->resolveFQN($fqns, $namespace, $uses); - } + $result = []; + foreach ($stmts as $stmt) { + switch (get_class($stmt)) { + case Use_::class: + $uses += $this->collect_uses($stmt); break; - - case T_FUNCTION: - $token = $this->nextToken($tokens); - if ((!is_array($token) && '&' == $token) - || (defined('T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG') && T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG == $token[0])) { - $token = $this->nextToken($tokens); - } - - if (($unitLevel + 1) == count($stack) && $currentName) { - $units[$currentName]['methods'][] = $token[1]; - if (!$isInterface && !$isAbstractFunction) { - // more nesting - $units[$currentName]['properties'] = array_merge( - $units[$currentName]['properties'], - $this->parsePromotedProperties($tokens) - ); - $this->skipTo($tokens, '{', true); - } else { - // no function body - $this->skipTo($tokens, ';'); - $isAbstractFunction = false; - } - } + case Class_::class: + $result += $this->collect_class($stmt, $details(), $resolve); break; - - case T_VARIABLE: - if (($unitLevel + 1) == count($stack) && $currentName) { - $units[$currentName]['properties'][] = substr($token[1], 1); - } + case Interface_::class: + $result += $this->collect_interface($stmt, $details(), $resolve); break; - default: - // handle trait here too to avoid duplication - if (T_TRAIT === $token[0] || (defined('T_ENUM') && T_ENUM === $token[0])) { - if ($currentName) { - break; - } - - $isInterface = false; - $token = $this->nextToken($tokens); - $currentName = $namespace . '\\' . $token[1]; - $unitLevel = count($stack); - $this->skipTo($tokens, '{', true); - $units[$currentName] = $initUnit($uses); - } + case Trait_::class: + case Enum_::class: + $result += $this->collect_classlike($stmt, $details(), $resolve); break; } - $lastToken = $token; } - return $units; + return $result; } - /** - * Get the next token that is not whitespace or comment. - * - * @return string|array|false Next token - */ - protected function nextToken(array &$tokens) + protected function collect_uses(Use_ $stmt): array { - $token = true; - while ($token) { - $token = next($tokens); - if (is_array($token)) { - if (in_array($token[0], [T_WHITESPACE, T_COMMENT])) { - continue; - } - } + $uses = []; - return $token; + foreach ($stmt->uses as $use) { + $uses[(string) $use->getAlias()] = (string) $use->name; } - return $token; + return $uses; } - /** - * @return array - */ - protected function resolveFQN(array $names, string $namespace, array $uses): array + protected function collect_classlike(ClassLike $stmt, array $details, callable $resolve): array { - $resolve = function ($name) use ($namespace, $uses) { - if ('\\' == $name[0]) { - return substr($name, 1); + foreach ($stmt->getProperties() as $properties) { + foreach ($properties->props as $prop) { + $details['properties'][] = (string) $prop->name; } + } - if (array_key_exists($name, $uses)) { - return $uses[$name]; - } - - return $namespace . '\\' . $name; - }; - - return array_values(array_map($resolve, $names)); - } - - protected function skipTo(array &$tokens, string $char, bool $prev = false): void - { - while (false !== ($token = next($tokens))) { - if (is_string($token) && $token === $char) { - if ($prev) { - prev($tokens); - } - - break; - } + foreach ($stmt->getMethods() as $method) { + $details['methods'][] = (string) $method->name; } - } - /** - * Read next word. - * - * Skips leading whitespace. - */ - protected function nextWord(array &$tokens): string - { - $word = ''; - while (false !== ($token = next($tokens))) { - if (is_array($token)) { - if ($token[0] === T_WHITESPACE) { - if ($word) { - break; - } - continue; - } - $word .= $token[1]; + foreach ($stmt->getTraitUses() as $traitUse) { + foreach ($traitUse->traits as $trait) { + $details['traits'][] = $resolve((string) $trait); } } - return $word; + return [ + $resolve($stmt->name->name) => $details, + ]; } - /** - * Parse a use statement. - */ - protected function parseFQNStatement(array &$tokens, array &$token): array + protected function collect_class(Class_ $stmt, array $details, callable $resolve): array { - $normalizeAlias = function ($alias): string { - $alias = ltrim($alias, '\\'); - $elements = explode('\\', $alias); - - return array_pop($elements); - }; + foreach ($stmt->implements as $implement) { + $details['interfaces'][] = $resolve((string) $implement); + } - $class = ''; - $alias = ''; - $statements = []; - $explicitAlias = false; - $php8NSToken = defined('T_NAME_QUALIFIED') ? [T_NAME_QUALIFIED, T_NAME_FULLY_QUALIFIED] : []; - $nsToken = array_merge([T_STRING, T_NS_SEPARATOR], $php8NSToken); - while ($token !== false) { - $token = $this->nextToken($tokens); - $isNameToken = in_array($token[0], $nsToken); - if (!$explicitAlias && $isNameToken) { - $class .= $token[1]; - $alias = $token[1]; - } elseif ($explicitAlias && $isNameToken) { - $alias .= $token[1]; - } elseif ($token[0] === T_AS) { - $explicitAlias = true; - $alias = ''; - } elseif ($token[0] === T_IMPLEMENTS) { - $statements[$normalizeAlias($alias)] = $class; - break; - } elseif ($token === ',') { - $statements[$normalizeAlias($alias)] = $class; - $class = ''; - $alias = ''; - $explicitAlias = false; - } elseif ($token === ';') { - $statements[$normalizeAlias($alias)] = $class; - break; - } elseif ($token === '{') { - $statements[$normalizeAlias($alias)] = $class; - prev($tokens); - break; - } else { - break; + // promoted properties + if ($ctor = $stmt->getMethod('__construct')) { + foreach ($ctor->getParams() as $param) { + if ($param->flags) { + $details['properties'][] = $param->var->name; + } } } - return $statements; + return $this->collect_classlike($stmt, $details, $resolve); } - protected function parsePromotedProperties(array &$tokens): array + protected function collect_interface(Interface_ $stmt, array $details, callable $resolve): array { - $properties = []; - - $this->skipTo($tokens, '('); - $round = 1; - $promoted = false; - while (false !== ($token = $this->nextToken($tokens))) { - if (is_string($token)) { - switch ($token) { - case '(': - ++$round; - break; - case ')': - --$round; - if (0 == $round) { - return $properties; - } - } - } - if (is_array($token)) { - switch ($token[0]) { - case T_PUBLIC: - case T_PROTECTED: - case T_PRIVATE: - $promoted = true; - break; - case T_VARIABLE: - if ($promoted) { - $properties[] = ltrim($token[1], '$'); - $promoted = false; - } - break; - } - } + foreach ($stmt->extends as $extend) { + $details['interfaces'][] = $resolve((string) $extend); } - return $properties; + return $this->collect_classlike($stmt, $details, $resolve); } } diff --git a/tests/Analysers/TokenScannerTest.php b/tests/Analysers/TokenScannerTest.php index b58feec4..b62b51fa 100644 --- a/tests/Analysers/TokenScannerTest.php +++ b/tests/Analysers/TokenScannerTest.php @@ -48,7 +48,7 @@ public static function scanCases(): iterable 'traits' => ['OpenApi\\Tests\\Fixtures\\Apis\\DocBlocks\\NameTrait'], 'enums' => [], 'methods' => ['__construct'], - 'properties' => ['quantity', 'brand', 'colour', 'id', 'releasedAt'], + 'properties' => ['releasedAt', 'quantity', 'brand', 'colour', 'id'], ], 'OpenApi\\Tests\\Fixtures\\Apis\\DocBlocks\\ProductController' => [ 'uses' => ['OA' => 'OpenApi\\Annotations'], @@ -195,7 +195,10 @@ public static function scanCases(): iterable 'OpenApi\\Tests\\Fixtures\\Parser\\AllTraits' => [ 'uses' => [], 'interfaces' => [], - 'traits' => ['OpenApi\\Tests\\Fixtures\\Parser\\AsTrait', 'OpenApi\\Tests\\Fixtures\\Parser\\HelloTrait'], + 'traits' => [ + 'OpenApi\\Tests\\Fixtures\\Parser\\AsTrait', + 'OpenApi\\Tests\\Fixtures\\Parser\\HelloTrait', + ], 'enums' => [], 'methods' => [], 'properties' => [], @@ -220,6 +223,25 @@ public static function scanCases(): iterable ], ]; + yield 'HelloTrait' => [ + 'Parser/HelloTrait.php', + [ + 'OpenApi\\Tests\\Fixtures\\Parser\\HelloTrait' => [ + 'uses' => [ + 'Aliased' => 'OpenApi\\Tests\\Fixtures\\Parser\\AsTrait', + ], + 'interfaces' => [], + 'traits' => [ + 'OpenApi\\Tests\\Fixtures\\Parser\\OtherTrait', + 'OpenApi\\Tests\\Fixtures\\Parser\\AsTrait', + ], + 'enums' => [], + 'methods' => [], + 'properties' => ['greet'], + ], + ], + ]; + yield 'Php8PromotedProperties' => [ 'PHP/Php8PromotedProperties.php', [ @@ -360,6 +382,20 @@ public static function scanCases(): iterable ], ]; + yield 'namespaces3' => [ + 'PHP/namespaces3.php', + [ + '\\BarClass' => [ + 'uses' => [], + 'interfaces' => [], + 'traits' => [], + 'enums' => [], + 'methods' => [], + 'properties' => [], + ], + ], + ]; + if (\PHP_VERSION_ID >= 80100) { yield 'enum' => [ 'PHP/Enums/StatusEnum.php', diff --git a/tests/Fixtures/PHP/namespaces3.php b/tests/Fixtures/PHP/namespaces3.php new file mode 100644 index 00000000..b647b797 --- /dev/null +++ b/tests/Fixtures/PHP/namespaces3.php @@ -0,0 +1,8 @@ +