From 6241c4e23428e5c44fa43731e284ab220a8af6a1 Mon Sep 17 00:00:00 2001 From: Dovid Levine Date: Wed, 7 Aug 2024 13:14:25 +0300 Subject: [PATCH] fix: prevent fatal errors with DOMHelpers methods by improving typesafety and returning early (#257) * chore: update Composer dev-deps to latest * chore: cleanup EOL whitespace [phpcbf] * chore: update phpstan.neon config * fix: `Block::get_attribute_type()` param type and usage. * fix: improve typesafety in DOMHelpers --- .changeset/few-cooks-reflect.md | 12 ++ includes/Blocks/Block.php | 31 +++-- includes/Field/BlockSupports/Anchor.php | 2 +- includes/Utilities/DOMHelpers.php | 173 +++++++++++++++++++++--- tests/unit/DOMHelpersTest.php | 53 ++++---- 5 files changed, 212 insertions(+), 59 deletions(-) create mode 100644 .changeset/few-cooks-reflect.md diff --git a/.changeset/few-cooks-reflect.md b/.changeset/few-cooks-reflect.md new file mode 100644 index 00000000..1b3c07b7 --- /dev/null +++ b/.changeset/few-cooks-reflect.md @@ -0,0 +1,12 @@ +--- +"@wpengine/wp-graphql-content-blocks": minor +--- + +fix: prevent fatal errors by improving type-safety and returning early when parsing HTML. +The following methods have been deprecated for their stricter-typed counterparts: + - `DOMHelpers::parseAttribute()` => `::parse_attribute()` + - `DOMHelpers::parseFirstNodeAttribute()` => `::parse_first_node_attribute()` + - `DOMHelpers::parseHTML()` => `::parse_html()` + - `DOMHelpers::getElementsFromHTML()` => `::get_elements_from_html()` + - `DOMHelpers::parseText()` => `::parse_text()` + - `DOMHelpers::findNodes()`=> `::find_nodes()` diff --git a/includes/Blocks/Block.php b/includes/Blocks/Block.php index 56ce4098..19d3c92c 100644 --- a/includes/Blocks/Block.php +++ b/includes/Blocks/Block.php @@ -373,6 +373,9 @@ private function register_type(): void { private function resolve_block_attributes_recursive( $attributes, string $html, array $config ): array { $result = []; + // Clean up the html. + $html = trim( $html ); + foreach ( $config as $key => $value ) { // Get default value. $default = $value['default'] ?? null; @@ -433,20 +436,18 @@ private function resolve_block_attributes_recursive( $attributes, string $html, * * @param string $html The html value * @param string $source The source type - * - * @return string|null */ - private function parse_single_source( $html, $source ) { - $value = null; + private function parse_single_source( string $html, $source ): ?string { if ( empty( $html ) ) { - return $value; + return null; } + switch ( $source ) { case 'html': - $value = DOMHelpers::findNodes( $html )->innerHTML(); - break; + return DOMHelpers::find_nodes( $html )->innerHTML(); } - return $value; + + return null; } /** @@ -458,15 +459,15 @@ private function parse_single_source( $html, $source ) { * @param array $value The value configuration. */ private function parse_html_source( string $html, $value ): ?string { - if ( ! isset( $value['selector'] ) ) { + if ( empty( $html ) || ! isset( $value['selector'] ) ) { return null; } - $result = DOMHelpers::parseHTML( $html, $value['selector'] ); + $result = DOMHelpers::parse_html( $html, $value['selector'] ); // Multiline values are located somewhere else. if ( isset( $value['multiline'] ) && ! empty( $result ) ) { - $result = DOMHelpers::getElementsFromHTML( $result, $value['multiline'] ); + $result = DOMHelpers::get_elements_from_html( $result, $value['multiline'] ); } return $result; @@ -479,11 +480,11 @@ private function parse_html_source( string $html, $value ): ?string { * @param array $value The value configuration. */ private function parse_attribute_source( string $html, $value ): ?string { - if ( ! isset( $value['selector'] ) || ! isset( $value['attribute'] ) ) { + if ( empty( $html ) || ! isset( $value['selector'] ) || ! isset( $value['attribute'] ) ) { return null; } - return DOMHelpers::parseAttribute( $html, $value['selector'], $value['attribute'] ); + return DOMHelpers::parse_attribute( $html, $value['selector'], $value['attribute'] ); } /** @@ -497,7 +498,7 @@ private function parse_text_source( string $html, $value ): ?string { return null; } - return DOMHelpers::parseText( $html, $value['selector'] ); + return DOMHelpers::parse_text( $html, $value['selector'] ); } /** @@ -514,7 +515,7 @@ private function parse_query_source( string $html, $value, $attributes ): ?array return null; } - $nodes = DOMHelpers::findNodes( $html, $value['selector'] ); + $nodes = DOMHelpers::find_nodes( $html, $value['selector'] ); // Coerce nodes to an array if it's not already. if ( ! is_array( $nodes ) ) { diff --git a/includes/Field/BlockSupports/Anchor.php b/includes/Field/BlockSupports/Anchor.php index 7480ee32..4ca1ba02 100644 --- a/includes/Field/BlockSupports/Anchor.php +++ b/includes/Field/BlockSupports/Anchor.php @@ -31,7 +31,7 @@ public static function register(): void { if ( empty( $rendered_block ) ) { return null; } - return DOMHelpers::parseFirstNodeAttribute( $rendered_block, 'id' ); + return DOMHelpers::parse_first_node_attribute( $rendered_block, 'id' ); }, ], ], diff --git a/includes/Utilities/DOMHelpers.php b/includes/Utilities/DOMHelpers.php index c7ccb607..a92cc96b 100644 --- a/includes/Utilities/DOMHelpers.php +++ b/includes/Utilities/DOMHelpers.php @@ -21,25 +21,32 @@ final class DOMHelpers { * @param string $attribute The attribute to extract. * @param mixed $default_value The default value to return if the selector is not found. * - * @return string|null extracted attribute + * @return ?string extracted attribute */ - public static function parseAttribute( $html, $selector, $attribute, $default_value = null ): ?string { + public static function parse_attribute( string $html, string $selector, string $attribute, $default_value = null ): ?string { + // Bail early if there's no html to parse. + if ( empty( trim( $html ) ) ) { + return null; + } + $doc = new Document(); $doc->loadHtml( $html ); + if ( '*' === $selector ) { $selector = '*[' . $attribute . ']'; - } - - if ( empty( $selector ) ) { + } elseif ( empty( $selector ) ) { $selector = '*'; } + $nodes = $doc->find( $selector ); $default_value = isset( $default_value ) ? $default_value : null; + foreach ( $nodes as $node ) { if ( $node->hasAttribute( $attribute ) ) { return $node->getAttribute( $attribute ); } } + return $default_value; } @@ -51,14 +58,14 @@ public static function parseAttribute( $html, $selector, $attribute, $default_va * * @return string|null extracted attribute */ - public static function parseFirstNodeAttribute( $html, $attribute ) { - $value = null; - + public static function parse_first_node_attribute( string $html, string $attribute ) { // Bail early if there's no html to parse. if ( empty( trim( $html ) ) ) { - return $value; + return null; } + $value = null; + $doc = new Document( $html ); // $html $elem = $doc->find( '*' )[2]; @@ -72,13 +79,18 @@ public static function parseFirstNodeAttribute( $html, $attribute ) { /** * Parses the given HTML string to extract the innerHTML contents. * - * @param string $html The HTML string to parse. - * @param string $selector The selector to use. - * @param mixed $default_value The default value to return if the selector is not found. + * @param string $html The HTML string to parse. + * @param string $selector The selector to use. + * @param ?string $default_value The default value to return if the selector is not found. * - * @return string extracted innerHTML of selector + * @return ?string extracted innerHTML of selector */ - public static function parseHTML( $html, $selector, $default_value = null ) { + public static function parse_html( string $html, string $selector, ?string $default_value = null ): ?string { + // Bail early if there's no html to parse. + if ( empty( trim( $html ) ) ) { + return $default_value; + } + $doc = new Document(); $doc->loadHtml( $html ); @@ -99,9 +111,14 @@ public static function parseHTML( $html, $selector, $default_value = null ) { * @param string $html The HTML string to parse. * @param string $selector The element (selector) to extract. * - * @return string the HTML string of the extracted elements + * @return ?string the HTML string of the extracted elements */ - public static function getElementsFromHTML( $html, $selector ) { + public static function get_elements_from_html( string $html, string $selector ): ?string { + // Bail early if there's no html to parse. + if ( empty( trim( $html ) ) ) { + return null; + } + $doc = new Document(); $doc->loadHtml( $html ); @@ -126,7 +143,12 @@ public static function getElementsFromHTML( $html, $selector ) { * * @return ?string The text content of the selector if found. */ - public static function parseText( $html, $selector ) { + public static function parse_text( string $html, string $selector ): ?string { + // Bail early if there's no html to parse. + if ( empty( trim( $html ) ) ) { + return null; + } + $doc = new Document(); $doc->loadHtml( $html ); @@ -150,7 +172,7 @@ public static function parseText( $html, $selector ) { * * @return \DiDom\Element[]|\DiDom\Element */ - public static function findNodes( $html, $selector = null ) { + public static function find_nodes( string $html, ?string $selector = null ) { // Bail early if there's no html to parse. if ( empty( trim( $html ) ) ) { return []; @@ -170,4 +192,119 @@ public static function findNodes( $html, $selector = null ) { return $elem; } + + + /** + * Deprecated methods. + */ + + /** + * Parses the given HTML string to extract the specified attribute selector. + * + * Deprecated in favor of ::parse_attribute(). + * + * @param string $html The HTML string to parse. + * @param string $selector The selector to use. + * @param string $attribute The attribute to extract. + * @param mixed $default_value The default value to return if the selector is not found. + * + * @deprecated @todo Use parse_attribute instead. + */ + public static function parseAttribute( $html, $selector, $attribute, $default_value = null ): ?string { + _deprecated_function( __METHOD__, '@todo', self::class . '::parse_attribute' ); + + return self::parse_attribute( $html, $selector, $attribute, $default_value ); + } + + /** + * Parses the given HTML string to extract the specified attribute selector of the first node. + * + * Deprecated in favor of ::parse_first_node_attribute(). + * + * @param string $html The HTML string to parse. + * @param string $attribute The attribute to extract of the first node. + * + * @return string|null extracted attribute + * + * @deprecated @todo Use parse_first_node_attribute instead. + */ + public static function parseFirstNodeAttribute( $html, $attribute ) { + _deprecated_function( __METHOD__, '@todo', self::class . '::parse_first_node_attribute' ); + + return self::parse_first_node_attribute( $html, $attribute ); + } + + /** + * Parses the given HTML string to extract the innerHTML contents. + * + * Deprecated in favor of ::parse_html(). + * + * @param string $html The HTML string to parse. + * @param string $selector The selector to use. + * @param mixed $default_value The default value to return if the selector is not found. + * + * @return string extracted innerHTML of selector + * + * @deprecated @todo Use parse_html instead. + */ + public static function parseHTML( $html, $selector, $default_value = null ) { + _deprecated_function( __METHOD__, '@todo', self::class . '::parse_html' ); + + return (string) self::parse_html( $html, $selector, $default_value ); + } + + /** + * Parses the given HTML string and extracts the specified elements. + * + * Deprecated in favor of ::get_elements_from_html(). + * + * @param string $html The HTML string to parse. + * @param string $selector The element (selector) to extract. + * + * @return string the HTML string of the extracted elements + * + * @deprecated @todo Use get_elements_from_html instead. + */ + public static function getElementsFromHTML( $html, $selector ) { + _deprecated_function( __METHOD__, '@todo', self::class . '::get_elements_from_html' ); + + return (string) self::get_elements_from_html( $html, $selector ); + } + + /** + * Gets the text content of a given selector. If multiple selectors exist, + * the first result will be used. + * + * Deprecated in favor of ::parse_text(). + * + * @param string $html The HTML string to parse. + * @param string $selector The selector to get the text content from. + * + * @return ?string The text content of the selector if found. + * + * @deprecated @todo Use parse_text instead. + */ + public static function parseText( $html, $selector ) { + _deprecated_function( __METHOD__, '@todo', self::class . '::parse_text' ); + + return self::parse_text( $html, $selector ); + } + + /** + * Parses the html into DOMElement and searches the DOM tree for a given XPath expression or CSS selector. + * + * Deprecated in favor of ::find_nodes(). + * + * @param string $html The HTML string to parse. + * @param string|null $selector The selector to use. + * + * @return \DiDom\Element[]|\DiDom\Element + * + * @deprecated @todo Use find_nodes instead. + */ + public static function findNodes( $html, $selector = null ) { + _deprecated_function( __METHOD__, '@todo', self::class . '::find_nodes' ); + + return self::find_nodes( $html, $selector ); + } } diff --git a/tests/unit/DOMHelpersTest.php b/tests/unit/DOMHelpersTest.php index 1868ae08..f507de07 100644 --- a/tests/unit/DOMHelpersTest.php +++ b/tests/unit/DOMHelpersTest.php @@ -19,27 +19,28 @@ public function testParseAttribute(): void { $id_attribute = 'id'; // $html - $this->assertNull( DOMHelpers::parseAttribute( $html, $no_existent_selector, $data_attribute ) ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $no_existent_selector, $data_attribute, 'Bar' ), 'Bar' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $id_selector, $data_attribute ), 'foo-data' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $id_selector, $class_attribute ), 'foo-class' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $id_selector, $id_attribute ), 'foo-id' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $class_selector, $data_attribute ), 'foo-data' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $class_selector, $class_attribute ), 'foo-class' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $class_selector, $id_attribute ), 'foo-id' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $element_selector, $data_attribute ), 'foo-data' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $element_selector, $class_attribute ), 'foo-class' ); - $this->assertEquals( DOMHelpers::parseAttribute( $html, $element_selector, $id_attribute ), 'foo-id' ); + $this->assertNull( DOMHelpers::parse_attribute( '', $no_existent_selector, $data_attribute ) ); + $this->assertNull( DOMHelpers::parse_attribute( $html, $no_existent_selector, $data_attribute ) ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $no_existent_selector, $data_attribute, 'Bar' ), 'Bar' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $id_selector, $data_attribute ), 'foo-data' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $id_selector, $class_attribute ), 'foo-class' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $id_selector, $id_attribute ), 'foo-id' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $class_selector, $data_attribute ), 'foo-data' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $class_selector, $class_attribute ), 'foo-class' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $class_selector, $id_attribute ), 'foo-id' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $element_selector, $data_attribute ), 'foo-data' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $element_selector, $class_attribute ), 'foo-class' ); + $this->assertEquals( DOMHelpers::parse_attribute( $html, $element_selector, $id_attribute ), 'foo-id' ); // $html2 - $this->assertEquals( 'center', DOMHelpers::parseAttribute( $html2, '*', 'data-align' ) ); - $this->assertEquals( 'right', DOMHelpers::parseAttribute( $html2, '.has-text-align-right', 'data-align' ) ); - $this->assertNull( DOMHelpers::parseAttribute( $html2, '.non-existent-class', 'data-align' ) ); - $this->assertEquals( 'default', DOMHelpers::parseAttribute( $html2, '.non-existent-class', 'data-align', 'default' ) ); + $this->assertEquals( 'center', DOMHelpers::parse_attribute( $html2, '*', 'data-align' ) ); + $this->assertEquals( 'right', DOMHelpers::parse_attribute( $html2, '.has-text-align-right', 'data-align' ) ); + $this->assertNull( DOMHelpers::parse_attribute( $html2, '.non-existent-class', 'data-align' ) ); + $this->assertEquals( 'default', DOMHelpers::parse_attribute( $html2, '.non-existent-class', 'data-align', 'default' ) ); // $htm3 - $this->assertEquals( 'left', DOMHelpers::parseAttribute( $html3, 'span', 'data-align' ) ); - $this->assertEquals( 'left', DOMHelpers::parseAttribute( $html3, '*', 'data-align' ) ); + $this->assertEquals( 'left', DOMHelpers::parse_attribute( $html3, 'span', 'data-align' ) ); + $this->assertEquals( 'left', DOMHelpers::parse_attribute( $html3, '*', 'data-align' ) ); } public function testParseHTML(): void { @@ -49,11 +50,12 @@ public function testParseHTML(): void { $class_selector = '.foo-class'; $element_selector = 'p'; - $this->assertEmpty( DOMHelpers::parseHTML( $html, $no_existent_selector ) ); - $this->assertEquals( DOMHelpers::parseHTML( $html, $no_existent_selector, 'Bar' ), 'Bar' ); - $this->assertEquals( DOMHelpers::parseHTML( $html, $id_selector ), 'Bar' ); - $this->assertEquals( DOMHelpers::parseHTML( $html, $class_selector ), 'Bar' ); - $this->assertEquals( DOMHelpers::parseHTML( $html, $element_selector ), 'Bar' ); + $this->assertNull( DOMHelpers::parse_html( '', $no_existent_selector ) ); + $this->assertEmpty( DOMHelpers::parse_html( $html, $no_existent_selector ) ); + $this->assertEquals( DOMHelpers::parse_html( $html, $no_existent_selector, 'Bar' ), 'Bar' ); + $this->assertEquals( DOMHelpers::parse_html( $html, $id_selector ), 'Bar' ); + $this->assertEquals( DOMHelpers::parse_html( $html, $class_selector ), 'Bar' ); + $this->assertEquals( DOMHelpers::parse_html( $html, $element_selector ), 'Bar' ); } public function testGetElementsFromHTML(): void { @@ -61,8 +63,9 @@ public function testGetElementsFromHTML(): void { $element_selector = 'p'; $no_existent_selector = 'span'; - $this->assertEquals( DOMHelpers::getElementsFromHTML( $html, $element_selector ), '

First paragraph

Second paragraph

' ); - $this->assertEmpty( DOMHelpers::getElementsFromHTML( $html, $no_existent_selector ) ); + $this->assertNull( DOMHelpers::get_elements_from_html( '', $no_existent_selector ) ); + $this->assertEquals( DOMHelpers::get_elements_from_html( $html, $element_selector ), '

First paragraph

Second paragraph

' ); + $this->assertEmpty( DOMHelpers::get_elements_from_html( $html, $no_existent_selector ) ); } public function getTextFromSelector(): void { @@ -79,6 +82,6 @@ public function getTextFromSelector(): void { $this->assertEquals( DOMHelpers::getTextFromSelector( $html, $p_element ), 'First paragraph' ); $this->assertEquals( DOMHelpers::getTextFromSelector( $html, $div_element ), 'My div' ); - $this->assertEmpty( DOMHelpers::getElementsFromHTML( $html, $no_existent_selector ) ); + $this->assertEmpty( DOMHelpers::get_elements_from_html( $html, $no_existent_selector ) ); } }