Skip to content

Commit

Permalink
fix: prevent fatal errors with DOMHelpers methods by improving typesa…
Browse files Browse the repository at this point in the history
…fety and returning early (#257)

* chore: update Composer dev-deps to latest

* chore: cleanup EOL whitespace [phpcbf]

* chore: update phpstan.neon config

* fix: `Block::get_attribute_type()` param type and usage.

* fix: improve typesafety in DOMHelpers
  • Loading branch information
justlevine authored Aug 7, 2024
1 parent 205da8c commit 6241c4e
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 59 deletions.
12 changes: 12 additions & 0 deletions .changeset/few-cooks-reflect.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
"@wpengine/wp-graphql-content-blocks": minor
---

fix: prevent fatal errors by improving type-safety and returning early when parsing HTML.
The following methods have been deprecated for their stricter-typed counterparts:
- `DOMHelpers::parseAttribute()` => `::parse_attribute()`
- `DOMHelpers::parseFirstNodeAttribute()` => `::parse_first_node_attribute()`
- `DOMHelpers::parseHTML()` => `::parse_html()`
- `DOMHelpers::getElementsFromHTML()` => `::get_elements_from_html()`
- `DOMHelpers::parseText()` => `::parse_text()`
- `DOMHelpers::findNodes()`=> `::find_nodes()`
31 changes: 16 additions & 15 deletions includes/Blocks/Block.php
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,9 @@ private function register_type(): void {
private function resolve_block_attributes_recursive( $attributes, string $html, array $config ): array {
$result = [];

// Clean up the html.
$html = trim( $html );

foreach ( $config as $key => $value ) {
// Get default value.
$default = $value['default'] ?? null;
Expand Down Expand Up @@ -433,20 +436,18 @@ private function resolve_block_attributes_recursive( $attributes, string $html,
*
* @param string $html The html value
* @param string $source The source type
*
* @return string|null
*/
private function parse_single_source( $html, $source ) {
$value = null;
private function parse_single_source( string $html, $source ): ?string {
if ( empty( $html ) ) {
return $value;
return null;
}

switch ( $source ) {
case 'html':
$value = DOMHelpers::findNodes( $html )->innerHTML();
break;
return DOMHelpers::find_nodes( $html )->innerHTML();
}
return $value;

return null;
}

/**
Expand All @@ -458,15 +459,15 @@ private function parse_single_source( $html, $source ) {
* @param array<string,mixed> $value The value configuration.
*/
private function parse_html_source( string $html, $value ): ?string {
if ( ! isset( $value['selector'] ) ) {
if ( empty( $html ) || ! isset( $value['selector'] ) ) {
return null;
}

$result = DOMHelpers::parseHTML( $html, $value['selector'] );
$result = DOMHelpers::parse_html( $html, $value['selector'] );

// Multiline values are located somewhere else.
if ( isset( $value['multiline'] ) && ! empty( $result ) ) {
$result = DOMHelpers::getElementsFromHTML( $result, $value['multiline'] );
$result = DOMHelpers::get_elements_from_html( $result, $value['multiline'] );
}

return $result;
Expand All @@ -479,11 +480,11 @@ private function parse_html_source( string $html, $value ): ?string {
* @param array<string,mixed> $value The value configuration.
*/
private function parse_attribute_source( string $html, $value ): ?string {
if ( ! isset( $value['selector'] ) || ! isset( $value['attribute'] ) ) {
if ( empty( $html ) || ! isset( $value['selector'] ) || ! isset( $value['attribute'] ) ) {
return null;
}

return DOMHelpers::parseAttribute( $html, $value['selector'], $value['attribute'] );
return DOMHelpers::parse_attribute( $html, $value['selector'], $value['attribute'] );
}

/**
Expand All @@ -497,7 +498,7 @@ private function parse_text_source( string $html, $value ): ?string {
return null;
}

return DOMHelpers::parseText( $html, $value['selector'] );
return DOMHelpers::parse_text( $html, $value['selector'] );
}

/**
Expand All @@ -514,7 +515,7 @@ private function parse_query_source( string $html, $value, $attributes ): ?array
return null;
}

$nodes = DOMHelpers::findNodes( $html, $value['selector'] );
$nodes = DOMHelpers::find_nodes( $html, $value['selector'] );

// Coerce nodes to an array if it's not already.
if ( ! is_array( $nodes ) ) {
Expand Down
2 changes: 1 addition & 1 deletion includes/Field/BlockSupports/Anchor.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public static function register(): void {
if ( empty( $rendered_block ) ) {
return null;
}
return DOMHelpers::parseFirstNodeAttribute( $rendered_block, 'id' );
return DOMHelpers::parse_first_node_attribute( $rendered_block, 'id' );
},
],
],
Expand Down
173 changes: 155 additions & 18 deletions includes/Utilities/DOMHelpers.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,32 @@ final class DOMHelpers {
* @param string $attribute The attribute to extract.
* @param mixed $default_value The default value to return if the selector is not found.
*
* @return string|null extracted attribute
* @return ?string extracted attribute
*/
public static function parseAttribute( $html, $selector, $attribute, $default_value = null ): ?string {
public static function parse_attribute( string $html, string $selector, string $attribute, $default_value = null ): ?string {
// Bail early if there's no html to parse.
if ( empty( trim( $html ) ) ) {
return null;
}

$doc = new Document();
$doc->loadHtml( $html );

if ( '*' === $selector ) {
$selector = '*[' . $attribute . ']';
}

if ( empty( $selector ) ) {
} elseif ( empty( $selector ) ) {
$selector = '*';
}

$nodes = $doc->find( $selector );
$default_value = isset( $default_value ) ? $default_value : null;

foreach ( $nodes as $node ) {
if ( $node->hasAttribute( $attribute ) ) {
return $node->getAttribute( $attribute );
}
}

return $default_value;
}

Expand All @@ -51,14 +58,14 @@ public static function parseAttribute( $html, $selector, $attribute, $default_va
*
* @return string|null extracted attribute
*/
public static function parseFirstNodeAttribute( $html, $attribute ) {
$value = null;

public static function parse_first_node_attribute( string $html, string $attribute ) {
// Bail early if there's no html to parse.
if ( empty( trim( $html ) ) ) {
return $value;
return null;
}

$value = null;

$doc = new Document( $html );
// <html><body>$html</body></html>
$elem = $doc->find( '*' )[2];
Expand All @@ -72,13 +79,18 @@ public static function parseFirstNodeAttribute( $html, $attribute ) {
/**
* Parses the given HTML string to extract the innerHTML contents.
*
* @param string $html The HTML string to parse.
* @param string $selector The selector to use.
* @param mixed $default_value The default value to return if the selector is not found.
* @param string $html The HTML string to parse.
* @param string $selector The selector to use.
* @param ?string $default_value The default value to return if the selector is not found.
*
* @return string extracted innerHTML of selector
* @return ?string extracted innerHTML of selector
*/
public static function parseHTML( $html, $selector, $default_value = null ) {
public static function parse_html( string $html, string $selector, ?string $default_value = null ): ?string {
// Bail early if there's no html to parse.
if ( empty( trim( $html ) ) ) {
return $default_value;
}

$doc = new Document();
$doc->loadHtml( $html );

Expand All @@ -99,9 +111,14 @@ public static function parseHTML( $html, $selector, $default_value = null ) {
* @param string $html The HTML string to parse.
* @param string $selector The element (selector) to extract.
*
* @return string the HTML string of the extracted elements
* @return ?string the HTML string of the extracted elements
*/
public static function getElementsFromHTML( $html, $selector ) {
public static function get_elements_from_html( string $html, string $selector ): ?string {
// Bail early if there's no html to parse.
if ( empty( trim( $html ) ) ) {
return null;
}

$doc = new Document();
$doc->loadHtml( $html );

Expand All @@ -126,7 +143,12 @@ public static function getElementsFromHTML( $html, $selector ) {
*
* @return ?string The text content of the selector if found.
*/
public static function parseText( $html, $selector ) {
public static function parse_text( string $html, string $selector ): ?string {
// Bail early if there's no html to parse.
if ( empty( trim( $html ) ) ) {
return null;
}

$doc = new Document();
$doc->loadHtml( $html );

Expand All @@ -150,7 +172,7 @@ public static function parseText( $html, $selector ) {
*
* @return \DiDom\Element[]|\DiDom\Element
*/
public static function findNodes( $html, $selector = null ) {
public static function find_nodes( string $html, ?string $selector = null ) {
// Bail early if there's no html to parse.
if ( empty( trim( $html ) ) ) {
return [];
Expand All @@ -170,4 +192,119 @@ public static function findNodes( $html, $selector = null ) {

return $elem;
}


/**
* Deprecated methods.
*/

/**
* Parses the given HTML string to extract the specified attribute selector.
*
* Deprecated in favor of ::parse_attribute().
*
* @param string $html The HTML string to parse.
* @param string $selector The selector to use.
* @param string $attribute The attribute to extract.
* @param mixed $default_value The default value to return if the selector is not found.
*
* @deprecated @todo Use parse_attribute instead.
*/
public static function parseAttribute( $html, $selector, $attribute, $default_value = null ): ?string {
_deprecated_function( __METHOD__, '@todo', self::class . '::parse_attribute' );

return self::parse_attribute( $html, $selector, $attribute, $default_value );
}

/**
* Parses the given HTML string to extract the specified attribute selector of the first node.
*
* Deprecated in favor of ::parse_first_node_attribute().
*
* @param string $html The HTML string to parse.
* @param string $attribute The attribute to extract of the first node.
*
* @return string|null extracted attribute
*
* @deprecated @todo Use parse_first_node_attribute instead.
*/
public static function parseFirstNodeAttribute( $html, $attribute ) {
_deprecated_function( __METHOD__, '@todo', self::class . '::parse_first_node_attribute' );

return self::parse_first_node_attribute( $html, $attribute );
}

/**
* Parses the given HTML string to extract the innerHTML contents.
*
* Deprecated in favor of ::parse_html().
*
* @param string $html The HTML string to parse.
* @param string $selector The selector to use.
* @param mixed $default_value The default value to return if the selector is not found.
*
* @return string extracted innerHTML of selector
*
* @deprecated @todo Use parse_html instead.
*/
public static function parseHTML( $html, $selector, $default_value = null ) {
_deprecated_function( __METHOD__, '@todo', self::class . '::parse_html' );

return (string) self::parse_html( $html, $selector, $default_value );
}

/**
* Parses the given HTML string and extracts the specified elements.
*
* Deprecated in favor of ::get_elements_from_html().
*
* @param string $html The HTML string to parse.
* @param string $selector The element (selector) to extract.
*
* @return string the HTML string of the extracted elements
*
* @deprecated @todo Use get_elements_from_html instead.
*/
public static function getElementsFromHTML( $html, $selector ) {
_deprecated_function( __METHOD__, '@todo', self::class . '::get_elements_from_html' );

return (string) self::get_elements_from_html( $html, $selector );
}

/**
* Gets the text content of a given selector. If multiple selectors exist,
* the first result will be used.
*
* Deprecated in favor of ::parse_text().
*
* @param string $html The HTML string to parse.
* @param string $selector The selector to get the text content from.
*
* @return ?string The text content of the selector if found.
*
* @deprecated @todo Use parse_text instead.
*/
public static function parseText( $html, $selector ) {
_deprecated_function( __METHOD__, '@todo', self::class . '::parse_text' );

return self::parse_text( $html, $selector );
}

/**
* Parses the html into DOMElement and searches the DOM tree for a given XPath expression or CSS selector.
*
* Deprecated in favor of ::find_nodes().
*
* @param string $html The HTML string to parse.
* @param string|null $selector The selector to use.
*
* @return \DiDom\Element[]|\DiDom\Element
*
* @deprecated @todo Use find_nodes instead.
*/
public static function findNodes( $html, $selector = null ) {
_deprecated_function( __METHOD__, '@todo', self::class . '::find_nodes' );

return self::find_nodes( $html, $selector );
}
}
Loading

0 comments on commit 6241c4e

Please sign in to comment.