Automattic\WooCommerce\EmailEditor\Integrations\Utils

Html_Processing_Helper::sanitize_caption_htmlpublic staticWC 1.0

Sanitize caption HTML to allow only specific tags and attributes.

Method of the class: Html_Processing_Helper{}

No Hooks.

Returns

String. Sanitized caption HTML.

Usage

$result = Html_Processing_Helper::sanitize_caption_html( $caption_html ): string;
$caption_html(string) (required)
Raw caption HTML.

Html_Processing_Helper::sanitize_caption_html() code WC 10.4.3

public static function sanitize_caption_html( string $caption_html ): string {
	// If no HTML tags, return as-is.
	if ( false === strpos( $caption_html, '<' ) ) {
		return $caption_html;
	}

	// Remove dangerous content: script, style, and other executable elements.
	$result = preg_replace( '/<(script|style|iframe|object|embed|form|input|button)\b[^>]*>.*?<\/\1>/is', '', $caption_html );
	if ( null === $result ) {
		$caption_html = '';
	} else {
		$caption_html = $result;
	}

	// Use a more conservative approach - only validate attributes, don't modify tags.
	$allowed_tags = array( 'strong', 'em', 'a', 'mark', 'kbd', 's', 'sub', 'sup', 'span', 'br' );

	$html = new \WP_HTML_Tag_Processor( $caption_html );

	// First pass: Process attributes for allowed tags only.
	while ( $html->next_tag() ) {
		$tag_name = $html->get_tag();

		// Skip processing for disallowed tags.
		if ( ! in_array( $tag_name, $allowed_tags, true ) ) {
			continue;
		}

		// Only process attributes for allowed tags.
		$attributes = $html->get_attribute_names_with_prefix( '' );
		if ( is_array( $attributes ) ) {
			foreach ( $attributes as $attr_name ) {
				// Validate and sanitize each attribute individually.
				self::validate_caption_attribute( $html, $attr_name );
			}
		}
	}

	// Second pass: Remove disallowed tags using a simple regex approach.
	$final_html = $html->get_updated_html();

	// Create a regex pattern to match disallowed tags.
	$allowed_tags_pattern = implode( '|', array_map( 'preg_quote', $allowed_tags ) );

	// Remove disallowed opening and closing tags, keeping only their content.
	$result = preg_replace( '/<(?!(?:' . $allowed_tags_pattern . ')\b)[^>]*>(.*?)<\/(?!(?:' . $allowed_tags_pattern . ')\b)[^>]*>/s', '$1', $final_html );
	if ( null === $result ) {
		$final_html = '';
	} else {
		$final_html = $result;
	}

	// Remove disallowed self-closing tags.
	$result = preg_replace( '/<(?!(?:' . $allowed_tags_pattern . ')\b)[^>]*\/>/s', '', $final_html );
	if ( null === $result ) {
		$final_html = '';
	} else {
		$final_html = $result;
	}

	return $final_html;
}