Automattic\WooCommerce\EmailEditor\Integrations\Utils
Html_Processing_Helper::sanitize_caption_html
Sanitize caption HTML to allow only specific tags and attributes.
Method of the class: Html_Processing_Helper{}
No Hooks.
Returns
String. Sanitized caption HTML.
Usage
$result = Html_Processing_Helper::sanitize_caption_html( $caption_html ): string;
- $caption_html(string) (required)
- Raw caption HTML.
Html_Processing_Helper::sanitize_caption_html() Html Processing Helper::sanitize caption html code WC 10.4.3
public static function sanitize_caption_html( string $caption_html ): string {
// If no HTML tags, return as-is.
if ( false === strpos( $caption_html, '<' ) ) {
return $caption_html;
}
// Remove dangerous content: script, style, and other executable elements.
$result = preg_replace( '/<(script|style|iframe|object|embed|form|input|button)\b[^>]*>.*?<\/\1>/is', '', $caption_html );
if ( null === $result ) {
$caption_html = '';
} else {
$caption_html = $result;
}
// Use a more conservative approach - only validate attributes, don't modify tags.
$allowed_tags = array( 'strong', 'em', 'a', 'mark', 'kbd', 's', 'sub', 'sup', 'span', 'br' );
$html = new \WP_HTML_Tag_Processor( $caption_html );
// First pass: Process attributes for allowed tags only.
while ( $html->next_tag() ) {
$tag_name = $html->get_tag();
// Skip processing for disallowed tags.
if ( ! in_array( $tag_name, $allowed_tags, true ) ) {
continue;
}
// Only process attributes for allowed tags.
$attributes = $html->get_attribute_names_with_prefix( '' );
if ( is_array( $attributes ) ) {
foreach ( $attributes as $attr_name ) {
// Validate and sanitize each attribute individually.
self::validate_caption_attribute( $html, $attr_name );
}
}
}
// Second pass: Remove disallowed tags using a simple regex approach.
$final_html = $html->get_updated_html();
// Create a regex pattern to match disallowed tags.
$allowed_tags_pattern = implode( '|', array_map( 'preg_quote', $allowed_tags ) );
// Remove disallowed opening and closing tags, keeping only their content.
$result = preg_replace( '/<(?!(?:' . $allowed_tags_pattern . ')\b)[^>]*>(.*?)<\/(?!(?:' . $allowed_tags_pattern . ')\b)[^>]*>/s', '$1', $final_html );
if ( null === $result ) {
$final_html = '';
} else {
$final_html = $result;
}
// Remove disallowed self-closing tags.
$result = preg_replace( '/<(?!(?:' . $allowed_tags_pattern . ')\b)[^>]*\/>/s', '', $final_html );
if ( null === $result ) {
$final_html = '';
} else {
$final_html = $result;
}
return $final_html;
}