Automattic\WooCommerce\EmailEditor\Engine\Renderer
Html2Text::iterate_over_node │ private static │ WC 1.0
Iterate over a DOM node and convert to text
Method of the class: Html2Text{}
No Hooks.
Returns
String. The converted text.
Usage
$result = Html2Text::iterate_over_node( $node, ?string $prev_name, $in_pre, $is_office_document, $options ): string;
- $node(DOMNode) (required)
- The DOM node.
- ?string $prev_name(required)
- .
- $in_pre(true|false) (required)
- Whether we're in a pre block.
- $is_office_document(true|false) (required)
- Whether this is an Office document.
- $options(array) (required)
- .
Html2Text::iterate_over_node() Html2Text::iterate over node code WC 10.7.0
private static function iterate_over_node( \DOMNode $node, ?string $prev_name, bool $in_pre, bool $is_office_document, array $options ): string {
if ( $node instanceof \DOMText ) {
// Replace whitespace characters with a space (equivalent to \s).
if ( $in_pre ) {
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$text = "\n" . trim( self::render_text( $node->wholeText ), "\n\r\t " ) . "\n";
// Remove trailing whitespace only.
$result = preg_replace( "/[ \t]*\n/im", "\n", $text );
$text = null !== $result ? $result : $text;
// Armor newlines with \r.
return str_replace( "\n", "\r", $text );
}
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$text = self::render_text( $node->wholeText );
$result = preg_replace( "/[\\t\\n\\f\\r ]+/im", ' ', $text );
$text = null !== $result ? $result : $text;
if ( ! self::is_whitespace( $text ) && ( 'p' === $prev_name || 'div' === $prev_name ) ) {
return "\n" . $text;
}
return $text;
}
if ( $node instanceof \DOMDocumentType || $node instanceof \DOMProcessingInstruction ) {
// Ignore.
return '';
}
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$name = strtolower( $node->nodeName );
$next_name = self::next_child_name( $node );
// Start whitespace.
switch ( $name ) {
case 'hr':
$prefix = '';
if ( null !== $prev_name ) {
$prefix = "\n";
}
return $prefix . "---------------------------------------------------------------\n";
case 'style':
case 'head':
case 'title':
case 'meta':
case 'script':
// Ignore these tags.
return '';
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
case 'ol':
case 'ul':
case 'pre':
// Add two newlines.
$output = "\n\n";
break;
case 'td':
case 'th':
// Add tab char to separate table fields.
$output = "\t";
break;
case 'p':
// Microsoft exchange emails often include HTML which, when passed through
// html2text, results in lots of double line returns everywhere.
//
// To fix this, for any p element with a className of `MsoNormal` (the standard
// classname in any Microsoft export or outlook for a paragraph that behaves
// like a line return) we skip the first line returns and set the name to br.
if ( $is_office_document && $node instanceof \DOMElement && 'MsoNormal' === $node->getAttribute( 'class' ) ) {
$output = '';
$name = 'br';
break;
}
// Add two lines.
$output = "\n\n";
break;
case 'tr':
// Add one line.
$output = "\n";
break;
case 'div':
$output = '';
if ( null !== $prev_name ) {
// Add one line.
$output .= "\n";
}
break;
case 'li':
$output = '- ';
break;
default:
// Print out contents of unknown tags.
$output = '';
break;
}
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
if ( $node->childNodes->length > 0 ) {
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$n = $node->childNodes->item( 0 );
$previous_sibling_names = array();
$previous_sibling_name = null;
$parts = array();
$trailing_whitespace = 0;
while ( null !== $n ) {
$text = self::iterate_over_node( $n, $previous_sibling_name, $in_pre || 'pre' === $name, $is_office_document, $options );
// Pass current node name to next child, as previousSibling does not appear to get populated.
if ( $n instanceof \DOMDocumentType
|| $n instanceof \DOMProcessingInstruction
|| ( $n instanceof \DOMText && self::is_whitespace( $text ) ) ) {
// Keep current previousSiblingName, these are invisible.
++$trailing_whitespace;
} else {
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$previous_sibling_name = strtolower( $n->nodeName );
$previous_sibling_names[] = $previous_sibling_name;
$trailing_whitespace = 0;
}
$node->removeChild( $n );
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$n = $node->childNodes->item( 0 );
$parts[] = $text;
}
// Remove trailing whitespace, important for the br check below.
while ( $trailing_whitespace-- > 0 ) {
array_pop( $parts );
}
// Suppress last br tag inside a node list if follows text.
$last_name = array_pop( $previous_sibling_names );
if ( 'br' === $last_name ) {
$last_name = array_pop( $previous_sibling_names );
if ( '#text' === $last_name ) {
array_pop( $parts );
}
}
$output .= implode( '', $parts );
}
// End whitespace.
switch ( $name ) {
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
case 'pre':
case 'p':
// Add two lines.
$output .= "\n\n";
break;
case 'br':
// Add one line.
$output .= "\n";
break;
case 'div':
break;
case 'a':
// Links are returned in [text](link) format.
$href = $node instanceof \DOMElement ? $node->getAttribute( 'href' ) : '';
$output = trim( $output );
// Remove double [[ ]] s from linking images.
if ( '[' === substr( $output, 0, 1 ) && ']' === substr( $output, -1 ) ) {
$output = substr( $output, 1, strlen( $output ) - 2 );
// For linking images, the title of the <a> overrides the title of the <img>.
if ( $node instanceof \DOMElement && $node->getAttribute( 'title' ) ) {
$output = $node->getAttribute( 'title' );
}
}
// If there is no link text, but a title attr.
if ( ! $output && $node instanceof \DOMElement && $node->getAttribute( 'title' ) ) {
$output = $node->getAttribute( 'title' );
}
if ( ! $href ) {
// It doesn't link anywhere.
if ( $node instanceof \DOMElement && $node->getAttribute( 'name' ) ) {
if ( $options['drop_links'] ) {
$output = "$output";
} else {
$output = "[$output]";
}
}
} elseif ( $href === $output || "mailto:$output" === $href || "http://$output" === $href || "https://$output" === $href ) {
// Link to the same address: just use link.
$output = "$output";
} elseif ( $output ) {
// Replace it.
if ( $options['drop_links'] ) {
$output = "$output";
} else {
$output = "[$output]($href)";
}
} else {
// Empty string.
$output = "$href";
}
// Does the next node require additional whitespace?
switch ( $next_name ) {
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
$output .= "\n";
break;
}
break;
case 'img':
if ( $node instanceof \DOMElement && $node->getAttribute( 'title' ) ) {
$output = '[' . $node->getAttribute( 'title' ) . ']';
} elseif ( $node instanceof \DOMElement && $node->getAttribute( 'alt' ) ) {
$output = '[' . $node->getAttribute( 'alt' ) . ']';
} else {
$output = '';
}
break;
case 'li':
$output .= "\n";
break;
case 'blockquote':
// Process quoted text for whitespace/newlines.
$output = self::process_whitespace_newlines( $output );
// Add leading newline.
$output = "\n" . $output;
// Prepend '> ' at the beginning of all lines.
$result = preg_replace( "/\n/im", "\n> ", $output );
$output = null !== $result ? $result : $output;
// Replace leading '> >' with '>>'.
$result = preg_replace( "/\n> >/im", "\n>>", $output );
$output = null !== $result ? $result : $output;
// Add another leading newline and trailing newlines.
$output = "\n" . $output . "\n\n";
break;
default:
// Do nothing.
}
return $output;
}