Automattic\WooCommerce\EmailEditor\Engine\Renderer

Html2Text::iterate_over_nodeprivate staticWC 1.0

Iterate over a DOM node and convert to text

Method of the class: Html2Text{}

No Hooks.

Returns

String. The converted text.

Usage

$result = Html2Text::iterate_over_node( $node, ?string $prev_name, $in_pre, $is_office_document, $options ): string;
$node(DOMNode) (required)
The DOM node.
?string $prev_name(required)
.
$in_pre(true|false) (required)
Whether we're in a pre block.
$is_office_document(true|false) (required)
Whether this is an Office document.
$options(array) (required)
.

Html2Text::iterate_over_node() code WC 10.7.0

private static function iterate_over_node( \DOMNode $node, ?string $prev_name, bool $in_pre, bool $is_office_document, array $options ): string {
	if ( $node instanceof \DOMText ) {
		// Replace whitespace characters with a space (equivalent to \s).
		if ( $in_pre ) {
			// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
			$text = "\n" . trim( self::render_text( $node->wholeText ), "\n\r\t " ) . "\n";

			// Remove trailing whitespace only.
			$result = preg_replace( "/[ \t]*\n/im", "\n", $text );
			$text   = null !== $result ? $result : $text;

			// Armor newlines with \r.
			return str_replace( "\n", "\r", $text );
		}
		// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
		$text   = self::render_text( $node->wholeText );
		$result = preg_replace( "/[\\t\\n\\f\\r ]+/im", ' ', $text );
		$text   = null !== $result ? $result : $text;

		if ( ! self::is_whitespace( $text ) && ( 'p' === $prev_name || 'div' === $prev_name ) ) {
			return "\n" . $text;
		}
		return $text;
	}

	if ( $node instanceof \DOMDocumentType || $node instanceof \DOMProcessingInstruction ) {
		// Ignore.
		return '';
	}

	// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
	$name      = strtolower( $node->nodeName );
	$next_name = self::next_child_name( $node );

	// Start whitespace.
	switch ( $name ) {
		case 'hr':
			$prefix = '';
			if ( null !== $prev_name ) {
				$prefix = "\n";
			}
			return $prefix . "---------------------------------------------------------------\n";

		case 'style':
		case 'head':
		case 'title':
		case 'meta':
		case 'script':
			// Ignore these tags.
			return '';

		case 'h1':
		case 'h2':
		case 'h3':
		case 'h4':
		case 'h5':
		case 'h6':
		case 'ol':
		case 'ul':
		case 'pre':
			// Add two newlines.
			$output = "\n\n";
			break;

		case 'td':
		case 'th':
			// Add tab char to separate table fields.
			$output = "\t";
			break;

		case 'p':
			// Microsoft exchange emails often include HTML which, when passed through
			// html2text, results in lots of double line returns everywhere.
			//
			// To fix this, for any p element with a className of `MsoNormal` (the standard
			// classname in any Microsoft export or outlook for a paragraph that behaves
			// like a line return) we skip the first line returns and set the name to br.
			if ( $is_office_document && $node instanceof \DOMElement && 'MsoNormal' === $node->getAttribute( 'class' ) ) {
				$output = '';
				$name   = 'br';
				break;
			}

			// Add two lines.
			$output = "\n\n";
			break;

		case 'tr':
			// Add one line.
			$output = "\n";
			break;

		case 'div':
			$output = '';
			if ( null !== $prev_name ) {
				// Add one line.
				$output .= "\n";
			}
			break;

		case 'li':
			$output = '- ';
			break;

		default:
			// Print out contents of unknown tags.
			$output = '';
			break;
	}

	// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
	if ( $node->childNodes->length > 0 ) {

		// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
		$n                      = $node->childNodes->item( 0 );
		$previous_sibling_names = array();
		$previous_sibling_name  = null;
		$parts                  = array();
		$trailing_whitespace    = 0;

		while ( null !== $n ) {

			$text = self::iterate_over_node( $n, $previous_sibling_name, $in_pre || 'pre' === $name, $is_office_document, $options );

			// Pass current node name to next child, as previousSibling does not appear to get populated.
			if ( $n instanceof \DOMDocumentType
				|| $n instanceof \DOMProcessingInstruction
				|| ( $n instanceof \DOMText && self::is_whitespace( $text ) ) ) {
				// Keep current previousSiblingName, these are invisible.
				++$trailing_whitespace;
			} else {
				// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
				$previous_sibling_name    = strtolower( $n->nodeName );
				$previous_sibling_names[] = $previous_sibling_name;
				$trailing_whitespace      = 0;
			}

			$node->removeChild( $n );
			// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
			$n = $node->childNodes->item( 0 );

			$parts[] = $text;
		}

		// Remove trailing whitespace, important for the br check below.
		while ( $trailing_whitespace-- > 0 ) {
			array_pop( $parts );
		}

		// Suppress last br tag inside a node list if follows text.
		$last_name = array_pop( $previous_sibling_names );
		if ( 'br' === $last_name ) {
			$last_name = array_pop( $previous_sibling_names );
			if ( '#text' === $last_name ) {
				array_pop( $parts );
			}
		}

		$output .= implode( '', $parts );
	}

	// End whitespace.
	switch ( $name ) {
		case 'h1':
		case 'h2':
		case 'h3':
		case 'h4':
		case 'h5':
		case 'h6':
		case 'pre':
		case 'p':
			// Add two lines.
			$output .= "\n\n";
			break;

		case 'br':
			// Add one line.
			$output .= "\n";
			break;

		case 'div':
			break;

		case 'a':
			// Links are returned in [text](link) format.
			$href = $node instanceof \DOMElement ? $node->getAttribute( 'href' ) : '';

			$output = trim( $output );

			// Remove double [[ ]] s from linking images.
			if ( '[' === substr( $output, 0, 1 ) && ']' === substr( $output, -1 ) ) {
				$output = substr( $output, 1, strlen( $output ) - 2 );

				// For linking images, the title of the <a> overrides the title of the <img>.
				if ( $node instanceof \DOMElement && $node->getAttribute( 'title' ) ) {
					$output = $node->getAttribute( 'title' );
				}
			}

			// If there is no link text, but a title attr.
			if ( ! $output && $node instanceof \DOMElement && $node->getAttribute( 'title' ) ) {
				$output = $node->getAttribute( 'title' );
			}

			if ( ! $href ) {
				// It doesn't link anywhere.
				if ( $node instanceof \DOMElement && $node->getAttribute( 'name' ) ) {
					if ( $options['drop_links'] ) {
						$output = "$output";
					} else {
						$output = "[$output]";
					}
				}
			} elseif ( $href === $output || "mailto:$output" === $href || "http://$output" === $href || "https://$output" === $href ) {
				// Link to the same address: just use link.
				$output = "$output";
			} elseif ( $output ) {
				// Replace it.
				if ( $options['drop_links'] ) {
					$output = "$output";
				} else {
					$output = "[$output]($href)";
				}
			} else {
				// Empty string.
				$output = "$href";
			}

			// Does the next node require additional whitespace?
			switch ( $next_name ) {
				case 'h1':
				case 'h2':
				case 'h3':
				case 'h4':
				case 'h5':
				case 'h6':
					$output .= "\n";
					break;
			}
			break;

		case 'img':
			if ( $node instanceof \DOMElement && $node->getAttribute( 'title' ) ) {
				$output = '[' . $node->getAttribute( 'title' ) . ']';
			} elseif ( $node instanceof \DOMElement && $node->getAttribute( 'alt' ) ) {
				$output = '[' . $node->getAttribute( 'alt' ) . ']';
			} else {
				$output = '';
			}
			break;

		case 'li':
			$output .= "\n";
			break;

		case 'blockquote':
			// Process quoted text for whitespace/newlines.
			$output = self::process_whitespace_newlines( $output );

			// Add leading newline.
			$output = "\n" . $output;

			// Prepend '> ' at the beginning of all lines.
			$result = preg_replace( "/\n/im", "\n> ", $output );
			$output = null !== $result ? $result : $output;

			// Replace leading '> >' with '>>'.
			$result = preg_replace( "/\n> >/im", "\n>>", $output );
			$output = null !== $result ? $result : $output;

			// Add another leading newline and trailing newlines.
			$output = "\n" . $output . "\n\n";
			break;
		default:
			// Do nothing.
	}

	return $output;
}