WP_HTML_Processor::step_in_body()privateWP 6.4.0

Parses next element in the 'in body' insertion mode.

This internal function performs the 'in body' insertion mode logic for the generalized WP_HTML_Processor::step() function.

Method of the class: WP_HTML_Processor{}

No Hooks.

Return

true|false. Whether an element was found.

Usage

// private - for code of main (parent) class only
$result = $this->step_in_body();

Notes

Changelog

Since 6.4.0 Introduced.

WP_HTML_Processor::step_in_body() code WP 6.6.2

private function step_in_body() {
	$token_name = $this->get_token_name();
	$token_type = $this->get_token_type();
	$op_sigil   = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
	$op         = "{$op_sigil}{$token_name}";

	switch ( $op ) {
		case '#comment':
		case '#funky-comment':
		case '#presumptuous-tag':
			$this->insert_html_element( $this->state->current_token );
			return true;

		case '#text':
			$this->reconstruct_active_formatting_elements();

			$current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ];

			/*
			 * > A character token that is U+0000 NULL
			 *
			 * Any successive sequence of NULL bytes is ignored and won't
			 * trigger active format reconstruction. Therefore, if the text
			 * only comprises NULL bytes then the token should be ignored
			 * here, but if there are any other characters in the stream
			 * the active formats should be reconstructed.
			 */
			if (
				1 <= $current_token->length &&
				"\x00" === $this->html[ $current_token->start ] &&
				strspn( $this->html, "\x00", $current_token->start, $current_token->length ) === $current_token->length
			) {
				// Parse error: ignore the token.
				return $this->step();
			}

			/*
			 * Whitespace-only text does not affect the frameset-ok flag.
			 * It is probably inter-element whitespace, but it may also
			 * contain character references which decode only to whitespace.
			 */
			$text = $this->get_modifiable_text();
			if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) {
				$this->state->frameset_ok = false;
			}

			$this->insert_html_element( $this->state->current_token );
			return true;

		case 'html':
			/*
			 * > A DOCTYPE token
			 * > Parse error. Ignore the token.
			 */
			return $this->step();

		/*
		 * > A start tag whose tag name is "button"
		 */
		case '+BUTTON':
			if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) {
				// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
				$this->generate_implied_end_tags();
				$this->state->stack_of_open_elements->pop_until( 'BUTTON' );
			}

			$this->reconstruct_active_formatting_elements();
			$this->insert_html_element( $this->state->current_token );
			$this->state->frameset_ok = false;

			return true;

		/*
		 * > A start tag whose tag name is one of: "address", "article", "aside",
		 * > "blockquote", "center", "details", "dialog", "dir", "div", "dl",
		 * > "fieldset", "figcaption", "figure", "footer", "header", "hgroup",
		 * > "main", "menu", "nav", "ol", "p", "search", "section", "summary", "ul"
		 */
		case '+ADDRESS':
		case '+ARTICLE':
		case '+ASIDE':
		case '+BLOCKQUOTE':
		case '+CENTER':
		case '+DETAILS':
		case '+DIALOG':
		case '+DIR':
		case '+DIV':
		case '+DL':
		case '+FIELDSET':
		case '+FIGCAPTION':
		case '+FIGURE':
		case '+FOOTER':
		case '+HEADER':
		case '+HGROUP':
		case '+MAIN':
		case '+MENU':
		case '+NAV':
		case '+OL':
		case '+P':
		case '+SEARCH':
		case '+SECTION':
		case '+SUMMARY':
		case '+UL':
			if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
				$this->close_a_p_element();
			}

			$this->insert_html_element( $this->state->current_token );
			return true;

		/*
		 * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote",
		 * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset",
		 * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main",
		 * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul"
		 */
		case '-ADDRESS':
		case '-ARTICLE':
		case '-ASIDE':
		case '-BLOCKQUOTE':
		case '-BUTTON':
		case '-CENTER':
		case '-DETAILS':
		case '-DIALOG':
		case '-DIR':
		case '-DIV':
		case '-DL':
		case '-FIELDSET':
		case '-FIGCAPTION':
		case '-FIGURE':
		case '-FOOTER':
		case '-HEADER':
		case '-HGROUP':
		case '-LISTING':
		case '-MAIN':
		case '-MENU':
		case '-NAV':
		case '-OL':
		case '-PRE':
		case '-SEARCH':
		case '-SECTION':
		case '-SUMMARY':
		case '-UL':
			if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) {
				// @todo Report parse error.
				// Ignore the token.
				return $this->step();
			}

			$this->generate_implied_end_tags();
			if ( $this->state->stack_of_open_elements->current_node()->node_name !== $token_name ) {
				// @todo Record parse error: this error doesn't impact parsing.
			}
			$this->state->stack_of_open_elements->pop_until( $token_name );
			return true;

		/*
		 * > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
		 */
		case '+H1':
		case '+H2':
		case '+H3':
		case '+H4':
		case '+H5':
		case '+H6':
			if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
				$this->close_a_p_element();
			}

			if (
				in_array(
					$this->state->stack_of_open_elements->current_node()->node_name,
					array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ),
					true
				)
			) {
				// @todo Indicate a parse error once it's possible.
				$this->state->stack_of_open_elements->pop();
			}

			$this->insert_html_element( $this->state->current_token );
			return true;

		/*
		 * > A start tag whose tag name is one of: "pre", "listing"
		 */
		case '+PRE':
		case '+LISTING':
			if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
				$this->close_a_p_element();
			}
			$this->insert_html_element( $this->state->current_token );
			$this->state->frameset_ok = false;
			return true;

		/*
		 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
		 */
		case '-H1':
		case '-H2':
		case '-H3':
		case '-H4':
		case '-H5':
		case '-H6':
			if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) {
				/*
				 * This is a parse error; ignore the token.
				 *
				 * @todo Indicate a parse error once it's possible.
				 */
				return $this->step();
			}

			$this->generate_implied_end_tags();

			if ( $this->state->stack_of_open_elements->current_node()->node_name !== $token_name ) {
				// @todo Record parse error: this error doesn't impact parsing.
			}

			$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
			return true;

		/*
		 * > A start tag whose tag name is "li"
		 * > A start tag whose tag name is one of: "dd", "dt"
		 */
		case '+DD':
		case '+DT':
		case '+LI':
			$this->state->frameset_ok = false;
			$node                     = $this->state->stack_of_open_elements->current_node();
			$is_li                    = 'LI' === $token_name;

			in_body_list_loop:
			/*
			 * The logic for LI and DT/DD is the same except for one point: LI elements _only_
			 * close other LI elements, but a DT or DD element closes _any_ open DT or DD element.
			 */
			if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) {
				$node_name = $is_li ? 'LI' : $node->node_name;
				$this->generate_implied_end_tags( $node_name );
				if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
					// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
				}

				$this->state->stack_of_open_elements->pop_until( $node_name );
				goto in_body_list_done;
			}

			if (
				'ADDRESS' !== $node->node_name &&
				'DIV' !== $node->node_name &&
				'P' !== $node->node_name &&
				$this->is_special( $node->node_name )
			) {
				/*
				 * > If node is in the special category, but is not an address, div,
				 * > or p element, then jump to the step labeled done below.
				 */
				goto in_body_list_done;
			} else {
				/*
				 * > Otherwise, set node to the previous entry in the stack of open elements
				 * > and return to the step labeled loop.
				 */
				foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
					$node = $item;
					break;
				}
				goto in_body_list_loop;
			}

			in_body_list_done:
			if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
				$this->close_a_p_element();
			}

			$this->insert_html_element( $this->state->current_token );
			return true;

		/*
		 * > An end tag whose tag name is "li"
		 * > An end tag whose tag name is one of: "dd", "dt"
		 */
		case '-DD':
		case '-DT':
		case '-LI':
			if (
				/*
				 * An end tag whose tag name is "li":
				 * If the stack of open elements does not have an li element in list item scope,
				 * then this is a parse error; ignore the token.
				 */
				(
					'LI' === $token_name &&
					! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' )
				) ||
				/*
				 * An end tag whose tag name is one of: "dd", "dt":
				 * If the stack of open elements does not have an element in scope that is an
				 * HTML element with the same tag name as that of the token, then this is a
				 * parse error; ignore the token.
				 */
				(
					'LI' !== $token_name &&
					! $this->state->stack_of_open_elements->has_element_in_scope( $token_name )
				)
			) {
				/*
				 * This is a parse error, ignore the token.
				 *
				 * @todo Indicate a parse error once it's possible.
				 */
				return $this->step();
			}

			$this->generate_implied_end_tags( $token_name );

			if ( $token_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
				// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
			}

			$this->state->stack_of_open_elements->pop_until( $token_name );
			return true;

		/*
		 * > An end tag whose tag name is "p"
		 */
		case '-P':
			if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
				$this->insert_html_element( $this->state->current_token );
			}

			$this->close_a_p_element();
			return true;

		// > A start tag whose tag name is "a"
		case '+A':
			foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
				switch ( $item->node_name ) {
					case 'marker':
						break;

					case 'A':
						$this->run_adoption_agency_algorithm();
						$this->state->active_formatting_elements->remove_node( $item );
						$this->state->stack_of_open_elements->remove_node( $item );
						break;
				}
			}

			$this->reconstruct_active_formatting_elements();
			$this->insert_html_element( $this->state->current_token );
			$this->state->active_formatting_elements->push( $this->state->current_token );
			return true;

		/*
		 * > A start tag whose tag name is one of: "b", "big", "code", "em", "font", "i",
		 * > "s", "small", "strike", "strong", "tt", "u"
		 */
		case '+B':
		case '+BIG':
		case '+CODE':
		case '+EM':
		case '+FONT':
		case '+I':
		case '+S':
		case '+SMALL':
		case '+STRIKE':
		case '+STRONG':
		case '+TT':
		case '+U':
			$this->reconstruct_active_formatting_elements();
			$this->insert_html_element( $this->state->current_token );
			$this->state->active_formatting_elements->push( $this->state->current_token );
			return true;

		/*
		 * > An end tag whose tag name is one of: "a", "b", "big", "code", "em", "font", "i",
		 * > "nobr", "s", "small", "strike", "strong", "tt", "u"
		 */
		case '-A':
		case '-B':
		case '-BIG':
		case '-CODE':
		case '-EM':
		case '-FONT':
		case '-I':
		case '-S':
		case '-SMALL':
		case '-STRIKE':
		case '-STRONG':
		case '-TT':
		case '-U':
			$this->run_adoption_agency_algorithm();
			return true;

		/*
		 * > An end tag whose tag name is "br"
		 * >   Parse error. Drop the attributes from the token, and act as described in the next
		 * >   entry; i.e. act as if this was a "br" start tag token with no attributes, rather
		 * >   than the end tag token that it actually is.
		 */
		case '-BR':
			$this->last_error = self::ERROR_UNSUPPORTED;
			throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' );

		/*
		 * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
		 */
		case '+AREA':
		case '+BR':
		case '+EMBED':
		case '+IMG':
		case '+KEYGEN':
		case '+WBR':
			$this->reconstruct_active_formatting_elements();
			$this->insert_html_element( $this->state->current_token );
			$this->state->frameset_ok = false;
			return true;

		/*
		 * > A start tag whose tag name is "input"
		 */
		case '+INPUT':
			$this->reconstruct_active_formatting_elements();
			$this->insert_html_element( $this->state->current_token );
			$type_attribute = $this->get_attribute( 'type' );
			/*
			 * > If the token does not have an attribute with the name "type", or if it does,
			 * > but that attribute's value is not an ASCII case-insensitive match for the
			 * > string "hidden", then: set the frameset-ok flag to "not ok".
			 */
			if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) {
				$this->state->frameset_ok = false;
			}
			return true;

		/*
		 * > A start tag whose tag name is "hr"
		 */
		case '+HR':
			if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
				$this->close_a_p_element();
			}
			$this->insert_html_element( $this->state->current_token );
			$this->state->frameset_ok = false;
			return true;

		/*
		 * > A start tag whose tag name is one of: "param", "source", "track"
		 */
		case '+PARAM':
		case '+SOURCE':
		case '+TRACK':
			$this->insert_html_element( $this->state->current_token );
			return true;
	}

	/*
	 * These tags require special handling in the 'in body' insertion mode
	 * but that handling hasn't yet been implemented.
	 *
	 * As the rules for each tag are implemented, the corresponding tag
	 * name should be removed from this list. An accompanying test should
	 * help ensure this list is maintained.
	 *
	 * @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags
	 *
	 * Since this switch structure throws a WP_HTML_Unsupported_Exception, it's
	 * possible to handle "any other start tag" and "any other end tag" below,
	 * as that guarantees execution doesn't proceed for the unimplemented tags.
	 *
	 * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
	 */
	switch ( $token_name ) {
		case 'APPLET':
		case 'BASE':
		case 'BASEFONT':
		case 'BGSOUND':
		case 'BODY':
		case 'CAPTION':
		case 'COL':
		case 'COLGROUP':
		case 'FORM':
		case 'FRAME':
		case 'FRAMESET':
		case 'HEAD':
		case 'HTML':
		case 'IFRAME':
		case 'LINK':
		case 'MARQUEE':
		case 'MATH':
		case 'META':
		case 'NOBR':
		case 'NOEMBED':
		case 'NOFRAMES':
		case 'NOSCRIPT':
		case 'OBJECT':
		case 'OPTGROUP':
		case 'OPTION':
		case 'PLAINTEXT':
		case 'RB':
		case 'RP':
		case 'RT':
		case 'RTC':
		case 'SARCASM':
		case 'SCRIPT':
		case 'SELECT':
		case 'STYLE':
		case 'SVG':
		case 'TABLE':
		case 'TBODY':
		case 'TD':
		case 'TEMPLATE':
		case 'TEXTAREA':
		case 'TFOOT':
		case 'TH':
		case 'THEAD':
		case 'TITLE':
		case 'TR':
		case 'XMP':
			$this->last_error = self::ERROR_UNSUPPORTED;
			throw new WP_HTML_Unsupported_Exception( "Cannot process {$token_name} element." );
	}

	if ( ! parent::is_tag_closer() ) {
		/*
		 * > Any other start tag
		 */
		$this->reconstruct_active_formatting_elements();
		$this->insert_html_element( $this->state->current_token );
		return true;
	} else {
		/*
		 * > Any other end tag
		 */

		/*
		 * Find the corresponding tag opener in the stack of open elements, if
		 * it exists before reaching a special element, which provides a kind
		 * of boundary in the stack. For example, a `</custom-tag>` should not
		 * close anything beyond its containing `P` or `DIV` element.
		 */
		foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
			if ( $token_name === $node->node_name ) {
				break;
			}

			if ( self::is_special( $node->node_name ) ) {
				// This is a parse error, ignore the token.
				return $this->step();
			}
		}

		$this->generate_implied_end_tags( $token_name );
		if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
			// @todo Record parse error: this error doesn't impact parsing.
		}

		foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
			$this->state->stack_of_open_elements->pop();
			if ( $node === $item ) {
				return true;
			}
		}
	}
}