WP_HTML_Tag_Processor::skip_script_data()privateWP 6.2.0

Skips contents of script tags.

Method of the class: WP_HTML_Tag_Processor{}

No Hooks.

Return

true|false. Whether the script tag was closed before the end of the document.

Usage

// private - for code of main (parent) class only
$result = $this->skip_script_data();

Changelog

Since 6.2.0 Introduced.

WP_HTML_Tag_Processor::skip_script_data() code WP 6.6.2

private function skip_script_data() {
	$state      = 'unescaped';
	$html       = $this->html;
	$doc_length = strlen( $html );
	$at         = $this->bytes_already_parsed;

	while ( false !== $at && $at < $doc_length ) {
		$at += strcspn( $html, '-<', $at );

		/*
		 * For all script states a "-->"  transitions
		 * back into the normal unescaped script mode,
		 * even if that's the current state.
		 */
		if (
			$at + 2 < $doc_length &&
			'-' === $html[ $at ] &&
			'-' === $html[ $at + 1 ] &&
			'>' === $html[ $at + 2 ]
		) {
			$at   += 3;
			$state = 'unescaped';
			continue;
		}

		// Everything of interest past here starts with "<".
		if ( $at + 1 >= $doc_length || '<' !== $html[ $at++ ] ) {
			continue;
		}

		/*
		 * Unlike with "-->", the "<!--" only transitions
		 * into the escaped mode if not already there.
		 *
		 * Inside the escaped modes it will be ignored; and
		 * should never break out of the double-escaped
		 * mode and back into the escaped mode.
		 *
		 * While this requires a mode change, it does not
		 * impact the parsing otherwise, so continue
		 * parsing after updating the state.
		 */
		if (
			$at + 2 < $doc_length &&
			'!' === $html[ $at ] &&
			'-' === $html[ $at + 1 ] &&
			'-' === $html[ $at + 2 ]
		) {
			$at   += 3;
			$state = 'unescaped' === $state ? 'escaped' : $state;
			continue;
		}

		if ( '/' === $html[ $at ] ) {
			$closer_potentially_starts_at = $at - 1;
			$is_closing                   = true;
			++$at;
		} else {
			$is_closing = false;
		}

		/*
		 * At this point the only remaining state-changes occur with the
		 * <script> and </script> tags; unless one of these appears next,
		 * proceed scanning to the next potential token in the text.
		 */
		if ( ! (
			$at + 6 < $doc_length &&
			( 's' === $html[ $at ] || 'S' === $html[ $at ] ) &&
			( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) &&
			( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) &&
			( 'i' === $html[ $at + 3 ] || 'I' === $html[ $at + 3 ] ) &&
			( 'p' === $html[ $at + 4 ] || 'P' === $html[ $at + 4 ] ) &&
			( 't' === $html[ $at + 5 ] || 'T' === $html[ $at + 5 ] )
		) ) {
			++$at;
			continue;
		}

		/*
		 * Ensure that the script tag terminates to avoid matching on
		 * substrings of a non-match. For example, the sequence
		 * "<script123" should not end a script region even though
		 * "<script" is found within the text.
		 */
		if ( $at + 6 >= $doc_length ) {
			continue;
		}
		$at += 6;
		$c   = $html[ $at ];
		if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) {
			++$at;
			continue;
		}

		if ( 'escaped' === $state && ! $is_closing ) {
			$state = 'double-escaped';
			continue;
		}

		if ( 'double-escaped' === $state && $is_closing ) {
			$state = 'escaped';
			continue;
		}

		if ( $is_closing ) {
			$this->bytes_already_parsed = $closer_potentially_starts_at;
			$this->tag_name_starts_at   = $closer_potentially_starts_at;
			if ( $this->bytes_already_parsed >= $doc_length ) {
				return false;
			}

			while ( $this->parse_next_attribute() ) {
				continue;
			}

			if ( $this->bytes_already_parsed >= $doc_length ) {
				$this->parser_state = self::STATE_INCOMPLETE_INPUT;

				return false;
			}

			if ( '>' === $html[ $this->bytes_already_parsed ] ) {
				++$this->bytes_already_parsed;
				return true;
			}
		}

		++$at;
	}

	return false;
}