WpOrg\Requests

Iri::replace_invalid_with_pct_encoding()protectedWP 1.0

Replace invalid character with percent encoding

Method of the class: Iri{}

No Hooks.

Return

String.

Usage

// protected - for code of main (parent) or child class
$result = $this->replace_invalid_with_pct_encoding( $text, $extra_chars, $i );
$text(string) (required)
Input string
$extra_chars(string) (required)
Valid characters not in iunreserved or iprivate (this is ASCII-only)
$i **
-
Default: false

Iri::replace_invalid_with_pct_encoding() code WP 6.6.2

protected function replace_invalid_with_pct_encoding($text, $extra_chars, $iprivate = false) {
	// Normalize as many pct-encoded sections as possible
	$text = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $text);

	// Replace invalid percent characters
	$text = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $text);

	// Add unreserved and % to $extra_chars (the latter is safe because all
	// pct-encoded sections are now valid).
	$extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';

	// Now replace any bytes that aren't allowed with their pct-encoded versions
	$position = 0;
	$strlen = strlen($text);
	while (($position += strspn($text, $extra_chars, $position)) < $strlen) {
		$value = ord($text[$position]);

		// Start position
		$start = $position;

		// By default we are valid
		$valid = true;

		// No one byte sequences are valid due to the while.
		// Two byte sequence:
		if (($value & 0xE0) === 0xC0) {
			$character = ($value & 0x1F) << 6;
			$length = 2;
			$remaining = 1;
		}
		// Three byte sequence:
		elseif (($value & 0xF0) === 0xE0) {
			$character = ($value & 0x0F) << 12;
			$length = 3;
			$remaining = 2;
		}
		// Four byte sequence:
		elseif (($value & 0xF8) === 0xF0) {
			$character = ($value & 0x07) << 18;
			$length = 4;
			$remaining = 3;
		}
		// Invalid byte:
		else {
			$valid = false;
			$length = 1;
			$remaining = 0;
		}

		if ($remaining) {
			if ($position + $length <= $strlen) {
				for ($position++; $remaining; $position++) {
					$value = ord($text[$position]);

					// Check that the byte is valid, then add it to the character:
					if (($value & 0xC0) === 0x80) {
						$character |= ($value & 0x3F) << (--$remaining * 6);
					}
					// If it is invalid, count the sequence as invalid and reprocess the current byte:
					else {
						$valid = false;
						$position--;
						break;
					}
				}
			}
			else {
				$position = $strlen - 1;
				$valid = false;
			}
		}

		// Percent encode anything invalid or not in ucschar
		if (
			// Invalid sequences
			!$valid
			// Non-shortest form sequences are invalid
			|| $length > 1 && $character <= 0x7F
			|| $length > 2 && $character <= 0x7FF
			|| $length > 3 && $character <= 0xFFFF
			// Outside of range of ucschar codepoints
			// Noncharacters
			|| ($character & 0xFFFE) === 0xFFFE
			|| $character >= 0xFDD0 && $character <= 0xFDEF
			|| (
				// Everything else not in ucschar
				   $character > 0xD7FF && $character < 0xF900
				|| $character < 0xA0
				|| $character > 0xEFFFD
			)
			&& (
				// Everything not in iprivate, if it applies
				   !$iprivate
				|| $character < 0xE000
				|| $character > 0x10FFFD
			)
		) {
			// If we were a character, pretend we weren't, but rather an error.
			if ($valid) {
				$position--;
			}

			for ($j = $start; $j <= $position; $j++) {
				$text = substr_replace($text, sprintf('%%%02X', ord($text[$j])), $j, 1);
				$j += 2;
				$position += 2;
				$strlen += 2;
			}
		}
	}

	return $text;
}