wp_kses_hair()WP 1.0.0

Builds an attribute list from string containing attributes.

This function does a lot of work. It parses an attribute list into an array with attribute data, and tries to do the right thing even if it gets weird input. It will add quotes around attribute values that don't have any quotes or apostrophes around them, to make it easier to produce HTML code that will conform to W3C's HTML specification. It will also remove bad URL protocols from attribute values. It also reduces duplicate attributes by using the attribute defined first (foo='bar' foo='baz' will result in foo='bar').

1 time — 0.000145 sec (fast) | 50000 times — 2.85 sec (fast)

No Hooks.

Return

Array[]. Array of attribute information after parsing.

Usage

wp_kses_hair( $attr, $allowed_protocols );
$attr(string) (required)
Attribute list from HTML element to closing HTML element tag.
$allowed_protocols(string[]) (required)
Array of allowed URL protocols.

Examples

0

#1 Demonstration of work

$attrs = wp_kses_hair(' src="http://example.com/jpg.jpg"   alt="aaaaa"   foo=bar', 'http');

/*
$attrs will contain:

Array
(
	[src] => Array
		(
			[name] => src
			[value] => http://example.com/jpg.jpg
			[whole] => src="http://example.com/jpg.jpg"
			[vless] => n
		)

	[alt] => Array
		(
			[name] => alt
			[value] => aaaaa
			[whole] => alt="aaaaa"
			[vless] => n
		)

	[foo] => Array
		(
			[name] => foo
			[value] => bar
			[whole] => foo="bar"
			[vless] => n
		)

)
*/
0

#2 Protocol, which is different from the value of the attribute

If you specify a protocol that will be different, the function simply removes the protocol leaving the URL relative:

$attrs = wp_kses_hair('src=http://example.com/jpg.jpg', 'https');

/*
We get it:

Array
(
	[src] => Array
		(
			[name] => src
			[value] => //example.com/jpg.jpg
			[whole] => src="//example.com/jpg.jpg"
			[vless] => n
		)

)

*/

Changelog

Since 1.0.0 Introduced.

wp_kses_hair() code WP 6.4.3

function wp_kses_hair( $attr, $allowed_protocols ) {
	$attrarr  = array();
	$mode     = 0;
	$attrname = '';
	$uris     = wp_kses_uri_attributes();

	// Loop through the whole attribute list.

	while ( strlen( $attr ) !== 0 ) {
		$working = 0; // Was the last operation successful?

		switch ( $mode ) {
			case 0:
				if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) {
					$attrname = $match[1];
					$working  = 1;
					$mode     = 1;
					$attr     = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr );
				}

				break;

			case 1:
				if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign.
					$working = 1;
					$mode    = 2;
					$attr    = preg_replace( '/^\s*=\s*/', '', $attr );
					break;
				}

				if ( preg_match( '/^\s+/', $attr ) ) { // Valueless.
					$working = 1;
					$mode    = 0;

					if ( false === array_key_exists( $attrname, $attrarr ) ) {
						$attrarr[ $attrname ] = array(
							'name'  => $attrname,
							'value' => '',
							'whole' => $attrname,
							'vless' => 'y',
						);
					}

					$attr = preg_replace( '/^\s+/', '', $attr );
				}

				break;

			case 2:
				if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) {
					// "value"
					$thisval = $match[1];
					if ( in_array( strtolower( $attrname ), $uris, true ) ) {
						$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
					}

					if ( false === array_key_exists( $attrname, $attrarr ) ) {
						$attrarr[ $attrname ] = array(
							'name'  => $attrname,
							'value' => $thisval,
							'whole' => "$attrname=\"$thisval\"",
							'vless' => 'n',
						);
					}

					$working = 1;
					$mode    = 0;
					$attr    = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr );
					break;
				}

				if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) {
					// 'value'
					$thisval = $match[1];
					if ( in_array( strtolower( $attrname ), $uris, true ) ) {
						$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
					}

					if ( false === array_key_exists( $attrname, $attrarr ) ) {
						$attrarr[ $attrname ] = array(
							'name'  => $attrname,
							'value' => $thisval,
							'whole' => "$attrname='$thisval'",
							'vless' => 'n',
						);
					}

					$working = 1;
					$mode    = 0;
					$attr    = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr );
					break;
				}

				if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) {
					// value
					$thisval = $match[1];
					if ( in_array( strtolower( $attrname ), $uris, true ) ) {
						$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
					}

					if ( false === array_key_exists( $attrname, $attrarr ) ) {
						$attrarr[ $attrname ] = array(
							'name'  => $attrname,
							'value' => $thisval,
							'whole' => "$attrname=\"$thisval\"",
							'vless' => 'n',
						);
					}

					// We add quotes to conform to W3C's HTML spec.
					$working = 1;
					$mode    = 0;
					$attr    = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr );
				}

				break;
		} // End switch.

		if ( 0 === $working ) { // Not well-formed, remove and try again.
			$attr = wp_kses_html_error( $attr );
			$mode = 0;
		}
	} // End while.

	if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) {
		/*
		 * Special case, for when the attribute list ends with a valueless
		 * attribute like "selected".
		 */
		$attrarr[ $attrname ] = array(
			'name'  => $attrname,
			'value' => '',
			'whole' => $attrname,
			'vless' => 'y',
		);
	}

	return $attrarr;
}