Html::openElement() micro-optimisations

* Use isset() instead of in_array()

In dropDefaults():
* Remove strtolower(), since the performance cost of running
  strtolower() on all attribute names far outweighs the performance
  benefit of dropping default values for attributes with non-canonical
  capitalisation. Attribute names with non-canonical case are extremely
  rare in the MediaWiki ecosystem. The change is harmless in the sense
  that a default attribute is semantically equivalent to an omitted
  attribute.
* Handle common cases first.
* Avoid value normalization when the attribute name is unknown.
* For class attributes, check if the value is empty using strict
  comparison rather than normalization.

In expandAttributes():
* If the value is an array, explode each element and add the parts to
  an array rather than doing implode() then explode().
* If the value is an array, check for spaces in the loop rather than
  calling array_diff().
* Replace $quote with a literal
* Skip the check for boolean attributes if we already know it is a
  space-separated attribute.
* Inline Sanitizer::encodeAttribute

Measuring instruction count per iteration with perf stat, averaged over
10M iterations, PS1. Test case:
Html::openElement('a', [ 'class' => [ 'foo', 'bar' ] ] )

* Baseline: 11160.7265433
* in_array(): 10390.3837233
* dropDefaults() changes: 9674.1248824
* expandAttributes() misc: 9248.1947500
* implode/explode and space check: 8318.9800417
* Sanitizer inline: 8021.7371794

Change-Id: I7680b8939da50430316d77d42ebc1987752e99dc
This commit is contained in:
Tim Starling 2021-07-29 21:19:14 +10:00 committed by Krinkle
parent e82c5e52d5
commit 5deb6a2a45

View file

@ -47,59 +47,57 @@ use MediaWiki\MediaWikiServices;
* @since 1.16
*/
class Html {
/** @var string[] List of void elements from HTML5, section 8.1.2 as of 2016-09-19 */
/** @var bool[] List of void elements from HTML5, section 8.1.2 as of 2016-09-19 */
private static $voidElements = [
'area',
'base',
'br',
'col',
'embed',
'hr',
'img',
'input',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr',
'area' => true,
'base' => true,
'br' => true,
'col' => true,
'embed' => true,
'hr' => true,
'img' => true,
'input' => true,
'keygen' => true,
'link' => true,
'meta' => true,
'param' => true,
'source' => true,
'track' => true,
'wbr' => true,
];
/**
* Boolean attributes, which may have the value omitted entirely. Manually
* collected from the HTML5 spec as of 2011-08-12.
* @var string[]
* @var bool[]
*/
private static $boolAttribs = [
'async',
'autofocus',
'autoplay',
'checked',
'controls',
'default',
'defer',
'disabled',
'formnovalidate',
'hidden',
'ismap',
'itemscope',
'loop',
'multiple',
'muted',
'novalidate',
'open',
'pubdate',
'readonly',
'required',
'reversed',
'scoped',
'seamless',
'selected',
'truespeed',
'typemustmatch',
// HTML5 Microdata
'itemscope',
'async' => true,
'autofocus' => true,
'autoplay' => true,
'checked' => true,
'controls' => true,
'default' => true,
'defer' => true,
'disabled' => true,
'formnovalidate' => true,
'hidden' => true,
'ismap' => true,
'itemscope' => true,
'loop' => true,
'multiple' => true,
'muted' => true,
'novalidate' => true,
'open' => true,
'pubdate' => true,
'readonly' => true,
'required' => true,
'reversed' => true,
'scoped' => true,
'seamless' => true,
'selected' => true,
'truespeed' => true,
'typemustmatch' => true,
];
/**
@ -211,7 +209,7 @@ class Html {
*/
public static function rawElement( $element, $attribs = [], $contents = '' ) {
$start = self::openElement( $element, $attribs );
if ( in_array( $element, self::$voidElements ) ) {
if ( isset( self::$voidElements[$element] ) ) {
// Silly XML.
return substr( $start, 0, -1 ) . '/>';
} else {
@ -266,33 +264,33 @@ class Html {
// Remove invalid input types
if ( $element == 'input' ) {
$validTypes = [
'hidden',
'text',
'password',
'checkbox',
'radio',
'file',
'submit',
'image',
'reset',
'button',
'hidden' => true,
'text' => true,
'password' => true,
'checkbox' => true,
'radio' => true,
'file' => true,
'submit' => true,
'image' => true,
'reset' => true,
'button' => true,
// HTML input types
'datetime',
'datetime-local',
'date',
'month',
'time',
'week',
'number',
'range',
'email',
'url',
'search',
'tel',
'color',
'datetime' => true,
'datetime-local' => true,
'date' => true,
'month' => true,
'time' => true,
'week' => true,
'number' => true,
'range' => true,
'email' => true,
'url' => true,
'search' => true,
'tel' => true,
'color' => true,
];
if ( isset( $attribs['type'] ) && !in_array( $attribs['type'], $validTypes ) ) {
if ( isset( $attribs['type'] ) && !isset( $validTypes[$attribs['type']] ) ) {
unset( $attribs['type'] );
}
}
@ -371,25 +369,20 @@ class Html {
'textarea' => [ 'wrap' => 'soft' ],
];
$element = strtolower( $element );
foreach ( $attribs as $attrib => $value ) {
$lcattrib = strtolower( $attrib );
if ( is_array( $value ) ) {
$value = implode( ' ', $value );
} else {
$value = strval( $value );
}
// Simple checks using $attribDefaults
if ( isset( $attribDefaults[$element][$lcattrib] )
&& $attribDefaults[$element][$lcattrib] == $value
) {
unset( $attribs[$attrib] );
}
if ( $lcattrib == 'class' && $value == '' ) {
unset( $attribs[$attrib] );
if ( $attrib === 'class' ) {
if ( $value === '' || $value === [] || $value === [ '' ] ) {
unset( $attribs[$attrib] );
}
} elseif ( isset( $attribDefaults[$element][$attrib] ) ) {
if ( is_array( $value ) ) {
$value = implode( ' ', $value );
} else {
$value = strval( $value );
}
if ( $attribDefaults[$element][$attrib] == $value ) {
unset( $attribs[$attrib] );
}
}
}
@ -490,7 +483,7 @@ class Html {
// For boolean attributes, support [ 'foo' ] instead of
// requiring [ 'foo' => 'meaningless' ].
if ( is_int( $key ) && in_array( strtolower( $value ), self::$boolAttribs ) ) {
if ( is_int( $key ) && isset( self::$boolAttribs[strtolower( $value )] ) ) {
$key = $value;
}
@ -501,23 +494,23 @@ class Html {
// https://www.w3.org/TR/html401/index/attributes.html ("space-separated")
// https://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
$spaceSeparatedListAttributes = [
'class', // html4, html5
'accesskey', // as of html5, multiple space-separated values allowed
'class' => true, // html4, html5
'accesskey' => true, // as of html5, multiple space-separated values allowed
// html4-spec doesn't document rel= as space-separated
// but has been used like that and is now documented as such
// in the html5-spec.
'rel',
'rel' => true,
];
// Specific features for attributes that allow a list of space-separated values
if ( in_array( $key, $spaceSeparatedListAttributes ) ) {
if ( isset( $spaceSeparatedListAttributes[$key] ) ) {
// Apply some normalization and remove duplicates
// Convert into correct array. Array can contain space-separated
// values. Implode/explode to get those into the main array as well.
if ( is_array( $value ) ) {
// If input wasn't an array, we can skip this step
$newValue = [];
$arrayValue = [];
foreach ( $value as $k => $v ) {
if ( is_string( $v ) ) {
// String values should be normal `[ 'foo' ]`
@ -526,34 +519,55 @@ class Html {
// As a special case don't set 'foo' if a
// separate 'foo' => true/false exists in the array
// keys should be authoritative
$newValue[] = $v;
foreach ( explode( ' ', $v ) as $part ) {
// Normalize spacing by fixing up cases where people used
// more than 1 space and/or a trailing/leading space
if ( $part !== '' && $part !== ' ' ) {
$arrayValue[] = $part;
}
}
}
} elseif ( $v ) {
// If the value is truthy but not a string this is likely
// an [ 'foo' => true ], falsy values don't add strings
$newValue[] = $k;
$arrayValue[] = $k;
}
}
$value = implode( ' ', $newValue );
} else {
$arrayValue = explode( ' ', $value );
// Normalize spacing by fixing up cases where people used
// more than 1 space and/or a trailing/leading space
$arrayValue = array_diff( $arrayValue, [ '', ' ' ] );
}
$value = explode( ' ', $value );
// Normalize spacing by fixing up cases where people used
// more than 1 space and/or a trailing/leading space
$value = array_diff( $value, [ '', ' ' ] );
// Remove duplicates and create the string
$value = implode( ' ', array_unique( $value ) );
$value = implode( ' ', array_unique( $arrayValue ) );
// Optimization: Skip below boolAttribs check and jump straight
// to its `else` block. The current $spaceSeparatedListAttributes
// block is mutually exclusive with $boolAttribs.
// phpcs:ignore Generic.PHP.DiscourageGoto
goto not_bool; // NOSONAR
} elseif ( is_array( $value ) ) {
throw new MWException( "HTML attribute $key can not contain a list of values" );
}
$quote = '"';
if ( in_array( $key, self::$boolAttribs ) ) {
if ( isset( self::$boolAttribs[$key] ) ) {
$ret .= " $key=\"\"";
} else {
$ret .= " $key=$quote" . Sanitizer::encodeAttribute( $value ) . $quote;
// phpcs:ignore Generic.PHP.DiscourageGoto
not_bool:
// Inlined from Sanitizer::encodeAttribute() for improved performance
$encValue = htmlspecialchars( $value, ENT_QUOTES );
// Whitespace is normalized during attribute decoding,
// so if we've been passed non-spaces we must encode them
// ahead of time or they won't be preserved.
$encValue = strtr( $encValue, [
"\n" => '
',
"\r" => '
',
"\t" => '	',
] );
$ret .= " $key=\"$encValue\"";
}
}
return $ret;
@ -671,10 +685,22 @@ class Html {
$attribs['type'] = $type;
$attribs['value'] = $value;
$attribs['name'] = $name;
if ( in_array( $type, [ 'text', 'search', 'email', 'password', 'number' ] ) ) {
$textInputAttributes = [
'text' => true,
'search' => true,
'email' => true,
'password' => true,
'number' => true
];
if ( isset( $textInputAttributes[$type] ) ) {
$attribs = self::getTextInputAttributes( $attribs );
}
if ( in_array( $type, [ 'button', 'reset', 'submit' ] ) ) {
$buttonAttributes = [
'button' => true,
'reset' => true,
'submit' => true
];
if ( isset( $buttonAttributes[$type] ) ) {
$attribs = self::buttonAttributes( $attribs );
}
return self::element( 'input', $attribs );