Hide <style> tags from Tidy

Some versions of html-tidy (e.g. the one currently in use on WMF wikis)
will try to move all <style> tags in the body into the head, effectively
removing them for our purposes. We need to avoid that for TemplateStyles.

Bug: T167349
Change-Id: I133776d16f366cad73ed30af0e5a665fdf9f5ed9
This commit is contained in:
Brad Jorsch 2017-06-13 12:03:56 -04:00
parent e250f70742
commit 83b798bbab
3 changed files with 27 additions and 1 deletions

View file

@ -48,6 +48,12 @@ class RaggettWrapper {
// Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so
// we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config
$wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext );
// Similar for inline <style> tags, but those aren't empty.
$wrappedtext = preg_replace_callback( '!<style([^>]*)>(.*?)</style>!s', function ( $m ) {
return '<html-style' . $m[1] . '>'
. $this->replaceCallback( [ $m[2] ] )
. '</html-style>';
}, $wrappedtext );
// Preserve empty li elements (T49673) by abusing Tidy's datafld hack
// The whitespace class is as in TY_(InitMap)
@ -78,8 +84,9 @@ class RaggettWrapper {
* @return string
*/
public function postprocess( $text ) {
// Revert <html-{link,meta}> back to <{link,meta}>
// Revert <html-{link,meta,style}> back to <{link,meta,style}>
$text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text );
$text = preg_replace( '!<(/?)html-(style)([^>]*)>!', '<$1$2$3>', $text );
// Remove datafld
$text = str_replace( '<li datafld=""', '<li', $text );

View file

@ -20,3 +20,5 @@ fix-uri: no
# html-{meta,link} is a hack we use to prevent Tidy from stripping <meta> and <link> used in the body for Microdata
new-empty-tags: html-meta, html-link, wbr, source, track
new-inline-tags: video, audio, bdi, data, time, mark
# html-style is a hack we use to prevent pre-HTML5 versions of Tidy from stripping <style> used in the body for TemplateStyles
new-blocklevel-tags: html-style

View file

@ -28315,3 +28315,20 @@ unclosed internal link XSS (T137264)
!! html/parsoid
<p>[[#%3Cscript%3Ealert(1)%3C/script%3E|</p>
!! end
!! test
Validating that <style> isn't eaten by tidy (T167349)
!! options
# Use $wgRawHtml to inject a <style> tag, since you normally can't in wikitext
wgRawHtml=1
!! wikitext
<div class="foo">
<html><style>.foo::before { content: "<foo>"; }</style></html>
<html><style data-mw-foobar="baz">.foo::after { content: "<bar>"; }</style></html>
</div>
!! html+tidy
<div class="foo">
<style>.foo::before { content: "<foo>"; }</style>
<style data-mw-foobar="baz">.foo::after { content: "<bar>"; }</style>
</div>
!! end