Hide <style> tags from Tidy
Some versions of html-tidy (e.g. the one currently in use on WMF wikis) will try to move all <style> tags in the body into the head, effectively removing them for our purposes. We need to avoid that for TemplateStyles. Bug: T167349 Change-Id: I133776d16f366cad73ed30af0e5a665fdf9f5ed9
This commit is contained in:
parent
e250f70742
commit
83b798bbab
3 changed files with 27 additions and 1 deletions
|
|
@ -48,6 +48,12 @@ class RaggettWrapper {
|
|||
// Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so
|
||||
// we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config
|
||||
$wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext );
|
||||
// Similar for inline <style> tags, but those aren't empty.
|
||||
$wrappedtext = preg_replace_callback( '!<style([^>]*)>(.*?)</style>!s', function ( $m ) {
|
||||
return '<html-style' . $m[1] . '>'
|
||||
. $this->replaceCallback( [ $m[2] ] )
|
||||
. '</html-style>';
|
||||
}, $wrappedtext );
|
||||
|
||||
// Preserve empty li elements (T49673) by abusing Tidy's datafld hack
|
||||
// The whitespace class is as in TY_(InitMap)
|
||||
|
|
@ -78,8 +84,9 @@ class RaggettWrapper {
|
|||
* @return string
|
||||
*/
|
||||
public function postprocess( $text ) {
|
||||
// Revert <html-{link,meta}> back to <{link,meta}>
|
||||
// Revert <html-{link,meta,style}> back to <{link,meta,style}>
|
||||
$text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text );
|
||||
$text = preg_replace( '!<(/?)html-(style)([^>]*)>!', '<$1$2$3>', $text );
|
||||
|
||||
// Remove datafld
|
||||
$text = str_replace( '<li datafld=""', '<li', $text );
|
||||
|
|
|
|||
|
|
@ -20,3 +20,5 @@ fix-uri: no
|
|||
# html-{meta,link} is a hack we use to prevent Tidy from stripping <meta> and <link> used in the body for Microdata
|
||||
new-empty-tags: html-meta, html-link, wbr, source, track
|
||||
new-inline-tags: video, audio, bdi, data, time, mark
|
||||
# html-style is a hack we use to prevent pre-HTML5 versions of Tidy from stripping <style> used in the body for TemplateStyles
|
||||
new-blocklevel-tags: html-style
|
||||
|
|
|
|||
|
|
@ -28315,3 +28315,20 @@ unclosed internal link XSS (T137264)
|
|||
!! html/parsoid
|
||||
<p>[[#%3Cscript%3Ealert(1)%3C/script%3E|</p>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
Validating that <style> isn't eaten by tidy (T167349)
|
||||
!! options
|
||||
# Use $wgRawHtml to inject a <style> tag, since you normally can't in wikitext
|
||||
wgRawHtml=1
|
||||
!! wikitext
|
||||
<div class="foo">
|
||||
<html><style>.foo::before { content: "<foo>"; }</style></html>
|
||||
<html><style data-mw-foobar="baz">.foo::after { content: "<bar>"; }</style></html>
|
||||
</div>
|
||||
!! html+tidy
|
||||
<div class="foo">
|
||||
<style>.foo::before { content: "<foo>"; }</style>
|
||||
<style data-mw-foobar="baz">.foo::after { content: "<bar>"; }</style>
|
||||
</div>
|
||||
!! end
|
||||
|
|
|
|||
Loading…
Reference in a new issue