true, 'bgsound' => true, 'frame' => true, 'keygen' => true ]; /** HTML5 elements with raw (unescaped) content */ private static $hasRawContent = [ 'style' => true, 'script' => true, 'xmp' => true, 'iframe' => true, 'noembed' => true, 'noframes' => true, 'plaintext' => true, 'noscript' => true ]; /** * Elements that strip leading newlines * http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#html-fragment-serialization-algorithm * @namespace * @private */ private static $newlineStrippingElements = [ 'pre' => true, 'textarea' => true, 'listing' => true ]; private static $entityEncodings = [ '<' => '<', '&' => '&', '"' => '"', "'" => ''', ]; /** * HTML entity encoder helper. Replaces calls to the entities npm module. * Only supports the few entities we'll actually need: <&'" * @param string $raw Input string * @param string $encodeChars String with the characters that should be encoded * @return string */ private static function encodeHtmlEntities( string $raw, string $encodeChars ): string { $encodings = array_intersect_key( self::$entityEncodings, array_flip( str_split( $encodeChars ) ) ); return strtr( $raw, $encodings ); } /** * Serialize an HTML DOM3 node to XHTML. The XHTML and associated information will be fed * step-by-step to the callback given in $accum. * @param DOMNode $node * @param array $options See {@link XMLSerializer::serialize()} * @param callable $accum function( $bit, $node, $flag ) * - $bit: (string) piece of HTML code * - $node: (DOMNode) ?? * - $flag: (string|null) 'start' or 'end' (??) * @return void */ private static function serializeToString( DOMNode $node, array $options, callable $accum ): void { $child = null; if ( !empty( $options['tunnelFosteredContent'] ) && isset( WikitextConstants::$HTML['FosterablePosition'][$node->nodeName] ) ) { // Tunnel fosterable metas as comments. // This is analogous to what is done when treebuilding. $ownerDoc = $node->ownerDocument; $allowedTags = WikitextConstants::$HTML['TableContentModels'][$node->nodeName]; $child = $node->firstChild; while ( $child ) { $next = $child->nextSibling; if ( DOMUtils::isText( $child ) ) { Assert::invariant( DOMUtils::isIEW( $child ), 'Only expecting whitespace!' ); } elseif ( $child instanceof DOMElement && !in_array( $child->nodeName, $allowedTags, true ) ) { Assert::invariant( $child->nodeName === 'meta', 'Only fosterable metas expected!' ); $as = []; foreach ( DOMCompat::attributes( $child ) as $attr ) { $as[] = [ $attr->name, $attr->value ]; } $comment = WTUtils::fosterCommentData( $child->getAttribute( 'typeof' ), $as, true ); $node->replaceChild( $ownerDoc->createComment( $comment ), $child ); } $child = $next; } } switch ( $node->nodeType ) { case XML_ELEMENT_NODE: DOMUtils::assertElt( $node ); $child = $node->firstChild; $nodeName = $node->tagName; $localName = $node->localName; $accum( '<' . $localName, $node ); foreach ( DOMCompat::attributes( $node ) as $attr ) { if ( $options['smartQuote'] // More double quotes than single quotes in value? && substr_count( $attr->value, '"' ) > substr_count( $attr->value, "'" ) ) { // use single quotes $accum( ' ' . $attr->name . "='" . self::encodeHtmlEntities( $attr->value, "<&'" ) . "'", $node ); } else { // use double quotes $accum( ' ' . $attr->name . '="' . self::encodeHtmlEntities( $attr->value, '<&"' ) . '"', $node ); } } if ( $child || ( !isset( WikitextConstants::$HTML['VoidTags'][$nodeName] ) && !isset( self::$alsoSerializeAsVoid[$nodeName] ) ) ) { $accum( '>', $node, 'start' ); // if is cdata child node if ( isset( self::$hasRawContent[$nodeName] ) ) { // TODO: perform context-sensitive escaping? // Currently this content is not normally part of our DOM, so // no problem. If it was, we'd probably have to do some // tag-specific escaping. Examples: // * < to \u003c in