nodeName === 'a' && DOMUtils::treeHasElement( $fragment, $targetNode->nodeName ); } /** * @param DOMNode $targetNode * @param DOMNode $fragment * @param Env $env */ private static function fixUpMisnestedTagDSR( DOMNode $targetNode, DOMNode $fragment, Env $env ): void { // Currently, this only deals with A-tags if ( $targetNode->nodeName !== 'a' ) { return; } // Walk the fragment till you find an 'A' tag and // zero out DSR width for all tags from that point on. // This also requires adding span wrappers around // bare text from that point on. // QUICK FIX: Add wrappers unconditionally and strip unneeded ones // Since this scenario should be rare in practice, I am going to // go with this simple solution. PipelineUtils::addSpanWrappers( $fragment->childNodes ); $resetDSR = false; $currOffset = 0; $dsrFixer = new DOMTraverser(); $fixHandler = function ( DOMNode $node ) use ( &$resetDSR, &$currOffset ) { if ( $node instanceof DOMElement ) { $dp = DOMDataUtils::getDataParsoid( $node ); if ( $node->nodeName === 'a' ) { $resetDSR = true; } if ( $resetDSR ) { if ( isset( $dp->dsr->start ) ) { $currOffset = $dp->dsr->end = $dp->dsr->start; } else { $dp->dsr = new DomSourceRange( $currOffset, $currOffset, null, null ); } $dp->misnested = true; } elseif ( !empty( $dp->tmp->wrapper ) ) { // Unnecessary wrapper added above -- strip it. $next = $node->firstChild ?: $node->nextSibling; DOMUtils::migrateChildren( $node, $node->parentNode, $node ); $node->parentNode->removeChild( $node ); return $next; } } return true; }; $dsrFixer->addHandler( null, $fixHandler ); $dsrFixer->traverse( $env, $fragment->firstChild ); $fixHandler( $fragment ); } /** * @param DOMNode $node * @param int $delta */ public static function addDeltaToDSR( DOMNode $node, int $delta ): void { // Add 'delta' to dsr->start and dsr->end for nodes in the subtree // node's dsr has already been updated $child = $node->firstChild; while ( $child ) { if ( $child instanceof DOMElement ) { $dp = DOMDataUtils::getDataParsoid( $child ); if ( !empty( $dp->dsr ) ) { // SSS FIXME: We've exploited partial DSR information // in propagating DSR values across the DOM. But, worth // revisiting at some point to see if we want to change this // so that either both or no value is present to eliminate these // kind of checks. // // Currently, it can happen that one or the other // value can be null. So, we should try to udpate // the dsr value in such a scenario. if ( is_int( $dp->dsr->start ) ) { $dp->dsr->start += $delta; } if ( is_int( $dp->dsr->end ) ) { $dp->dsr->end += $delta; } } self::addDeltaToDSR( $child, $delta ); } $child = $child->nextSibling; } } /** * @param Env $env * @param DOMNode $node * @param array &$aboutIdMap */ private static function fixAbouts( Env $env, DOMNode $node, array &$aboutIdMap = [] ): void { $c = $node->firstChild; while ( $c ) { if ( $c instanceof DOMElement ) { if ( $c->hasAttribute( 'about' ) ) { $cAbout = $c->getAttribute( 'about' ); // Update about $newAbout = $aboutIdMap[$cAbout] ?? null; if ( !$newAbout ) { $newAbout = $env->newAboutId(); $aboutIdMap[$cAbout] = $newAbout; } $c->setAttribute( 'about', $newAbout ); } self::fixAbouts( $env, $c, $aboutIdMap ); } $c = $c->nextSibling; } } /** * @param DOMNode $node * @param string $about */ private static function makeChildrenEncapWrappers( DOMNode $node, string $about ): void { PipelineUtils::addSpanWrappers( $node->childNodes ); $c = $node->firstChild; while ( $c ) { /** * We just span wrapped the child nodes, so it's safe to assume * they're all DOMElements. * * @var DOMElement $c */ '@phan-var DOMElement $c'; // FIXME: This unconditionally sets about on children // This is currently safe since all of them are nested // inside a transclusion, but do we need future-proofing? $c->setAttribute( 'about', $about ); $c = $c->nextSibling; } } /** * DOMTraverser handler that unpacks DOM fragments which were injected in the * token pipeline. * @param DOMNode $node * @param Env $env * @return bool|DOMNode */ public static function handler( DOMNode $node, Env $env ) { if ( !$node instanceof DOMElement ) { return true; } // sealed fragments shouldn't make it past this point if ( !DOMUtils::hasTypeOf( $node, 'mw:DOMFragment' ) ) { return true; } $dp = DOMDataUtils::getDataParsoid( $node ); // Replace this node and possibly a sibling with node.dp.html $fragmentParent = $node->parentNode; $dummyNode = $node->ownerDocument->createElement( $fragmentParent->nodeName ); Assert::invariant( preg_match( '/^mwf/', $dp->html ), '' ); $nodes = $env->getDOMFragment( $dp->html ); if ( !empty( $dp->tmp->isHtmlExt ) ) { // FIXME: This is a silly workaround for foundationwiki which has the // "html" extension tag which lets through arbitrary content and // often does so in a way that doesn't consider that we'd like to // encapsulate it. For example, it closes the tag in the middle // of style tag content to insert a template and then closes the style // tag in another "html" extension tag. The balance proposal isn't // its friend. // // This works because importNode does attribute error checking, whereas // parsing does not. A better fix would be to use one ownerDocument // for the entire parse, so no adoption is needed. See T179082 $nodeStrings = array_map( function ( $n ) { $str = ContentUtils::ppToXML( $n ); // Put $n back in canonical form. // Since $nodes isn't used beyond this point, we can also // set $nodes to null but this is an uncommon use case so // simpler to do this here. DOMDataUtils::visitAndLoadDataAttribs( $n ); return $str; }, $nodes ); $html = implode( '', $nodeStrings ); ContentUtils::ppToDOM( $env, $html, [ 'node' => $dummyNode ] ); } else { array_walk( $nodes, function ( $n ) use ( &$dummyNode ) { // Dump $n's node data from the data-bag onto the node attribute DOMDataUtils::visitAndStoreDataAttribs( $n ); $imp = $dummyNode->ownerDocument->importNode( $n, true ); $dummyNode->appendChild( $imp ); } ); DOMDataUtils::visitAndLoadDataAttribs( $dummyNode ); } $contentNode = $dummyNode->firstChild; if ( DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) ) { // Ensure our `firstChild` is an element to add annotation. At present, // we're unlikely to end up with translusion annotations on fragments // where span wrapping hasn't occurred (ie. link contents, since that's // placed on the anchor itself) but in the future, nowiki spans may be // omitted or new uses for dom fragments found. For now, the test case // necessitating this is an edgy link-in-link scenario: // [[Test|{{1x|[[Hmm|Something strange]]}}]] PipelineUtils::addSpanWrappers( $dummyNode->childNodes ); // Reset `contentNode`, since the `firstChild` may have changed in // span wrapping. $contentNode = $dummyNode->firstChild; DOMUtils::assertElt( $contentNode ); // Transfer typeof, data-mw, and param info // about attributes are transferred below. DOMDataUtils::setDataMw( $contentNode, Utils::clone( DOMDataUtils::getDataMw( $node ) ) ); DOMUtils::addTypeOf( $contentNode, 'mw:Transclusion' ); DOMDataUtils::getDataParsoid( $contentNode )->pi = $dp->pi ?? null; } // Update DSR: // // - Only update DSR for content that came from cache. // - For new DOM fragments from this pipeline, // previously-computed DSR is valid. // - EXCEPTION: fostered content from tables get their DSR reset // to zero-width. // - FIXME: We seem to also be doing this for new extension content, // which is the only place still using `setDSR`. // // There is currently no DSR for DOMFragments nested inside // transclusion / extension content (extension inside template // content etc). // TODO: Make sure that is the only reason for not having a DSR here. $dsr = $dp->dsr ?? null; if ( $dsr && !( empty( $dp->tmp->setDSR ) && empty( $dp->tmp->fromCache ) && empty( $dp->fostered ) ) ) { DOMUtils::assertElt( $contentNode ); $cnDP = DOMDataUtils::getDataParsoid( $contentNode ); if ( DOMUtils::hasTypeOf( $contentNode, 'mw:Transclusion' ) ) { // FIXME: An old comment from c28f137 said we just use dsr->start and // dsr->end since tag-widths will be incorrect for reuse of template // expansions. The comment was removed in ca9e760. $cnDP->dsr = new DomSourceRange( $dsr->start, $dsr->end, null, null ); } elseif ( DOMUtils::matchTypeOf( $contentNode, '/^mw:(Nowiki|Extension(\/[^\s]+))$/' ) !== null ) { $cnDP->dsr = $dsr; } else { // non-transcluded images $cnDP->dsr = new DomSourceRange( $dsr->start, $dsr->end, 2, 2 ); // Reused image -- update dsr by tsrDelta on all // descendents of 'firstChild' which is the
tag $tsrDelta = $dp->tmp->tsrDelta ?? 0; if ( $tsrDelta ) { self::addDeltaToDSR( $contentNode, $tsrDelta ); } } } if ( !empty( $dp->tmp->fromCache ) ) { // Replace old about-id with new about-id that is // unique to the global page environment object. // //
s are reused from cache. Note that figure captions // can contain multiple independent transclusions. Each one // of those individual transclusions should get a new unique // about id. Hence a need for an aboutIdMap and the need to // walk the entire tree. self::fixAbouts( $env, $dummyNode ); } // If the fragment wrapper has an about id, it came from template // annotating (the wrapper was an about sibling) and should be transferred // to top-level nodes after span wrapping. This should happen regardless // of whether we're coming `fromCache` or not. // FIXME: Presumably we have a nesting issue here if this is a cached // transclusion. $about = $node->getAttribute( 'about' ); if ( $about !== '' ) { // Span wrapping may not have happened for the transclusion above if // the fragment is not the first encapsulation wrapper node. PipelineUtils::addSpanWrappers( $dummyNode->childNodes ); $n = $dummyNode->firstChild; while ( $n ) { DOMUtils::assertElt( $n ); $n->setAttribute( 'about', $about ); $n = $n->nextSibling; } } $nextNode = $node->nextSibling; if ( self::hasBadNesting( $fragmentParent, $dummyNode ) ) { DOMUtils::assertElt( $fragmentParent ); /* ----------------------------------------------------------------------- * If fragmentParent is an A element and the fragment contains another * A element, we have an invalid nesting of A elements and needs fixing up * * doc1: ... fragmentParent -> [... dummyNode=mw:DOMFragment, ...] ... * * 1. Change doc1:fragmentParent -> [... "#unique-hash-code", ...] by replacing * node with the "#unique-hash-code" text string * * 2. str = parentHTML.replace(#unique-hash-code, dummyHTML) * We now have a HTML string with the bad nesting. We will now use the HTML5 * parser to parse this HTML string and give us the fixed up DOM * * 3. ParseHTML(str) to get * doc2: [BODY -> [[fragmentParent -> [...], nested-A-tag-from-dummyNode, ...]]] * * 4. Replace doc1:fragmentParent with doc2:body.childNodes * ----------------------------------------------------------------------- */ $timestamp = (string)time(); $fragmentParent->replaceChild( $node->ownerDocument->createTextNode( $timestamp ), $node ); // If fragmentParent has an about, it presumably is nested inside a template // Post fixup, its children will surface to the encapsulation wrapper level. // So, we have to fix them up so they dont break the encapsulation. // // Ex: {{1x|[http://foo.com This is [[bad]], very bad]}} // // In this example, the corresponding to Foo is fragmentParent and has an about. // dummyNode is the DOM corresponding to "This is [[bad]], very bad". Post-fixup // "[[bad]], very bad" are at encapsulation level and need about ids. $about = $fragmentParent->getAttribute( 'about' ); if ( $about !== '' ) { self::makeChildrenEncapWrappers( $dummyNode, $about ); } // Set zero-dsr width on all elements that will get split // in dummyNode's tree to prevent selser-based corruption // on edits to a page that contains badly nested tags. self::fixUpMisnestedTagDSR( $fragmentParent, $dummyNode, $env ); $dummyHTML = ContentUtils::ppToXML( $dummyNode, [ 'innerXML' => true, // We just added some span wrappers and we need to keep // that tmp info so the unnecessary ones get stripped. // Should be fine since tmp was stripped before packing. 'keepTmp' => true ] ); $parentHTML = ContentUtils::ppToXML( $fragmentParent ); $p = $fragmentParent->previousSibling; // We rely on HTML5 parser to fixup the bad nesting (see big comment above) $newDoc = DOMUtils::parseHTML( str_replace( $timestamp, $dummyHTML, $parentHTML ) ); $body = DOMCompat::getBody( $newDoc ); DOMUtils::migrateChildrenBetweenDocs( $body, $fragmentParent->parentNode, $fragmentParent ); if ( !$p ) { $p = $fragmentParent->parentNode->firstChild; } else { $p = $p->nextSibling; } while ( $p !== $fragmentParent ) { DOMDataUtils::visitAndLoadDataAttribs( $p ); $p = $p->nextSibling; } // Set nextNode to the previous-sibling of former fragmentParent (which will get deleted) // This will ensure that all nodes will get handled $nextNode = $fragmentParent->previousSibling; // fragmentParent itself is useless now $fragmentParent->parentNode->removeChild( $fragmentParent ); } else { // Move the content nodes over and delete the placeholder node DOMUtils::migrateChildren( $dummyNode, $fragmentParent, $node ); $node->parentNode->removeChild( $node ); } return $nextNode; } }