true, 'tr' => true ]; /** * @param stdClass $range * @param bool|null $startsWithText * @return bool */ private static function expandRangeToAvoidSpanWrapping( stdClass $range, ?bool $startsWithText = null ): bool { // SSS FIXME: Later on, if safe, we could consider expanding the // range unconditionally rather than only if a span is required. $mightAddSpan = $startsWithText; if ( $startsWithText === null ) { $n = $range->start; if ( WTUtils::isTplMarkerMeta( $n ) ) { $n = $n->nextSibling; } $mightAddSpan = DOMUtils::isText( $n ); } $expandable = false; if ( $mightAddSpan ) { // See if we can expand the range to the parent node. // Eliminates useless spanning of wikitext of the form: {{1x|foo}} // where the the entire template content is contained in a paragraph. $contentParent = $range->start->parentNode; $expandable = $contentParent->nodeName === 'p' && !WTUtils::isLiteralHTMLNode( $contentParent ) && $contentParent->firstChild === $range->startElem && $contentParent->lastChild === $range->endElem && $contentParent === $range->end->parentNode; if ( $expandable ) { $range->start = $contentParent; $range->end = $contentParent; } } return $expandable; } /** * @param DOMElement $target * @param DOMElement $source */ private static function updateDSRForFirstTplNode( DOMElement $target, DOMElement $source ): void { $srcDP = DOMDataUtils::getDataParsoid( $source ); $tgtDP = DOMDataUtils::getDataParsoid( $target ); // Since TSRs on template content tokens are cleared by the // template handler, all computed dsr values for template content // is always inferred from top-level content values and is safe. // So, do not overwrite a bigger end-dsr value. if ( isset( $srcDP->dsr->end ) && isset( $tgtDP->dsr->end ) && $tgtDP->dsr->end > $srcDP->dsr->end ) { $tgtDP->dsr->start = $srcDP->dsr->start ?? null; } else { $tgtDP->dsr = clone $srcDP->dsr; $tgtDP->src = $srcDP->src ?? null; } } /** * @param stdClass $range * @return DomSourceRange|null */ private static function getRangeEndDSR( stdClass $range ): ?DomSourceRange { $endNode = $range->end; if ( $endNode instanceof DOMElement ) { return DOMDataUtils::getDataParsoid( $endNode )->dsr ?? null; } else { // In the rare scenario where the last element of a range is not an ELEMENT, // extrapolate based on DSR of first leftmost sibling that is an ELEMENT. // We don't try any harder than this for now. $offset = 0; $n = $endNode->previousSibling; while ( $n && !( $n instanceof DOMElement ) ) { if ( $n instanceof DOMText ) { $offset += strlen( $n->nodeValue ); } else { // A comment $offset += WTUtils::decodedCommentLength( $n ); } $n = $n->previousSibling; } $dsr = null; if ( $n ) { /** * The point of the above loop is to ensure we're working * with a DOMElement if there is an $n. * * @var DOMElement $n */ '@phan-var DOMElement $n'; $dsr = DOMDataUtils::getDataParsoid( $n )->dsr ?? null; } if ( $dsr && is_int( $dsr->end ?? null ) ) { $len = $endNode instanceof DOMText ? strlen( $endNode->nodeValue ) : WTUtils::decodedCommentLength( $endNode ); $dsr = new DomSourceRange( $dsr->end + $offset, $dsr->end + $offset + $len, null, null ); } return $dsr; } } /** * Find the common DOM ancestor of two DOM nodes. * @param Frame $frame * @param DOMDocument $doc * @param DOMElement $startElem * @param DOMElement $endMeta * @param DOMElement $endElem * @return object */ private static function getDOMRange( Frame $frame, DOMDocument $doc, DOMElement $startElem, DOMElement $endMeta, DOMElement $endElem ) { $env = $frame->getEnv(); $range = (object)[ 'startElem' => $startElem, 'endElem' => $endMeta, 'id' => Utils::stripParsoidIdPrefix( $startElem->getAttribute( 'about' ) ), 'startOffset' => DOMDataUtils::getDataParsoid( $startElem )->tsr->start, 'flipped' => false ]; // Find common ancestor of startElem and endElem $startAncestors = DOMUtils::pathToRoot( $startElem ); $elem = $endElem; $parentNode = $endElem->parentNode; while ( $parentNode && $parentNode->nodeType !== XML_DOCUMENT_NODE ) { $i = array_search( $parentNode, $startAncestors, true ); if ( $i === 0 ) { // widen the scope to include the full subtree $range->root = $startElem; $range->start = $startElem->firstChild; $range->end = $startElem->lastChild; break; } elseif ( $i > 0 ) { $range->root = $parentNode; $range->start = $startAncestors[$i - 1]; $range->end = $elem; break; } $elem = $parentNode; $parentNode = $elem->parentNode; } // Detect empty content in unfosterable positions and // wrap them in spans. if ( $startElem->nodeName === 'meta' && $startElem->nextSibling === $endElem && !DOMUtils::isFosterablePosition( $startElem ) ) { $emptySpan = $doc->createElement( 'span' ); $startElem->parentNode->insertBefore( $emptySpan, $endElem ); } // Handle unwrappable content in fosterable positions // and expand template range, if required. if ( DOMUtils::isFosterablePosition( $range->start ) && ( !DOMUtils::isElt( $range->start ) || // NOTE: These template marker meta tags are translated from comments // *after* the DOM has been built which is why they can show up in // fosterable positions in the DOM. ( WTUtils::isTplMarkerMeta( $range->start ) && WTUtils::isTplMarkerMeta( $range->start->nextSibling ) ) || ( WTUtils::isTplMarkerMeta( $range->start ) && !DOMUtils::isElt( $range->start->nextSibling ) ) ) ) { $rangeStartParent = $range->start->parentNode; // 1. If we are in a table in a foster-element position, then all non-element // nodes will be white-space and comments. Skip over all of them and find // the first table content node $newStart = $range->start; while ( $newStart && !$newStart instanceof DOMElement ) { $newStart = $newStart->nextSibling; } // 2. Push leading comments and whitespace into the element node // as long as it is a tr/tbody -- pushing whitespace into the // other (th/td/caption) can change display semantics. if ( $newStart && isset( self::MAP_TBODY_TR[$newStart->nodeName] ) ) { /** * The point of the above loop is to ensure we're working * with a DOMElement if there is an $newStart. * * @var DOMElement $newStart */ '@phan-var DOMElement $newStart'; $insertPosition = $newStart->firstChild; $n = $range->start; while ( $n !== $newStart ) { $next = $n->nextSibling; $newStart->insertBefore( $n, $insertPosition ); $n = $next; } $range->start = $newStart; // Update dsr to point to original start self::updateDSRForFirstTplNode( $range->start, $startElem ); } else { $range->start = $rangeStartParent; $range->end = $rangeStartParent; } } // Ensure range.start is an element node since we want to // add/update the data-parsoid attribute to it. if ( !DOMUtils::isElt( $range->start ) && !self::expandRangeToAvoidSpanWrapping( $range, true ) ) { $span = $doc->createElement( 'span' ); $range->start->parentNode->insertBefore( $span, $range->start ); $span->appendChild( $range->start ); self::updateDSRForFirstTplNode( $span, $startElem ); $range->start = $span; } if ( $range->start->nodeName === 'table' ) { // If we have any fostered content, include it as well. for ( $rangeStartPreviousSibling = $range->start->previousSibling; $rangeStartPreviousSibling instanceof DOMElement && !empty( DOMDataUtils::getDataParsoid( $rangeStartPreviousSibling )->fostered ); $rangeStartPreviousSibling = $range->start->previousSibling ) { $range->start = $rangeStartPreviousSibling; } } $rangeStartNextSibling = $range->start->nextSibling; if ( $range->start === $startElem && $rangeStartNextSibling instanceof DOMElement ) { // HACK! // The strip-double-tds pass has a HACK that requires DSR and src // information being set on this element node. So, this HACK here // is supporting that HACK there. // // (The parser test for T52603 will fail without this fix) self::updateDSRForFirstTplNode( $rangeStartNextSibling, $startElem ); } // Use the negative test since it doesn't mark the range as flipped // if range.start === range.end if ( !DOMUtils::inSiblingOrder( $range->start, $range->end ) ) { // In foster-parenting situations, the end-meta tag (and hence range.end) // can show up before the range.start which would be the table itself. // So, we record this info for later analysis. $range->flipped = true; } $env->log( 'trace/tplwrap/findranges', function () use ( &$range ) { $msg = ''; $dp1 = DOMDataUtils::getDataParsoid( $range->start ); $dp2 = DOMDataUtils::getDataParsoid( $range->end ); $tmp1 = $dp1->tmp; $tmp2 = $dp2->tmp; $dp1->tmp = null; $dp2->tmp = null; $msg .= "\n----------------------------------------------"; $msg .= "\nFound range : " . $range->id . '; flipped? ' . $range->flipped . '; offset: ' . $range->startOffset; $msg .= "\nstart-elem : " . DOMCompat::getOuterHTML( $range->startElem ) . '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $range->startElem ) ); $msg .= "\nend-elem : " . DOMCompat::getOuterHTML( $range->endElem ) . '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $range->endElem ) ); $msg .= "\nstart : [TAG_ID " . $tmp1->tagId . ']: ' . DOMCompat::getOuterHTML( $range->start ) . '; DP: ' . PHPUtils::jsonEncode( $dp1 ); $msg .= "\nend : [TAG_ID " . $tmp2->tagId . ']: ' . DOMCompat::getOuterHTML( $range->end ) . '; DP: ' . PHPUtils::jsonEncode( $dp2 ); $msg .= "\n----------------------------------------------"; $dp1->tmp = $tmp1; $dp2->tmp = $tmp2; return $msg; } ); return $range; } /** * @param DOMElement $meta */ private static function stripStartMeta( DOMElement $meta ): void { if ( $meta->nodeName === 'meta' ) { $meta->parentNode->removeChild( $meta ); } else { // Remove mw:* from the typeof. $type = $meta->getAttribute( 'typeof' ); $type = preg_replace( '/(?:^|\s)mw:[^\/]*(\/[^\s]+|(?=$|\s))/D', '', $type ); $meta->setAttribute( 'typeof', $type ); } } /** * @param array $nestingInfo * @param string|null $startId * @return string|null */ private static function findToplevelEnclosingRange( array $nestingInfo, ?string $startId ): ?string { // Walk up the implicit nesting tree to find the // top-level range within which rId is nested. // No cycles can exist since they have been suppressed. $visited = []; $rId = $startId; while ( isset( $nestingInfo[$rId] ) ) { if ( isset( $visited[$rId] ) ) { throw new Error( "Found a cycle in tpl-range nesting where there shouldn't have been one." ); } $visited[$rId] = true; $rId = $nestingInfo[$rId]; } return $rId; } /** * @param Frame $frame * @param array &$compoundTpls * @param string $compoundTplId * @param stdClass $tpl * @param stdClass $argInfo */ private static function recordTemplateInfo( Frame $frame, array &$compoundTpls, string $compoundTplId, stdClass $tpl, stdClass $argInfo ): void { if ( !isset( $compoundTpls[$compoundTplId] ) ) { $compoundTpls[$compoundTplId] = []; } // Record template args info along with any intervening wikitext // between templates that are part of the same compound structure. /** @var array $tplArray */ $tplArray = &$compoundTpls[$compoundTplId]; $dp = DOMDataUtils::getDataParsoid( $tpl->startElem ); $dsr = $dp->dsr; if ( count( $tplArray ) > 0 ) { $prevTplInfo = PHPUtils::lastItem( $tplArray ); if ( $prevTplInfo->dsr->end < $dsr->start ) { $width = $dsr->start - $prevTplInfo->dsr->end; $tplArray[] = (object)[ 'wt' => PHPUtils::safeSubstr( $frame->getSrcText(), $prevTplInfo->dsr->end, $width ), ]; } } if ( !empty( $dp->unwrappedWT ) ) { $tplArray[] = (object)[ 'wt' => $dp->unwrappedWT ]; } // Get rid of src-offsets since they aren't needed anymore. foreach ( $argInfo->paramInfos as &$pi ) { unset( $pi->srcOffsets ); } $tplArray[] = (object)[ 'dsr' => $dsr, 'args' => $argInfo->dict, 'paramInfos' => $argInfo->paramInfos ]; } /** * Nesting cycles with multiple ranges can show up because of foster * parenting scenarios if they are not detected and suppressed. * @param string $start * @param string $end * @param array $nestingInfo * @return bool */ private static function introducesCycle( string $start, string $end, array $nestingInfo ): bool { $visited = [ $start => true ]; $elt = $nestingInfo[$end] ?? null; while ( $elt ) { if ( !empty( $visited[$elt] ) ) { return true; } $elt = $nestingInfo[$elt] ?? null; } return false; } /** * The `inSiblingOrder` check here is sufficient to determine overlaps * because the algorithm in `findWrappableTemplateRanges` will put the * start/end elements for intersecting ranges on the same plane and prev/ * curr are in textual order (which hopefully translates to dom order). * * @param stdClass $prev * @param stdClass $curr * @return bool */ private static function rangesOverlap( stdClass $prev, stdClass $curr ): bool { $prevEnd = ( !$prev->flipped ) ? $prev->end : $prev->start; $currStart = ( !$curr->flipped ) ? $curr->start : $curr->end; return DOMUtils::inSiblingOrder( $currStart, $prevEnd ); } /** * @param DOMDocument $document * @param Frame $frame * @param DOMNode $docRoot * @param array $tplRanges * @return stdClass [ 'ranges' => $newRanges, 'tplArrays' => $compoundTpls ] */ public static function findTopLevelNonOverlappingRanges( DOMDocument $document, Frame $frame, DOMNode $docRoot, array $tplRanges ): stdClass { $env = $frame->getEnv(); $numRanges = count( $tplRanges ); // For each node, assign an attribute that is a record of all // tpl ranges it belongs to at the top-level. // // FIXME: Ideally we would have used a hash-table external to the // DOM, but we have no way of computing a hash-code on a dom-node // right now. So, this is the next best solution (=hack) to use // node.data as hash-table storage. for ( $i = 0; $i < $numRanges; $i++ ) { $r = $tplRanges[$i]; $n = !$r->flipped ? $r->start : $r->end; $e = !$r->flipped ? $r->end : $r->start; while ( $n ) { if ( $n instanceof DOMElement ) { // Initialize tplRanges, if necessary. $dp = DOMDataUtils::getDataParsoid( $n ); if ( !isset( $dp->tmp->tplRanges ) ) { $dp->tmp->tplRanges = []; } // Record 'r' $dp->tmp->tplRanges[$r->id] = $r; // Done if ( $n === $e ) { break; } } $n = $n->nextSibling; } } // In the first pass over `numRanges` below, `subsumedRanges` is used to // record purely the nested ranges. However, in the second pass, we also // add the relationships between overlapping ranges so that // `findToplevelEnclosingRange` can use that information to add `argInfo` // to the right `compoundTpls`. This scenario can come up when you have // three ranges, 1 intersecting with 2 but not 3, and 3 nested in 2. $subsumedRanges = []; // For each range r:(s, e), walk up from s --> docRoot and if any of // these nodes have tpl-ranges (besides r itself) assigned to them, // then r is nested in those other templates and can be ignored. for ( $k = 0; $k < $numRanges; $k++ ) { $r = $tplRanges[$k]; $n = $r->start; while ( $n !== $docRoot ) { $ranges = DOMDataUtils::getDataParsoid( $n )->tmp->tplRanges ?? null; if ( $ranges ) { if ( $n !== $r->start ) { // console.warn(" -> nested; n_tpls: " + Object.keys(ranges)); // 'r' is nested for sure // Record the outermost range in which 'r' is nested. $rangeIds = array_keys( $ranges ); $findOutermostRange = function ( $previous, $next ) use ( &$ranges ) { return ( $ranges[$next]->startOffset < $ranges[$previous]->startOffset ) ? $next : $previous; }; $subsumedRanges[$r->id] = (string)array_reduce( $rangeIds, $findOutermostRange, $rangeIds[0] ); break; } else { // n === r.start // // We have to make sure this is not an overlap scenario. // Find the ranges that r.start and r.end belong to and // compute their intersection. If this intersection has // another tpl range besides r itself, we have a winner! // // The code below does the above check efficiently. $sTpls = $ranges; $eTpls = DOMDataUtils::getDataParsoid( $r->end )->tmp->tplRanges ?? null; $sKeys = array_keys( $sTpls ); $foundNesting = false; $sKeysCount = count( $sKeys ); for ( $j = 0; $j < $sKeysCount; $j++ ) { // - Don't record nesting cycles. // - Record the outermost range in which 'r' is nested in. $otherId = (string)$sKeys[$j]; $other = $sTpls[$otherId]; if ( $otherId !== $r->id && !empty( $eTpls[$otherId] ) && // When we have identical ranges, pick the range with // the larger offset to be subsumed. ( $r->start !== $other->start || $r->end !== $other->end || $other->startOffset < $r->startOffset ) && !self::introducesCycle( $r->id, $otherId, $subsumedRanges ) ) { $foundNesting = true; if ( !isset( $subsumedRanges[$r->id] ) || $other->startOffset < $sTpls[$subsumedRanges[$r->id]]->startOffset ) { $subsumedRanges[$r->id] = $otherId; } } } if ( $foundNesting ) { // 'r' is nested // console.warn(" -> nested: sTpls: " + Object.keys(sTpls) + // "; eTpls: " + Object.keys(eTpls) + // "; set to: " + subsumedRanges.get(r.id)); break; } } } // Move up $n = $n->parentNode; } } // Sort by start offset in source wikitext usort( $tplRanges, function ( $r1, $r2 ) { return $r1->startOffset - $r2->startOffset; } ); // Since the tpl ranges are sorted in textual order (by start offset), // it is sufficient to only look at the most recent template to see // if the current one overlaps with the previous one. // // This works because we've already identify nested ranges and can ignore them. $newRanges = []; $prev = null; $compoundTpls = []; for ( $l = 0; $l < $numRanges; $l++ ) { $endTagToRemove = null; $startTagToStrip = null; $r = $tplRanges[$l]; // Extract argInfo $tmp = DOMDataUtils::getDataParsoid( $r->startElem )->tmp; $argInfo = $tmp->tplarginfo ?? null; if ( $argInfo ) { /** @var stdClass $argInfo */ $argInfo = json_decode( $argInfo ); } else { // An assertion here is probably an indication that we're // mistakenly doing template wrapping in a nested context. Assert::invariant( isset( $tmp->fromFoster ), 'Template range without arginfo.' ); } $env->log( 'trace/tplwrap/merge', function () use ( &$DOMDataUtils, &$r ) { $msg = ''; $dp1 = DOMDataUtils::getDataParsoid( $r->start ); $dp2 = DOMDataUtils::getDataParsoid( $r->end ); $tmp1 = $dp1->tmp; $tmp2 = $dp2->tmp; $dp1->tmp = null; $dp2->tmp = null; $msg .= "\n##############################################"; $msg .= "\nrange " . $r->id . '; r-start-elem: ' . DOMCompat::getOuterHTML( $r->startElem ) . '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $r->startElem ) ); $msg .= "\nrange " . $r->id . '; r-end-elem: ' . DOMCompat::getOuterHTML( $r->endElem ) . '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $r->endElem ) ); $msg .= "\nrange " . $r->id . '; r-start: [TAG_ID ' . $tmp1->tagId . ']: ' . DOMCompat::getOuterHTML( $r->start ) . '; DP: ' . PHPUtils::jsonEncode( $dp1 ); $msg .= "\nrange " . $r->id . '; r-end: [TAG_ID ' . $tmp2->tagId . ']: ' . DOMCompat::getOuterHTML( $r->end ) . '; DP: ' . PHPUtils::jsonEncode( $dp2 ); $msg .= "\n----------------------------------------------"; $dp1->tmp = $tmp1; $dp2->tmp = $tmp2; return $msg; } ); $enclosingRangeId = self::findToplevelEnclosingRange( $subsumedRanges, $subsumedRanges[$r->id] ?? null ); if ( $enclosingRangeId ) { $env->log( 'trace/tplwrap/merge', '--nested in ', $enclosingRangeId, '--' ); // Nested -- ignore r $startTagToStrip = $r->startElem; $endTagToRemove = $r->endElem; if ( $argInfo ) { // 'r' is nested in 'enclosingRange' at the top-level // So, enclosingRange gets r's argInfo self::recordTemplateInfo( $frame, $compoundTpls, $enclosingRangeId, $r, $argInfo ); } } elseif ( $prev && self::rangesOverlap( $prev, $r ) ) { // In the common case, in overlapping scenarios, r.start is // identical to prev.end. However, in fostered content scenarios, // there can true overlap of the ranges. $env->log( 'trace/tplwrap/merge', '--overlapped--' ); // See comment above, where `subsumedRanges` is defined. $subsumedRanges[$r->id] = $prev->id; // Overlapping ranges. // r is the regular kind // Merge r with prev // Note that if a table comes from a template, a foster box isn't // emitted so the enclosure isn't guaranteed. In pathological // cases, like where the table end tag isn't emitted, we can still // end up with flipped ranges if the template end marker gets into // a fosterable position (which can still happen despite being // emitted as a comment). Assert::invariant( !$r->flipped, 'Flipped range should have been enclosed.' ); $startTagToStrip = $r->startElem; $endTagToRemove = $prev->endElem; $prev->end = $r->end; $prev->endElem = $r->endElem; // Update compoundTplInfo if ( $argInfo ) { self::recordTemplateInfo( $frame, $compoundTpls, $prev->id, $r, $argInfo ); } } else { $env->log( 'trace/tplwrap/merge', '--normal--' ); // Default -- no overlap // Emit the merged range $newRanges[] = $r; $prev = $r; // Update compoundTpls if ( $argInfo ) { self::recordTemplateInfo( $frame, $compoundTpls, $r->id, $r, $argInfo ); } } if ( $endTagToRemove ) { // Remove start and end meta-tags // Not necessary to remove the start tag, but good to cleanup $endTagToRemove->parentNode->removeChild( $endTagToRemove ); self::stripStartMeta( $startTagToStrip ); } } return (object)[ 'ranges' => $newRanges, 'tplArrays' => $compoundTpls ]; } /** * @param stdClass $range * @return string|null */ private static function findFirstTemplatedNode( stdClass $range ): ?string { $firstNode = $range->start; // Skip tpl marker meta if ( WTUtils::isTplMarkerMeta( $firstNode ) ) { $firstNode = $firstNode->nextSibling; } // Walk past fostered nodes since they came from within a table // Note that this is not foolproof because in some scenarios, // fostered content is not marked up. Ex: when a table is templated, // and content from the table is fostered. $dp = DOMDataUtils::getDataParsoid( $firstNode ); while ( !empty( $dp->fostered ) ) { $firstNode = $firstNode->nextSibling; /** @var DOMElement $firstNode */ DOMUtils::assertElt( $firstNode ); $dp = DOMDataUtils::getDataParsoid( $firstNode ); } // FIXME: It is harder to use META as a node name since this is a generic // placeholder for a whole bunch of things each of which has its own // newline constraint requirements. So, for now, I am skipping that // can of worms to prevent confusing the serializer with an overloaded // tag name. if ( $firstNode->nodeName === 'meta' ) { return null; } // FIXME spec-compliant values would be upper-case, this is just a workaround // for current PHP DOM implementation and could be removed in the future $nodeName = mb_strtoupper( $firstNode->nodeName ); return !empty( $dp->stx ) ? $nodeName . '_' . $dp->stx : $nodeName; } /** * @param DOMDocument $doc * @param Frame $frame * @param array $tplRanges * @param array $tplArrays */ private static function encapsulateTemplates( DOMDocument $doc, Frame $frame, array $tplRanges, array $tplArrays ): void { $env = $frame->getEnv(); $numRanges = count( $tplRanges ); for ( $i = 0; $i < $numRanges; $i++ ) { $range = $tplRanges[$i]; // We should never have flipped overlapping ranges, and indeed that's // asserted in `findTopLevelNonOverlappingRanges`. Flipping results // in either completely nested ranges, or non-intersecting ranges. // // If the table causing the fostering is not transcluded, we emit a // foster box and wrap the whole table+fb in metas, producing nested // ranges. For ex, // //