foo * ``` * the Parsoid parser, pre-post processing generates something like * ``` *
  • foo
  • * ``` * This visitor deletes such spurious `
  • `s to match the output of * the PHP parser. * * However, note that the wikitext `
  • `, any preceding wikitext * asterisk `*` absent, should indeed expand into two nodes in the * DOM. * @param DOMElement $node * @param Env $env * @param array $options * @return bool */ public static function handleLIHack( DOMElement $node, Env $env, array $options ): bool { $prevNode = $node->previousSibling; if ( WTUtils::isLiteralHTMLNode( $node ) && $prevNode instanceof DOMElement && $prevNode->nodeName === 'li' && !WTUtils::isLiteralHTMLNode( $prevNode ) && DOMUtils::nodeEssentiallyEmpty( $prevNode ) ) { $dp = DOMDataUtils::getDataParsoid( $node ); $liHackSrc = WTUtils::getWTSource( $options['frame'], $prevNode ); if ( DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) ) { $dataMW = DOMDataUtils::getDataMw( $node ); if ( isset( $dataMW->parts ) ) { array_unshift( $dataMW->parts, $liHackSrc ); } } else { // We have to store the extra information in order to // reconstruct the original source for roundtripping. $dp->liHackSrc = $liHackSrc; } // Update the dsr. Since we are coalescing the first // node with the second (or, more precisely, deleting // the first node), we have to update the second DSR's // starting point and start tag width. $nodeDSR = $dp->dsr ?? null; $prevNodeDSR = DOMDataUtils::getDataParsoid( $prevNode )->dsr ?? null; if ( $nodeDSR !== null && $prevNodeDSR !== null ) { $dp->dsr = new DomSourceRange( $prevNodeDSR->start, $nodeDSR->end, $nodeDSR->openWidth + $prevNodeDSR->length(), $nodeDSR->closeWidth ); } // Delete the duplicated
  • node. $prevNode->parentNode->removeChild( $prevNode ); } return true; } /** * @param DOMNode $c * @return array */ private static function getMigrationInfo( DOMNode $c ): array { $tplRoot = WTUtils::findFirstEncapsulationWrapperNode( $c ); if ( $tplRoot !== null ) { // Check if everything between tplRoot and c is migratable. $prev = $tplRoot->previousSibling; while ( $c !== $prev ) { if ( !WTUtils::isCategoryLink( $c ) && !( $c->nodeName === 'span' && preg_match( '/^\s*$/D', $c->textContent ) ) ) { return [ 'tplRoot' => $tplRoot, 'migratable' => false ]; } $c = $c->previousSibling; } } return [ 'tplRoot' => $tplRoot, 'migratable' => true ]; } /** * @param DOMNode $li * @return DOMNode|null */ private static function findLastMigratableNode( DOMNode $li ): ?DOMNode { $sentinel = null; $c = DOMUtils::lastNonSepChild( $li ); // c is known to be a category link. // fail fast in parser tests if something changes. Assert::invariant( WTUtils::isCategoryLink( $c ), 'c is known to be a category link' ); while ( $c ) { // Handle template units first $info = self::getMigrationInfo( $c ); if ( !$info['migratable'] ) { break; } elseif ( $info['tplRoot'] !== null ) { $c = $info['tplRoot']; } if ( $c instanceof DOMText ) { // Update sentinel if we hit a newline. // We want to migrate these newlines and // everything following them out of 'li'. if ( preg_match( '/\n\s*$/D', $c->nodeValue ) ) { $sentinel = $c; } // If we didn't hit pure whitespace, we are done! if ( !preg_match( '/^\s*$/D', $c->nodeValue ) ) { break; } } elseif ( $c instanceof DOMComment ) { $sentinel = $c; } elseif ( !WTUtils::isCategoryLink( $c ) ) { // We are done if we hit anything but text // or category links. break; } $c = $c->previousSibling; } return $sentinel; } /** * Earlier in the parsing pipeline, we suppress all newlines * and other whitespace before categories which causes category * links to be swallowed into preceding paragraphs and list items. * * However, with wikitext like this: `*a\n\n[[Category:Foo]]`, this * could prevent proper roundtripping (because we suppress newlines * when serializing list items). This needs addressing because * this pattern is extremely common (some list at the end of the page * followed by a list of categories for the page). * @param DOMElement $li * @param Env $env * @param array $options * @param bool $atTopLevel * @param stdClass|null $tplInfo * @return bool */ public static function migrateTrailingCategories( DOMElement $li, Env $env, array $options, bool $atTopLevel = false, ?stdClass $tplInfo = null ): bool { // * Don't bother fixing up template content when processing the full page if ( $tplInfo ) { return true; } // If there is migratable content inside a list item // (categories preceded by newlines), // * migrate it out of the outermost list // * and fix up the DSR of list items and list along the rightmost path. if ( $li->nextSibling === null && DOMUtils::isList( $li->parentNode ) && WTUtils::isCategoryLink( DOMUtils::lastNonSepChild( $li ) ) ) { // Find the outermost list -- content will be moved after it $outerList = $li->parentNode; while ( DOMUtils::isListItem( $outerList->parentNode ) ) { $p = $outerList->parentNode; // Bail if we find ourself on a path that is not the rightmost path. if ( $p->nextSibling !== null ) { return true; } $outerList = $p->parentNode; } // Find last migratable node $sentinel = self::findLastMigratableNode( $li ); if ( !$sentinel ) { return true; } // Migrate (and update DSR) $c = $li->lastChild; $liDsr = DOMDataUtils::getDataParsoid( $li )->dsr ?? null; $newEndDsr = -1; // dummy to eliminate useless null checks while ( true ) { if ( $c instanceof DOMElement ) { $dsr = DOMDataUtils::getDataParsoid( $c )->dsr ?? null; $newEndDsr = $dsr->start ?? -1; $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); } elseif ( $c instanceof DOMText ) { if ( preg_match( '/^\s*$/D', $c->nodeValue ) ) { $newEndDsr -= strlen( $c->nodeValue ); $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); } else { // Split off the newlines into its own node and migrate it $nls = $c->nodeValue; $c->nodeValue = preg_replace( '/\s+$/D', '', $c->nodeValue, 1 ); $nls = substr( $nls, strlen( $c->nodeValue ) ); $nlNode = $c->ownerDocument->createTextNode( $nls ); $outerList->parentNode->insertBefore( $nlNode, $outerList->nextSibling ); $newEndDsr -= strlen( $nls ); } } elseif ( $c instanceof DOMComment ) { $newEndDsr -= WTUtils::decodedCommentLength( $c ); $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); } if ( $c === $sentinel ) { break; } $c = $li->lastChild; } // Update DSR of all listitem & list nodes till // we hit the outermost list we started with. $delta = null; if ( $liDsr && $newEndDsr >= 0 ) { $delta = $liDsr->end - $newEndDsr; } // If there is no delta to adjust dsr by, we are done if ( !$delta ) { return true; } // Fix DSR along the rightmost path to outerList $list = null; while ( $outerList !== $list ) { $list = $li->parentNode; DOMUtils::assertElt( $list ); $liDp = DOMDataUtils::getDataParsoid( $li ); if ( !empty( $liDp->dsr ) ) { $liDp->dsr->end -= $delta; } $listDp = DOMDataUtils::getDataParsoid( $list ); if ( !empty( $listDp->dsr ) ) { $listDp->dsr->end -= $delta; } $li = $list->parentNode; } } return true; } }