false, 'suppressIdAttribute' => false, 'errorCallback' => null, ]; $this->errorCallback = $options['errorCallback']; $this->suppressHtmlNamespace = $options['suppressHtmlNamespace']; $this->suppressIdAttribute = $options['suppressIdAttribute']; } /** * Get the constructed document or document fragment. In the fragment case, * a DOMElement is returned, and the caller is expected to extract its * inner contents, ignoring the wrapping element. This convention is * convenient because the wrapping element gives libxml somewhere to put * its namespace declarations. If we copied the children into a * DOMDocumentFragment, libxml would invent new prefixes for the orphaned * namespaces. * * @return \DOMNode */ public function getFragment() { if ( $this->isFragment ) { return $this->doc->documentElement; } else { return $this->doc; } } /** * Returns true if the document was coerced due to libxml limitations. We * follow HTML 5.1 ยง 8.2.7 "Coercing an HTML DOM into an infoset". * * @return bool */ public function isCoerced() { return $this->coerced; } public function startDocument( $fragmentNamespace, $fragmentName ) { $this->isFragment = $fragmentNamespace !== null; $this->doc = $this->createDocument(); } /** * @param string|null $doctypeName * @param string|null $public * @param string|null $system * @return \DOMDocument * @suppress PhanTypeMismatchArgumentInternalProbablyReal * Null args to DOMImplementation::createDocument */ protected function createDocument( string $doctypeName = null, string $public = null, string $system = null ) { $impl = new \DOMImplementation; if ( $doctypeName === '' ) { $this->coerced = true; $doc = $impl->createDocument( null, null ); } elseif ( $doctypeName === null ) { $doc = $impl->createDocument( null, null ); } else { $doctype = $impl->createDocumentType( $doctypeName, $public, $system ); $doc = $impl->createDocument( null, null, $doctype ); } $doc->encoding = 'UTF-8'; return $doc; } public function endDocument( $pos ) { } private function insertNode( $preposition, $refElement, $node ) { if ( $preposition === TreeBuilder::ROOT ) { $parent = $this->doc; $refNode = null; } elseif ( $preposition === TreeBuilder::BEFORE ) { $parent = $refElement->userData->parentNode; $refNode = $refElement->userData; } else { $parent = $refElement->userData; $refNode = null; } $parent->insertBefore( $node, $refNode ); } /** * Replace unsupported characters with a code of the form U123456. * * @param string $name * @return string */ private function coerceName( $name ) { $coercedName = DOMUtils::coerceName( $name ); if ( $name !== $coercedName ) { $this->coerced = true; } return $coercedName; } private function createNode( Element $element ) { $noNS = $this->suppressHtmlNamespace && $element->namespace === HTMLData::NS_HTML; try { if ( $noNS ) { $node = $this->doc->createElement( $element->name ); } else { $node = $this->doc->createElementNS( $element->namespace, $element->name ); } } catch ( \DOMException $e ) { // Attempt to escape the name so that it is more acceptable if ( $noNS ) { $node = $this->doc->createElement( $this->coerceName( $element->name ) ); } else { $node = $this->doc->createElementNS( $element->namespace, $this->coerceName( $element->name ) ); } } foreach ( $element->attrs->getObjects() as $attr ) { if ( $attr->namespaceURI === null && strpos( $attr->localName, ':' ) !== false ) { // FIXME: this apparently works to create a prefixed localName // in the null namespace, but this is probably taking advantage // of a bug in PHP's DOM library, and screws up in various // interesting ways. For example, attributes created in this // way can't be discovered via hasAttribute() or hasAttributeNS(). $attrNode = $this->doc->createAttribute( $attr->localName ); $attrNode->value = $attr->value; try { $node->setAttributeNodeNS( $attrNode ); } catch ( \DOMException $e ) { $node->setAttributeNS( $attr->namespaceURI, $this->coerceName( $attr->qualifiedName ), $attr->value ); } } else { try { $node->setAttributeNS( $attr->namespaceURI, $attr->qualifiedName, $attr->value ); } catch ( \DOMException $e ) { $node->setAttributeNS( $attr->namespaceURI, $this->coerceName( $attr->qualifiedName ), $attr->value ); } } } if ( ( !$this->suppressIdAttribute ) && $node->hasAttribute( 'id' ) ) { // This is a call to a non-standard DOM method required by PHP in // order to implement DOMDocument::getElementById() efficiently. $node->setIdAttribute( 'id', true ); } $element->userData = $node; return $node; } public function characters( $preposition, $refElement, $text, $start, $length, $sourceStart, $sourceLength ) { // Parse $preposition and $refElement as in self::insertNode() if ( $preposition === TreeBuilder::ROOT ) { $parent = $this->doc; $refNode = null; } elseif ( $preposition === TreeBuilder::BEFORE ) { $parent = $refElement->userData->parentNode; $refNode = $refElement->userData; } else { $parent = $refElement->userData; $refNode = null; } // https://html.spec.whatwg.org/#insert-a-character // If the adjusted insertion location is in a Document node, then // return. if ( $parent === $this->doc ) { return; } $data = substr( $text, $start, $length ); // If there is a Text node immediately before the adjusted insertion // location, then append data to that Text node's data. if ( $refNode === null ) { $prev = $parent->lastChild; } else { /** @var \DOMNode $refNode */ $prev = $refNode->previousSibling; } if ( $prev !== null && $prev->nodeType === XML_TEXT_NODE ) { '@phan-var \DOMCharacterData $prev'; /** @var \DOMCharacterData $prev */ $prev->appendData( $data ); } else { $node = $this->doc->createTextNode( $data ); $parent->insertBefore( $node, $refNode ); } } public function insertElement( $preposition, $refElement, Element $element, $void, $sourceStart, $sourceLength ) { if ( $element->userData ) { $node = $element->userData; } else { $node = $this->createNode( $element ); } $this->insertNode( $preposition, $refElement, $node ); } public function endTag( Element $element, $sourceStart, $sourceLength ) { } public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) { if ( !$this->doc->firstChild ) { $this->doc = $this->createDocument( $name, $public, $system ); } $this->doctypeName = $name; $this->public = $public; $this->system = $system; $this->quirks = $quirks; } public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) { $node = $this->doc->createComment( $text ); $this->insertNode( $preposition, $refElement, $node ); } public function error( $text, $pos ) { if ( $this->errorCallback ) { call_user_func( $this->errorCallback, $text, $pos ); } } public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) { $node = $element->userData; '@phan-var \DOMElement $node'; /** @var \DOMElement $node */ foreach ( $attrs->getObjects() as $name => $attr ) { if ( $attr->namespaceURI === null && strpos( $attr->localName, ':' ) !== false ) { // As noted in createNode(), we can't use hasAttribute() here. // However, we can use the return value of setAttributeNodeNS() // instead. $attrNode = $this->doc->createAttribute( $attr->localName ); $attrNode->value = $attr->value; try { $replaced = $node->setAttributeNodeNS( $attrNode ); } catch ( \DOMException $e ) { $attrNode = $this->doc->createAttribute( $this->coerceName( $attr->localName ) ); $attrNode->value = $attr->value; $replaced = $node->setAttributeNodeNS( $attrNode ); } if ( $replaced ) { // Put it back how it was $node->setAttributeNodeNS( $replaced ); } } elseif ( $attr->namespaceURI === null ) { try { if ( !$node->hasAttribute( $attr->localName ) ) { $node->setAttribute( $attr->localName, $attr->value ); } } catch ( \DOMException $e ) { $name = $this->coerceName( $attr->localName ); if ( !$node->hasAttribute( $name ) ) { $node->setAttribute( $name, $attr->value ); } } } else { try { if ( !$node->hasAttributeNS( $attr->namespaceURI, $attr->localName ) ) { $node->setAttributeNS( $attr->namespaceURI, $attr->localName, $attr->value ); } } catch ( \DOMException $e ) { $name = $this->coerceName( $attr->localName ); if ( !$node->hasAttributeNS( $attr->namespaceURI, $name ) ) { $node->setAttributeNS( $attr->namespaceURI, $name, $attr->value ); } } } } } public function removeNode( Element $element, $sourceStart ) { $node = $element->userData; $node->parentNode->removeChild( $node ); } public function reparentChildren( Element $element, Element $newParent, $sourceStart ) { $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 ); $node = $element->userData; /** @var \DOMElement $newParentNode */ $newParentNode = $newParent->userData; while ( $node->firstChild !== $newParentNode ) { $newParentNode->appendChild( $node->firstChild ); } } }