💎一站式轻松地调用各大LLM模型接口,支持GPT4、智谱、星火、月之暗面及文生图 广告
``` DOMElement extends DOMNode { /* 属性 */ //尚未实现,始终返回NULL public readonly bool $schemaTypeInfo ; //元素名称 public readonly string $tagName ; /* 方法 */ //创建一个新的DOMElement对象 public __construct ( string $name [, string $value [, string $namespaceURI ]] ) //返回属性值 public getAttribute ( string $name ) : string //返回属性节点 public getAttributeNode ( string $name ) : DOMAttr //回属性节点 public getAttributeNodeNS ( string $namespaceURI , string $localName ) : DOMAttr //返回属性值 public getAttributeNS ( string $namespaceURI , string $localName ) : string //通过标记名获取元素 public getElementsByTagName ( string $name ) : DOMNodeList //通过namespaceURI和localName获取元素 public getElementsByTagNameNS ( string $namespaceURI , string $localName ) : DOMNodeList //检查属性是否存在 public hasAttribute ( string $name ) : bool //检查属性是否存在 public hasAttributeNS ( string $namespaceURI , string $localName ) : bool //Removes attribute public removeAttribute ( string $name ) : bool //移除属性 public removeAttributeNode ( DOMAttr $oldnode ) : bool //移除属性 public removeAttributeNS ( string $namespaceURI , string $localName ) : bool //添加新属性 public setAttribute ( string $name , string $value ) : DOMAttr //向元素添加新的属性节点 public setAttributeNode ( DOMAttr $attr ) : DOMAttr //向元素添加新的属性节点 public setAttributeNodeNS ( DOMAttr $attr ) : DOMAttr //添加新属性 public setAttributeNS ( string $namespaceURI , string $qualifiedName , string $value ) : void //声明名称指定的属性为ID类型 public setIdAttribute ( string $name , bool $isId ) : void //声明节点指定的属性为ID类型 public setIdAttributeNode ( DOMAttr $attr , bool $isId ) : void //声明由本地名称和名称空间URI指定的属性为ID类型 public setIdAttributeNS ( string $namespaceURI , string $localName , bool $isId ) : void } ``` 要获取DOMElement的值,只需获取nodeValue公共参数(它是从DOMNode继承的) ``` echo $domElement->nodeValue; ``` 结合所有注释,获取节点内部HTML的最简单方法是使用此函数 ``` function get_inner_html( $node ) { $innerHTML= ''; $children = $node->childNodes; foreach ($children as $child) { $innerHTML .= $child->ownerDocument->saveXML( $child ); } return $innerHTML; } ``` 拥有将文档/节点/元素转换为字符串的函数会很好。 无论如何,我使用下面的代码片段来获取DOMNode的innerHTML值: ``` function getInnerHTML($Node) { $Body = $Node->ownerDocument->documentElement->firstChild->firstChild; $Document = new DOMDocument(); $Document->appendChild($Document->importNode($Body,true)); return $Document->saveHTML(); } ``` 重命名元素并保留属性: ``` // 将元素$element的名称更改为$ newName function renameElement($element, $newName) { $newElement = $element->ownerDocument->createElement($newName); $parentElement = $element->parentNode; $parentElement->insertBefore($newElement, $element); $childNodes = $element->childNodes; while ($childNodes->length > 0) { $newElement->appendChild($childNodes->item(0)); } $attributes = $element->attributes; while ($attributes->length > 0) { $attribute = $attributes->item(0); if (!is_null($attribute->namespaceURI)) { $newElement->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:'.$attribute->prefix, $attribute->namespaceURI); } $newElement->setAttributeNode($attribute); } $parentElement->removeChild($element); } function prettyPrint($d) { $d->formatOutput = true; echo '<pre>'.htmlspecialchars($d->saveXML()).'</pre>'; } $d = new DOMDocument( '1.0' ); $d->loadXML('<?xml version="1.0"?> <library> <data a:foo="1" x="bar" xmlns:a="http://example.com/a"> <invite> <username>jmansa</username> <userid>1</userid> </invite> <update>1</update> </data> </library>'); $xpath = new DOMXPath($d); $elements = $xpath->query('/library/data'); if ($elements->length == 1) { $element = $elements->item(0); renameElement($element, 'invites'); } prettyPrint($d); ``` 尽管使用dom操作元素可能更可取,但有时从文档元素实际获取innerHTML很有用(例如,加载到客户端编辑器中)。 要获取特定html文件($filepath)中特定元素($elem\_id)的innerHTML: ``` $innerHTML = ''; $doc = new DOMDocument(); $doc->loadHTMLFile($filepath); $elem = $doc->getElementById($elem_id); // loop through all childNodes, getting html $children = $elem->childNodes; foreach ($children as $child) { $tmp_doc = new DOMDocument(); $tmp_doc->appendChild($tmp_doc->importNode($child,true)); $innerHTML .= $tmp_doc->saveHTML(); } ``` 以下代码显示可以从文档中提取纯文本内容 ``` function getTextFromNode($Node, $Text = "") { if ($Node->tagName == null) return $Text.$Node->textContent; $Node = $Node->firstChild; if ($Node != null) $Text = getTextFromNode($Node, $Text); while($Node->nextSibling != null) { $Text = getTextFromNode($Node->nextSibling, $Text); $Node = $Node->nextSibling; } return $Text; } function getTextFromDocument($DOMDoc) { return getTextFromNode($DOMDoc->documentElement); } $Doc = new DOMDocument(); $Doc->loadHTMLFile("Test.html"); echo getTextFromDocument($Doc)."\n"; ```