php DOMDocument 递归 格式化缩进HTML文档

时间:2023-12-21 11:46:50
function format(\DOMNode $node, $treeIndex = 0)
{
//不格式化的标签
if (in_array($node->nodeName, ['title', 'p', 'span', 'li']))
return;
if ($node->hasChildNodes()) {
$treeIndex++;
$tabStart = "\r\n" . str_repeat("\t", $treeIndex);
$tabEnd = "\r\n" . str_repeat("\t", $treeIndex - 1);
$i = 0;
while ($childNode = $node->childNodes->item($i++)) {
if ($childNode->nodeType == XML_TEXT_NODE) {
if (ctype_space(str_replace("\xc2\xa0", '', $childNode->nodeValue))) {
$node->removeChild($childNode);
$i--;
continue;
}
$childNode->nodeValue = trim($childNode->nodeValue);
}
$node->insertBefore($node->ownerDocument->createTextNode($tabStart), $childNode);
$i++;
$this->format($childNode, $treeIndex);
};
$node->appendChild($node->ownerDocument->createTextNode($tabEnd));
}
}
$html = '<!DOCTYPE html><html><head><meta charset="utf-8"><title></title></head><body></body></html>';
$doc = new \DOMDocument();
//$doc->formatOutput = true; //不知道是不是我的理解问题,这个选项格式化出来的并不完美
$doc->loadHTML($html);
format($doc->documentElement);
echo $doc->saveHTML();