<?php
/**
* 读取的xml的格式
* <urlset>
* <url>
* <loc>http://www.51buy.com/0.html</loc>
* <priority>1.0</priority>
* <lastmod>Wed, 12 Jun 2013 21:37:52 +0800</lastmod>
* <changefreq>Always</changefreq>
* </url>
* <url>
* <loc>http://www.baidu.com/1.html</loc>
* <priority>1.0</priority>
* <lastmod>Tue, 11 Jun 2013 15:39:17 +0800</lastmod>
* <changefreq>Always</changefreq>
* </url>
* <url>
* <loc>http://www.jd.com/2.html</loc>
* <priority>1.0</priority>
* <lastmod>Tue, 11 Jun 2013 01:21:46 +0800</lastmod>
* <changefreq>Always</changefreq>
* </url>
* </urlset>
*/
//1读取xml
header("Content-type: text/html; charset=utf-8");
// 首先要建一个DOMDocument对象
$xml = new DOMDocument();
// 加载Xml文件
$xml->load("http://www.baidu.com/sitemap.xml");
// 获取所有的post标签
$postDom = $xml->getElementsByTagName("url");
// 循环遍历post标签
$array = array();
foreach($postDom as $post){
// 获取Title标签Node
$title = $post->getElementsByTagName("loc");
$url = $title->item(0)->nodeValue;
//替换数组中某个值为指定字符串(没有需要的此行可以删除)
$url1 = str_replace(array("w.baidu.com",'book.baidu.com','iworld.baidu.com'),"www.baidu.com/nihao",$url);
//priority
$priority= $post->getElementsByTagName("priority")->item(0)->nodeValue;
//lastmod
$lastmod= $post->getElementsByTagName("lastmod")->item(0)->nodeValue;
//changefreq
$changefreq= $post->getElementsByTagName("changefreq")->item(0)->nodeValue;
$article_array = array('loc'=>$url1, 'priority'=>$priority, 'lastmod'=>$lastmod, 'changefreq'=>$changefreq);
$array[] = $article_array; }
//echo "<pre>";
//var_dump($array);
//print_r($array);
//删除XML文件
/*
$file = "2222.html";//此处
if (!unlink($file))
{
echo ("Error deleting $file");
}
else
{
echo ("Deleted $file");
}
*/ /**
* 写xml的数组的形式
* Array
(
[0] => Array
(
[loc] => http://www.51buy.com/0.html
[priority] => 1.0
[lastmod] => Wed, 12 Jun 2013 21:37:52 +0800
[changefreq] => Always
) [1] => Array
(
[loc] => http://www.51buy.com/0.html
[priority] => 1.0
[lastmod] => Tue, 11 Jun 2013 15:39:17 +0800
[changefreq] => Always
) [2] => Array
(
[loc] => http://www.51buy.com/0.html
[priority] => 1.0
[lastmod] => Tue, 11 Jun 2013 01:21:46 +0800
[changefreq] => Always
)
)
*/
//2写xml
$dom = new DOMDocument('1.0', 'UTF-8');
$dom->formatOutput = true;
$rootelement = $dom->createElement("urlset");
foreach ($array as $key=>$value){
$article = $dom->createElement("url");
//$article = $dom->createElement("article", $key);
$loc = $dom->createElement("loc", $value['loc']);
$priority = $dom->createElement("priority", $value['priority']);
$lastmod = $dom->createElement("lastmod", $value['lastmod']);
$changefreq = $dom->createElement("changefreq", $value['changefreq']);
$article->appendChild($loc);
$article->appendChild($priority);
$article->appendChild($lastmod);
$article->appendChild($changefreq);
$rootelement->appendChild($article);
}
$dom->appendChild($rootelement);
$filename = "./test.xml";
echo 'XML文件大小' . $dom->save($filename) . '字节';