libxml2的使用总结

时间:2023-02-15 20:15:32
    libxml是一个实现了读,创建和操作xml数据等功能的c语言库,对于其具体的API,可参考官方文档,这里只介绍一些常用的用法。

    libxml在操作xml数据时,定义了几种节点类型:

Enum xmlElementType {
XML_ELEMENT_NODE = 1
XML_ATTRIBUTE_NODE = 2
XML_TEXT_NODE = 3
XML_CDATA_SECTION_NODE = 4
XML_ENTITY_REF_NODE = 5
XML_ENTITY_NODE = 6
XML_PI_NODE = 7
XML_COMMENT_NODE = 8
XML_DOCUMENT_NODE = 9
XML_DOCUMENT_TYPE_NODE = 10
XML_DOCUMENT_FRAG_NODE = 11
XML_NOTATION_NODE = 12
XML_HTML_DOCUMENT_NODE = 13
XML_DTD_NODE = 14
XML_ELEMENT_DECL = 15
XML_ATTRIBUTE_DECL = 16
XML_ENTITY_DECL = 17
XML_NAMESPACE_DECL = 18
XML_XINCLUDE_START = 19
XML_XINCLUDE_END = 20
XML_DOCB_DOCUMENT_NODE = 21
}
    比较常用的是XML_ELEMENT_NODE,XML_TEXT_NODE和XML_ATTRIBUTE_NODE,可称为元素节点,文本节点和属性节点;它们都是xmlNode结构体类型的,且可以通过curnode->type来获得类型。这几种节点分别对应xml的不同数据,以下面xml文档来说明,story和storyinfo是元素节点,而John Fleck就是文本节点。在结构上John Fleck文本节点是<author>元素节点的子节点。

    测试xml:

<?xml version="1.0"?>
<story>
<storyinfo>
<author>John Fleck</author>
<datewritten>June 2, 2002</datewritten>
<keyword>example keyword</keyword>
</storyinfo>
<body>
<headline>This is the headline</headline>
<para>This is the body text.</para>
</body>
</story>
    1. 解析文档:

        xmlDocPtr doc;
xmlNodePtr cur;

doc = xmlParseFile(docname);

if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return;
}

cur = xmlDocGetRootElement(doc);

if (cur == NULL) {
fprintf(stderr,"empty document\n");
xmlFreeDoc(doc);
return;
}

if (xmlStrcmp(cur->name, (const xmlChar *) "story")) {
fprintf(stderr,"document of the wrong type, root node != story");
xmlFreeDoc(doc);
return;
}

    2. 检索节点:

void parseStory (xmlDocPtr doc, xmlNodePtr cur) {
xmlChar *key;
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"keyword"))) {
key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
printf("keyword: %s\n", key);
xmlFree(key);
}
cur = cur->next;
}
return;
}

        3. XPATH方式检索文档:
    除了遍历文档树来寻找某个节点外,libxml2还支持使用XPATH表达式来寻找符合指定搜索规则的节点集。xpath的内容可参见:http://www.w3schools.com/xpath/xpath_operators.asp 和 http://www.w3.org/TR/xpath/ 。测试可参:http://www.xpathtester.com/test 。这里是几个简单的用法,"/Infomation/CameraSet"取的是绝对路径,"//CameraItem"取的是相对路径,数字可直接用等号比较,文本则要用text()取其内容进行比较。

char expr[128];
sprintf(expr, "/Information/CameraSet/CameraItem[IP[text()='%s']]", pItem->sIP);
sprintf(expr, "/Information/CameraSet/CameraItem[Index=%d]/VarTrafficLine", iIndex);
xpObjPtr = getnodeset(doc, (xmlChar*)expr);

 4. 添加节点和获取文本:
    设置元素节点的文本可使用xmlNodeSetContent(cur, (const xmlChar*)"sssss");也有其他方式,如下面代码中。获取文本:xmlNodeGetContent(cur)和xmlNodeListGetString(doc, nodelist, inline);第一个方法获取的是cur的子孙节点中类型是TEXT或者ENTITY_REF的节点的内容拼接的字符串;第二个方法只取当前节点链表中类型是TEXT或者ENTITY_REF的节点的内容。

newnode = xmlNewNode(NULL, (const xmlChar*)"NewNode");
xmlAddChild(cur, newnode);
xmlNewTextChild(newnode, NULL, (const xmlChar*)"Code", (const xmlChar*)"1001");
xmlNewTextChild(newnode, NULL, (const xmlChar*)"Name", (const xmlChar*)"anewnode");
textnode = xmlNewText((const xmlChar*)"abcdefg"); //向已存在的元素节点添加文本
xmlAddChild(cur, textnode);

    5. 删除节点:

xmlUnlinkNode(cur);
xmlFreeNode(cur);
    这样删除指定节点后,会自动形成新的节点树结构,而不会打乱原来的结构,如:删除<author>节点后,<storyinfo>的第一个子节点就变成<datewritten>,<datewritten>的前驱节点是NULL,而不是删除前的<author>了。

    代码:

#include <libxml/parser.h>
#include <libxml/xpath.h>
xmlDocPtr getdoc (char *docname) {
xmlDocPtr doc;
doc = xmlParseFile(docname);
if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return NULL;
}
return doc;
}

xmlXPathObjectPtr getnodeset (xmlDocPtr doc, xmlChar *xpath){
xmlXPathContextPtr context;
xmlXPathObjectPtr result;
context = xmlXPathNewContext(doc);
if (context == NULL) {
printf("Error in xmlXPathNewContext\n");
return NULL;
}
result = xmlXPathEvalExpression(xpath, context);
xmlXPathFreeContext(context);
if (result == NULL) {
printf("Error in xmlXPathEvalExpression\n");
return NULL;
}
if(xmlXPathNodeSetIsEmpty(result->nodesetval)){
xmlXPathFreeObject(result);
printf("No result\n");
return NULL;
}
return result;
}

int main(int argc, char **argv) {
char *docname;
xmlDocPtr doc;
xmlChar *xpath = (xmlChar*) "//root";
xmlNodeSetPtr nodeset;
xmlXPathObjectPtr result;
int i;
xmlChar *keyword;
if (argc <= 1) {
printf("Usage: %s docname\n", argv[0]);
return(0);
}
docname = argv[1];
doc = getdoc(docname);
result = getnodeset (doc, xpath);
if (result) {
nodeset = result->nodesetval;
for (i=0; i < nodeset->nodeNr; i++) {
keyword = xmlNodeListGetString(doc, nodeset->nodeTab[i]->xmlChildrenNode,1);
printf("keyword1: %s\n", keyword);
xmlFree(keyword);
}
xmlXPathFreeObject (result);
}
xmlFreeDoc(doc);
xmlCleanupParser();
return (1);
}