xml的解析与创建——bing到youdao导入文件的转换

时间:2024-01-14 18:37:44

首先是为了解决一个问题:如何将必应单词本中记录的单词转入到有道词典中去。实际上,必应词典可以导出xml文件,但是该文件有道词典无法解析。这里涉及到xml的解析和创建了。

代码如下:

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList; public class bing2youdao {
private String bingPath;//input filepath
private String time;// filtering time (Default:current time)
private String tag=null;
final static String youdao="youdao.xml";//output file public bing2youdao(String filename){
bingPath=filename;
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");
time=sdf.format(new Date());
}
public bing2youdao(String filename,String d){
bingPath=filename;
time=d;
}
public bing2youdao(String filename,String d,String t){
this(filename,d);
tag=t;
} public void run(){
Node word=null;
Node detail=null;
String danci=null;
String pronunciation=null;
String definition=null;
String data=null;
try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.newDocument();
Element wordbook = document.createElement("wordbook"); //创建根节点
document.appendChild(wordbook); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document dm = db.parse(new File(bingPath));//获得根元素
Element bingWord = dm.getDocumentElement();
NodeList wordList= bingWord.getChildNodes().item(1).getChildNodes();
for (int i = 1; i < wordList.getLength(); i++) {
word = wordList.item(i);
NodeList nodeDetail = word.getChildNodes();
for (int j = 0; j < nodeDetail.getLength(); j++) {
detail = nodeDetail.item(j);
if ("Eng".equals(detail.getNodeName()))
danci=detail.getTextContent();
else if ("Phonetic".equals(detail.getNodeName()))
pronunciation=detail.getTextContent();
else if ("Defi".equals(detail.getNodeName()))
definition=detail.getTextContent();
else if("Date".equals(detail.getNodeName()))
data=detail.getTextContent();
} //If the recorded time is after the filtering time than output it
if(timeDecision(data)&&nodeDetail.getLength()>0){
Element item= document.createElement("item"); Element vacbulary= document.createElement("word");
vacbulary.appendChild(document.createTextNode(danci));
item.appendChild(vacbulary); Element trans = document.createElement("trans");
trans.appendChild(document.createTextNode(definition));
item.appendChild(trans); Element phonetic = document.createElement("phonetic");
phonetic.appendChild(document.createTextNode(pronunciation));
item.appendChild(phonetic);
Element tags = document.createElement("tags");
if(tag!=null)
tags.appendChild(document.createTextNode(tag));
item.appendChild(tags); Element progress = document.createElement("progress");
progress.appendChild(document.createTextNode(1+""));//复习进度,默认为1,可调为-1表示不加入复习计划
item.appendChild(progress); wordbook.appendChild(item);
}
} try {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
DOMSource source = new DOMSource(document);
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
PrintWriter pw = new PrintWriter(new FileOutputStream(youdao));
StreamResult result = new StreamResult(pw);
transformer.transform(source, result);
System.out.println("【必应词典】转换为【有道词典】成功!");
}
catch (TransformerConfigurationException e) {
System.out.println(e.getMessage());
}
catch (IllegalArgumentException e) {
System.out.println(e.getMessage());
}
catch (FileNotFoundException e) {
System.out.println(e.getMessage());
}
catch (TransformerException e) {
System.out.println(e.getMessage());
} }
catch (Exception ex) {
System.out.println(ex.getMessage());
}
} //filtering function
private boolean timeDecision(String data){
if(data.compareTo(time)>=0)
return true;
else
return false; }
}

使用说明:

(1)将必应词典导出到本地文件“bing.xml”,放入工程根目录下。

(2)定义 bing2youdao对象,可以使用三种构造函数:

     new bing2youdao("bing.xml");
      new bing2youdao("bing.xml",“yyyy-mm-dd”);//表示仅仅转换“yyyy-mm-dd”(含)之后天数记录的单词
      new bing2youdao("bing.xml",“yyyy-mm-dd”,“newClass”);//表示将转换的单词归为“newClass”类

(3)调用run()生成“youdao.xml”并将其导入到有道词典单词本即可。本例模式的复习进度为1,若希望其不加入复习计划,修改为“-1”即可

如:

	 public static void main(String args[]) {
bing2youdao jisuan= new bing2youdao("bing.xml","2015-10-21","CC");
jisuan.run();
}

上例仅仅转换2015年10月21日后保存到必应词典中的单词,并将其统一归为“CC”类。