package read.document; import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.List; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range; import pers.mysql.DBUtil;
import pers.mysql.MysqlDao;
import pers.mysql.MysqlDaoImp; public class WordReading { public static void main(String[] args) { String filePath = "*****.doc"; readOnWord(filePath); } public static void readOnWord(String filePath) { if (filePath.endsWith(".doc")) { // 输入流-基类
InputStream is = null;
try {
is = new FileInputStream(filePath);
} catch (FileNotFoundException e) {
e.printStackTrace();
System.out.println("文件打开失败。");
} // 加载doc文档
try { HWPFDocument doc = new HWPFDocument(is); Range text = doc.getRange();// 整个文档 /*
* 分解word:文本 ->小节 ->段落 ->characterRun(理解为小单元)
* section -小节; paragraph - 段落
*/ //1分出内容节点
Range hotWord = text.getSection();// 0-封面,1-目录,2-文本;第3小节 //2段落处理
/*
* 维护两个变量
*
* 热词和解释区别 :大小-word:26,explaining:18
*
*/
String word = "";
String explaining = "";
int wordOK = ;
int explainOK = ;// 判断当前word&explain是否可以填入数据库 int count = ;// 读取几条数据到数据库
int begin = ;// 段落读取位置 for (int i = ; i < count;) {
Range para = hotWord.getParagraph(begin);
CharacterRun field = para.getCharacterRun();
int fontSize = field.getFontSize();
if (fontSize == ) {
word = para.text();
wordOK = ;
begin++;
} else {
while (fontSize < ) {
explaining += para.text();
begin++;
para = hotWord.getParagraph(begin);
field = para.getCharacterRun();
fontSize = field.getFontSize();
}
explainOK = ;
}
// 判断word&explain是否可以填入数据库
if (wordOK == && explainOK == ) {
MysqlDaoImp.addData(word, explaining);
i++;
//填入数据库后,一切归"0"
wordOK = ;
explainOK = ;
word="";
explaining="";
}
}
// 输出测试
// System.out.println("读取:" + "head:"); } catch (IOException e) {
e.printStackTrace();
System.out.println("IO错误。");
} } else {
System.out.println("文件格式 error:not .doc");
} }