利用POI读取word文档实例

package read.document;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStream;

import java.sql.Connection;

import java.util.ArrayList;

import java.util.List;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.usermodel.CharacterRun;

import org.apache.poi.hwpf.usermodel.Range;

import pers.mysql.DBUtil;

import pers.mysql.MysqlDao;

import pers.mysql.MysqlDaoImp;

public class WordReading {

    public static void main(String[] args) {

        String filePath = "*****.doc";

        readOnWord(filePath);

    }

    public static void readOnWord(String filePath) {

        if (filePath.endsWith(".doc")) {

            // 输入流-基类

            InputStream is = null;

            try {

                is = new FileInputStream(filePath);

            } catch (FileNotFoundException e) {

                e.printStackTrace();

                System.out.println("文件打开失败。");

            }

            // 加载doc文档

            try {

                HWPFDocument doc = new HWPFDocument(is);

                Range text = doc.getRange();// 整个文档

                /*

                 * 分解word：文本 ->小节 ->段落 ->characterRun(理解为小单元）

                 * section -小节; paragraph - 段落

                 */

                //1分出内容节点

                Range hotWord = text.getSection();// 0-封面，1-目录，2-文本；第3小节

                //2段落处理

                /*

                 * 维护两个变量

                 *

                 * 热词和解释区别 ：大小-word:26,explaining:18

                 *

                 */

                String word = "";

                String explaining = "";

                int wordOK = ;

                int explainOK = ;// 判断当前word&explain是否可以填入数据库

                int count = ;// 读取几条数据到数据库

                int begin = ;// 段落读取位置

                for (int i = ; i < count;) {

                    Range para = hotWord.getParagraph(begin);

                    CharacterRun field = para.getCharacterRun();

                    int fontSize = field.getFontSize();

                    if (fontSize == ) {

                        word = para.text();

                        wordOK = ;

                        begin++;

                    } else {

                        while (fontSize < ) {

                            explaining += para.text();

                            begin++;

                            para = hotWord.getParagraph(begin);

                            field = para.getCharacterRun();

                            fontSize = field.getFontSize();

                        }

                        explainOK = ;

                    }

                    // 判断word&explain是否可以填入数据库

                    if (wordOK ==  && explainOK == ) {

                        MysqlDaoImp.addData(word, explaining);

                        i++;

                        //填入数据库后，一切归"0"

                        wordOK = ;

                        explainOK = ;

                        word="";

                        explaining="";

                    }

                }

                // 输出测试

                // System.out.println("读取：" + "head:");

            } catch (IOException e) {

                e.printStackTrace();

                System.out.println("IO错误。");

            }

        } else {

            System.out.println("文件格式 error:not .doc");

        }

    }
秒客网

利用POI读取word文档实例

相关文章