java pdf转word 支持图片转换到word(最大程度的解决原PDF)

时间:2025-04-27 10:26:23

依赖

  <dependency>
            <groupId></groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.4</version>
        </dependency>
        <dependency>
            <groupId></groupId>
            <artifactId>thumbnailator</artifactId>
            <version>0.4.8</version>
        </dependency>

        <dependency>
            <groupId></groupId>
            <artifactId>poi</artifactId>
            <version>3.9</version>
        </dependency>
        <dependency>
            <groupId></groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.9</version>
        </dependency>

处理图片的工具-代码

package .Pdf2wordNew;

import .;
import ;
import ;
import ;
import ;
import .;
import ..CTPositiveSize2D;
import .;

import ;

/**
 * @program: pdf
 * @author: xlk
 * @create: 2022-11-21 10:30
 */
public class MyXWPFDocument extends XWPFDocument {
    public MyXWPFDocument(InputStream in) throws Exception {
        super(in);
    }

    public MyXWPFDocument() {
        super();
    }

    public MyXWPFDocument(OPCPackage pkg) throws Exception {
        super(pkg);
    }

    /**
     *  处理图片工具
     * @param id
     * @param width     宽
     * @param height    高
     * @param paragraph 段落
     */
    public void createPicture(int id, int width, int height, XWPFParagraph paragraph) {
        final int EMU = 9525;
        width *= EMU;
        height *= EMU;
        String blipId = getAllPictures().get(id).getPackageRelationship().getId();
        CTInline inline = ().getCTR().addNewDrawing().addNewInline();
        String picXml = ""
                + "<a:graphic xmlns:a=\"/drawingml/2006/main\">"
                + "   <a:graphicData uri=\"/drawingml/2006/picture\">"
                + "      <pic:pic xmlns:pic=\"/drawingml/2006/picture\">"
                + "         <pic:nvPicPr>" + "            <pic:cNvPr id=\""
                + id
                + "\" name=\"Generated\"/>"
                + "            <pic:cNvPicPr/>"
                + "         </pic:nvPicPr>"
                + "         <pic:blipFill>"
                + "            <a:blip r:embed=\""
                + blipId
                + "\" xmlns:r=\"/officeDocument/2006/relationships\"/>"
                + "            <a:stretch>"
                + "               <a:fillRect/>"
                + "            </a:stretch>"
                + "         </pic:blipFill>"
                + "         <pic:spPr>"
                + "            <a:xfrm>"
                + "               <a:off x=\"0\" y=\"0\"/>"
                + "               <a:ext cx=\""
                + width
                + "\" cy=\""
                + height
                + "\"/>"
                + "            </a:xfrm>"
                + "            <a:prstGeom prst=\"rect\">"
                + "               <a:avLst/>"
                + "            </a:prstGeom>"
                + "         </pic:spPr>"
                + "      </pic:pic>"
                + "   </a:graphicData>" + "</a:graphic>";

        ().addNewGraphicData();
        XmlToken xmlToken = null;
        try {
            xmlToken = (picXml);
        } catch (XmlException xe) {
            ();
        }
        (xmlToken);

        (0);
        (0);
        (0);
        (0);

        CTPositiveSize2D extent = ();
        (width);
        (height);

        CTNonVisualDrawingProps docPr = ();
        (id);
        ("图片名称");
        ("描述信息");
    }
}

开始转换

package .Pdf2wordNew;

import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;

import ;
import ;
import .*;
import ;
import ;
import ;

public class Pdf2wordNew {

    public static void main(String[] args) throws Exception {

        try { 
            
            String pdfFileName = "C:\\Users\\11949\\Desktop\\新建文件夹 (2)\\面试题.pdf";

            PDDocument pdf = (new File(pdfFileName));
            int pageNumber = ();

            String docFileName = (0, (".")) + ".doc";

            File file = new File(docFileName);
            if (!()) {
                ();
            }
            MyXWPFDocument document = new MyXWPFDocument();
            FileOutputStream fos = new FileOutputStream(docFileName);

            //提取每一页的图片和文字,添加到 word 中
            for (int i = 0; i < pageNumber; i++) {
                PDPage page = (i);
                PDResources resources = ();
                Iterable<COSName> names = ();
                Iterator<COSName> iterator = ();
                while (()) {
                    COSName cosName = ();
                    if ((cosName)) {
                        PDImageXObject imageXObject = (PDImageXObject) (cosName);
                        File outImgFile = new File("C:\\Users\\11949\\Desktop\\新建文件夹 (2)\\"
                                + () + ".jpg");
                        (()).scale(1).rotate(0).toFile(outImgFile);
                        BufferedImage bufferedImage = (outImgFile);
                        int width = ();
                        int height = ();
                        if (width > 600) {
                            double ratio = ((double) width / 550.0);
                            ("缩放比ratio:" + ratio);
                            width = (int) (width / ratio);
                            height = (int) (height / ratio);
                        }
                        ("width: " + width + ",  height: " + height);
                        FileInputStream in = new FileInputStream(outImgFile);
                        byte[] ba = new byte[()];
                        (ba);
                        ByteArrayInputStream byteInputStream = new ByteArrayInputStream(ba);
                        XWPFParagraph picture = ();
                        //添加图片
                        (byteInputStream, MyXWPFDocument.PICTURE_TYPE_JPEG);
                        //图片大小、位置
                        (().size() - 1, width, height, picture);
                    }
                }
                PDFTextStripper stripper = new PDFTextStripper();
                (true);
                (i);
                (i);
                //当前页中的文字
                String text = (pdf);
                ("  ==========  " + text);
                XWPFParagraph textParagraph = ();
                XWPFRun textRun = ();
                // 处理换行问题
                if (("\r\n")) {
                    String[] split = ("\r\n");
                    List<String> strsToList1 = (split);
                    for (String str : strsToList1) {
                        (str);
                        (str);
                        ();
                    }
                }
//                (text);
                ("仿宋");
                (10);
                //换行
                // 插入换行符
                (true);
            }
            (fos);
            ();
            ();
            ("pdf转换解析结束!!----");
        } catch (IOException e) {
            ();
        }
    }
}