依赖
<dependency>
<groupId></groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.4</version>
</dependency>
<dependency>
<groupId></groupId>
<artifactId>thumbnailator</artifactId>
<version>0.4.8</version>
</dependency>
<dependency>
<groupId></groupId>
<artifactId>poi</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId></groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.9</version>
</dependency>
处理图片的工具-代码
package .Pdf2wordNew;
import .;
import ;
import ;
import ;
import ;
import .;
import ..CTPositiveSize2D;
import .;
import ;
/**
* @program: pdf
* @author: xlk
* @create: 2022-11-21 10:30
*/
public class MyXWPFDocument extends XWPFDocument {
public MyXWPFDocument(InputStream in) throws Exception {
super(in);
}
public MyXWPFDocument() {
super();
}
public MyXWPFDocument(OPCPackage pkg) throws Exception {
super(pkg);
}
/**
* 处理图片工具
* @param id
* @param width 宽
* @param height 高
* @param paragraph 段落
*/
public void createPicture(int id, int width, int height, XWPFParagraph paragraph) {
final int EMU = 9525;
width *= EMU;
height *= EMU;
String blipId = getAllPictures().get(id).getPackageRelationship().getId();
CTInline inline = ().getCTR().addNewDrawing().addNewInline();
String picXml = ""
+ "<a:graphic xmlns:a=\"/drawingml/2006/main\">"
+ " <a:graphicData uri=\"/drawingml/2006/picture\">"
+ " <pic:pic xmlns:pic=\"/drawingml/2006/picture\">"
+ " <pic:nvPicPr>" + " <pic:cNvPr id=\""
+ id
+ "\" name=\"Generated\"/>"
+ " <pic:cNvPicPr/>"
+ " </pic:nvPicPr>"
+ " <pic:blipFill>"
+ " <a:blip r:embed=\""
+ blipId
+ "\" xmlns:r=\"/officeDocument/2006/relationships\"/>"
+ " <a:stretch>"
+ " <a:fillRect/>"
+ " </a:stretch>"
+ " </pic:blipFill>"
+ " <pic:spPr>"
+ " <a:xfrm>"
+ " <a:off x=\"0\" y=\"0\"/>"
+ " <a:ext cx=\""
+ width
+ "\" cy=\""
+ height
+ "\"/>"
+ " </a:xfrm>"
+ " <a:prstGeom prst=\"rect\">"
+ " <a:avLst/>"
+ " </a:prstGeom>"
+ " </pic:spPr>"
+ " </pic:pic>"
+ " </a:graphicData>" + "</a:graphic>";
().addNewGraphicData();
XmlToken xmlToken = null;
try {
xmlToken = (picXml);
} catch (XmlException xe) {
();
}
(xmlToken);
(0);
(0);
(0);
(0);
CTPositiveSize2D extent = ();
(width);
(height);
CTNonVisualDrawingProps docPr = ();
(id);
("图片名称");
("描述信息");
}
}
开始转换
package .Pdf2wordNew;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import .*;
import ;
import ;
import ;
public class Pdf2wordNew {
public static void main(String[] args) throws Exception {
try {
String pdfFileName = "C:\\Users\\11949\\Desktop\\新建文件夹 (2)\\面试题.pdf";
PDDocument pdf = (new File(pdfFileName));
int pageNumber = ();
String docFileName = (0, (".")) + ".doc";
File file = new File(docFileName);
if (!()) {
();
}
MyXWPFDocument document = new MyXWPFDocument();
FileOutputStream fos = new FileOutputStream(docFileName);
//提取每一页的图片和文字,添加到 word 中
for (int i = 0; i < pageNumber; i++) {
PDPage page = (i);
PDResources resources = ();
Iterable<COSName> names = ();
Iterator<COSName> iterator = ();
while (()) {
COSName cosName = ();
if ((cosName)) {
PDImageXObject imageXObject = (PDImageXObject) (cosName);
File outImgFile = new File("C:\\Users\\11949\\Desktop\\新建文件夹 (2)\\"
+ () + ".jpg");
(()).scale(1).rotate(0).toFile(outImgFile);
BufferedImage bufferedImage = (outImgFile);
int width = ();
int height = ();
if (width > 600) {
double ratio = ((double) width / 550.0);
("缩放比ratio:" + ratio);
width = (int) (width / ratio);
height = (int) (height / ratio);
}
("width: " + width + ", height: " + height);
FileInputStream in = new FileInputStream(outImgFile);
byte[] ba = new byte[()];
(ba);
ByteArrayInputStream byteInputStream = new ByteArrayInputStream(ba);
XWPFParagraph picture = ();
//添加图片
(byteInputStream, MyXWPFDocument.PICTURE_TYPE_JPEG);
//图片大小、位置
(().size() - 1, width, height, picture);
}
}
PDFTextStripper stripper = new PDFTextStripper();
(true);
(i);
(i);
//当前页中的文字
String text = (pdf);
(" ========== " + text);
XWPFParagraph textParagraph = ();
XWPFRun textRun = ();
// 处理换行问题
if (("\r\n")) {
String[] split = ("\r\n");
List<String> strsToList1 = (split);
for (String str : strsToList1) {
(str);
(str);
();
}
}
// (text);
("仿宋");
(10);
//换行
// 插入换行符
(true);
}
(fos);
();
();
("pdf转换解析结束!!----");
} catch (IOException e) {
();
}
}
}