java调用openoffice将office系列文档转换为PDF的示例方法

时间:2022-06-18 09:17:54

前导:

发过程中经常会使用javaoffice系列文档转换为PDF, 一般都使用微软提供的openoffice+jodconverter 实现转换文档。

openoffice既有windows版本也有linux版。不用担心生产环境是linux系统。

1、openoffice依赖jar,以maven为例:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
<dependency>
      <groupId>com.artofsolving</groupId>
      <artifactId>jodconverter</artifactId>
      <version>2.2.1</version>
    </dependency>
    <dependency>
      <groupId>org.openoffice</groupId>
      <artifactId>jurt</artifactId>
      <version>3.0.1</version>
    </dependency>
    <dependency>
      <groupId>org.openoffice</groupId>
      <artifactId>ridl</artifactId>
      <version>3.0.1</version>
    </dependency>
    <dependency>
      <groupId>org.openoffice</groupId>
      <artifactId>juh</artifactId>
      <version>3.0.1</version>
    </dependency>
    <dependency>
      <groupId>org.openoffice</groupId>
      <artifactId>unoil</artifactId>
      <version>3.0.1</version>
    </dependency>
 
    <!--jodconverter2.2.1必须依赖slf4j-jdk14必须这个版本,不然源码中日志会报错,很low的一个问题-->
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-jdk14</artifactId>
      <version>1.4.3</version>
    </dependency>

2、直接上转换代码,需要监听openoffice应用程序8100端口即可。

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
public void convert(File sourceFile, File targetFile) {
 
  try {
    // 1: 打开连接
    OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
    connection.connect();
 
    DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
    // 2:获取Format
    DocumentFormatRegistry factory = new BasicDocumentFormatRegistry();
    DocumentFormat inputDocumentFormat = factory
        .getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath()));
    DocumentFormat outputDocumentFormat = factory
        .getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath()));
    // 3:执行转换
    converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat);
  } catch (ConnectException e) {
    log.info("文档转换PDF失败");
  }
}

3、需注意:jodconverter 在转换2007版本以后的xxx.docx文档会报错,原因大家都明03后缀名xxx.doc  07以后版本xxx.docx

查看jodconverter源码发现documentFormat不支持xxx.docx格式BasicDocumentFormatRegistry中public DocumentFormat getFormatByFileExtension(String extension)默认支持是使用doc格式

BasicDocumentFormatRegistry类源码

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;
 
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
 
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {
 
  private List/*<DocumentFormat>*/ documentFormats = new ArrayList();
 
  public void addDocumentFormat(DocumentFormat documentFormat) {
    documentFormats.add(documentFormat);
  }
 
  protected List/*<DocumentFormat>*/ getDocumentFormats() {
    return documentFormats;
  }
 
  /**
   * @param extension the file extension
   * @return the DocumentFormat for this extension, or null if the extension is not mapped
   */
  public DocumentFormat getFormatByFileExtension(String extension) {
    if (extension == null) {
      return null;
    }
    String lowerExtension = extension.toLowerCase();
    for (Iterator it = documentFormats.iterator(); it.hasNext();) {
      DocumentFormat format = (DocumentFormat) it.next();   
      if (format.getFileExtension().equals(lowerExtension)) {
        return format;
      }
    }
    return null;
  }
 
  public DocumentFormat getFormatByMimeType(String mimeType) {
    for (Iterator it = documentFormats.iterator(); it.hasNext();) {
      DocumentFormat format = (DocumentFormat) it.next();   
      if (format.getMimeType().equals(mimeType)) {
        return format;
      }
    }
    return null;
  }
}

BasicDocumentFormatRegistry的默认实现类DefaultDocumentFormatRegistry  中支持的文件格式如下

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;
 
public class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry {
 
  public DefaultDocumentFormatRegistry() {
    final DocumentFormat pdf = new DocumentFormat("Portable Document Format", "application/pdf", "pdf");
    pdf.setExportFilter(DocumentFamily.DRAWING, "draw_pdf_Export");
    pdf.setExportFilter(DocumentFamily.PRESENTATION, "impress_pdf_Export");
    pdf.setExportFilter(DocumentFamily.SPREADSHEET, "calc_pdf_Export");
    pdf.setExportFilter(DocumentFamily.TEXT, "writer_pdf_Export");
    addDocumentFormat(pdf);
     
    final DocumentFormat swf = new DocumentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf");
    swf.setExportFilter(DocumentFamily.DRAWING, "draw_flash_Export");
    swf.setExportFilter(DocumentFamily.PRESENTATION, "impress_flash_Export");
    addDocumentFormat(swf);
     
    final DocumentFormat xhtml = new DocumentFormat("XHTML", "application/xhtml+xml", "xhtml");
    xhtml.setExportFilter(DocumentFamily.PRESENTATION, "XHTML Impress File");
    xhtml.setExportFilter(DocumentFamily.SPREADSHEET, "XHTML Calc File");
    xhtml.setExportFilter(DocumentFamily.TEXT, "XHTML Writer File");
    addDocumentFormat(xhtml);
 
    // HTML is treated as Text when supplied as input, but as an output it is also
    // available for exporting Spreadsheet and Presentation formats
    final DocumentFormat html = new DocumentFormat("HTML", DocumentFamily.TEXT, "text/html", "html");
    html.setExportFilter(DocumentFamily.PRESENTATION, "impress_html_Export");
    html.setExportFilter(DocumentFamily.SPREADSHEET, "HTML (StarCalc)");
    html.setExportFilter(DocumentFamily.TEXT, "HTML (StarWriter)");
    addDocumentFormat(html);
     
    final DocumentFormat odt = new DocumentFormat("OpenDocument Text", DocumentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt");
    odt.setExportFilter(DocumentFamily.TEXT, "writer8");
    addDocumentFormat(odt);
 
    final DocumentFormat sxw = new DocumentFormat("OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw");
    sxw.setExportFilter(DocumentFamily.TEXT, "StarOffice XML (Writer)");
    addDocumentFormat(sxw);
 
    final DocumentFormat doc = new DocumentFormat("Microsoft Word", DocumentFamily.TEXT, "application/msword", "doc");
    doc.setExportFilter(DocumentFamily.TEXT, "MS Word 97");
    addDocumentFormat(doc);
 
    final DocumentFormat rtf = new DocumentFormat("Rich Text Format", DocumentFamily.TEXT, "text/rtf", "rtf");
    rtf.setExportFilter(DocumentFamily.TEXT, "Rich Text Format");
    addDocumentFormat(rtf);
 
    final DocumentFormat wpd = new DocumentFormat("WordPerfect", DocumentFamily.TEXT, "application/wordperfect", "wpd");
    addDocumentFormat(wpd);
 
    final DocumentFormat txt = new DocumentFormat("Plain Text", DocumentFamily.TEXT, "text/plain", "txt");
    // set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog
    // alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed
    txt.setImportOption("FilterName", "Text");
    txt.setExportFilter(DocumentFamily.TEXT, "Text");
    addDocumentFormat(txt);
 
    final DocumentFormat wikitext = new DocumentFormat("MediaWiki wikitext", "text/x-wiki", "wiki");
    wikitext.setExportFilter(DocumentFamily.TEXT, "MediaWiki");
    addDocumentFormat(wikitext);
     
    final DocumentFormat ods = new DocumentFormat("OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods");
    ods.setExportFilter(DocumentFamily.SPREADSHEET, "calc8");
    addDocumentFormat(ods);
 
    final DocumentFormat sxc = new DocumentFormat("OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc");
    sxc.setExportFilter(DocumentFamily.SPREADSHEET, "StarOffice XML (Calc)");
    addDocumentFormat(sxc);
 
    final DocumentFormat xls = new DocumentFormat("Microsoft Excel", DocumentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls");
    xls.setExportFilter(DocumentFamily.SPREADSHEET, "MS Excel 97");
    addDocumentFormat(xls);
 
    final DocumentFormat csv = new DocumentFormat("CSV", DocumentFamily.SPREADSHEET, "text/csv", "csv");
    csv.setImportOption("FilterName", "Text - txt - csv (StarCalc)");
    csv.setImportOption("FilterOptions", "44,34,0"); // Field Separator: ','; Text Delimiter: '"' 
    csv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)");
    csv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "44,34,0");
    addDocumentFormat(csv);
 
    final DocumentFormat tsv = new DocumentFormat("Tab-separated Values", DocumentFamily.SPREADSHEET, "text/tab-separated-values", "tsv");
    tsv.setImportOption("FilterName", "Text - txt - csv (StarCalc)");
    tsv.setImportOption("FilterOptions", "9,34,0"); // Field Separator: '\t'; Text Delimiter: '"'
    tsv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)");
    tsv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "9,34,0");
    addDocumentFormat(tsv);
 
    final DocumentFormat odp = new DocumentFormat("OpenDocument Presentation", DocumentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp");
    odp.setExportFilter(DocumentFamily.PRESENTATION, "impress8");
    addDocumentFormat(odp);
 
    final DocumentFormat sxi = new DocumentFormat("OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi");
    sxi.setExportFilter(DocumentFamily.PRESENTATION, "StarOffice XML (Impress)");
    addDocumentFormat(sxi);
 
    final DocumentFormat ppt = new DocumentFormat("Microsoft PowerPoint", DocumentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt");
    ppt.setExportFilter(DocumentFamily.PRESENTATION, "MS PowerPoint 97");
    addDocumentFormat(ppt);
     
    final DocumentFormat odg = new DocumentFormat("OpenDocument Drawing", DocumentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg");
    odg.setExportFilter(DocumentFamily.DRAWING, "draw8");
    addDocumentFormat(odg);
     
    final DocumentFormat svg = new DocumentFormat("Scalable Vector Graphics", "image/svg+xml", "svg");
    svg.setExportFilter(DocumentFamily.DRAWING, "draw_svg_Export");
    addDocumentFormat(svg);
  }
}

 解决方法:重写BasicDocumentFormatRegistry类中public DocumentFormat getFormatByFileExtension(String extension)方法,只要是后缀名包含doc则使用doc的documentFormat文档格式

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
//
// JODConverter - Java OpenDocument Converter
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// http://www.gnu.org/copyleft/lesser.html
//
package com.artofsolving.jodconverter;
 
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
 
/**
 * 重写 BasicDocumentFormatRegistry 文档格式
 * @author HuGuangJun
 */
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {
 
  private List/* <DocumentFormat> */ documentFormats = new ArrayList();
 
  public void addDocumentFormat(DocumentFormat documentFormat) {
    documentFormats.add(documentFormat);
  }
 
  protected List/* <DocumentFormat> */ getDocumentFormats() {
    return documentFormats;
  }
 
  /**
   * @param extension
   *      the file extension
   * @return the DocumentFormat for this extension, or null if the extension
   *     is not mapped
   */
  public DocumentFormat getFormatByFileExtension(String extension) {
    if (extension == null) {
      return null;
    }
    //将文件名后缀统一转化
    if (extension.indexOf("doc") >= 0) {
      extension = "doc";
    }
    if (extension.indexOf("ppt") >= 0) {
      extension = "ppt";
    }
    if (extension.indexOf("xls") >= 0) {
      extension = "xls";
    }
    String lowerExtension = extension.toLowerCase();
    for (Iterator it = documentFormats.iterator(); it.hasNext();) {
      DocumentFormat format = (DocumentFormat) it.next();
      if (format.getFileExtension().equals(lowerExtension)) {
        return format;
      }
    }
    return null;
  }
 
  public DocumentFormat getFormatByMimeType(String mimeType) {
    for (Iterator it = documentFormats.iterator(); it.hasNext();) {
      DocumentFormat format = (DocumentFormat) it.next();
      if (format.getMimeType().equals(mimeType)) {
        return format;
      }
    }
    return null;
  }
}

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。

原文链接:http://blog.csdn.net/make_a_difference/article/details/53771136