使用阿里云的图片识别成表格ocr(将图片表格转换成excel)

时间:2022-09-05 23:44:56

为了简便财务总是要对照着别人发来的表格图片制作成自己的表格

效果图如下

使用阿里云的图片识别成表格ocr(将图片表格转换成excel)

整合的代码
package com.xai.wuye.controller.api;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import com.xai.wuye.common.JsonResult;
import com.xai.wuye.exception.ResultException;
import com.xai.wuye.model.AParam
import com.xai.wuye.service.CarService;
import com.xai.wuye.util.HttpUtils;
import org.apache.http.HttpResponse;
import org.apache.http.util.EntityUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.FileSystemResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile; import java.io.*;
import java.util.Date;
import java.util.HashMap;
import java.util.Map; import static org.apache.tomcat.util.codec.binary.Base64.encodeBase64; @Controller
@EnableAsync
@RequestMapping("/api/ocr")
public class AliOCRImages { @Autowired
CarService carService;    private String OcrPath = "/home/runApp/car/orc/"; @ResponseBody
@RequestMapping("table")
public JsonResult getFirstLicence(@RequestParam(value = "file", required = false) MultipartFile file) {
if (file == null || file.isEmpty()||file.getSize() > 1204*1204*3)
throw new ResultException(0,"文件为null,且不能大于3M"); String filename = file.getOriginalFilename();
String filepath = OcrPath+"temp/"+filename;
File newFile = new File(filepath);
try {
file.transferTo(newFile); String host = "https://form.market.alicloudapi.com";
String path = "/api/predict/ocr_table_parse";            // 输入阿里的code
           String appcode = "4926a667ee6c41329c278361*****";
String imgFile = "图片路径";
Boolean is_old_format = false;//如果文档的输入中含有inputs字段,设置为True, 否则设置为False
//请根据线上文档修改configure字段
JSONObject configObj = new JSONObject();
configObj.put("format", "xlsx");
configObj.put("finance", false);
configObj.put("dir_assure", false);
String config_str = configObj.toString();
// configObj.put("min_size", 5);
//String config_str = ""; String method = "POST";
Map<String, String> headers = new HashMap<String, String>();
//最后在header中的格式(中间是英文空格)为Authorization:APPCODE 83359fd73fe94948385f570e3c139105
headers.put("Authorization", "APPCODE " + appcode); Map<String, String> querys = new HashMap<String, String>(); // 对图像进行base64编码
String imgBase64 = "";
try { byte[] content = new byte[(int) newFile.length()];
FileInputStream finputstream = new FileInputStream(newFile);
finputstream.read(content);
finputstream.close();
imgBase64 = new String(encodeBase64(content));
} catch (IOException e) {
e.printStackTrace();
return null;
}
// 拼装请求body的json字符串
JSONObject requestObj = new JSONObject();
try {
if(is_old_format) {
JSONObject obj = new JSONObject();
obj.put("image", getParam(50, imgBase64));
if(config_str.length() > 0) {
obj.put("configure", getParam(50, config_str));
}
JSONArray inputArray = new JSONArray();
inputArray.add(obj);
requestObj.put("inputs", inputArray);
}else{
requestObj.put("image", imgBase64);
if(config_str.length() > 0) {
requestObj.put("configure", config_str);
}
}
} catch (JSONException e) {
e.printStackTrace();
}
String bodys = requestObj.toString(); try {
/**
* 重要提示如下:
* HttpUtils请从
* https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/src/main/java/com/aliyun/api/gateway/demo/util/HttpUtils.java
* 下载
*
* 相应的依赖请参照
* https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/pom.xml
*/
HttpResponse response = HttpUtils.doPost(host, path, method, headers, querys, bodys);
int stat = response.getStatusLine().getStatusCode();
if(stat != 200){
System.out.println("Http code: " + stat);
System.out.println("http header error msg: "+ response.getFirstHeader("X-Ca-Error-Message"));
System.out.println("Http body error msg:" + EntityUtils.toString(response.getEntity()));
return null;
} String res = EntityUtils.toString(response.getEntity());
JSONObject res_obj = JSON.parseObject(res);
Long fileName = System.currentTimeMillis();
if(is_old_format) { JSONArray outputArray = res_obj.getJSONArray("outputs");
String output = outputArray.getJSONObject(0).getJSONObject("outputValue").getString("dataValue");
JSONObject out = JSON.parseObject(output);
System.out.println(out.toJSONString()); }else{ String tmp_base64path = OcrPath + fileName;
File tmp_base64file = new File(tmp_base64path);
if(!tmp_base64file.exists()){
tmp_base64file.getParentFile().mkdirs();
}
tmp_base64file.createNewFile(); // write
FileWriter fw = new FileWriter(tmp_base64file, true);
BufferedWriter bw = new BufferedWriter(fw);
bw.write(res_obj.getString("tables"));
bw.flush();
bw.close();
fw.close(); String exelFilePath = OcrPath + fileName + "_1.xlsx";
Runtime.getRuntime().exec("touch "+exelFilePath).destroy();
Process exec = Runtime.getRuntime().exec("sed -i -e 's/\\\\n/\\n/g' " + tmp_base64path);
exec.waitFor();
exec.destroy(); Process exec1 = null;
String[] cmd = { "/bin/sh", "-c", "base64 -d " + tmp_base64path + " > " + exelFilePath };
exec1 = Runtime.getRuntime().exec(cmd);
exec1.waitFor();
exec1.destroy(); return JsonResult.success(fileName);
}
} catch (Exception e) {
e.printStackTrace();
} } catch (IOException e) {
e.printStackTrace();
} return null;
} @ResponseBody
@RequestMapping("getId")
public ResponseEntity<FileSystemResource> getFirstLicence(String id) {
String exelFilePath = OcrPath + id + "_1.xlsx";
return export(new File(exelFilePath));
} public ResponseEntity<FileSystemResource> export(File file) {
if (file == null) {
return null;
}
HttpHeaders headers = new HttpHeaders();
headers.add("Cache-Control", "no-cache, no-store, must-revalidate");
headers.add("Content-Disposition", "attachment; filename=" + System.currentTimeMillis() + ".xls");
headers.add("Pragma", "no-cache");
headers.add("Expires", "0");
headers.add("Last-Modified", new Date().toString());
headers.add("ETag", String.valueOf(System.currentTimeMillis())); return ResponseEntity
.ok()
.headers(headers)
.contentLength(file.length())
.contentType(MediaType.parseMediaType("application/octet-stream"))
.body(new FileSystemResource(file));
} public static JSONObject getParam(int type, String dataValue) {
JSONObject obj = new JSONObject();
try {
obj.put("dataType", type);
obj.put("dataValue", dataValue);
} catch (JSONException e) {
e.printStackTrace();
}
return obj;
} }

大功告成

  • 以下是静态页面代码
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<!-- import CSS -->
<link rel="stylesheet" href="https://unpkg.com/element-ui/lib/theme-chalk/index.css">
<title>table</title>
</head>
<body>
<div id="app">
<el-upload
class="upload-demo"
drag
action="https://www.***.com/car/api/ocr/table"
:file-list="imagelist"
:on-preview="pre"
>
<i class="el-icon-upload"></i>
<div class="el-upload__text">将文件拖到此处,或<em>点击上传</em></div>
<div class="el-upload__tip" slot="tip">只能上传jpg/png文件,且不超过500kb</div>
</el-upload>
<div class="img-content" v-for="(item,key) in imagelist" :key="key">
<img :src="item.url">
<div class="name">
<div>{{ item.name }}</div>
<el-button type="text" @click="handleFileName(item,key)">修改名字</el-button>
</div>
<!-- 删除icon -->
<div class="del">
<i @click="handleFileRemove(item,key)" class="el-icon-delete2"></i>
</div>
<!-- 放大icon -->
<div class="layer" @click="handleFileEnlarge(item.url)">
<i class="el-icon-view"></i>
</div>
</div>
</div>
</body>
<!-- import Vue before Element -->
<script src="https://unpkg.com/vue/dist/vue.js"></script>
<!-- import JavaScript -->
<script src="https://unpkg.com/element-ui/lib/index.js"></script>
<script>
new Vue({
el: '#app',
data: function() {
return {
visible: false,
imagelist: [ ] }
},
methods: {
pre(res) {
console.log(res.response.msg)
window.open("https://www.***.com/api/ocr/getId?id="+res.response.data);
}
} })
</script>
</html>