【Selenium-WebDriver实战篇】基于java的selenium之验证码识别内容

==========================================================================================================

写在前面：

还是遇到了传说中的登录验证识别码数字图片，从网上找了一些例子，然后觉得比较好的连接地址记录下来，然后修改相关的内容成为能用的。

内容还是蛮实用的，方便以后查阅。

==========================================================================================================

参考链接：http://www.51testing.com/html/53/462853-3718359.html

遇到无法下载tesseract，然后网上查可以通过该目录进行下载安装。

https://blog.****.net/showgea/article/details/82656515

遇到的第三个问题是：对有噪点的图片使用tesseract工具时，识别结果很差，但是对黑白照片视频很好，所以想到了把图片黑白化后再识别。

https://blog.****.net/zhulier1124/article/details/80606647

https://sourceforge.net/p/tess4j/bugs/15/

识别验证码切割后的算法：https://www.cnblogs.com/zeze/p/5816742.html

https://www.cnblogs.com/nayitian/p/3282862.html

https://blog.****.net/qq_34351177/article/details/84992133

在识别的道路上总是遇到各种各样的问题，现在可以裁剪图片了，但是裁剪后的原图是倾斜的，所以识别率也很低，于是找到旋转图片的方法。

https://blog.****.net/x541211190/article/details/80784713

package com.util;

import java.io.BufferedReader;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.net.HttpURLConnection;

import java.net.URL;

/*准备工具：

 * 1.下载安装tesseract：http://sourceforge.net/projects/tesseract-ocr/

 * 2.将tesseract.exe命令保存为bat文件，bat内容为

 * @echo off

tesseract.exe yzm.png 1 -l

exit

 *获取验证码步骤：

 *1、保存动态页面中的图片文件到本地；

download(URL地址, "code.png");

  2、java运行tesseract批量处理bat文件，产生TXT文件保存到本地；

 String batPath = "cmd /c start G:\\uxin1\\selenium_xin\\code.bat";

 runTesseractBat(String batPath)

  3、读取TXT文件；

String filepath="G:/uxin1/selenium_xin/code.txt";

readTextFile(filePath)

 */

public class analysisIdentifyingCode {

/*

 * 根据url下载验证码图片并保存到指定的地址

 * @strUrl 图片下载地址

 * @strPath 保存路径

 */

public void download(String strUrl,String strPath) {

  FileOutputStream utStream = null;

  try {

 //打开链接

 URL url = new URL(strUrl);                               

 HttpURLConnection conn = (HttpURLConnection) url.openConnection();

 conn.setConnectTimeout(5 * 1000);

 //通过输入流获取图片数据

 InputStream inStream = conn.getInputStream();

 //得到图片的二进制数据，以二进制封装得到数据，具有通用性

 byte[] data =readInputStream(inStream);

 //new一个文件对象用来保存图片，默认保存当前工程根目录

 File imageFile = new File(strPath);

 //创建输出流

 outStream = new FileOutputStream(imageFile);

 //写入数据

 outStream.write(data);

 //关闭输出流

 outStream.close();           

} catch (Exception e) {

               e.printStackTrace();

           } finally {

               try {  

                 if (outStream != null) { 

                     outStream.close();

                    }  

             } catch (IOException e) { 

                 e.printStackTrace(); 

             } 

   }

}

/*

 * java运行tesseract批量处理bat文件，产生TXT文件保存到本地

 * @batPath batPath 存放的二进制图片路径

 *

 */

public void runTesseractBat(String batPath) throws InterruptedException{

String cmd = batPath;

Thread.sleep(2000);

try {

Runtime.getRuntime().exec(cmd);    }

catch (IOException e) {

// TODO Auto-generated catch block

Log.error("运行tesseract批量处理bat文件异常", e);

   } 

}

/*

 * 读取得到的txt文档读取获取到的文本内容

 * @batPath batPath 存放的二进制图片路径

 *

 */

public static String readTextFile(String filePath) {

String IdentificateTxt = null;

String lineTxt = null;

    try {

       String encoding = "GBK";

       File file = new File(filePath);

        if (file.isFile() && file.exists()) { // 判断文件是否存在

            InputStreamReader read = new InputStreamReader(

            new FileInputStream(file), encoding);// 考虑到编码格式

            BufferedReader bufferedReader = new BufferedReader(read);

          while ((lineTxt = bufferedReader.readLine()) != null) {

        Log.info(lineTxt);

        IdentificateTxt = lineTxt;

           }

           read.close();

        } else {

            Log.error("----找不到指定的文件"); 

      }      } catch (Exception e) {

    Log.error("读取文件内容出错",e);

   }

    return IdentificateTxt;

}

/*

 * 读取的文件流

 * @batPath batPath 存放的二进制图片路径

 *

 */

public static byte[] readInputStream(InputStream inStream) throws Exception{

    ByteArrayOutputStream utStream = new ByteArrayOutputStream();

    //创建一个Buffer字符串

    byte[] buffer = new byte[1024];

    //每次读取的字符串长度，如果为-1，代表全部读取完毕

    int len = 0;

    //使用一个输入流从buffer里把数据读取出来

    while( (len=inStream.read(buffer)) != -1 ){

        //用输出流往buffer里写入数据，中间参数代表从哪个位置开始读，len代表读取的长度

        outStream.write(buffer, 0, len);

    }

    //关闭输入流

    inStream.close();

    //把outStream里的数据写入内存

    return outStream.toByteArray();

}

}