
org.ladsn.commons.ocr.service.PictureRecognition Maven / Gradle / Ivy
package org.ladsn.commons.ocr.service;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.UUID;
import javax.imageio.ImageIO;
import org.apache.commons.lang.StringUtils;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.util.ImageHelper;
/**
* 图片验证码识别
*
* @author LRJ
*
*/
public class PictureRecognition {
/**
* @param libPath 训练库的位置(E:\\images\\exam\\out)
* @param libLanguage 语言(tif文面命名格式[lang].[fontname].exp[num].tif
* lang是语言,fontname是字体,num为自定义数字。)
* @param inputStream 需要识别的图片流
* @param outPaht 输出灰度图片到指定位置,参数为文件路径,名字系统uuid
* @return
*/
private static String execute(String libPath, String libLanguage, BufferedImage textImage, String outPaht) {
String result = null;
try {
double start = System.currentTimeMillis();
// 这里对图片黑白处理,增强识别率.这里先通过截图,截取图片中需要识别的部分
textImage = ImageHelper.convertImageToGrayscale(textImage);
// 图片锐化
textImage = ImageHelper.convertImageToBinary(textImage);
// 图片放大倍数,增强识别率(很多图片本身无法识别,放大5倍时就可以轻易识,但是考滤到客户电脑配置低,针式打印机打印不连贯的问题,这里就放大5倍)
textImage = ImageHelper.getScaledInstance(textImage, textImage.getWidth() * 1, textImage.getHeight() * 1);
textImage = ImageHelper.convertImageToBinary(textImage);
// 如果设置灰度化图片输出地址,就生成照片
if (!StringUtils.isEmpty(outPaht)) {
String uuid = UUID.randomUUID().toString().replaceAll("-", "");
ImageIO.write(textImage, "png", new File(outPaht + uuid+".jpg"));
}
Tesseract instance = new Tesseract();
instance.setDatapath(libPath);// 设置训练库的位置
instance.setLanguage(libLanguage);// 中文识别chi_sim
result = instance.doOCR(textImage);
double end = System.currentTimeMillis();
System.out.println("耗时" + (end - start) / 1000 + " s");
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
/**
* @param libPath
* @param libLanguage
* @param input
* @param outPaht
* @return
*/
public static String executeForInputStream(String libPath, String libLanguage, InputStream input, String outPaht) {
BufferedImage textImage = null;
try {
textImage = ImageIO.read(input);
} catch (IOException e) {
e.printStackTrace();
}
return execute(libPath, libLanguage, textImage, outPaht);
}
/**
* @param libPath
* @param libLanguage
* @param filePath
* @param outPaht
* @return
*/
public static String executeForFilePath(String libPath, String libLanguage, String filePath, String outPaht) {
BufferedImage textImage = null;
try {
textImage = ImageIO.read(new File(filePath));
} catch (IOException e) {
e.printStackTrace();
}
return execute(libPath, libLanguage, textImage, outPaht);
}
/**
* @param libPath
* @param libLanguage
* @param url
* @param outPaht
* @return
*/
public static String executeForURL(String libPath, String libLanguage, String url, String outPaht) {
BufferedImage textImage = null;
try {
textImage = ImageIO.read(new URL(url));
} catch (IOException e) {
e.printStackTrace();
}
return execute(libPath, libLanguage, textImage, outPaht);
}
public static void main(String[] args) {
// String result = PictureRecognition.executeForFilePath("E:\\images\\exam\\out\\", "exam", "E:\\\\images\\\\exam\\\\out\\\\1.jpg", "E:\\images\\exam\\out\\");
// System.out.println("==========" + result);
String libPath = "E:\\work\\mavenwork\\sts\\ladsn-commons-code\\src\\test\\tesseract-ocr\\images\\exam\\out\\";
String result=PictureRecognition.executeForURL(libPath, "exam", "http://exam.tpccn.com/inc/checkcode.jsp", libPath);
System.out.println("==========" + result);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy