All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ladsn.commons.ocr.service.PictureRecognition Maven / Gradle / Ivy

package org.ladsn.commons.ocr.service;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.UUID;

import javax.imageio.ImageIO;

import org.apache.commons.lang.StringUtils;

import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.util.ImageHelper;

/**
 * 图片验证码识别
 * 
 * @author LRJ
 *
 */
public class PictureRecognition {

	/**
	 * @param libPath     训练库的位置(E:\\images\\exam\\out)
	 * @param libLanguage 语言(tif文面命名格式[lang].[fontname].exp[num].tif
	 *                    lang是语言,fontname是字体,num为自定义数字。)
	 * @param inputStream 需要识别的图片流
	 * @param outPaht     输出灰度图片到指定位置,参数为文件路径,名字系统uuid
	 * @return
	 */
	private static String execute(String libPath, String libLanguage, BufferedImage textImage, String outPaht) {

		String result = null;
		try {
			double start = System.currentTimeMillis();

			// 这里对图片黑白处理,增强识别率.这里先通过截图,截取图片中需要识别的部分
			textImage = ImageHelper.convertImageToGrayscale(textImage);
			// 图片锐化
			textImage = ImageHelper.convertImageToBinary(textImage);
			// 图片放大倍数,增强识别率(很多图片本身无法识别,放大5倍时就可以轻易识,但是考滤到客户电脑配置低,针式打印机打印不连贯的问题,这里就放大5倍)
			textImage = ImageHelper.getScaledInstance(textImage, textImage.getWidth() * 1, textImage.getHeight() * 1);

			textImage = ImageHelper.convertImageToBinary(textImage);
			// 如果设置灰度化图片输出地址,就生成照片
			if (!StringUtils.isEmpty(outPaht)) {
				String uuid = UUID.randomUUID().toString().replaceAll("-", "");
				ImageIO.write(textImage, "png", new File(outPaht + uuid+".jpg"));
			}

			Tesseract instance = new Tesseract();
			instance.setDatapath(libPath);// 设置训练库的位置
			instance.setLanguage(libLanguage);// 中文识别chi_sim
			result = instance.doOCR(textImage);

			double end = System.currentTimeMillis();
			System.out.println("耗时" + (end - start) / 1000 + " s");
		} catch (Exception e) {
			e.printStackTrace();
		}
		return result;
	}

	/**
	 * @param libPath
	 * @param libLanguage
	 * @param input
	 * @param outPaht
	 * @return
	 */
	public static String executeForInputStream(String libPath, String libLanguage, InputStream input, String outPaht) {
		BufferedImage textImage = null;
		try {
			textImage = ImageIO.read(input);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return execute(libPath, libLanguage, textImage, outPaht);
	}

	/**
	 * @param libPath
	 * @param libLanguage
	 * @param filePath
	 * @param outPaht
	 * @return
	 */
	public static String executeForFilePath(String libPath, String libLanguage, String filePath, String outPaht) {
		BufferedImage textImage = null;
		try {
			textImage = ImageIO.read(new File(filePath));
		} catch (IOException e) {
			e.printStackTrace();
		}
		return execute(libPath, libLanguage, textImage, outPaht);
	}

	/**
	 * @param libPath
	 * @param libLanguage
	 * @param url
	 * @param outPaht
	 * @return
	 */
	public static String executeForURL(String libPath, String libLanguage, String url, String outPaht) {
		BufferedImage textImage = null;
		try {
			textImage = ImageIO.read(new URL(url));
		} catch (IOException e) {
			e.printStackTrace();
		}
		return execute(libPath, libLanguage, textImage, outPaht);
	}
	
	public static void main(String[] args) {
//		String result = PictureRecognition.executeForFilePath("E:\\images\\exam\\out\\", "exam", "E:\\\\images\\\\exam\\\\out\\\\1.jpg", "E:\\images\\exam\\out\\");
//		System.out.println("==========" + result);
		
		String libPath = "E:\\work\\mavenwork\\sts\\ladsn-commons-code\\src\\test\\tesseract-ocr\\images\\exam\\out\\";
		String result=PictureRecognition.executeForURL(libPath, "exam", "http://exam.tpccn.com/inc/checkcode.jsp", libPath);
		System.out.println("==========" + result);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy