net.sourceforge.javaocr.ocrPlugins.mseOCR.OCRScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of javaocr-plugin-awt Show documentation
Show all versions of javaocr-plugin-awt Show documentation
encapsulate AWT based image processing classes
The newest version!
/*
* Copyright (c) 2003-2012, Ronald B. Cemer , Konstantin Pribluda, William Whitney, Andrea De Pasquale
*
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.sourceforge.javaocr.ocrPlugins.mseOCR;
import net.sourceforge.javaocr.scanner.DocumentScanner;
import net.sourceforge.javaocr.scanner.DocumentScannerListenerAdaptor;
import net.sourceforge.javaocr.scanner.PixelImage;
import net.sourceforge.javaocr.scanner.accuracy.AccuracyListenerInterface;
import net.sourceforge.javaocr.scanner.accuracy.AccuracyProviderInterface;
import net.sourceforge.javaocr.scanner.accuracy.OCRComp;
import net.sourceforge.javaocr.scanner.accuracy.OCRIdentification;
import java.awt.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.logging.Logger;
/**
* OCR document scanner.
* @author Ronald B. Cemer
*/
public class OCRScanner extends DocumentScannerListenerAdaptor implements AccuracyProviderInterface
{
private static final int BEST_MATCH_STORE_COUNT = 8;
private StringBuffer decodeBuffer = new StringBuffer();
private CharacterRange[] acceptableChars;
private boolean beginningOfRow = false;
private boolean firstRow = false;
private String newline = System.getProperty("line.separator");
private HashMap> trainingImages = new HashMap>();
private Character[] bestChars = new Character[BEST_MATCH_STORE_COUNT];
private double[] bestMSEs = new double[BEST_MATCH_STORE_COUNT];
private DocumentScanner documentScanner = new DocumentScanner();
private AccuracyListenerInterface accListener;
public void acceptAccuracyListener(AccuracyListenerInterface listener)
{
accListener = listener;
}
/**
* @return The DocumentScanner
instance that is used to scan the document(s).
* This is useful if the caller wants to adjust some of the scanner's parameters.
*/
public DocumentScanner getDocumentScanner()
{
return documentScanner;
}
/**
* Remove all training images from the training set.
*/
public void clearTrainingImages()
{
trainingImages.clear();
}
/**
* Add training images to the training set.
* @param images A HashMap
using Character
s for
* the keys. Each value is an ArrayList
of
* TrainingImages
for the specified character. The training
* images are added to any that may already have been loaded.
*/
public void addTrainingImages(HashMap> images)
{
for (Iterator it = images.keySet().iterator(); it.hasNext();)
{
Character key = it.next();
ArrayList al = images.get(key);
ArrayList oldAl = trainingImages.get(key);
if (oldAl == null)
{
oldAl = new ArrayList();
trainingImages.put(key, oldAl);
}
for (int i = 0; i < al.size(); i++)
{
oldAl.add(al.get(i));
}
}
}
/**
* Scan an image and return the decoded text.
* @param image The Image
to be scanned.
* @param x1 The leftmost pixel position of the area to be scanned, or
* 0
to start scanning at the left boundary of the image.
* @param y1 The topmost pixel position of the area to be scanned, or
* 0
to start scanning at the top boundary of the image.
* @param x2 The rightmost pixel position of the area to be scanned, or
* 0
to stop scanning at the right boundary of the image.
* @param y2 The bottommost pixel position of the area to be scanned, or
* 0
to stop scanning at the bottom boundary of the image.
* @param acceptableChars An array of CharacterRange
objects
* representing the ranges of characters which are allowed to be decoded,
* or null
to not limit which characters can be decoded.
* @return The decoded text.
*/
public String scan(
Image image,
int x1,
int y1,
int x2,
int y2,
CharacterRange[] acceptableChars)
{
this.acceptableChars = acceptableChars;
PixelImage pixelImage = new PixelImage(image);
pixelImage.toGrayScale(true);
pixelImage.filter();
decodeBuffer.setLength(0);
firstRow = true;
documentScanner.scan(pixelImage, this, x1, y1, x2, y2);
String result = decodeBuffer.toString();
decodeBuffer.setLength(0);
return result;
}
@Override
public void endRow(PixelImage pixelImage, int y1, int y2)
{
//Send accuracy of this identification to the listener
if (accListener != null)
{
OCRIdentification identAccuracy = new OCRIdentification(OCRComp.MSE);
identAccuracy.addChar('\n', 0.0);
accListener.processCharOrSpace(identAccuracy);
}
}
@Override
public void beginRow(PixelImage pixelImage, int y1, int y2)
{
beginningOfRow = true;
if (firstRow)
{
firstRow = false;
}
else
{
decodeBuffer.append(newline);
}
}
@Override
public void processChar(
PixelImage pixelImage,
int x1,
int y1,
int x2,
int y2,
int rowY1,
int rowY2)
{
int[] pixels = pixelImage.pixels;
int w = pixelImage.width;
int h = pixelImage.height;
int areaW = x2 - x1, areaH = y2 - y1;
float aspectRatio = ((float) areaW) / ((float) areaH);
int rowHeight = rowY2 - rowY1;
float topWhiteSpaceFraction = (float) (y1 - rowY1) / (float) rowHeight;
float bottomWhiteSpaceFraction = (float) (rowY2 - y2) / (float) rowHeight;
Iterator it;
if (acceptableChars != null)
{
ArrayList al = new ArrayList();
for (int cs = 0; cs < acceptableChars.length; cs++)
{
CharacterRange cr = acceptableChars[cs];
for (int c = cr.min; c <= cr.max; c++)
{
Character ch = new Character((char) c);
if (al.indexOf(ch) < 0)
{
al.add(ch);
}
}
}
it = al.iterator();
}
else
{
it = trainingImages.keySet().iterator();
}
int bestCount = 0;
while (it.hasNext())
{
Character ch = it.next();
ArrayList al = trainingImages.get(ch);
int nimg = al.size();
if (nimg > 0)
{
double mse = 0.0;
boolean gotAny = false;
for (int i = 0; i < nimg; i++)
{
TrainingImage ti = al.get(i);
if (isTrainingImageACandidate(
aspectRatio,
areaW,
areaH,
topWhiteSpaceFraction,
bottomWhiteSpaceFraction,
ti))
{
double thisMSE = ti.calcMSE(pixels, w, h, x1, y1, x2, y2);
if ((!gotAny) || (thisMSE < mse))
{
gotAny = true;
mse = thisMSE;
}
}
}
/// Maybe mse should be required to be below a certain threshold before we store it.
/// That would help us to handle things like welded characters, and characters that get improperly
/// split into two or more characters.
if (gotAny)
{
boolean inserted = false;
for (int i = 0; i < bestCount; i++)
{
if (mse < bestMSEs[i])
{
for (int j = Math.min(bestCount, BEST_MATCH_STORE_COUNT - 1); j > i; j--)
{
int k = j - 1;
bestChars[j] = bestChars[k];
bestMSEs[j] = bestMSEs[k];
}
bestChars[i] = ch;
bestMSEs[i] = mse;
if (bestCount < BEST_MATCH_STORE_COUNT)
{
bestCount++;
}
inserted = true;
break;
}
}
if ((!inserted) && (bestCount < BEST_MATCH_STORE_COUNT))
{
bestChars[bestCount] = ch;
bestMSEs[bestCount] = mse;
bestCount++;
}
}
}
}
/// We could also put some aspect ratio range checking into the page scanning logic (but only when
/// decoding; not when loading training images) so that the aspect ratio of a non-empty character
/// block is limited to within the min and max of the aspect ratios in the training set.
if (bestCount > 0)
{
decodeBuffer.append(bestChars[0].charValue());
//Send accuracy of this identification to the listener
if (accListener != null)
{
OCRIdentification identAccuracy = new OCRIdentification(OCRComp.MSE);
for (int i = 0; i < bestCount; i++)
{
identAccuracy.addChar((char) bestChars[i], bestMSEs[i]);
}
accListener.processCharOrSpace(identAccuracy);
}
}
else
{
if (accListener != null)
{
OCRIdentification identAccuracy = new OCRIdentification(OCRComp.MSE);
accListener.processCharOrSpace(identAccuracy);
}
}
}
private boolean isTrainingImageACandidate(
float aspectRatio,
int w,
int h,
float topWhiteSpaceFraction,
float bottomWhiteSpaceFraction,
TrainingImage ti)
{
// The aspect ratios must be within tolerance.
if (((aspectRatio / ti.aspectRatio) - 1.0f) > TrainingImage.ASPECT_RATIO_TOLERANCE)
{
return false;
}
if (((ti.aspectRatio / aspectRatio) - 1.0f) > TrainingImage.ASPECT_RATIO_TOLERANCE)
{
return false;
}
// The top whitespace fractions must be within tolerance.
if (Math.abs(topWhiteSpaceFraction - ti.topWhiteSpaceFraction)
> TrainingImage.TOP_WHITE_SPACE_FRACTION_TOLERANCE)
{
return false;
}
// The bottom whitespace fractions must be within tolerance.
if (Math.abs(bottomWhiteSpaceFraction - ti.bottomWhiteSpaceFraction)
> TrainingImage.BOTTOM_WHITE_SPACE_FRACTION_TOLERANCE)
{
return false;
}
// If the area being scanned is really small and we
// are about to crunch down a training image by a huge
// factor in order to compare to it, then don't do that.
if ((w <= 4) && (ti.width >= (w * 10)))
{
return false;
}
if ((h <= 4) && (ti.height >= (h * 10)))
{
return false;
}
// If the area being scanned is really large and we
// are about to expand a training image by a huge
// factor in order to compare to it, then don't do that.
if ((ti.width <= 4) && (w >= (ti.width * 10)))
{
return false;
}
if ((ti.height <= 4) && (h >= (ti.height * 10)))
{
return false;
}
return true;
}
@Override
public void processSpace(PixelImage pixelImage, int x1, int y1, int x2, int y2)
{
decodeBuffer.append(' ');
//Send accuracy of this identification to the listener
if (accListener != null)
{
OCRIdentification identAccuracy = new OCRIdentification(OCRComp.MSE);
identAccuracy.addChar(' ', 0.0);
accListener.processCharOrSpace(identAccuracy);
}
}
private static final Logger LOG = Logger.getLogger(OCRScanner.class.getName());
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy