org.sejda.impl.sambox.ocr.component.OCR Maven / Gradle / Ivy
/*
* Created on 05 dic 2016
* Copyright 2015 by Andrea Vacondio ([email protected]).
* This file is part of Sejda.
*
* Sejda is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Sejda is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Sejda. If not, see .
*/
package org.sejda.impl.sambox.ocr.component;
import java.awt.image.BufferedImage;
import java.io.IOException;
import org.sejda.impl.sambox.ocr.util.ImageUtils;
import net.sourceforge.tess4j.Tesseract;
/**
* @author Andrea Vacondio
*
*/
public class OCR extends Tesseract {
public String ocrTextFrom(BufferedImage image) throws IOException {
init();
setTessVariables();
StringBuilder sb = new StringBuilder();
try {
setImage(image.getWidth(), image.getHeight(), ImageUtils.convertToTiff(image), null,
image.getColorModel().getPixelSize());
return sb.append(getOCRText("", 0)).toString();
} finally {
dispose();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy