org.sejda.model.parameter.OcrTextByPagesParameters Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sejda-model Show documentation
Show all versions of sejda-model Show documentation
Package containing reusable model for Sejda.
/*
* Created on 06 dic 2016
* Copyright 2015 by Andrea Vacondio ([email protected]).
* This file is part of Sejda.
*
* Sejda is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Sejda is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Sejda. If not, see .
*/
package org.sejda.model.parameter;
import java.util.*;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.sejda.common.collection.NullSafeSet;
import org.sejda.model.parameter.base.SinglePdfSourceMultipleOutputParameters;
import org.sejda.model.pdf.page.PageRange;
import org.sejda.model.pdf.page.PageRangeSelection;
import org.sejda.model.pdf.page.PagesSelection;
import org.sejda.model.validation.constraint.NotEmpty;
import javax.validation.Valid;
/**
* Parameter class to extract text by pages performing OCR
*
* @author Andrea Vacondio
*
*/
public class OcrTextByPagesParameters extends SinglePdfSourceMultipleOutputParameters
implements PageRangeSelection, PagesSelection {
@Valid
private final Set pageSelection = new NullSafeSet();
@NotEmpty
private String textEncoding = "UTF-8";
public final Set languages = new HashSet<>();
/**
* Adds a language o the list of possible languages of the text found in the document. This can help the OCR engine to return a more accurate result.
*
* @param language
*/
public void addLanguage(Locale language) {
this.languages.add(language);
}
/**
* @return Languages that can be fed to the OCR engine to return a more accurate result
*/
public Set getLanguages() {
return languages;
}
public String getTextEncoding() {
return textEncoding;
}
public void setTextEncoding(String textEncoding) {
this.textEncoding = textEncoding;
}
/**
* @return an unmodifiable view of the pageSelection
*/
@Override
public Set getPageSelection() {
return Collections.unmodifiableSet(pageSelection);
}
public void addPageRange(PageRange range) {
pageSelection.add(range);
}
public void addAllPageRanges(Collection ranges) {
pageSelection.addAll(ranges);
}
/**
* @param totalNumberOfPage
* the number of pages of the document (upper limit).
* @return the selected set of pages. Iteration ordering is predictable, it is the order in which elements were inserted into the {@link PageRange} set.
* @see PagesSelection#getPages(int)
*/
@Override
public Set getPages(int totalNumberOfPage) {
if (pageSelection.isEmpty()) {
return new PageRange(1).getPages(totalNumberOfPage);
}
Set retSet = new NullSafeSet();
for (PageRange range : getPageSelection()) {
retSet.addAll(range.getPages(totalNumberOfPage));
}
return retSet;
}
@Override
public int hashCode() {
return new HashCodeBuilder().appendSuper(super.hashCode()).append(pageSelection).append(textEncoding)
.append(languages).toHashCode();
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (!(other instanceof OcrTextByPagesParameters)) {
return false;
}
OcrTextByPagesParameters parameter = (OcrTextByPagesParameters) other;
return new EqualsBuilder().appendSuper(super.equals(other)).append(pageSelection, parameter.pageSelection)
.append(textEncoding, parameter.getTextEncoding())
.append(languages, parameter.getLanguages())
.isEquals();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy