com.formkiq.vision.crafter.TextBlockExtractor Maven / Gradle / Ivy
/*
* Copyright (C) 2018 FormKiQ Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.formkiq.vision.crafter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang3.Range;
import org.apache.commons.lang3.builder.ToStringBuilder;
import com.formkiq.vision.comparator.DocumentBlockRectangleComparator;
import com.formkiq.vision.document.DocumentBlockRectangle;
import com.formkiq.vision.document.DocumentContentGroup;
import com.formkiq.vision.document.DocumentSection;
import com.formkiq.vision.document.DocumentSectionContent;
import com.formkiq.vision.document.DocumentSource;
/**
* A group of {@link TextLineExtractor}.
*
*/
public class TextBlockExtractor implements BlockExtractor {
/** {@link List} {@link TextLineExtractor}. */
private List lines;
/** {@link DocumentBlockRectangleToDocumentContent}. */
private DocumentBlockRectangleToDocumentContent blockToContent;
/**
* constructor.
* @param document {@link DocumentSource}
*/
public TextBlockExtractor(final DocumentSource document) {
this.lines = new ArrayList<>();
this.blockToContent = new DocumentBlockRectangleToDocumentContent(document);
}
/**
* constructor.
* @param document {@link DocumentSource}
* @param list {@link List} {@link TextLineExtractor}
*/
public TextBlockExtractor(final DocumentSource document,
final List list) {
this(document);
addLines(list);
}
/**
* @param line {@link TextLineExtractor}
*/
public void addLine(final TextLineExtractor line) {
this.lines.add(line);
}
/**
* @param textlines {@link List} {@link TextLineExtractor}
*/
public void addLines(final List textlines) {
this.lines.addAll(textlines);
}
/**
* @return {@link List} {@link TextLineExtractor}
*/
public List getLines() {
return this.lines;
}
@Override
public Range getX() {
Float max = this.lines.stream().map(l -> l.getX().getMaximum())
.max(Float::compareTo).get();
Float min = this.lines.stream().map(l -> l.getX().getMinimum())
.min(Float::compareTo).get();
return Range.between(min, max);
}
@Override
public Range getY() {
Float max = this.lines.stream().map(l -> l.getY().getMaximum())
.max(Float::compareTo).get();
Float min = this.lines.stream().map(l -> l.getY().getMinimum())
.min(Float::compareTo).get();
return Range.between(min, max);
}
/**
* Has Multi line.
* @return boolean
*/
public boolean hasMultiLine() {
return this.lines.stream().filter(l -> l.isMultiLine()).findFirst()
.isPresent();
}
/**
* Has Field.
* @return boolean
*/
public boolean hasField() {
return this.lines.stream().filter(l -> l.hasField()).findFirst()
.isPresent();
}
/**
* @param extractorLines {@link List} {@link TextLineExtractor}
*/
public void setLines(final List extractorLines) {
this.lines = extractorLines;
}
@Override
public String toString() {
return "y:" + getY() + ","
+ (!this.lines.isEmpty()
? ToStringBuilder.reflectionToString(this)
: "no lines");
}
@Override
public Collection getBlocks() {
return this.lines.stream().flatMap(l -> l.getRectangles().stream())
.collect(Collectors.toList());
}
@Override
public DocumentSection toDocumentSection() {
DocumentSection section = new DocumentSection();
section.setContent(buildSectionContent());
return section;
}
/**
* Builds {@link DocumentSectionContent}.
* @return {@link List} {@link DocumentSectionContent}
*/
private List buildSectionContent() {
List list = new ArrayList<>();
for (TextLineExtractor e : this.lines) {
Collections.sort(e.getRectangles(),
new DocumentBlockRectangleComparator());
List scontents = e.getRectangles().stream()
.map(this.blockToContent).filter(s -> s != null)
.collect(Collectors.toList());
if (scontents.size() == 1) {
list.add(scontents.get(0));
} else if (!scontents.isEmpty()) {
list.add(new DocumentContentGroup(scontents));
}
}
return list;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy