com.formkiq.vision.crafter.DocumentRowLayoutBuilder Maven / Gradle / Ivy
/*
* Copyright (C) 2018 FormKiQ Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.formkiq.vision.crafter;
import static com.formkiq.vision.document.DocumentBlock.isEquals;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.Range;
import com.formkiq.vision.comparator.DocumentBlockRectangleComparator;
import com.formkiq.vision.comparator.DocumentBlockUpperYXComparator;
import com.formkiq.vision.crafter.comparator.DocumentBlockListComparator;
import com.formkiq.vision.crafter.predicate.DocumentBlockSingleFieldPredicate;
import com.formkiq.vision.document.DocumentBlock;
import com.formkiq.vision.document.DocumentBlockRectangle;
import com.formkiq.vision.predicate.DocumentBlockHorizontalLinePredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleBottomPredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleHeightGTPredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleHorizontalIntersectionPredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleTopPredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleXEqualsPredicate;
import com.formkiq.vision.predicate.DocumentBlockVerticalLinePredicate;
/**
* {@link Function} to transform {@link DocumentBlockRectangle} into
* {@link DocumentRowLayout}.
* TODO chop into multiple transformers.
*/
public class DocumentRowLayoutBuilder
implements Function, List> {
/** float. */
private float documentPageHeight;
/** {@link PageScratchPad}. */
private PageScratchPad scratchpad;
/**
* constructor.
* @param pad {@link PageScratchPad}
*/
public DocumentRowLayoutBuilder(final PageScratchPad pad) {
this.scratchpad = pad;
this.documentPageHeight = pad.getDocument()
.getPageHeight(pad.getDocumentPageNumber().intValue());
}
@Override
public List apply(
final List rawblocks) {
List hl = getHorizontalLines(rawblocks);
List vl = getVerticalLines(rawblocks);
List list = buildDocumentRows(hl, vl);
Collections.sort(list, new DocumentBlockRectangleComparator());
return list;
}
/**
* Build {@link List} of {@link DocumentRowLayout}.
* @param hl {@link List} {@link DocumentBlockRectangle}
* @param vl {@link List} {@link DocumentBlockRectangle}
* @return {@link List} {@link DocumentRowLayout}
*/
// TODO make Function
public List buildDocumentRows(
final List hl, final List vl) {
List rows = createRows(hl, vl);
Map> rowMap = createMap(rows, hl, vl);
List list = new ArrayList<>();
for (DocumentBlockRectangle row : rows) {
List lines = rowMap.get(row);
Collection blocks =
new DocumentRowToBlockTransformer(row).apply(lines);
DocumentRowLayout docrow = new DocumentRowLayout();
docrow.setHorizontalLines(lines.stream()
.filter(new DocumentBlockHorizontalLinePredicate())
.collect(Collectors.toList()));
docrow.setVerticalLines(lines.stream()
.filter(new DocumentBlockVerticalLinePredicate())
.collect(Collectors.toList()));
docrow.setBlock(row);
docrow.setBlocks(
blocks.stream().map(new DocumentRowBlockBuilder(this.scratchpad, docrow))
.collect(Collectors.toList()));
list.add(docrow);
}
return list;
}
/**
* Create Map of Rows to Horizontal / Vertical {@link DocumentBlock}.
*
* @param rows {@link List} {@link DocumentBlock}
* @param hl {@link List} {@link DocumentBlock}
* @param vl {@link List} {@link DocumentBlock}
* @return {@link Map} {@link DocumentBlock}
*/
private Map> createMap(
final List rows, final List hl,
final List vl) {
final int delta = 2;
Map> map = new HashMap<>();
for (DocumentBlockRectangle b : rows) {
Range xr = b.toRangeX();
Range yr = Range.between(
Float.valueOf(b.getLowerLeftY() + delta),
Float.valueOf(b.getUpperRightY() - delta));
List h = hl.stream()
.filter(l -> yr.isOverlappedBy(l.toRangeY())
&& xr.contains(Float.valueOf(l.getLowerLeftX())))
.collect(Collectors.toList());
List v = vl.stream()
.filter(l -> !isEquals(b.getLowerLeftX(), l.getLowerLeftX())
&& !isEquals(b.getUpperRightX(), l.getUpperRightX())
&& yr.isOverlappedBy(l.toRangeY())
&& xr.contains(Float.valueOf(l.getLowerLeftX())))
.collect(Collectors.toList());
map.put(b, Stream.concat(h.stream(), v.stream())
.collect(Collectors.toList()));
}
return map;
}
/**
* Create {@link DocumentBlock} into Rows.
* @param horizontalLines {@link List} {@link DocumentBlock}
* @param verticallines {@link List} {@link DocumentBlock}
* @return {@link List} {@link DocumentBlock}
*/
private List createRows(
final List horizontalLines,
final List verticallines) {
List sections = createRowSections(horizontalLines,
verticallines);
List rows = new ArrayList<>();
for (DocumentBlockRectangle s : sections) {
List hl = findHorizontalLinesForBlock(s, horizontalLines);
DocumentBlockRectangle top = null;
for (DocumentBlockRectangle bottom : hl) {
if (top != null) {
DocumentBlock b = new DocumentBlock();
if (isEquals(top.getLowerLeftX(), bottom.getLowerLeftX())) {
b.setLowerLeftX(Math.min(top.getLowerLeftX(),
bottom.getLowerLeftX()));
} else {
b.setLowerLeftX(Math.max(top.getLowerLeftX(),
bottom.getLowerLeftX()));
}
b.setUpperRightX(Math.max(top.getUpperRightX(),
bottom.getUpperRightX()));
b.setLowerLeftY(Math.min(top.getLowerLeftY(),
bottom.getLowerLeftY()));
b.setUpperRightY(Math.max(top.getUpperRightY(),
bottom.getUpperRightY()));
rows.add(b);
}
top = bottom;
}
}
return rows;
}
/**
* Find Horizontal Lines that touch {@link DocumentBlockRectangle}.
* @param s {@link DocumentBlockRectangle}
* @param horizontalLines {@link List} {@link DocumentBlockRectangle}
* @return {@link List} {@link DocumentBlockRectangle}
*/
private List findHorizontalLinesForBlock(final DocumentBlockRectangle s,
final List horizontalLines) {
return horizontalLines.stream()
.filter(new DocumentBlockRectangleHorizontalIntersectionPredicate(s))
.sorted(new DocumentBlockRectangleComparator()).collect(Collectors.toList());
}
/**
* Create Row Sections.
* @param horizontalLines {@link List} {@link DocumentBlock}
* @param verticallines {@link List} {@link DocumentBlock}
* @return {@link List} {@link DocumentBlock}
*/
private List createRowSections(
final List horizontalLines,
final List verticallines) {
List leftvl = findLeftMostVerticalLines(verticallines);
SimpleDocumentBlockMaker bm = new SimpleDocumentBlockMaker(
horizontalLines, verticallines);
List sections = leftvl.stream()
.flatMap(v -> bm.apply(v).stream())
.collect(Collectors.toList());
final float minHeight = this.documentPageHeight * 0.7f;
Predicate hp = new DocumentBlockRectangleHeightGTPredicate(
minHeight);
List list = findLargeBlockWithoutHorizontal(sections, hp,
horizontalLines);
if (!list.isEmpty()) {
// remove Vertical Lines matching large section
List vls = new ArrayList<>(verticallines);
for (DocumentBlockRectangle l : list) {
vls.removeIf(new DocumentBlockRectangleXEqualsPredicate(l.getLowerLeftX()));
vls.removeIf(new DocumentBlockRectangleXEqualsPredicate(l.getUpperRightX()));
}
List newsections = createRowSections(horizontalLines, vls);
if (newsections.stream().filter(hp).findFirst().isPresent()) {
sections = newsections;
} else {
// remove large sections with no horizontal lines
sections.removeAll(list);
}
}
sections.removeIf(new DocumentBlockSingleFieldPredicate(this.scratchpad));
DocumentBlockRectangleExpander exp = new DocumentBlockRectangleExpander(horizontalLines,
verticallines);
sections = sections.stream().flatMap(s -> exp.apply(s).stream())
.collect(Collectors.toList());
return sections;
}
/**
* Find Large {@link DocumentBlockRectangle} that do no have any Horizontal
* Lines that cross through.
*
* @param sections {@link List} {@link DocumentBlockRectangle}
* @param hp {@link Predicate} {@link DocumentBlockRectangle}
* @param horizontalLines {@link List} {@link DocumentBlockRectangle}
* @return {@link List} {@link DocumentBlockRectangle}
*/
private List findLargeBlockWithoutHorizontal(
final List sections, final Predicate hp,
final List horizontalLines) {
List list = sections.stream().filter(s -> {
Predicate tp = new DocumentBlockRectangleTopPredicate(s);
Predicate bp = new DocumentBlockRectangleBottomPredicate(s);
Predicate ip =
new DocumentBlockRectangleHorizontalIntersectionPredicate(s);
return hp.test(s) && !horizontalLines.stream()
.filter(ip.and(tp.negate()).and(bp.negate())).findFirst().isPresent();
}).collect(Collectors.toList());
return list;
}
/**
* Find Left Most Vertical Lines.
* @param vls {@link List} {@link DocumentBlockRectangle}
* @return {@link List} {@link DocumentBlockRectangle}
*/
private List findLeftMostVerticalLines(
final List vls) {
// find all Left most vertical lines
List vl = vls.stream().filter(v0 -> {
Float middle = Float
.valueOf((v0.getUpperRightY() + v0.getLowerLeftY()) / 2);
boolean isVerticalLines = !vls.stream()
.filter(v1 -> v1.getLowerLeftX() < v0.getLowerLeftX()
&& v1.toRangeY().contains(middle))
.findFirst().isPresent();
return isVerticalLines;
}).collect(Collectors.toList());
return vl;
}
/**
* Get Vertical Lines.
* @param rawblocks {@link List} {@link DocumentBlockRectangle}
* @return {@link List} {@link DocumentBlockRectangle}
*/
private List getVerticalLines(
final List rawblocks) {
DocumentBlockRemoveDuplicate rd = new DocumentBlockRemoveDuplicate(1);
List vl = rawblocks.stream()
.map(new DocumentRawBlockToDocumentBlock())
.map(new DocumentBlockSplitVerticalLines())
.flatMap(s -> s.stream())
.filter(s -> s.getWidth() > 1 || s.getHeight() > 1)
.sorted(new DocumentBlockListComparator())
.collect(Collectors.toList());
vl = rd.apply(vl);
DocumentBlockRectangle last = null;
vl.removeIf(b -> b.getWidth() < 1 && b.getHeight() < 1);
for (DocumentBlockRectangle v : new ArrayList<>(vl)) {
Range y = v.toRangeY();
if (last != null && y.isOverlappedBy(last.toRangeY())
&& isEquals(v.getUpperRightX(), last.getUpperRightX())) {
vl.remove(v);
vl.remove(last);
last = new DocumentBlock(last.getLowerLeftX(),
Math.min(v.getLowerLeftY(), last.getLowerLeftY()), last.getUpperRightX(),
Math.max(v.getUpperRightY(), last.getUpperRightY()));
vl.add(last);
} else {
last = v;
}
}
Collections.sort(vl, new DocumentBlockRectangleComparator());
return vl;
}
/**
* Get Horizontal Lines.
* @param rawblocks {@link List} {@link DocumentBlockRectangle}
* @return {@link List} {@link DocumentBlock}
*/
private List getHorizontalLines(
final List rawblocks) {
DocumentBlockRemoveDuplicate rd = new DocumentBlockRemoveDuplicate(1);
List hl = rawblocks.stream()
.map(new DocumentRawBlockToDocumentBlock())
.map(new DocumentBlockSplitHorizontalLines())
.flatMap(s -> s.stream())
.filter(s -> s.getWidth() > 1 || s.getHeight() > 1)
.sorted(new DocumentBlockUpperYXComparator())
.collect(Collectors.toList());
hl = rd.apply(hl);
DocumentBlockRectangle last = null;
for (DocumentBlockRectangle h : new ArrayList<>(hl)) {
Range x = h.toRangeX(2);
if (last != null && x.isOverlappedBy(last.toRangeX())
&& isEquals(h.getUpperRightY(), last.getUpperRightY())) {
hl.remove(last);
hl.remove(h);
last = new DocumentBlock(Math.min(h.getLowerLeftX(), last.getLowerLeftX()),
last.getLowerLeftY(), Math.max(h.getUpperRightX(), last.getUpperRightX()),
last.getUpperRightY());
hl.add(last);
} else {
last = h;
}
}
Collections.sort(hl, new DocumentBlockRectangleComparator());
return hl;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy