com.formkiq.vision.crafter.DocumentRowToBlockTransformer Maven / Gradle / Ivy
/*
* Copyright (C) 2018 FormKiQ Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.formkiq.vision.crafter;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Deque;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.Range;
import com.formkiq.vision.comparator.DocumentBlockXComparator;
import com.formkiq.vision.document.DocumentBlock;
import com.formkiq.vision.document.DocumentBlockRectangle;
import com.formkiq.vision.predicate.DocumentBlockHorizontalLinePredicate;
import com.formkiq.vision.predicate.DocumentBlockVerticalLinePredicate;
import com.formkiq.vision.util.RangeUtil;
/**
* {@link Function} to transform lines into {@link DocumentBlock}.
*
*/
public class DocumentRowToBlockTransformer implements
Function, Collection> {
/** {@link DocumentBlock}. */
private DocumentBlockRectangle documentRow;
/**
* constructor.
* @param row {@link DocumentBlock}
*/
public DocumentRowToBlockTransformer(final DocumentBlockRectangle row) {
this.documentRow = row;
}
@Override
public Collection apply(
final Collection lines) {
List hl = lines.stream()
.filter(new DocumentBlockHorizontalLinePredicate())
.collect(Collectors.toList());
List vl = filterVerticalLines(lines);
Deque stack = new ArrayDeque<>();
stack.push(this.documentRow);
Collection results = buildBlocks(stack, hl, vl);
return results;
}
/**
* Filter Vertical Lines.
*
* @param lines
* {@link Collection} {@link DocumentBlock}
* @return {@link List} {@link DocumentBlock}
*/
private List filterVerticalLines(
final Collection lines) {
List vl = lines.stream()
.filter(new DocumentBlockVerticalLinePredicate())
.sorted(new DocumentBlockXComparator()).collect(Collectors.toList());
return vl;
}
/**
* Builds {@link DocumentRow} {@link DocumentBlock}.
* @param stack {@link Deque} of {@link DocumentBlock}
* @param hl Horizontal Lines
* @param vl Vertical Lines
* @return {@link Collection} {@link DocumentBlock}
*/
private Collection buildBlocks(
final Deque stack, final List hl,
final List vl) {
Collection blocks = new ArrayList<>();
while (!stack.isEmpty()) {
DocumentBlockRectangle b0 = stack.pop();
Optional v = findVerticalSplit(b0, vl);
if (v.isPresent()) {
DocumentBlockRectangle b1 = v.get();
DocumentBlock n1 = new DocumentBlock(b0.getLowerLeftX(),
b0.getLowerLeftY(), b1.getLowerLeftX(),
b0.getUpperRightY());
DocumentBlock n2 = new DocumentBlock(b1.getLowerLeftX() + 1,
b0.getLowerLeftY(), b0.getUpperRightX(),
b0.getUpperRightY());
if (!isEquals(b0, n1) && !isEquals(n1, n2)) {
stack.push(n1);
stack.push(n2);
}
} else {
Optional h = findHorizontalSplit(b0, hl);
if (h.isPresent()) {
DocumentBlockRectangle b1 = h.get();
DocumentBlock n1 = new DocumentBlock(b0.getLowerLeftX(),
b0.getLowerLeftY(), b0.getUpperRightX(),
b1.getUpperRightY());
DocumentBlock n2 = new DocumentBlock(b0.getLowerLeftX(),
b1.getUpperRightY() + 1, b0.getUpperRightX(),
b0.getUpperRightY());
if (!isEquals(b0, n1) && !isEquals(n1, n2)) {
stack.push(n1);
stack.push(n2);
}
} else {
blocks.add(b0);
}
}
}
return blocks;
}
/**
* Is {@link DocumentBlockRectangle} equals.
* @param b0 {@link DocumentBlockRectangle}
* @param b1 {@link DocumentBlockRectangle}
* @return boolean
*/
private boolean isEquals(final DocumentBlockRectangle b0, final DocumentBlockRectangle b1) {
return b0.toString().equals(b1.toString());
}
/**
* Find Horizontal Split {@link DocumentBlock}.
* @param block {@link DocumentBlock}
* @param hl {@link List} {@link DocumentBlock}
* @return {@link Optional} {@link DocumentBlock}
*/
private Optional findHorizontalSplit(
final DocumentBlockRectangle block, final List hl) {
final int d2 = -2;
final int d5 = 2;
Range rl = RangeUtil.between(block.getLowerLeftX() - d5, block.getLowerLeftX() + d5);
Range ru = RangeUtil.between(block.getUpperRightX() - d5,
block.getUpperRightX() + d5);
Range y = block.toRangeY(d2);
Optional o = hl.stream().filter(l -> {
Range rx = l.toRangeX();
return rl.isOverlappedBy(rx) && ru.isOverlappedBy(rx) && y.contains(l.getLowerLeftY());
}).findFirst();
return o;
}
/**
* Find Vertical Split of a {@link DocumentBlock}.
* @param block {@link DocumentBlock}
* @param vl {@link List} {@link DocumentBlock}
* @return {@link Optional} {@link DocumentBlock}
*/
private Optional findVerticalSplit(final DocumentBlockRectangle block,
final List vl) {
final int d2 = -2;
final int d5 = 2;
Range x = block.toRangeX(d2);
Range rl = RangeUtil.between(block.getLowerLeftY() - d5, block.getLowerLeftY() + d5);
Range ru = RangeUtil.between(block.getUpperRightY() - d5,
block.getUpperRightY() + d5);
Optional o = vl.stream().filter(l -> {
Range ry = l.toRangeY();
return rl.isOverlappedBy(ry) && ru.isOverlappedBy(ry) && x.contains(l.getLowerLeftX());
}).findFirst();
return o;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy