All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.formkiq.vision.crafter.DocumentRowToBlockTransformer Maven / Gradle / Ivy

/*
 * Copyright (C) 2018 FormKiQ Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.formkiq.vision.crafter;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Deque;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;

import org.apache.commons.lang3.Range;

import com.formkiq.vision.comparator.DocumentBlockXComparator;
import com.formkiq.vision.document.DocumentBlock;
import com.formkiq.vision.document.DocumentBlockRectangle;
import com.formkiq.vision.predicate.DocumentBlockHorizontalLinePredicate;
import com.formkiq.vision.predicate.DocumentBlockVerticalLinePredicate;
import com.formkiq.vision.util.RangeUtil;

/**
 * {@link Function} to transform lines into {@link DocumentBlock}.
 *
 */
public class DocumentRowToBlockTransformer implements
        Function, Collection> {

    /** {@link DocumentBlock}. */
    private DocumentBlockRectangle documentRow;

    /**
     * constructor.
     * @param row {@link DocumentBlock}
     */
    public DocumentRowToBlockTransformer(final DocumentBlockRectangle row) {
        this.documentRow = row;
    }

    @Override
    public Collection apply(
            final Collection lines) {

        List hl = lines.stream()
                .filter(new DocumentBlockHorizontalLinePredicate())
                .collect(Collectors.toList());

        List vl = filterVerticalLines(lines);

        Deque stack = new ArrayDeque<>();
        stack.push(this.documentRow);

        Collection results = buildBlocks(stack, hl, vl);

        return results;
    }

	/**
	 * Filter Vertical Lines.
	 *
	 * @param lines
	 *            {@link Collection} {@link DocumentBlock}
	 * @return {@link List} {@link DocumentBlock}
	 */
	private List filterVerticalLines(
			final Collection lines) {

		List vl = lines.stream()
				.filter(new DocumentBlockVerticalLinePredicate())
				.sorted(new DocumentBlockXComparator()).collect(Collectors.toList());

		return vl;
	}

    /**
     * Builds {@link DocumentRow} {@link DocumentBlock}.
     * @param stack {@link Deque} of {@link DocumentBlock}
     * @param hl Horizontal Lines
     * @param vl Vertical Lines
     * @return {@link Collection} {@link DocumentBlock}
     */
    private Collection buildBlocks(
            final Deque stack, final List hl,
            final List vl) {

        Collection blocks = new ArrayList<>();

        while (!stack.isEmpty()) {

        	DocumentBlockRectangle b0 = stack.pop();

            Optional v = findVerticalSplit(b0, vl);

            if (v.isPresent()) {

            	DocumentBlockRectangle b1 = v.get();

                DocumentBlock n1 = new DocumentBlock(b0.getLowerLeftX(),
                        b0.getLowerLeftY(), b1.getLowerLeftX(),
                        b0.getUpperRightY());
                
                DocumentBlock n2 = new DocumentBlock(b1.getLowerLeftX() + 1,
                        b0.getLowerLeftY(), b0.getUpperRightX(),
                        b0.getUpperRightY());
                
                if (!isEquals(b0, n1) && !isEquals(n1, n2)) {
                	stack.push(n1);
                	stack.push(n2);
                }

            } else {

                Optional h = findHorizontalSplit(b0, hl);

                if (h.isPresent()) {

                	DocumentBlockRectangle b1 = h.get();

                    DocumentBlock n1 = new DocumentBlock(b0.getLowerLeftX(),
                            b0.getLowerLeftY(), b0.getUpperRightX(),
                            b1.getUpperRightY());
                    
                    DocumentBlock n2 = new DocumentBlock(b0.getLowerLeftX(),
                            b1.getUpperRightY() + 1, b0.getUpperRightX(),
                            b0.getUpperRightY());
                    
                    if (!isEquals(b0, n1) && !isEquals(n1, n2)) {
                    	stack.push(n1);
                    	stack.push(n2);
                    }

                } else {
                    blocks.add(b0);
                }
            }
        }

        return blocks;
    }

    /**
     * Is {@link DocumentBlockRectangle} equals.
     * @param b0 {@link DocumentBlockRectangle}
     * @param b1 {@link DocumentBlockRectangle}
     * @return boolean
     */
    private boolean isEquals(final DocumentBlockRectangle b0, final DocumentBlockRectangle b1) {
		return b0.toString().equals(b1.toString());
	}

	/**
     * Find Horizontal Split {@link DocumentBlock}.
     * @param block {@link DocumentBlock}
     * @param hl {@link List} {@link DocumentBlock} 
     * @return {@link Optional} {@link DocumentBlock}
     */
    private Optional findHorizontalSplit(
            final DocumentBlockRectangle block, final List hl) {

    	final int d2 = -2;
    	final int d5 = 2;

        Range rl = RangeUtil.between(block.getLowerLeftX() - d5, block.getLowerLeftX() + d5);
		Range ru = RangeUtil.between(block.getUpperRightX() - d5,
				block.getUpperRightX() + d5);

		Range y = block.toRangeY(d2);

		Optional o = hl.stream().filter(l -> {

			Range rx = l.toRangeX();
			return rl.isOverlappedBy(rx) && ru.isOverlappedBy(rx) && y.contains(l.getLowerLeftY());
		}).findFirst();
		
        return o;
    }

    /**
     * Find Vertical Split of a {@link DocumentBlock}.
     * @param block {@link DocumentBlock}
     * @param vl {@link List} {@link DocumentBlock}
     * @return {@link Optional} {@link DocumentBlock}
     */
    private Optional findVerticalSplit(final DocumentBlockRectangle block,
            final List vl) {

        final int d2 = -2;
        final int d5 = 2;

        Range x = block.toRangeX(d2);

        Range rl = RangeUtil.between(block.getLowerLeftY() - d5, block.getLowerLeftY() + d5);
		Range ru = RangeUtil.between(block.getUpperRightY() - d5,
				block.getUpperRightY() + d5);

        Optional o = vl.stream().filter(l -> {
            Range ry = l.toRangeY();
            return rl.isOverlappedBy(ry) && ru.isOverlappedBy(ry) && x.contains(l.getLowerLeftX());
        }).findFirst();

        return o;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy