All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.formkiq.vision.crafter.DocumentRowLayoutBuilder Maven / Gradle / Ivy

/*
 * Copyright (C) 2018 FormKiQ Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.formkiq.vision.crafter;

import static com.formkiq.vision.document.DocumentBlock.isEquals;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.lang3.Range;

import com.formkiq.vision.comparator.DocumentBlockRectangleComparator;
import com.formkiq.vision.comparator.DocumentBlockUpperYXComparator;
import com.formkiq.vision.crafter.comparator.DocumentBlockListComparator;
import com.formkiq.vision.crafter.predicate.DocumentBlockSingleFieldPredicate;
import com.formkiq.vision.document.DocumentBlock;
import com.formkiq.vision.document.DocumentBlockRectangle;
import com.formkiq.vision.predicate.DocumentBlockHorizontalLinePredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleBottomPredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleHeightGTPredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleHorizontalIntersectionPredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleTopPredicate;
import com.formkiq.vision.predicate.DocumentBlockRectangleXEqualsPredicate;
import com.formkiq.vision.predicate.DocumentBlockVerticalLinePredicate;

/**
 * {@link Function} to transform {@link DocumentBlockRectangle} into
 * {@link DocumentRowLayout}.
 * TODO chop into multiple transformers.
 */
public class DocumentRowLayoutBuilder
        implements Function, List> {

    /** float. */
    private float documentPageHeight;
    /** {@link PageScratchPad}. */
    private PageScratchPad scratchpad;

    /**
     * constructor.
     * @param pad {@link PageScratchPad} 
     */
	public DocumentRowLayoutBuilder(final PageScratchPad pad) {
		this.scratchpad = pad;
		this.documentPageHeight = pad.getDocument()
				.getPageHeight(pad.getDocumentPageNumber().intValue());
    }
    
    @Override
    public List apply(
            final List rawblocks) {

	    List hl = getHorizontalLines(rawblocks);

        List vl = getVerticalLines(rawblocks);

        List list = buildDocumentRows(hl, vl);

        Collections.sort(list, new DocumentBlockRectangleComparator());

        return list;
	}

    /**
     * Build {@link List} of {@link DocumentRowLayout}.
     * @param hl {@link List} {@link DocumentBlockRectangle}
     * @param vl {@link List} {@link DocumentBlockRectangle}
     * @return {@link List} {@link DocumentRowLayout}
     */
    // TODO make Function
    public List buildDocumentRows(
            final List hl, final List vl) {

        List rows = createRows(hl, vl);

		Map> rowMap = createMap(rows, hl, vl);

        List list = new ArrayList<>();

        for (DocumentBlockRectangle row : rows) {

            List lines = rowMap.get(row);

            Collection blocks =
                    new DocumentRowToBlockTransformer(row).apply(lines);

            DocumentRowLayout docrow = new DocumentRowLayout();

            docrow.setHorizontalLines(lines.stream()
                    .filter(new DocumentBlockHorizontalLinePredicate())
                    .collect(Collectors.toList()));

            docrow.setVerticalLines(lines.stream()
                    .filter(new DocumentBlockVerticalLinePredicate())
                    .collect(Collectors.toList()));

            docrow.setBlock(row);
			docrow.setBlocks(
					blocks.stream().map(new DocumentRowBlockBuilder(this.scratchpad, docrow))
							.collect(Collectors.toList()));
            
            list.add(docrow);
        }

        return list;
    }

    /**
	 * Create Map of Rows to Horizontal / Vertical {@link DocumentBlock}.
	 *
	 * @param rows {@link List} {@link DocumentBlock}
	 * @param hl {@link List} {@link DocumentBlock}
	 * @param vl {@link List} {@link DocumentBlock}
	 * @return {@link Map} {@link DocumentBlock}
	 */
    private Map> createMap(
            final List rows, final List hl,
            final List vl) {

        final int delta = 2;
        Map> map = new HashMap<>();

        for (DocumentBlockRectangle b : rows) {

            Range xr = b.toRangeX();
            Range yr = Range.between(
                    Float.valueOf(b.getLowerLeftY() + delta),
                    Float.valueOf(b.getUpperRightY() - delta));

            List h = hl.stream()
                    .filter(l -> yr.isOverlappedBy(l.toRangeY())
                            && xr.contains(Float.valueOf(l.getLowerLeftX())))
                    .collect(Collectors.toList());

            List v = vl.stream()
                    .filter(l -> !isEquals(b.getLowerLeftX(), l.getLowerLeftX())
                            && !isEquals(b.getUpperRightX(), l.getUpperRightX())
                            && yr.isOverlappedBy(l.toRangeY())
                            && xr.contains(Float.valueOf(l.getLowerLeftX())))
                    .collect(Collectors.toList());

            map.put(b, Stream.concat(h.stream(), v.stream())
                    .collect(Collectors.toList()));
        }

        return map;
    }

    /**
	 * Create {@link DocumentBlock} into Rows.
	 * @param horizontalLines {@link List} {@link DocumentBlock}
	 * @param verticallines {@link List} {@link DocumentBlock}
	 * @return {@link List} {@link DocumentBlock}
	 */
    private List createRows(
            final List horizontalLines,
            final List verticallines) {

        List sections = createRowSections(horizontalLines,
                verticallines);

        List rows = new ArrayList<>();

        for (DocumentBlockRectangle s : sections) {

			List hl = findHorizontalLinesForBlock(s, horizontalLines);

            DocumentBlockRectangle top = null;
            for (DocumentBlockRectangle bottom : hl) {

                if (top != null) {
                    DocumentBlock b = new DocumentBlock();

                    if (isEquals(top.getLowerLeftX(), bottom.getLowerLeftX())) {
                        b.setLowerLeftX(Math.min(top.getLowerLeftX(),
                                bottom.getLowerLeftX()));
                    } else {
                        b.setLowerLeftX(Math.max(top.getLowerLeftX(),
                                bottom.getLowerLeftX()));
                    }

                    b.setUpperRightX(Math.max(top.getUpperRightX(),
                            bottom.getUpperRightX()));
                    b.setLowerLeftY(Math.min(top.getLowerLeftY(),
                            bottom.getLowerLeftY()));
                    b.setUpperRightY(Math.max(top.getUpperRightY(),
                            bottom.getUpperRightY()));

                    rows.add(b);
                }

                top = bottom;
            }
        }

        return rows;
    }

    /**
     * Find Horizontal Lines that touch {@link DocumentBlockRectangle}.
     * @param s {@link DocumentBlockRectangle}
     * @param horizontalLines {@link List} {@link DocumentBlockRectangle}
     * @return {@link List} {@link DocumentBlockRectangle}
     */
	private List findHorizontalLinesForBlock(final DocumentBlockRectangle s,
			final List horizontalLines) {
		return horizontalLines.stream()
				.filter(new DocumentBlockRectangleHorizontalIntersectionPredicate(s))
				.sorted(new DocumentBlockRectangleComparator()).collect(Collectors.toList());
	}

    /**
     * Create Row Sections.
     * @param horizontalLines {@link List} {@link DocumentBlock}
     * @param verticallines {@link List} {@link DocumentBlock}
     * @return {@link List} {@link DocumentBlock}
     */
    private List createRowSections(
            final List horizontalLines,
            final List verticallines) {
    	    	
        List leftvl = findLeftMostVerticalLines(verticallines);
                
        SimpleDocumentBlockMaker bm = new SimpleDocumentBlockMaker(
                horizontalLines, verticallines);

        List sections = leftvl.stream()
                .flatMap(v -> bm.apply(v).stream())
                .collect(Collectors.toList());
		
        final float minHeight = this.documentPageHeight * 0.7f;
		Predicate hp = new DocumentBlockRectangleHeightGTPredicate(
				minHeight);
		
		List list = findLargeBlockWithoutHorizontal(sections, hp,
				horizontalLines);

		if (!list.isEmpty()) {
			
			// remove Vertical Lines matching large section
			List vls = new ArrayList<>(verticallines);
			
			for (DocumentBlockRectangle l : list) {
				vls.removeIf(new DocumentBlockRectangleXEqualsPredicate(l.getLowerLeftX()));
				vls.removeIf(new DocumentBlockRectangleXEqualsPredicate(l.getUpperRightX()));
			}
			
			List newsections = createRowSections(horizontalLines, vls);
			
			if (newsections.stream().filter(hp).findFirst().isPresent()) {
				sections = newsections;
			} else {
		        // remove large sections with no horizontal lines
				sections.removeAll(list);
			}
		}
		
		sections.removeIf(new DocumentBlockSingleFieldPredicate(this.scratchpad));

		DocumentBlockRectangleExpander exp = new DocumentBlockRectangleExpander(horizontalLines,
				verticallines);
		
		sections = sections.stream().flatMap(s -> exp.apply(s).stream())
				.collect(Collectors.toList());

        return sections;
    }

	/**
	 * Find Large {@link DocumentBlockRectangle} that do no have any Horizontal
	 * Lines that cross through.
	 * 
	 * @param sections        {@link List} {@link DocumentBlockRectangle}
	 * @param hp              {@link Predicate} {@link DocumentBlockRectangle}
	 * @param horizontalLines {@link List} {@link DocumentBlockRectangle}
	 * @return {@link List} {@link DocumentBlockRectangle}
	 */
	private List findLargeBlockWithoutHorizontal(
			final List sections, final Predicate hp,
			final List horizontalLines) {
		
		List list = sections.stream().filter(s -> {
			
			Predicate tp = new DocumentBlockRectangleTopPredicate(s);
			Predicate bp = new DocumentBlockRectangleBottomPredicate(s);
			Predicate ip = 
					new DocumentBlockRectangleHorizontalIntersectionPredicate(s);

			return hp.test(s) && !horizontalLines.stream()
					.filter(ip.and(tp.negate()).and(bp.negate())).findFirst().isPresent();
		}).collect(Collectors.toList());
		
		return list;
	}

    /**
     * Find Left Most Vertical Lines.
     * @param vls {@link List} {@link DocumentBlockRectangle}
     * @return {@link List} {@link DocumentBlockRectangle}
     */
	private List findLeftMostVerticalLines(
			final List vls) {
		
		// find all Left most vertical lines
        List vl = vls.stream().filter(v0 -> {
  
            Float middle = Float
                    .valueOf((v0.getUpperRightY() + v0.getLowerLeftY()) / 2);

            boolean isVerticalLines = !vls.stream()
                    .filter(v1 -> v1.getLowerLeftX() < v0.getLowerLeftX()
                            && v1.toRangeY().contains(middle))
                    .findFirst().isPresent();
			            
			return isVerticalLines;
            
        }).collect(Collectors.toList());
		return vl;
	}

    /**
	 * Get Vertical Lines.
	 * @param rawblocks {@link List} {@link DocumentBlockRectangle}
	 * @return {@link List} {@link DocumentBlockRectangle}
	 */
    private List getVerticalLines(
            final List rawblocks) {

        DocumentBlockRemoveDuplicate rd = new DocumentBlockRemoveDuplicate(1);
        List vl = rawblocks.stream()
        		.map(new DocumentRawBlockToDocumentBlock())
                .map(new DocumentBlockSplitVerticalLines())
                .flatMap(s -> s.stream())
                .filter(s -> s.getWidth() > 1 || s.getHeight() > 1)
                .sorted(new DocumentBlockListComparator())
                .collect(Collectors.toList());
        vl = rd.apply(vl);

        DocumentBlockRectangle last = null;

        vl.removeIf(b -> b.getWidth() < 1 && b.getHeight() < 1);

        for (DocumentBlockRectangle v : new ArrayList<>(vl)) {

            Range y = v.toRangeY();

            if (last != null && y.isOverlappedBy(last.toRangeY())
                    && isEquals(v.getUpperRightX(), last.getUpperRightX())) {

                vl.remove(v);
                vl.remove(last);
                
				last = new DocumentBlock(last.getLowerLeftX(),
						Math.min(v.getLowerLeftY(), last.getLowerLeftY()), last.getUpperRightX(),
						Math.max(v.getUpperRightY(), last.getUpperRightY()));
				vl.add(last);

            } else {
                last = v;
            }
        }

        Collections.sort(vl, new DocumentBlockRectangleComparator());
        return vl;
    }

    /**
     * Get Horizontal Lines.
     * @param rawblocks {@link List} {@link DocumentBlockRectangle}
     * @return {@link List} {@link DocumentBlock}
     */
    private List getHorizontalLines(
            final List rawblocks) {

        DocumentBlockRemoveDuplicate rd = new DocumentBlockRemoveDuplicate(1);

        List hl = rawblocks.stream()
        		.map(new DocumentRawBlockToDocumentBlock())
                .map(new DocumentBlockSplitHorizontalLines())
                .flatMap(s -> s.stream())
                .filter(s -> s.getWidth() > 1 || s.getHeight() > 1)
                .sorted(new DocumentBlockUpperYXComparator())
                .collect(Collectors.toList());

        hl = rd.apply(hl);

        DocumentBlockRectangle last = null;

        for (DocumentBlockRectangle h : new ArrayList<>(hl)) {

            Range x = h.toRangeX(2);

            if (last != null && x.isOverlappedBy(last.toRangeX())
                    && isEquals(h.getUpperRightY(), last.getUpperRightY())) {

				hl.remove(last);
                hl.remove(h);

				last = new DocumentBlock(Math.min(h.getLowerLeftX(), last.getLowerLeftX()),
						last.getLowerLeftY(), Math.max(h.getUpperRightX(), last.getUpperRightX()),
						last.getUpperRightY());
				
				hl.add(last);

            } else {
                last = h;
            }
        }

        Collections.sort(hl, new DocumentBlockRectangleComparator());

        return hl;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy