All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.formkiq.vision.crafter.TextBlockExtractor Maven / Gradle / Ivy

/*
 * Copyright (C) 2018 FormKiQ Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.formkiq.vision.crafter;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

import org.apache.commons.lang3.Range;
import org.apache.commons.lang3.builder.ToStringBuilder;

import com.formkiq.vision.comparator.DocumentBlockRectangleComparator;
import com.formkiq.vision.document.DocumentBlockRectangle;
import com.formkiq.vision.document.DocumentContentGroup;
import com.formkiq.vision.document.DocumentSection;
import com.formkiq.vision.document.DocumentSectionContent;
import com.formkiq.vision.document.DocumentSource;

/**
 * A group of {@link TextLineExtractor}.
 *
 */
public class TextBlockExtractor implements BlockExtractor {

    /** {@link List} {@link TextLineExtractor}. */
    private List lines;
    /** {@link DocumentBlockRectangleToDocumentContent}. */
    private DocumentBlockRectangleToDocumentContent blockToContent;

    /**
     * constructor.
     * @param document {@link DocumentSource}
     */
	public TextBlockExtractor(final DocumentSource document) {
		this.lines = new ArrayList<>();
		this.blockToContent = new DocumentBlockRectangleToDocumentContent(document);
	}

    /**
     * constructor.
     * @param document {@link DocumentSource}
     * @param list {@link List} {@link TextLineExtractor}
     */
	public TextBlockExtractor(final DocumentSource document,
			final List list) {
		this(document);
		addLines(list);
	}

	/**
     * @param line {@link TextLineExtractor}
     */
    public void addLine(final TextLineExtractor line) {
        this.lines.add(line);
    }

    /**
     * @param textlines {@link List} {@link TextLineExtractor}
     */
    public void addLines(final List textlines) {
    	this.lines.addAll(textlines);
	}

    /**
     * @return {@link List} {@link TextLineExtractor}
     */
    public List getLines() {
        return this.lines;
    }

    @Override
    public Range getX() {

        Float max = this.lines.stream().map(l -> l.getX().getMaximum())
                .max(Float::compareTo).get();
        Float min = this.lines.stream().map(l -> l.getX().getMinimum())
                .min(Float::compareTo).get();
        return Range.between(min, max);
    }

    @Override
    public Range getY() {

        Float max = this.lines.stream().map(l -> l.getY().getMaximum())
                .max(Float::compareTo).get();
        Float min = this.lines.stream().map(l -> l.getY().getMinimum())
                .min(Float::compareTo).get();
        return Range.between(min, max);
    }

    /**
     * Has Multi line.
     * @return boolean
     */
    public boolean hasMultiLine() {
        return this.lines.stream().filter(l -> l.isMultiLine()).findFirst()
                .isPresent();
    }

    /**
     * Has Field.
     * @return boolean
     */
    public boolean hasField() {
        return this.lines.stream().filter(l -> l.hasField()).findFirst()
                .isPresent();
    }

    /**
     * @param extractorLines {@link List} {@link TextLineExtractor}
     */
    public void setLines(final List extractorLines) {
        this.lines = extractorLines;
    }

	@Override
	public String toString() {
        return "y:" + getY() + ","
                + (!this.lines.isEmpty()
                        ? ToStringBuilder.reflectionToString(this)
                        : "no lines");
    }

	@Override
	public Collection getBlocks() {
		return this.lines.stream().flatMap(l -> l.getRectangles().stream())
				.collect(Collectors.toList());
	}

	@Override
	public DocumentSection toDocumentSection() {
		DocumentSection section = new DocumentSection();
		section.setContent(buildSectionContent());
		return section;
	}

	/**
	 * Builds {@link DocumentSectionContent}.
	 * @return {@link List} {@link DocumentSectionContent}
	 */
	private List buildSectionContent() {

		List list = new ArrayList<>();

		for (TextLineExtractor e : this.lines) {

            Collections.sort(e.getRectangles(),
                    new DocumentBlockRectangleComparator());

            List scontents = e.getRectangles().stream()
                    .map(this.blockToContent).filter(s -> s != null)
                    .collect(Collectors.toList());

			if (scontents.size() == 1) {
				list.add(scontents.get(0));
			} else if (!scontents.isEmpty()) {
				list.add(new DocumentContentGroup(scontents));
			}
		}

		return list;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy