All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.formkiq.vision.crafter.DocumentSectionCrafter Maven / Gradle / Ivy

/*
 * Copyright (C) 2018 FormKiQ Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.formkiq.vision.crafter;

import static com.formkiq.vision.crafter.CollectionUtils.isType;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

import com.formkiq.vision.document.DocumentSection;
import com.formkiq.vision.document.DocumentSectionContent;
import com.formkiq.vision.document.DocumentSource;
import com.formkiq.vision.document.DocumentTextTableContent;

/**
 * {@link DocumentSection} Crafter.
 *
 */
public class DocumentSectionCrafter
        implements Function> {

    /** {@link DocumentSource}. */
    private DocumentSource document;

    /**
     * constructor.
     * @param source {@link DocumentSource}
     */
    public DocumentSectionCrafter(final DocumentSource source) {
        this.document = source;
    }

    @Override
    public List apply(final Integer pageNumber) {

        PageScratchPad pagePad = new PageScratchPadBuilder(this.document)
                .apply(pageNumber);

        List blocks = new BlockExtractorBuilder()
                .apply(pagePad);

        List sections = blocks.stream()
                .map(b -> b.toDocumentSection()).collect(Collectors.toList());

		sections.forEach(s -> {
			s.getContent().removeIf(c -> isEmpty(c));
		});
        
		sections.removeIf(s -> s.getContent().isEmpty());

		sections = mergeTextTableSections(sections);

        return sections;
    }

    /**
     * Returns whether {@link DocumentSectionContent} is empty.
     * @param content {@link DocumentSectionContent}
     * @return boolean
     */
    private boolean isEmpty(final DocumentSectionContent content) {
    	
    	boolean empty = false;
    	
    	if (content instanceof DocumentTextTableContent) {
    		empty = ((DocumentTextTableContent) content).getData().isEmpty();
    	}
    	
		return empty;
	}

	/**
     * Merge similar {@link DocumentSection} together.
     * @param sections {@link List} {@link DocumentSection}
     * @return {@link List} {@link DocumentSection}
     */
    private List mergeTextTableSections(
            final List sections) {

        List list = new ArrayList<>(sections.size());

        DocumentSection last = null;

        for (DocumentSection section : sections) {

            if (last != null && isTextTableMergeable(last, section)) {

                DocumentTextTableContent tt = (DocumentTextTableContent) last
                        .getContent().get(last.getContent().size() - 1);

                List> dlist = section.getContent().stream()
                        .flatMap(t -> ((DocumentTextTableContent) t).getData()
                                .stream())
                        .collect(Collectors.toList());

                tt.getData().addAll(dlist);
            } else {
                list.add(section);
                last = section;
            }
        }

        return list;
    }

    /**
     * Is {@link DocumentSection} mergeable.
     * @param s0 {@link DocumentSection}
     * @param s1 {@link DocumentSection}
     * @return boolean
     */
    private boolean isTextTableMergeable(final DocumentSection s0,
            final DocumentSection s1) {
        return s0 != null && s1 != null
                && isType(s0.getContent(), DocumentTextTableContent.class)
                && isType(s1.getContent(), DocumentTextTableContent.class)
                && isDocumentTextTableContentLengthEqual(s0, s1);
    }

    /**
     * Are {@link DocumentTextTableContent} the same length.
     * @param s0 {@link DocumentSection}
     * @param s1 {@link DocumentSection}
     * @return boolean
     */
    private boolean isDocumentTextTableContentLengthEqual(
            final DocumentSection s0, final DocumentSection s1) {

        Set length = new HashSet<>();

        for (DocumentSection s : Arrays.asList(s0, s1)) {
            for (DocumentSectionContent c : s.getContent()) {
                DocumentTextTableContent t = (DocumentTextTableContent) c;
                for (List ss : t.getData()) {
                    length.add(Integer.valueOf(ss.size()));
                }
            }
        }

        return length.size() == 1;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy