com.formkiq.vision.pdf.parser.PDRectangleIntersectionTransformer Maven / Gradle / Ivy
/*
* Copyright (C) 2018 FormKiQ Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.formkiq.vision.pdf.parser;
import static com.formkiq.vision.pdf.PDRectangleUtil.isIntersect;
import static com.formkiq.vision.pdf.PDRectangleUtil.isIntersectionAtPoint;
import static com.formkiq.vision.pdf.PDRectangleUtil.mergeInto;
import static com.formkiq.vision.pdf.PDRectangleUtil.removeDuplicates;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import com.formkiq.vision.pdf.comparator.PDRectangleLowerYXComparator;
import com.formkiq.vision.pdf.comparator.PDRectangleXYComparator;
import com.formkiq.vision.pdf.predicate.PDRectangleHorizontalLinePredicate;
import com.formkiq.vision.pdf.predicate.PDRectangleVerticalLinePredicate;
/**
* Merges intersecting horizontal / vertical {@link PDRectangle}.
*
*/
public class PDRectangleIntersectionTransformer
implements Function, List> {
@Override
public List apply(final List list) {
List re = new ArrayList<>(removeDuplicates(list, 0));
mergeHorizontal(re);
mergeVertical(re);
return re;
}
/**
* Merge Horizontal lines together.
*
* @param re
* {@link List} {@link PDRectangle}
*/
private void mergeHorizontal(final List re) {
List lines = re.stream()
.filter(new PDRectangleHorizontalLinePredicate())
.collect(Collectors.toList());
re.removeAll(lines);
Collections.sort(lines, new PDRectangleLowerYXComparator());
mergeIntersecting(lines);
re.addAll(lines);
}
/**
* Merge intersecting {@link PDRectangle} together.
*
* @param lines {@link List} {@link PDRectangle}
*/
private void mergeIntersecting(final List lines) {
PDRectangle last = null;
for (Iterator i = lines.iterator(); i.hasNext();) {
PDRectangle rect = i.next();
if (last != null && isIntersect(rect, last)) {
mergeInto(last, rect);
i.remove();
} else {
last = rect;
}
}
}
/**
* Merge intersecting {@link PDRectangle} together.
*
* @param lines {@link List} {@link PDRectangle}
*/
private void mergeIntersectingVertical(final List lines) {
PDRectangle last = null;
for (Iterator i = lines.iterator(); i.hasNext();) {
PDRectangle rect = i.next();
if (last != null && (isIntersectionAtPoint(rect,
last.getLowerLeftX(), last.getLowerLeftY())
|| isIntersectionAtPoint(rect, last.getUpperRightX(),
last.getUpperRightY()))) {
mergeInto(last, rect);
i.remove();
} else {
last = rect;
}
}
}
/**
* Merge Vertical lines together.
*
* @param re {@link List} {@link PDRectangle}
*/
private void mergeVertical(final List re) {
List lines = re.stream()
.filter(new PDRectangleVerticalLinePredicate())
.collect(Collectors.toList());
re.removeAll(lines);
Collections.sort(lines, new PDRectangleXYComparator());
mergeIntersectingVertical(lines);
re.addAll(lines);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy