com.exasol.adapter.document.documentpath.RedundantPathEliminator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of virtual-schema-common-document Show documentation
Show all versions of virtual-schema-common-document Show documentation
Common module of Exasol Virtual Schema Adapters for Document Data Sources.
The newest version!
package com.exasol.adapter.document.documentpath;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* This class eliminates path expressions that are redundant when used as a projection expression.
*
* For example {@code a} and {@code a.b} are reduced to {@code a} as {@code a} includes {@code a.b}.
*/
public class RedundantPathEliminator {
private static final RedundantPathEliminator INSTANCE = new RedundantPathEliminator();
/**
* Empty constructor to hide public default.
*/
private RedundantPathEliminator() {
// empty on purpose.
}
/**
* Get a singleton instance of {@link RedundantPathEliminator}.
*
* @return instance of {@link RedundantPathEliminator}
*/
public static RedundantPathEliminator getInstance() {
return INSTANCE;
}
/**
* Eliminate path expressions that are redundant when used as a projection expression.
*
* @param paths collection of paths
* @return Set with redundancy free paths
*/
public Set removeRedundantPaths(final Collection paths) {
return removeRedundantPaths(paths.stream());
}
/**
* Eliminate path expressions that are redundant when used as a projection expression.
*
* The basic idea of this algorithm is to iterate over the paths with an increasing path length. In the first
* iteration only the paths with a length of 0 are considered. In each iteration the algorithm removes all paths
* that are already included in the result from allPaths.
*
*
* @param paths collection of paths
* @return Set with redundancy free paths
*/
public Set removeRedundantPaths(final Stream paths) {
final List allPaths = paths.collect(Collectors.toCollection(LinkedList::new));
final Set redundancyFreePaths = new HashSet<>(allPaths.size() * 2);
int currentPathLength = 0;
while (!allPaths.isEmpty()) {
int nextPathLength = Integer.MAX_VALUE;
final Iterator pathIterator = allPaths.iterator();
while (pathIterator.hasNext()) {
final DocumentPathExpression path = pathIterator.next();
final DocumentPathExpression subPath = currentPathLength == 0 ? DocumentPathExpression.empty()
: path.getSubPath(0, currentPathLength - 1);
if (redundancyFreePaths.contains(subPath)) {
/*
* A more generic path is already included. --> remove this path.
*/
pathIterator.remove();
} else if (path.size() == currentPathLength) {
/*
* This path is not include and has the current length --> add
*/
redundancyFreePaths.add(path);
pathIterator.remove();
/*
* Here we also queue a check on the next path length to remove that paths that contain this path.
*/
nextPathLength = Math.min(nextPathLength, currentPathLength + 1);
} else {
/*
* This path is not included but has not the current length. --> Will be considered in a future
* iteration. We set nextPathLength to skip pathLengths with no paths.
*/
nextPathLength = Math.min(nextPathLength, path.size());
}
}
currentPathLength = nextPathLength;
}
return redundancyFreePaths;
}
}