
org.apache.lens.cube.parse.join.AutoJoinContext Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lens.cube.parse.join;
import java.util.*;
import org.apache.lens.cube.error.LensCubeErrorCode;
import org.apache.lens.cube.metadata.*;
import org.apache.lens.cube.metadata.join.JoinPath;
import org.apache.lens.cube.metadata.join.TableRelationship;
import org.apache.lens.cube.parse.*;
import org.apache.lens.server.api.error.LensException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.parse.JoinType;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
/**
* Store join chain information resolved by join resolver
*/
@Slf4j
public class AutoJoinContext {
// Map of a joined table to list of all possible paths from that table to
// the target
private final Map, List> allPaths;
private Set requiredDimensions;
@Getter
// Map of joined table to the join type (if provided by user)
private final Map tableJoinTypeMap;
// True if joins were resolved automatically
private boolean joinsResolved;
// Target table for the auto join resolver
private final AbstractCubeTable autoJoinTarget;
// Configuration string to control join type
private String joinTypeCfg;
// Map of a joined table to its columns which are part of any of the join
// paths. This is used in candidate table resolver
@Getter
private Map, Map>> joinPathFromColumns = new HashMap<>();
@Getter
private Map, Map>> joinPathToColumns = new HashMap<>();
// there can be separate join clause for each fact in-case of multi fact queries
@Getter
Map factClauses = new HashMap<>();
@Getter
@Setter
JoinClause minCostClause;
private final boolean flattenBridgeTables;
private final String bridgeTableFieldAggr;
private final String bridgeTableFieldArrayFilter;
private final boolean doFlatteningEarly;
public AutoJoinContext(Map, List> allPaths,
Set requiredDimensions,
Map tableJoinTypeMap,
AbstractCubeTable autoJoinTarget, String joinTypeCfg, boolean joinsResolved,
boolean flattenBridgeTables, String bridgeTableFieldAggr, String bridgeTableFieldArrayFilter,
boolean doFlatteningEarly) {
this.allPaths = allPaths;
this.requiredDimensions = requiredDimensions;
initJoinPathColumns();
this.tableJoinTypeMap = tableJoinTypeMap;
this.autoJoinTarget = autoJoinTarget;
this.joinTypeCfg = joinTypeCfg;
this.joinsResolved = joinsResolved;
this.flattenBridgeTables = flattenBridgeTables;
this.bridgeTableFieldAggr = bridgeTableFieldAggr;
this.bridgeTableFieldArrayFilter = bridgeTableFieldArrayFilter;
this.doFlatteningEarly = doFlatteningEarly;
log.debug("All join paths:{}", allPaths);
log.debug("Join path from columns:{}", joinPathFromColumns);
log.debug("Join path to columns:{}", joinPathToColumns);
}
public AbstractCubeTable getAutoJoinTarget() {
return autoJoinTarget;
}
public JoinClause getJoinClause(StorageCandidate sc) {
if (sc == null || !factClauses.containsKey(sc)) {
return minCostClause;
}
return factClauses.get(sc);
}
// Populate map of tables to their columns which are present in any of the
// join paths
private void initJoinPathColumns() {
for (List paths : allPaths.values()) {
for (int i = 0; i < paths.size(); i++) {
JoinPath jp = paths.get(i);
jp.initColumnsForTable();
}
}
refreshJoinPathColumns();
}
public void refreshJoinPathColumns() {
joinPathFromColumns.clear();
joinPathToColumns.clear();
for (Map.Entry, List> joinPathEntry : allPaths.entrySet()) {
List joinPaths = joinPathEntry.getValue();
Map> fromColPaths = joinPathFromColumns.get(joinPathEntry.getKey().getObject());
Map> toColPaths = joinPathToColumns.get(joinPathEntry.getKey().getObject());
if (fromColPaths == null) {
fromColPaths = new HashMap<>();
joinPathFromColumns.put(joinPathEntry.getKey(), fromColPaths);
}
if (toColPaths == null) {
toColPaths = new HashMap<>();
joinPathToColumns.put(joinPathEntry.getKey(), toColPaths);
}
populateJoinPathCols(joinPaths, fromColPaths, toColPaths);
}
}
private void populateJoinPathCols(List joinPaths,
Map> fromPathColumns, Map> toPathColumns) {
for (JoinPath path : joinPaths) {
for (TableRelationship edge : path.getEdges()) {
AbstractCubeTable fromTable = edge.getFromTable();
String fromColumn = edge.getFromColumn();
fromPathColumns.computeIfAbsent(fromTable, k -> new ArrayList<>()).add(fromColumn);
// Similarly populate for the 'to' table
AbstractCubeTable toTable = edge.getToTable();
String toColumn = edge.getToColumn();
toPathColumns.computeIfAbsent(toTable, k -> new ArrayList<>()).add(toColumn);
}
}
}
public void removeJoinedTable(Aliased dim) {
allPaths.remove(dim);
joinPathFromColumns.remove(dim);
}
public String getFromString(String fromTable, DimHQLContext sc,
Map dimsToQuery, CubeQueryContext cubeql) throws LensException {
String fromString = fromTable;
Set qdims = dimsToQuery.keySet();
log.info("All paths dump:{} Queried dims:{}", cubeql.getAutoJoinCtx().getAllPaths(), qdims);
if (qdims.isEmpty()) {
return fromString;
}
// Compute the merged join clause string for the min cost joinClause
String clause = getMergedJoinClause(cubeql, sc,
cubeql.getAutoJoinCtx().getJoinClause(sc.getStorageCandidate()), dimsToQuery);
fromString += clause;
return fromString;
}
// Some refactoring needed to account for multiple join paths
public String getMergedJoinClause(CubeQueryContext cubeql, DimHQLContext sc, JoinClause joinClause,
Map dimsToQuery) throws LensException {
Set clauses = new LinkedHashSet<>();
String joinTypeStr = "";
JoinType joinType = JoinType.INNER;
if (StringUtils.isNotBlank(joinTypeCfg)) {
joinType = JoinType.valueOf(joinTypeCfg.toUpperCase());
joinTypeStr = JoinUtils.getJoinTypeStr(joinType);
}
Iterator iter = joinClause.getJoinTree().dft();
boolean hasBridgeTable = false;
BridgeTableJoinContext bridgeTableJoinContext = new BridgeTableJoinContext(cubeql, sc, bridgeTableFieldAggr,
bridgeTableFieldArrayFilter, doFlatteningEarly);
while (iter.hasNext()) {
JoinTree cur = iter.next();
TableRelationship rel = cur.parentRelationship;
String toAlias, fromAlias;
fromAlias = cur.parent.getAlias();
toAlias = cur.getAlias();
hasBridgeTable = flattenBridgeTables && (hasBridgeTable || rel.isMapsToMany());
// We have to push user specified filters for the joined tables
String userFilter = null;
// Partition condition on the tables also needs to be pushed depending
// on the join
String storageFilter = null;
if (JoinType.INNER == joinType || JoinType.LEFTOUTER == joinType || JoinType.LEFTSEMI == joinType) {
// For inner and left joins push filter of right table
storageFilter = getStorageFilter(dimsToQuery, rel.getToTable(), toAlias);
dimsToQuery.get(rel.getToTable()).setWhereClauseAdded(toAlias);
} else if (JoinType.RIGHTOUTER == joinType) {
// For right outer joins, push filters of left table
if (rel.getFromTable() instanceof Dimension) {
storageFilter = getStorageFilter(dimsToQuery, rel.getFromTable(), fromAlias);
dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded(fromAlias);
}
} else if (JoinType.FULLOUTER == joinType) {
// For full outer we need to push filters of both left and right
// tables in the join clause
String leftFilter = null, rightFilter = null;
String leftStorageFilter = null, rightStorgeFilter = null;
if (rel.getFromTable() instanceof Dimension) {
leftStorageFilter = getStorageFilter(dimsToQuery, rel.getFromTable(), fromAlias);
if (StringUtils.isNotBlank((leftStorageFilter))) {
dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded(fromAlias);
}
}
rightStorgeFilter = getStorageFilter(dimsToQuery, rel.getToTable(), toAlias);
if (StringUtils.isNotBlank(rightStorgeFilter)) {
if (StringUtils.isNotBlank((leftStorageFilter))) {
leftStorageFilter += " and ";
}
dimsToQuery.get(rel.getToTable()).setWhereClauseAdded(toAlias);
}
userFilter = (leftFilter == null ? "" : leftFilter) + (rightFilter == null ? "" : rightFilter);
storageFilter =
(leftStorageFilter == null ? "" : leftStorageFilter)
+ (rightStorgeFilter == null ? "" : rightStorgeFilter);
}
StringBuilder clause = new StringBuilder();
// if a bridge table is present in the path
if (hasBridgeTable) {
bridgeTableJoinContext.updateBridgeClause(rel, fromAlias, toAlias, dimsToQuery.get(rel.getToTable()),
userFilter, storageFilter);
if (cubeql.getTblAliasToColumns().get(toAlias) != null
&& !cubeql.getTblAliasToColumns().get(toAlias).isEmpty()) {
// there are fields selected from this table after seeing bridge table in path
// we should make subQuery for this selection
clauses.add(bridgeTableJoinContext.generateJoinClause(joinTypeStr, toAlias));
}
if (cur.getSubtrees().isEmpty()) {
// clear bridge flags and builders, as there are no more clauses in this tree.
hasBridgeTable = false;
bridgeTableJoinContext.resetContext();
}
} else {
// Simple join clause is :
// joinType + " join " + destTable + " on " + joinCond + [" and" + userFilter] + ["and" + storageFilter]
clause.append(joinTypeStr).append(" join ");
//Add storage table name followed by alias
clause.append(dimsToQuery.get(rel.getToTable()).getStorageString(toAlias));
clause.append(" on ").append(fromAlias).append(".")
.append(rel.getFromColumn()).append(" = ").append(toAlias)
.append(".").append(rel.getToColumn());
if (StringUtils.isNotBlank(userFilter)) {
clause.append(" and ").append(userFilter);
}
if (StringUtils.isNotBlank(storageFilter)) {
clause.append(" and ").append(storageFilter);
}
clauses.add(clause.toString());
}
}
return StringUtils.join(clauses, "");
}
public Set getDimsOnPath(Map, List> joinChain,
Set qdims) {
Set dimsOnPath = new HashSet<>();
for (Map.Entry, List> entry : joinChain.entrySet()) {
List chain = entry.getValue();
Dimension table = entry.getKey().getObject();
// check if join with this dimension is required
if (!qdims.contains(table)) {
continue;
}
for (int i = chain.size() - 1; i >= 0; i--) {
TableRelationship rel = chain.get(i);
dimsOnPath.add((Dimension) rel.getToTable());
}
}
return dimsOnPath;
}
private String getStorageFilter(Map dimsToQuery, AbstractCubeTable table, String alias) {
String whereClause = "";
if (dimsToQuery != null && dimsToQuery.get(table) != null) {
if (StringUtils.isNotBlank(dimsToQuery.get(table).getWhereClause())) {
whereClause = dimsToQuery.get(table).getWhereClause();
if (alias != null) {
whereClause = StorageUtil.getWhereClause(whereClause, alias);
}
}
}
return whereClause;
}
/**
* @return the joinsResolved
*/
public boolean isJoinsResolved() {
return joinsResolved;
}
// Includes both queried join paths and optional join paths
public Set getAllJoinPathColumnsOfTable(AbstractCubeTable table) {
Set allPaths = new HashSet<>();
for (Map> optPaths : joinPathFromColumns.values()) {
if (optPaths.get(table) != null) {
allPaths.addAll(optPaths.get(table));
}
}
for (Map> optPaths : joinPathToColumns.values()) {
if (optPaths.get(table) != null) {
allPaths.addAll(optPaths.get(table));
}
}
return allPaths;
}
/**
* Prunes the join chains defined in Cube whose starting column is not there in any of the candidate facts.
* Same is done in case of join paths defined in Dimensions.
*
* @param cube
* @param dimsToQuery
* @throws LensException
*/
public void pruneAllPaths(CubeInterface cube, Collection candColumns,
final Map dimsToQuery) throws LensException {
// Remove join paths which cannot be satisfied by the resolved candidate
// fact and dimension tables
// include columns from picked candidate
for (List paths : allPaths.values()) {
for (int i = 0; i < paths.size(); i++) {
JoinPath jp = paths.get(i);
List cubeCols = jp.getColumnsForTable((AbstractCubeTable) cube);
if (cubeCols != null && !candColumns.containsAll(cubeCols)) {
// This path requires some columns from the cube which are not
// present in the candidate fact
// Remove this path
log.info("Removing join path:{} as columns :{} dont exist", jp, cubeCols);
paths.remove(i);
i--;
}
}
}
pruneEmptyPaths(allPaths);
pruneAllPaths(dimsToQuery);
}
/**
* Prunes allPaths by removing paths which contain columns that are not present in any candidate dims.
*
* @param candidateDims candidate dimensions
*/
public void pruneAllPathsForCandidateDims(Map> candidateDims) throws LensException {
Map> dimColumns = new HashMap<>();
// populate all columns present in candidate dims for each dimension
for (Map.Entry> entry : candidateDims.entrySet()) {
Dimension dim = entry.getKey();
Set allColumns = new HashSet<>();
for (CandidateDim cdim : entry.getValue()) {
allColumns.addAll(cdim.getColumns());
}
dimColumns.put(dim, allColumns);
}
for (List paths : allPaths.values()) {
for (int i = 0; i < paths.size(); i++) {
JoinPath jp = paths.get(i);
for (AbstractCubeTable refTable : jp.getAllTables()) {
List cols = jp.getColumnsForTable(refTable);
if (refTable instanceof Dimension) {
if (cols != null && (dimColumns.get(refTable) == null || !dimColumns.get(refTable).containsAll(cols))) {
// This path requires some columns from the cube which are not present in any candidate dim
// Remove this path
log.info("Removing join path:{} as columns :{} don't exist", jp, cols);
paths.remove(i);
i--;
break;
}
}
}
}
}
pruneEmptyPaths(allPaths);
}
private void pruneEmptyPaths(Map, List> allPaths) throws LensException {
Iterator, List>> iter = allPaths.entrySet().iterator();
Set noPathDims = new HashSet<>();
while (iter.hasNext()) {
Map.Entry, List> entry = iter.next();
if (entry.getValue().isEmpty()) {
noPathDims.add(entry.getKey().getObject());
iter.remove();
}
}
noPathDims.retainAll(requiredDimensions);
if (!noPathDims.isEmpty()) {
throw new LensException(LensCubeErrorCode.NO_JOIN_PATH.getLensErrorInfo(), autoJoinTarget.getName(),
noPathDims.toString());
}
}
private Map, List> pruneFactPaths(CubeInterface cube,
final StorageCandidate sc) throws LensException {
Map, List> prunedPaths = new LinkedHashMap<>();
// Remove join paths which cannot be satisfied by the candidate fact
for (Map.Entry, List> ppaths : allPaths.entrySet()) {
prunedPaths.put(ppaths.getKey(), new ArrayList<>(ppaths.getValue()));
List paths = prunedPaths.get(ppaths.getKey());
for (int i = 0; i < paths.size(); i++) {
JoinPath jp = paths.get(i);
List cubeCols = jp.getColumnsForTable((AbstractCubeTable) cube);
if (cubeCols != null && !sc.getColumns().containsAll(cubeCols)) {
// This path requires some columns from the cube which are not
// present in the candidate fact
// Remove this path
log.info("Removing join path:{} as columns :{} don't exist", jp, cubeCols);
paths.remove(i);
i--;
}
}
}
pruneEmptyPaths(prunedPaths);
return prunedPaths;
}
private void pruneAllPaths(final Map dimsToQuery) throws LensException {
// Remove join paths which cannot be satisfied by the resolved dimension
// tables
if (dimsToQuery != null && !dimsToQuery.isEmpty()) {
for (CandidateDim candidateDim : dimsToQuery.values()) {
Set dimCols = candidateDim.getTable().getAllFieldNames();
for (List paths : allPaths.values()) {
for (int i = 0; i < paths.size(); i++) {
JoinPath jp = paths.get(i);
List candidateDimCols = jp.getColumnsForTable(candidateDim.getBaseTable());
if (candidateDimCols != null && !dimCols.containsAll(candidateDimCols)) {
// This path requires some columns from the dimension which are
// not present in the candidate dim
// Remove this path
log.info("Removing join path:{} as columns :{} dont exist", jp, candidateDimCols);
paths.remove(i);
i--;
}
}
}
}
pruneEmptyPaths(allPaths);
}
}
/**
* There can be multiple join paths between a dimension and the target. Set of all possible join clauses is the
* cartesian product of join paths of all dimensions
*/
private Iterator getJoinClausesForAllPaths(final StorageCandidate sc,
final Set qDims, final CubeQueryContext cubeql) throws LensException {
Map, List> allPaths;
// if fact is passed only look at paths possible from fact to dims
if (sc != null) {
allPaths = pruneFactPaths(cubeql.getCube(), sc);
} else {
allPaths = new LinkedHashMap<>(this.allPaths);
}
// prune allPaths with qdims
pruneAllPathsWithQueriedDims(allPaths, qDims);
// Number of paths in each path set
final int[] groupSizes = new int[allPaths.values().size()];
// Total number of elements in the cartesian product
int numSamples = 1;
// All path sets
final List> pathSets = new ArrayList<>();
// Dimension corresponding to the path sets
final List> dimensions = new ArrayList<>(groupSizes.length);
int i = 0;
for (Map.Entry, List> entry : allPaths.entrySet()) {
dimensions.add(entry.getKey());
List group = entry.getValue();
pathSets.add(group);
groupSizes[i] = group.size();
numSamples *= groupSizes[i];
i++;
}
final int[] selection = new int[groupSizes.length];
final int MAX_SAMPLE_COUNT = numSamples;
// Return a lazy iterator over all possible join chains
return new Iterator() {
int sample = 0;
@Override
public boolean hasNext() {
return sample < MAX_SAMPLE_COUNT;
}
@Override
public JoinClause next() {
Map, List> chain = new LinkedHashMap<>();
//generate next permutation.
for (int i = groupSizes.length - 1, base = sample; i >= 0; base /= groupSizes[i], i--) {
selection[i] = base % groupSizes[i];
}
for (int i = 0; i < selection.length; i++) {
int selectedPath = selection[i];
List path = pathSets.get(i).get(selectedPath).getEdges();
chain.put(dimensions.get(i), path);
}
Set dimsOnPath = getDimsOnPath(chain, qDims);
sample++;
// Cost of join = number of tables joined in the clause
return new JoinClause(cubeql, chain, dimsOnPath);
}
@Override
public void remove() {
throw new UnsupportedOperationException("Cannot remove elements!");
}
};
}
/**
* Given allPaths, it will remove entries where key is a non-join chain dimension and not contained in qdims
*
* @param allPaths All join paths
* @param qDims queried dimensions
*/
private void pruneAllPathsWithQueriedDims(Map, List> allPaths,
Set qDims) {
Iterator, List>> iterator = allPaths.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry, List> cur = iterator.next();
if (!qDims.contains(cur.getKey().getObject())) {
log.info("removing from allPaths: {}", cur);
iterator.remove();
}
}
}
public Set pickOptionalTables(final DimHQLContext sc,
Set qdims, CubeQueryContext cubeql) throws LensException {
// Find the min cost join clause and add dimensions in the clause as optional dimensions
Set joiningOptionalTables = new HashSet<>();
if (qdims == null) {
return joiningOptionalTables;
}
// find least cost path
Iterator itr = getJoinClausesForAllPaths(sc.getStorageCandidate(), qdims, cubeql);
JoinClause minCostClause = null;
while (itr.hasNext()) {
JoinClause clause = itr.next();
if (minCostClause == null || minCostClause.getCost() > clause.getCost()) {
minCostClause = clause;
}
}
if (minCostClause == null) {
throw new LensException(LensCubeErrorCode.NO_JOIN_PATH.getLensErrorInfo(),
qdims.toString(), autoJoinTarget.getName());
}
log.info("Fact: {} minCostClause:{}", sc, minCostClause);
if (sc.getStorageCandidate() != null) {
getFactClauses().put(sc.getStorageCandidate(), minCostClause);
} else {
setMinCostClause(minCostClause);
}
for (Dimension dim : minCostClause.getDimsInPath()) {
if (!qdims.contains(dim)) {
joiningOptionalTables.add(dim);
}
}
minCostClause.initChainColumns();
// prune candidate dims of joiningOptionalTables wrt joining columns
for (Dimension dim : joiningOptionalTables) {
for (Iterator i = cubeql.getCandidateDimTables().get(dim).iterator(); i.hasNext();) {
CandidateDim cDim = i.next();
if (!cDim.getColumns().containsAll(minCostClause.chainColumns.get(dim))) {
i.remove();
log.info("Not considering dimTable:{} as its columns are not part of any join paths. Join columns:{}",
cDim.getTable(), minCostClause.chainColumns.get(dim));
cubeql.addDimPruningMsgs(dim, cDim.getTable(),
CandidateTablePruneCause.noColumnPartOfAJoinPath(minCostClause.chainColumns.get(dim)));
}
}
if (cubeql.getCandidateDimTables().get(dim).size() == 0) {
throw new LensException(LensCubeErrorCode.NO_DIM_HAS_COLUMN.getLensErrorInfo(), dim.getName(),
minCostClause.chainColumns.get(dim).toString());
}
}
return joiningOptionalTables;
}
public Map, List> getAllPaths() {
return allPaths;
}
public boolean isReachableDim(Dimension dim) {
Aliased aliased = Aliased.create(dim);
return isReachableDim(aliased);
}
public boolean isReachableDim(Dimension dim, String alias) {
Aliased aliased = Aliased.create(dim, alias);
return isReachableDim(aliased);
}
private boolean isReachableDim(Aliased aliased) {
return allPaths.containsKey(aliased) && !allPaths.get(aliased).isEmpty();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy