Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.exec.CommonJoinOperator Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer;
import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
/**
* Join operator implementation.
*/
public abstract class CommonJoinOperator extends
Operator implements Serializable {
private static final long serialVersionUID = 1L;
protected static final Logger LOG = LoggerFactory.getLogger(CommonJoinOperator.class
.getName());
protected transient int numAliases; // number of aliases
/**
* The expressions for join inputs.
*/
protected transient List[] joinValues;
/**
* The filters for join
*/
protected transient List[] joinFilters;
/**
* List of evaluators for conditions which appear on on-clause and needs to be
* evaluated before emitting rows. Currently, relevant only for outer joins.
*
* For instance, given the query:
* select * from t1 right outer join t2 on t1.c1 + t2.c2 > t1.c3;
* The expression evaluator for t1.c1 + t2.c2 > t1.c3 will be stored in this list.
*/
protected transient List residualJoinFilters;
protected transient int[][] filterMaps;
/**
* The ObjectInspectors for the join inputs.
*/
protected transient List[] joinValuesObjectInspectors;
/**
* The ObjectInspectors for join filters.
*/
protected transient List[] joinFilterObjectInspectors;
/**
* OIs corresponding to residualJoinFilters.
*/
protected transient List residualJoinFiltersOIs;
/**
* Will be true depending on content of residualJoinFilters.
*/
protected transient boolean needsPostEvaluation;
/**
* This data structure is used to keep track of rows on which residualFilters
* evaluated to false. We will iterate on this container afterwards and emit
* rows appending NULL values if it was not done. Key is relation index.
*/
protected transient Map rowContainerPostFilteredOuterJoin = null;
/**
* The standard ObjectInspectors for the join inputs.
*/
protected transient List[] joinValuesStandardObjectInspectors;
/**
* The standard ObjectInspectors for the row container.
*/
protected transient List[] rowContainerStandardObjectInspectors;
protected transient Byte[] order; // order in which the results should
// be output
protected transient JoinCondDesc[] condn;
protected transient boolean[] nullsafes;
public transient boolean noOuterJoin;
// for outer joins, contains the potential nulls for the concerned aliases
protected transient ArrayList[] dummyObj;
// empty rows for each table
protected transient RowContainer>[] dummyObjVectors;
protected transient int totalSz; // total size of the composite object
// keys are the column names. basically this maps the position of the column
// in
// the output of the CommonJoinOperator to the input columnInfo.
private transient Map> posToAliasMap;
transient LazyBinarySerDe[] spillTableSerDe;
protected transient TableDesc[] spillTableDesc; // spill tables are
// used if the join
// input is too large
// to fit in memory
AbstractRowContainer>[] storage; // map b/w table alias
// to RowContainer
int joinEmitInterval = -1;
int joinCacheSize = 0;
long nextSz = 0;
transient Byte lastAlias = null;
transient boolean handleSkewJoin = false;
transient boolean hasLeftSemiJoin = false;
protected transient int countAfterReport;
protected transient int heartbeatInterval;
protected static final int NOTSKIPBIGTABLE = -1;
private transient boolean closeOpCalled = false;
/** Kryo ctor. */
protected CommonJoinOperator() {
super();
}
public CommonJoinOperator(CompilationOpContext ctx) {
super(ctx);
}
public CommonJoinOperator(CommonJoinOperator clone) {
super(clone.id, clone.cContext);
this.joinEmitInterval = clone.joinEmitInterval;
this.joinCacheSize = clone.joinCacheSize;
this.nextSz = clone.nextSz;
this.childOperators = clone.childOperators;
this.parentOperators = clone.parentOperators;
this.done = clone.done;
this.storage = clone.storage;
this.condn = clone.condn;
this.conf = clone.getConf();
this.setSchema(clone.getSchema());
this.alias = clone.alias;
this.childOperatorsArray = clone.childOperatorsArray;
this.childOperatorsTag = clone.childOperatorsTag;
this.setColumnExprMap(clone.getColumnExprMap());
this.dummyObj = clone.dummyObj;
this.dummyObjVectors = clone.dummyObjVectors;
this.forwardCache = clone.forwardCache;
this.groupKeyObject = clone.groupKeyObject;
this.handleSkewJoin = clone.handleSkewJoin;
this.hconf = clone.hconf;
this.inputObjInspectors = clone.inputObjInspectors;
this.noOuterJoin = clone.noOuterJoin;
this.numAliases = clone.numAliases;
this.operatorId = clone.operatorId;
this.posToAliasMap = clone.posToAliasMap;
this.spillTableDesc = clone.spillTableDesc;
this.statsMap = clone.statsMap;
this.joinFilters = clone.joinFilters;
this.joinFilterObjectInspectors = clone.joinFilterObjectInspectors;
this.residualJoinFilters = clone.residualJoinFilters;
this.residualJoinFiltersOIs = clone.residualJoinFiltersOIs;
this.needsPostEvaluation = clone.needsPostEvaluation;
}
private ObjectInspector getJoinOutputObjectInspector(
Byte[] order, List[] aliasToObjectInspectors, T conf) {
List structFieldObjectInspectors = new ArrayList();
for (Byte alias : order) {
List oiList = getValueObjectInspectors(alias, aliasToObjectInspectors);
if (oiList != null && !oiList.isEmpty()) {
structFieldObjectInspectors.addAll(oiList);
}
}
StructObjectInspector joinOutputObjectInspector = ObjectInspectorFactory
.getStandardStructObjectInspector(conf.getOutputColumnNames(),
structFieldObjectInspectors);
return joinOutputObjectInspector;
}
protected List getValueObjectInspectors(
byte alias, List[] aliasToObjectInspectors) {
return aliasToObjectInspectors[alias];
}
protected Configuration hconf;
@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
closeOpCalled = false;
this.handleSkewJoin = conf.getHandleSkewJoin();
this.hconf = hconf;
heartbeatInterval = HiveConf.getIntVar(hconf,
HiveConf.ConfVars.HIVESENDHEARTBEAT);
countAfterReport = 0;
totalSz = 0;
int tagLen = conf.getTagLength();
// Map that contains the rows for each alias
storage = new AbstractRowContainer[tagLen];
numAliases = conf.getExprs().size();
joinValues = new List[tagLen];
joinFilters = new List[tagLen];
order = conf.getTagOrder();
condn = conf.getConds();
nullsafes = conf.getNullSafes();
noOuterJoin = conf.isNoOuterJoin();
totalSz = JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(),
order,NOTSKIPBIGTABLE, hconf);
//process join filters
joinFilters = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(),order,NOTSKIPBIGTABLE, hconf);
joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues,
inputObjInspectors,NOTSKIPBIGTABLE, tagLen);
joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters,
inputObjInspectors,NOTSKIPBIGTABLE, tagLen);
joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(
joinValuesObjectInspectors,NOTSKIPBIGTABLE, tagLen);
filterMaps = conf.getFilterMap();
if (noOuterJoin) {
rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors;
} else {
List[] rowContainerObjectInspectors = new List[tagLen];
for (Byte alias : order) {
ArrayList rcOIs = new ArrayList();
rcOIs.addAll(joinValuesObjectInspectors[alias]);
// for each alias, add object inspector for short as the last element
rcOIs.add(
PrimitiveObjectInspectorFactory.writableShortObjectInspector);
rowContainerObjectInspectors[alias] = rcOIs;
}
rowContainerStandardObjectInspectors =
JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors, NOTSKIPBIGTABLE, tagLen);
}
dummyObj = new ArrayList[numAliases];
dummyObjVectors = new RowContainer[numAliases];
joinEmitInterval = HiveConf.getIntVar(hconf,
HiveConf.ConfVars.HIVEJOINEMITINTERVAL);
joinCacheSize = HiveConf.getIntVar(hconf,
HiveConf.ConfVars.HIVEJOINCACHESIZE);
// construct dummy null row (indicating empty table) and
// construct spill table serde which is used if input is too
// large to fit into main memory.
byte pos = 0;
for (Byte alias : order) {
int sz = conf.getExprs().get(alias).size();
ArrayList nr = new ArrayList(sz);
for (int j = 0; j < sz; j++) {
nr.add(null);
}
if (!noOuterJoin) {
// add whether the row is filtered or not
// this value does not matter for the dummyObj
// because the join values are already null
nr.add(new ShortWritable());
}
dummyObj[pos] = nr;
// there should be only 1 dummy object in the RowContainer
RowContainer> values = JoinUtil.getRowContainer(hconf,
rowContainerStandardObjectInspectors[pos],
alias, 1, spillTableDesc, conf, !hasFilter(pos), reporter);
values.addRow(dummyObj[pos]);
dummyObjVectors[pos] = values;
// if serde is null, the input doesn't need to be spilled out
// e.g., the output columns does not contains the input table
RowContainer> rc = JoinUtil.getRowContainer(hconf,
rowContainerStandardObjectInspectors[pos],
alias, joinCacheSize, spillTableDesc, conf, !hasFilter(pos), reporter);
storage[pos] = rc;
pos++;
}
forwardCache = new Object[totalSz];
aliasFilterTags = new short[numAliases];
Arrays.fill(aliasFilterTags, (byte)0xff);
filterTags = new short[numAliases];
skipVectors = new boolean[numAliases][];
for(int i = 0; i < skipVectors.length; i++) {
skipVectors[i] = new boolean[i + 1];
}
intermediate = new List[numAliases];
offsets = new int[numAliases + 1];
int sum = 0;
for (int i = 0; i < numAliases; i++) {
offsets[i] = sum;
sum += joinValues[order[i]].size();
}
offsets[numAliases] = sum;
outputObjInspector = getJoinOutputObjectInspector(order,
joinValuesStandardObjectInspectors, conf);
for( int i = 0; i < condn.length; i++ ) {
if(condn[i].getType() == JoinDesc.LEFT_SEMI_JOIN) {
hasLeftSemiJoin = true;
}
}
// Create post-filtering evaluators if needed
if (conf.getResidualFilterExprs() != null) {
residualJoinFilters = new ArrayList<>(conf.getResidualFilterExprs().size());
residualJoinFiltersOIs = new ArrayList<>(conf.getResidualFilterExprs().size());
for (int i = 0; i < conf.getResidualFilterExprs().size(); i++) {
ExprNodeDesc expr = conf.getResidualFilterExprs().get(i);
residualJoinFilters.add(ExprNodeEvaluatorFactory.get(expr));
residualJoinFiltersOIs.add(
residualJoinFilters.get(i).initialize(outputObjInspector));
}
needsPostEvaluation = true;
if (!noOuterJoin) {
// We need to disable join emit interval, since for outer joins with post conditions
// we need to have the full view on the right matching rows to know whether we need
// to produce a row with NULL values or not
joinEmitInterval = -1;
}
}
if (LOG.isInfoEnabled()) {
LOG.info("JOIN " + outputObjInspector.getTypeName() + " totalsz = " + totalSz);
}
}
transient boolean newGroupStarted = false;
@Override
public void startGroup() throws HiveException {
newGroupStarted = true;
for (AbstractRowContainer> alw : storage) {
alw.clearRows();
}
super.startGroup();
}
protected long getNextSize(long sz) {
// A very simple counter to keep track of join entries for a key
if (sz >= 100000) {
return sz + 100000;
}
return 2 * sz;
}
protected transient Byte alias;
protected transient Object[] forwardCache;
// pre-calculated offset values for each alias
protected transient int[] offsets;
// a array of bitvectors where each entry denotes whether the element is to
// be used or not (whether it is null or not). The size of the bitvector is
// same as the number of inputs(aliases) under consideration currently.
// When all inputs are accounted for, the output is forwarded appropriately.
protected transient boolean[][] skipVectors;
// caches objects before constructing forward cache
protected transient List[] intermediate;
// filter tags for objects
protected transient short[] filterTags;
/**
* On filterTags
*
* ANDed value of all filter tags in current join group
* if any of values passes on outer join alias (which makes zero for the tag alias),
* it means there exists a pair for it and safely regarded as a inner join
*
* for example, with table a, b something like,
* a = 100, 10 | 100, 20 | 100, 30
* b = 100, 10 | 100, 20 | 100, 30
*
* the query "a FO b ON a.k=b.k AND a.v>10 AND b.v>30" makes filter map
* 0(a) = [1(b),1] : a.v>10
* 1(b) = [0(a),1] : b.v>30
*
* for filtered rows in a (100,10) create a-NULL
* for filtered rows in b (100,10) (100,20) (100,30) create NULL-b
*
* with 0(a) = [1(b),1] : a.v>10
* 100, 10 = 00000010 (filtered)
* 100, 20 = 00000000 (valid)
* 100, 30 = 00000000 (valid)
* -------------------------
* sum = 00000000 : for valid rows in b, there is at least one pair in a
*
* with 1(b) = [0(a),1] : b.v>30
* 100, 10 = 00000001 (filtered)
* 100, 20 = 00000001 (filtered)
* 100, 30 = 00000001 (filtered)
* -------------------------
* sum = 00000001 : for valid rows in a (100,20) (100,30), there is no pair in b
*
* result :
* 100, 10 : N, N
* N, N : 100, 10
* N, N : 100, 20
* N, N : 100, 30
* 100, 20 : N, N
* 100, 30 : N, N
*/
protected transient short[] aliasFilterTags;
// all evaluation should be processed here for valid aliasFilterTags
//
// for MapJoin, filter tag is pre-calculated in MapredLocalTask and stored with value.
// when reading the hashtable, MapJoinObjectValue calculates alias filter and provide it to join
protected List getFilteredValue(byte alias, Object row) throws HiveException {
boolean hasFilter = hasFilter(alias);
List nr = JoinUtil.computeValues(row, joinValues[alias],
joinValuesObjectInspectors[alias], hasFilter);
if (hasFilter) {
short filterTag = JoinUtil.isFiltered(row, joinFilters[alias],
joinFilterObjectInspectors[alias], filterMaps[alias]);
nr.add(new ShortWritable(filterTag));
aliasFilterTags[alias] &= filterTag;
}
return nr;
}
// fill forwardCache with skipvector
// returns whether a record was forwarded
private boolean createForwardJoinObject(boolean[] skip) throws HiveException {
Arrays.fill(forwardCache, null);
boolean forward = false;
for (int i = 0; i < numAliases; i++) {
if (!skip[i]) {
for (int j = offsets[i]; j < offsets[i + 1]; j++) {
forwardCache[j] = intermediate[i].get(j - offsets[i]);
}
forward = true;
}
}
if (forward) {
if (needsPostEvaluation) {
forward = !JoinUtil.isFiltered(forwardCache, residualJoinFilters, residualJoinFiltersOIs);
}
if (forward) {
// If it is not an outer join, or the post-condition filters
// are empty or the row passed them
internalForward(forwardCache, outputObjInspector);
countAfterReport = 0;
}
}
return forward;
}
// entry point (aliasNum = 0)
private void genJoinObject() throws HiveException {
if (needsPostEvaluation && 0 == numAliases - 2) {
int nextType = condn[0].getType();
if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
// Initialize container to use for storing tuples before emitting them
rowContainerPostFilteredOuterJoin = new HashMap<>();
}
}
boolean rightFirst = true;
boolean hasFilter = hasFilter(order[0]);
AbstractRowContainer.RowIterator> iter = storage[order[0]].rowIter();
for (List rightObj = iter.first(); rightObj != null; rightObj = iter.next()) {
boolean rightNull = rightObj == dummyObj[0];
if (hasFilter) {
filterTags[0] = getFilterTag(rightObj);
}
skipVectors[0][0] = rightNull;
intermediate[0] = rightObj;
genObject(1, rightFirst, rightNull);
rightFirst = false;
}
// Consolidation for outer joins
if (needsPostEvaluation && 0 == numAliases - 2) {
int nextType = condn[0].getType();
if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
// If it is a RIGHT / FULL OUTER JOIN, we need to iterate through the row container
// that contains all the right records that did not produce results. Then, for each
// of those records, we replace the left side with NULL values, and produce the
// records.
// Observe that we only enter this block when we have finished iterating through
// all the left and right records (aliasNum == numAliases - 2), and thus, we have
// tried to evaluate the post-filter condition on every possible combination.
// NOTE: the left records that do not produce results (for LEFT / FULL OUTER JOIN)
// will always be caught in the genObject method
Arrays.fill(forwardCache, null);
for (Object[] row : rowContainerPostFilteredOuterJoin.values()) {
if (row == null) {
continue;
}
System.arraycopy(row, 0, forwardCache, offsets[numAliases - 1], row.length);
internalForward(forwardCache, outputObjInspector);
countAfterReport = 0;
}
}
}
}
// creates objects in recursive manner
private void genObject(int aliasNum, boolean allLeftFirst, boolean allLeftNull)
throws HiveException {
JoinCondDesc joinCond = condn[aliasNum - 1];
int type = joinCond.getType();
int left = joinCond.getLeft();
int right = joinCond.getRight();
if (needsPostEvaluation && aliasNum == numAliases - 2) {
int nextType = condn[aliasNum].getType();
if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
// Initialize container to use for storing tuples before emitting them
rowContainerPostFilteredOuterJoin = new HashMap<>();
}
}
boolean[] skip = skipVectors[aliasNum];
boolean[] prevSkip = skipVectors[aliasNum - 1];
// search for match in the rhs table
AbstractRowContainer> aliasRes = storage[order[aliasNum]];
boolean needToProduceLeftRow = false;
boolean producedRow = false;
boolean done = false;
boolean loopAgain = false;
boolean tryLOForFO = type == JoinDesc.FULL_OUTER_JOIN;
boolean rightFirst = true;
AbstractRowContainer.RowIterator> iter = aliasRes.rowIter();
int pos = 0;
for (List rightObj = iter.first(); !done && rightObj != null;
rightObj = loopAgain ? rightObj : iter.next(), rightFirst = loopAgain = false, pos++) {
System.arraycopy(prevSkip, 0, skip, 0, prevSkip.length);
boolean rightNull = rightObj == dummyObj[aliasNum];
if (hasFilter(order[aliasNum])) {
filterTags[aliasNum] = getFilterTag(rightObj);
}
skip[right] = rightNull;
if (type == JoinDesc.INNER_JOIN) {
innerJoin(skip, left, right);
} else if (type == JoinDesc.LEFT_SEMI_JOIN) {
if (innerJoin(skip, left, right)) {
// if left-semi-join found a match and we do not have any additional predicates,
// skipping the rest of the rows in the rhs table of the semijoin
done = !needsPostEvaluation;
}
} else if (type == JoinDesc.LEFT_OUTER_JOIN ||
(type == JoinDesc.FULL_OUTER_JOIN && rightNull)) {
int result = leftOuterJoin(skip, left, right);
if (result < 0) {
continue;
}
done = result > 0;
} else if (type == JoinDesc.RIGHT_OUTER_JOIN ||
(type == JoinDesc.FULL_OUTER_JOIN && allLeftNull)) {
if (allLeftFirst && !rightOuterJoin(skip, left, right) ||
!allLeftFirst && !innerJoin(skip, left, right)) {
continue;
}
} else if (type == JoinDesc.FULL_OUTER_JOIN) {
if (tryLOForFO && leftOuterJoin(skip, left, right) > 0) {
loopAgain = allLeftFirst;
done = !loopAgain;
tryLOForFO = false;
} else if (allLeftFirst && !rightOuterJoin(skip, left, right) ||
!allLeftFirst && !innerJoin(skip, left, right)) {
continue;
}
}
intermediate[aliasNum] = rightObj;
if (aliasNum == numAliases - 1) {
if (!(allLeftNull && rightNull)) {
needToProduceLeftRow = true;
if (needsPostEvaluation) {
// This is only executed for outer joins with residual filters
boolean forward = createForwardJoinObject(skipVectors[numAliases - 1]);
producedRow |= forward;
done = (type == JoinDesc.LEFT_SEMI_JOIN) && forward;
if (!rightNull &&
(type == JoinDesc.RIGHT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN)) {
if (forward) {
// This record produced a result this time, remove it from the storage
// as it will not need to produce a result with NULL values anymore
rowContainerPostFilteredOuterJoin.put(pos, null);
} else {
// We need to store this record (if it is not done yet) in case
// we should produce a result
if (!rowContainerPostFilteredOuterJoin.containsKey(pos)) {
Object[] row = Arrays.copyOfRange(forwardCache, offsets[aliasNum], offsets[aliasNum + 1]);
rowContainerPostFilteredOuterJoin.put(pos, row);
}
}
}
} else {
createForwardJoinObject(skipVectors[numAliases - 1]);
}
}
} else {
// recursively call the join the other rhs tables
genObject(aliasNum + 1, allLeftFirst && rightFirst, allLeftNull && rightNull);
}
}
// Consolidation for outer joins
if (needsPostEvaluation && aliasNum == numAliases - 1 &&
needToProduceLeftRow && !producedRow && !allLeftNull) {
if (type == JoinDesc.LEFT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN) {
// If it is a LEFT / FULL OUTER JOIN and the left record did not produce
// results, we need to take that record, replace the right side with NULL
// values, and produce the records
int i = numAliases - 1;
for (int j = offsets[i]; j < offsets[i + 1]; j++) {
forwardCache[j] = null;
}
internalForward(forwardCache, outputObjInspector);
countAfterReport = 0;
}
} else if (needsPostEvaluation && aliasNum == numAliases - 2) {
int nextType = condn[aliasNum].getType();
if (nextType == JoinDesc.RIGHT_OUTER_JOIN || nextType == JoinDesc.FULL_OUTER_JOIN) {
// If it is a RIGHT / FULL OUTER JOIN, we need to iterate through the row container
// that contains all the right records that did not produce results. Then, for each
// of those records, we replace the left side with NULL values, and produce the
// records.
// Observe that we only enter this block when we have finished iterating through
// all the left and right records (aliasNum == numAliases - 2), and thus, we have
// tried to evaluate the post-filter condition on every possible combination.
Arrays.fill(forwardCache, null);
for (Object[] row : rowContainerPostFilteredOuterJoin.values()) {
if (row == null) {
continue;
}
System.arraycopy(row, 0, forwardCache, offsets[numAliases - 1], row.length);
internalForward(forwardCache, outputObjInspector);
countAfterReport = 0;
}
}
}
}
// inner join
private boolean innerJoin(boolean[] skip, int left, int right) {
if (!isInnerJoin(skip, left, right)) {
Arrays.fill(skip, true);
return false;
}
return true;
}
// LO
//
// LEFT\RIGHT skip filtered valid
// skip --(1) --(1) --(1)
// filtered +-(1) +-(1) +-(1)
// valid +-(1) +-(4*) ++(2)
//
// * If right alias has any pair for left alias, continue (3)
// -1 for continue : has pair but not in this turn
// 0 for inner join (++) : join and continue LO
// 1 for left outer join (+-) : join and skip further LO
private int leftOuterJoin(boolean[] skip, int left, int right) {
if (skip[left] || skip[right] || !isLeftValid(left, right)) {
skip[right] = true;
return 1; // case 1
}
if (isRightValid(left, right)) {
return 0; // case 2
}
if (hasRightPairForLeft(left, right)) {
return -1; // case 3
}
skip[right] = true;
return 1; // case 4
}
// RO
//
// LEFT\RIGHT skip filtered valid
// skip --(1) -+(1) -+(1)
// filtered --(1) -+(1) -+(4*)
// valid --(1) -+(1) ++(2)
//
// * If left alias has any pair for right alias, continue (3)
// false for continue : has pair but not in this turn
private boolean rightOuterJoin(boolean[] skip, int left, int right) {
if (skip[left] || skip[right] || !isRightValid(left, right)) {
Arrays.fill(skip, 0, right, true);
return true; // case 1
}
if (isLeftValid(left, right)) {
return true; // case 2
}
if (hasLeftPairForRight(left, right)) {
return false; // case 3
}
Arrays.fill(skip, 0, right, true);
return true; // case 4
}
// If left and right aliases are all valid, two values will be inner joined,
private boolean isInnerJoin(boolean[] skip, int left, int right) {
return !skip[left] && !skip[right] &&
isLeftValid(left, right) && isRightValid(left, right);
}
// check if left is valid
private boolean isLeftValid(int left, int right) {
return !hasFilter(left) || !JoinUtil.isFiltered(filterTags[left], right);
}
// check if right is valid
private boolean isRightValid(int left, int right) {
return !hasFilter(right) || !JoinUtil.isFiltered(filterTags[right], left);
}
// check if any left pair exists for right objects
private boolean hasLeftPairForRight(int left, int right) {
return !JoinUtil.isFiltered(aliasFilterTags[left], right);
}
// check if any right pair exists for left objects
private boolean hasRightPairForLeft(int left, int right) {
return !JoinUtil.isFiltered(aliasFilterTags[right], left);
}
private boolean hasAnyFiltered(int alias, List row) {
return row == dummyObj[alias] || hasFilter(alias) && JoinUtil.hasAnyFiltered(getFilterTag(row));
}
protected final boolean hasFilter(int alias) {
return filterMaps != null && filterMaps[alias] != null;
}
// get tag value from object (last of list)
protected final short getFilterTag(List row) {
return ((ShortWritable) row.get(row.size() - 1)).get();
}
/**
* Forward a record of join results.
*
* @throws HiveException
*/
@Override
public void endGroup() throws HiveException {
checkAndGenObject();
}
protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException {
forward(row, outputOI);
}
private void genUniqueJoinObject(int aliasNum, int forwardCachePos)
throws HiveException {
AbstractRowContainer.RowIterator> iter = storage[order[aliasNum]].rowIter();
for (List row = iter.first(); row != null; row = iter.next()) {
reportProgress();
int sz = joinValues[order[aliasNum]].size();
int p = forwardCachePos;
for (int j = 0; j < sz; j++) {
forwardCache[p++] = row.get(j);
}
if (aliasNum == numAliases - 1) {
internalForward(forwardCache, outputObjInspector);
countAfterReport = 0;
} else {
genUniqueJoinObject(aliasNum + 1, p);
}
}
}
private void genAllOneUniqueJoinObject()
throws HiveException {
int p = 0;
for (int i = 0; i < numAliases; i++) {
int sz = joinValues[order[i]].size();
List obj = storage[order[i]].rowIter().first();
for (int j = 0; j < sz; j++) {
forwardCache[p++] = obj.get(j);
}
}
internalForward(forwardCache, outputObjInspector);
countAfterReport = 0;
}
protected void checkAndGenObject() throws HiveException {
if (closeOpCalled) {
LOG.warn("checkAndGenObject is called after operator " +
id + " " + getName() + " called closeOp");
return;
}
if (condn[0].getType() == JoinDesc.UNIQUE_JOIN) {
// Check if results need to be emitted.
// Results only need to be emitted if there is a non-null entry in a table
// that is preserved or if there are no non-null entries
boolean preserve = false; // Will be true if there is a non-null entry
// in a preserved table
boolean hasNulls = false; // Will be true if there are null entries
boolean allOne = true;
for (int i = 0; i < numAliases; i++) {
Byte alias = order[i];
AbstractRowContainer> alw = storage[alias];
if (!alw.isSingleRow()) {
allOne = false;
}
if (!alw.hasRows()) {
alw.addRow(dummyObj[i]);
hasNulls = true;
} else if (condn[i].getPreserved()) {
preserve = true;
}
}
if (hasNulls && !preserve) {
return;
}
if (allOne) {
genAllOneUniqueJoinObject();
} else {
genUniqueJoinObject(0, 0);
}
} else {
// does any result need to be emitted
boolean mayHasMoreThanOne = false;
boolean hasEmpty = false;
for (int i = 0; i < numAliases; i++) {
Byte alias = order[i];
AbstractRowContainer> alw = storage[alias];
if (noOuterJoin) {
if (!alw.hasRows()) {
return;
} else if (!alw.isSingleRow()) {
mayHasMoreThanOne = true;
}
} else {
if (!alw.hasRows()) {
hasEmpty = true;
alw.addRow(dummyObj[i]);
} else if (!hasEmpty && alw.isSingleRow()) {
if (hasAnyFiltered(alias, alw.rowIter().first())) {
hasEmpty = true;
}
} else {
mayHasMoreThanOne = true;
if (!hasEmpty) {
AbstractRowContainer.RowIterator> iter = alw.rowIter();
for (List row = iter.first(); row != null; row = iter.next()) {
reportProgress();
if (hasAnyFiltered(alias, row)) {
hasEmpty = true;
break;
}
}
}
}
}
}
if (!needsPostEvaluation && !hasEmpty && !mayHasMoreThanOne) {
genAllOneUniqueJoinObject();
} else if (!needsPostEvaluation && !hasEmpty && !hasLeftSemiJoin) {
genUniqueJoinObject(0, 0);
} else {
genJoinObject();
}
}
Arrays.fill(aliasFilterTags, (byte)0xff);
}
protected void reportProgress() {
// Send some status periodically
countAfterReport++;
if ((countAfterReport % heartbeatInterval) == 0
&& (reporter != null)) {
reporter.progress();
countAfterReport = 0;
}
}
/**
* All done.
*
*/
@Override
public void closeOp(boolean abort) throws HiveException {
closeOpCalled = true;
for (AbstractRowContainer> alw : storage) {
if (alw != null) {
alw.clearRows(); // clean up the temp files
}
}
Arrays.fill(storage, null);
super.closeOp(abort);
}
@Override
public String getName() {
return CommonJoinOperator.getOperatorName();
}
static public String getOperatorName() {
return "JOIN";
}
/**
* @return the posToAliasMap
*/
public Map> getPosToAliasMap() {
return posToAliasMap;
}
/**
* @param posToAliasMap
* the posToAliasMap to set
*/
public void setPosToAliasMap(Map> posToAliasMap) {
this.posToAliasMap = posToAliasMap;
}
@Override
public boolean opAllowedBeforeMapJoin() {
return false;
}
@Override
public boolean opAllowedAfterMapJoin() {
return false;
}
}