org.apache.hadoop.hive.ql.exec.spark.SparkPlan Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.spark;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.io.BytesWritable;
import org.apache.spark.api.java.JavaPairRDD;
import com.facebook.presto.hive.$internal.com.google.common.base.Preconditions;
@SuppressWarnings("rawtypes")
public class SparkPlan {
private static final String CLASS_NAME = SparkPlan.class.getName();
private final PerfLogger perfLogger = PerfLogger.getPerfLogger();
private final Set rootTrans = new HashSet();
private final Set leafTrans = new HashSet();
private final Map> transGraph = new HashMap>();
private final Map> invertedTransGraph = new HashMap>();
private final Set cachedRDDIds = new HashSet();
@SuppressWarnings("unchecked")
public JavaPairRDD generateGraph() {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_RDD_GRAPH);
Map> tranToOutputRDDMap
= new HashMap>();
for (SparkTran tran : getAllTrans()) {
JavaPairRDD rdd = null;
List parents = getParents(tran);
if (parents.size() == 0) {
// Root tran, it must be MapInput
Preconditions.checkArgument(tran instanceof MapInput,
"AssertionError: tran must be an instance of MapInput");
rdd = tran.transform(null);
} else {
for (SparkTran parent : parents) {
JavaPairRDD prevRDD = tranToOutputRDDMap.get(parent);
if (rdd == null) {
rdd = prevRDD;
} else {
rdd = rdd.union(prevRDD);
}
}
rdd = tran.transform(rdd);
}
tranToOutputRDDMap.put(tran, rdd);
}
JavaPairRDD finalRDD = null;
for (SparkTran leafTran : leafTrans) {
JavaPairRDD rdd = tranToOutputRDDMap.get(leafTran);
if (finalRDD == null) {
finalRDD = rdd;
} else {
finalRDD = finalRDD.union(rdd);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_RDD_GRAPH);
return finalRDD;
}
public void addTran(SparkTran tran) {
rootTrans.add(tran);
leafTrans.add(tran);
}
public void addCachedRDDId(int rddId) {
cachedRDDIds.add(rddId);
}
public Set getCachedRDDIds() {
return cachedRDDIds;
}
/**
* This method returns a topologically sorted list of SparkTran.
*/
private List getAllTrans() {
List result = new LinkedList();
Set seen = new HashSet();
for (SparkTran leaf: leafTrans) {
// make sure all leaves are visited at least once
visit(leaf, seen, result);
}
return result;
}
private void visit(SparkTran child, Set seen, List result) {
if (seen.contains(child)) {
// don't visit multiple times
return;
}
seen.add(child);
for (SparkTran parent: getParents(child)) {
if (!seen.contains(parent)) {
visit(parent, seen, result);
}
}
result.add(child);
}
/**
* Connects the two SparkTrans in the graph. Does not allow multiple connections
* between the same pair of SparkTrans.
* @param parent
* @param child
*/
public void connect(SparkTran parent, SparkTran child) {
if (getChildren(parent).contains(child)) {
throw new IllegalStateException("Connection already exists");
}
rootTrans.remove(child);
leafTrans.remove(parent);
if (transGraph.get(parent) == null) {
transGraph.put(parent, new LinkedList());
}
if (invertedTransGraph.get(child) == null) {
invertedTransGraph.put(child, new LinkedList());
}
transGraph.get(parent).add(child);
invertedTransGraph.get(child).add(parent);
}
public List getParents(SparkTran tran) {
if (!invertedTransGraph.containsKey(tran)) {
return new ArrayList();
}
return invertedTransGraph.get(tran);
}
public List getChildren(SparkTran tran) {
if (!transGraph.containsKey(tran)) {
return new ArrayList();
}
return transGraph.get(tran);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy