All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.spark.HiveSparkClientFactory Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.spark; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.HashMap; import java.util.Map; import java.util.Properties; import java.util.Set; import org.apache.commons.compress.utils.CharsetNames; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.spark.client.rpc.RpcConfiguration; import org.apache.spark.SparkConf; import org.apache.spark.SparkException; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.Sets; public class HiveSparkClientFactory { protected static final transient Log LOG = LogFactory.getLog(HiveSparkClientFactory.class); private static final String SPARK_DEFAULT_CONF_FILE = "spark-defaults.conf"; private static final String SPARK_DEFAULT_MASTER = "yarn-cluster"; private static final String SPARK_DEFAULT_APP_NAME = "Hive on Spark"; private static final String SPARK_DEFAULT_SERIALIZER = "org.apache.spark.serializer.KryoSerializer"; private static final String SPARK_DEFAULT_REFERENCE_TRACKING = "false"; public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf) throws IOException, SparkException { Map sparkConf = initiateSparkConf(hiveconf); // Submit spark job through local spark context while spark master is local mode, otherwise submit // spark job through remote spark context. String master = sparkConf.get("spark.master"); if (master.equals("local") || master.startsWith("local[")) { // With local spark context, all user sessions share the same spark context. return LocalHiveSparkClient.getInstance(generateSparkConf(sparkConf)); } else { return new RemoteHiveSparkClient(hiveconf, sparkConf); } } public static Map initiateSparkConf(HiveConf hiveConf) { Map sparkConf = new HashMap(); // set default spark configurations. sparkConf.put("spark.master", SPARK_DEFAULT_MASTER); sparkConf.put("spark.app.name", SPARK_DEFAULT_APP_NAME); sparkConf.put("spark.serializer", SPARK_DEFAULT_SERIALIZER); sparkConf.put("spark.kryo.referenceTracking", SPARK_DEFAULT_REFERENCE_TRACKING); // load properties from spark-defaults.conf. InputStream inputStream = null; try { inputStream = HiveSparkClientFactory.class.getClassLoader() .getResourceAsStream(SPARK_DEFAULT_CONF_FILE); if (inputStream != null) { LOG.info("loading spark properties from:" + SPARK_DEFAULT_CONF_FILE); Properties properties = new Properties(); properties.load(new InputStreamReader(inputStream, CharsetNames.UTF_8)); for (String propertyName : properties.stringPropertyNames()) { if (propertyName.startsWith("spark")) { String value = properties.getProperty(propertyName); sparkConf.put(propertyName, properties.getProperty(propertyName)); LOG.info(String.format( "load spark property from %s (%s -> %s).", SPARK_DEFAULT_CONF_FILE, propertyName, value)); } } } } catch (IOException e) { LOG.info("Failed to open spark configuration file:" + SPARK_DEFAULT_CONF_FILE, e); } finally { if (inputStream != null) { try { inputStream.close(); } catch (IOException e) { LOG.debug("Failed to close inputstream.", e); } } } // load properties from hive configurations, including both spark.* properties, // properties for remote driver RPC, and yarn properties for Spark on YARN mode. String sparkMaster = hiveConf.get("spark.master"); if (sparkMaster == null) { sparkMaster = sparkConf.get("spark.master"); } if (sparkMaster.equals("yarn-cluster")) { sparkConf.put("spark.yarn.maxAppAttempts", "1"); } for (Map.Entry entry : hiveConf) { String propertyName = entry.getKey(); if (propertyName.startsWith("spark")) { String value = hiveConf.get(propertyName); sparkConf.put(propertyName, value); LOG.info(String.format( "load spark property from hive configuration (%s -> %s).", propertyName, value)); } else if (propertyName.startsWith("yarn") && (sparkMaster.equals("yarn-client") || sparkMaster.equals("yarn-cluster"))) { String value = hiveConf.get(propertyName); // Add spark.hadoop prefix for yarn properties as SparkConf only accept properties // started with spark prefix, Spark would remove spark.hadoop prefix lately and add // it to its hadoop configuration. sparkConf.put("spark.hadoop." + propertyName, value); LOG.info(String.format( "load yarn property from hive configuration in %s mode (%s -> %s).", sparkMaster, propertyName, value)); } if (RpcConfiguration.HIVE_SPARK_RSC_CONFIGS.contains(propertyName)) { String value = RpcConfiguration.getValue(hiveConf, propertyName); sparkConf.put(propertyName, value); LOG.info(String.format( "load RPC property from hive configuration (%s -> %s).", propertyName, value)); } } Set classes = Sets.newHashSet( Splitter.on(",").trimResults().omitEmptyStrings().split( Strings.nullToEmpty(sparkConf.get("spark.kryo.classesToRegister")))); classes.add(VectorizedRowBatch.class.getName()); classes.add(BytesWritable.class.getName()); classes.add(HiveKey.class.getName()); sparkConf.put( "spark.kryo.classesToRegister", Joiner.on(",").join(classes)); return sparkConf; } static SparkConf generateSparkConf(Map conf) { SparkConf sparkConf = new SparkConf(false); for (Map.Entry entry : conf.entrySet()) { sparkConf.set(entry.getKey(), entry.getValue()); } return sparkConf; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy