All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.storage.TopologyMapper.scala Maven / Gradle / Ivy

There is a newer version: 3.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.storage

import org.apache.spark.SparkConf
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.internal.{config, Logging}
import org.apache.spark.util.Utils

/**
 * ::DeveloperApi::
 * TopologyMapper provides topology information for a given host
 * @param conf SparkConf to get required properties, if needed
 */
@DeveloperApi
abstract class TopologyMapper(conf: SparkConf) {
  /**
   * Gets the topology information given the host name
   *
   * @param hostname Hostname
   * @return topology information for the given hostname. One can use a 'topology delimiter'
   *         to make this topology information nested.
   *         For example : ‘/myrack/myhost’, where ‘/’ is the topology delimiter,
   *         ‘myrack’ is the topology identifier, and ‘myhost’ is the individual host.
   *         This function only returns the topology information without the hostname.
   *         This information can be used when choosing executors for block replication
   *         to discern executors from a different rack than a candidate executor, for example.
   *
   *         An implementation can choose to use empty strings or None in case topology info
   *         is not available. This would imply that all such executors belong to the same rack.
   */
  def getTopologyForHost(hostname: String): Option[String]
}

/**
 * A TopologyMapper that assumes all nodes are in the same rack
 */
@DeveloperApi
class DefaultTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with Logging {
  override def getTopologyForHost(hostname: String): Option[String] = {
    logDebug(s"Got a request for $hostname")
    None
  }
}

/**
 * A simple file based topology mapper. This expects topology information provided as a
 * `java.util.Properties` file. The name of the file is obtained from SparkConf property
 * `spark.storage.replication.topologyFile`. To use this topology mapper, set the
 * `spark.storage.replication.topologyMapper` property to
 * [[org.apache.spark.storage.FileBasedTopologyMapper]]
 * @param conf SparkConf object
 */
@DeveloperApi
class FileBasedTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with Logging {
  val topologyFile = conf.get(config.STORAGE_REPLICATION_TOPOLOGY_FILE)
  require(topologyFile.isDefined, "Please specify topology file via " +
    "spark.storage.replication.topologyFile for FileBasedTopologyMapper.")
  val topologyMap = Utils.getPropertiesFromFile(topologyFile.get)

  override def getTopologyForHost(hostname: String): Option[String] = {
    val topology = topologyMap.get(hostname)
    if (topology.isDefined) {
      logDebug(s"$hostname -> ${topology.get}")
    } else {
      logWarning(s"$hostname does not have any topology information")
    }
    topology
  }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy