All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ignite.spark.IgniteContext.scala Maven / Gradle / Ivy

Go to download

Java-based middleware for in-memory processing of big data in a distributed environment.

There is a newer version: 2.13.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite.spark

import org.apache.ignite._
import org.apache.ignite.configuration.{CacheConfiguration, IgniteConfiguration}
import org.apache.ignite.internal.IgnitionEx
import org.apache.ignite.internal.util.IgniteUtils
import org.apache.ignite.spark.IgniteContext.setIgniteHome
import org.apache.spark.sql.SQLContext
import org.apache.spark.SparkContext
import org.apache.log4j.Logger
import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}

/**
 * Ignite context.
 *
 * @param sparkContext Spark context.
 * @param cfgF Configuration factory.
 */
class IgniteContext(
    @transient val sparkContext: SparkContext,
    cfgF: () ⇒ IgniteConfiguration,
    @deprecated("Embedded mode is deprecated and will be discontinued. Consider using standalone mode instead.")
    standalone: Boolean = true
    ) extends Serializable {
    private val cfgClo = new Once(cfgF)

    private val igniteHome = IgniteUtils.getIgniteHome

    if (!standalone) {
        Logging.log.warn("Embedded mode is deprecated and will be discontinued. Consider using standalone mode instead.")

        // Get required number of executors with default equals to number of available executors.
        val workers = sparkContext.getConf.getInt("spark.executor.instances",
            sparkContext.getExecutorStorageStatus.length)

        if (workers <= 0)
            throw new IllegalStateException("No Spark executors found to start Ignite nodes.")

        Logging.log.info("Will start Ignite nodes on " + workers + " workers")

        // Start ignite server node on each worker in server mode.
        sparkContext.parallelize(1 to workers, workers).foreachPartition(it ⇒ ignite())
    }

    // Make sure to start Ignite on context creation.
    ignite()

    //Stop local ignite instance on application end.
    //Instances on workers will be stopped with executor stop(jvm exit).
    sparkContext.addSparkListener(new SparkListener {
        override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
            close()
        }
    })

    /**
     * Creates an instance of IgniteContext with the given spring configuration.
     *
     * @param sc Spark context.
     * @param springUrl Spring configuration path.
     * @param standalone Standalone or embedded mode.
     */
    @deprecated("Embedded mode is deprecated and will be discontinued. Consider using standalone mode instead.")
    def this(
        sc: SparkContext,
        springUrl: String,
        standalone: Boolean
        ) {
        this(sc, () ⇒ IgnitionEx.loadConfiguration(springUrl).get1(), standalone)
    }

    /**
     * Creates an instance of IgniteContext with the given spring configuration.
     *
     * @param sc Spark context.
     * @param springUrl Spring configuration path.
     */
    def this(
        sc: SparkContext,
        springUrl: String
        ) {
        this(sc, () ⇒ IgnitionEx.loadConfiguration(springUrl).get1())
    }

    /**
     * Creates an instance of IgniteContext with default Ignite configuration.
     * By default this method will use grid configuration defined in `IGNITE_HOME/config/default-config.xml`
     * configuration file.
     *
     * @param sc Spark context.
     */
    def this(sc: SparkContext) {
        this(sc, IgnitionEx.DFLT_CFG)
    }

    val sqlContext = new SQLContext(sparkContext)

    /**
     * Creates an `IgniteRDD` instance from the given cache name. If the cache does not exist, it will be
     * automatically started from template on the first invoked RDD action.
     *
     * @param cacheName Cache name.
     * @return `IgniteRDD` instance.
     */
    def fromCache[K, V](cacheName: String): IgniteRDD[K, V] = {
        new IgniteRDD[K, V](this, cacheName, null, false)
    }

    /**
     * Creates an `IgniteRDD` instance from the given cache configuration. If the cache does not exist, it will be
     * automatically started using the configuration provided on the first invoked RDD action.
     *
     * @param cacheCfg Cache configuration to use.
     * @return `IgniteRDD` instance.
     */
    def fromCache[K, V](cacheCfg: CacheConfiguration[K, V]) = {
        new IgniteRDD[K, V](this, cacheCfg.getName, cacheCfg, false)
    }

    /**
     * Get or start Ignite instance it it's not started yet.
     * @return
     */
    def ignite(): Ignite = {
        setIgniteHome(igniteHome)

        val igniteCfg = cfgClo()

        // check if called from driver
        if (standalone || sparkContext != null) igniteCfg.setClientMode(true)

        try {
            Ignition.getOrStart(igniteCfg)
        }
        catch {
            case e: IgniteException ⇒
                Logging.log.error("Failed to start Ignite.", e)

                throw e
        }
    }

    /**
     * Stops supporting ignite instance. If ignite instance has been already stopped, this operation will be
     * a no-op.
     */
    def close(shutdownIgniteOnWorkers: Boolean = false): Unit = {
        // additional check if called from driver
        if (sparkContext != null && shutdownIgniteOnWorkers) {
            // Get required number of executors with default equals to number of available executors.
            val workers = sparkContext.getConf.getInt("spark.executor.instances",
                sparkContext.getExecutorStorageStatus.length)

            if (workers > 0) {
                Logging.log.info("Will stop Ignite nodes on " + workers + " workers")

                // Start ignite server node on each worker in server mode.
                sparkContext.parallelize(1 to workers, workers).foreachPartition(it ⇒ doClose())
            }
        }

        doClose()
    }

    private def doClose() = {
        val igniteCfg = cfgClo()

        if (Ignition.state(igniteCfg.getIgniteInstanceName) == IgniteState.STARTED)
            Ignition.stop(igniteCfg.getIgniteInstanceName, false)
    }
}

object IgniteContext {
    def apply(sparkContext: SparkContext, cfgF: () ⇒ IgniteConfiguration, standalone: Boolean = true): IgniteContext =
        new IgniteContext(sparkContext, cfgF, standalone)

    def setIgniteHome(igniteHome: String): Unit = {
        val home = IgniteUtils.getIgniteHome

        if (home == null && igniteHome != null) {
            Logging.log.info("Setting IGNITE_HOME from driver not as it is not available on this worker: " + igniteHome)

            IgniteUtils.nullifyHomeDirectory()

            System.setProperty(IgniteSystemProperties.IGNITE_HOME, igniteHome)
        }
    }
}

/**
 * Auxiliary closure that ensures that passed in closure is executed only once.
 *
 * @param clo Closure to wrap.
 */
class Once(clo: () ⇒ IgniteConfiguration) extends Serializable {
    @transient @volatile var res: IgniteConfiguration = null

    def apply(): IgniteConfiguration = {
        if (res == null) {

            this.synchronized {

                if (res == null)

                    res = clo()
            }
        }

        res
    }
}

/**
  * Spark uses log4j by default. Using this logger in IgniteContext as well.
  *
  * This object is used to avoid problems with log4j serialization.
  */
object Logging extends Serializable {
    @transient lazy val log = Logger.getLogger(classOf[IgniteContext])
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy