org.apache.ignite.spark.impl.IgniteRelationProvider.scala Maven / Gradle / Ivy
Go to download
Java-based middleware for in-memory processing of big data in a distributed environment.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.spark.impl
import org.apache.ignite.IgniteException
import org.apache.ignite.configuration.IgniteConfiguration
import org.apache.ignite.internal.IgnitionEx
import org.apache.ignite.internal.util.IgniteUtils
import org.apache.ignite.spark.IgniteContext
import org.apache.ignite.spark.IgniteDataFrameSettings._
import org.apache.ignite.spark.impl.QueryHelper.{createTable, dropTable, ensureCreateTableOptions, saveTable}
import org.apache.spark.sql.SaveMode.{Append, Overwrite}
import org.apache.spark.sql.ignite.IgniteExternalCatalog.{IGNITE_PROTOCOL, OPTION_GRID}
import org.apache.spark.sql.ignite.IgniteOptimization
import org.apache.spark.sql.sources._
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
/**
* Apache Ignite relation provider.
*/
class IgniteRelationProvider extends RelationProvider
with CreatableRelationProvider
with DataSourceRegister {
/**
* @return "ignite" - name of relation provider.
*/
override def shortName(): String = FORMAT_IGNITE
/**
* To create IgniteRelation we need a link to a ignite cluster and a table name.
* To refer cluster user have to specify one of config parameter:
*
* config
- path to ignite configuration file.
*
* Existing table inside Apache Ignite should be referred via table
parameter.
*
* @param sqlCtx SQLContext.
* @param params Parameters for relation creation.
* @return IgniteRelation.
* @see IgniteRelation
* @see IgnitionEx#grid(String)
* @see org.apache.ignite.spark.IgniteDataFrameSettings.OPTION_TABLE
* @see org.apache.ignite.spark.IgniteDataFrameSettings.OPTION_CONFIG_FILE
*/
override def createRelation(sqlCtx: SQLContext, params: Map[String, String]): BaseRelation =
createRelation(
igniteContext(params, sqlCtx),
params.getOrElse(OPTION_TABLE, throw new IgniteException("'table' must be specified.")),
sqlCtx)
/**
* Save `data` to corresponding Ignite table and returns Relation for saved data.
*
* To save data or create IgniteRelation we need a link to a ignite cluster and a table name.
* To refer cluster user have to specify one of config parameter:
*
* config
- path to ignite configuration file.
*
* Existing table inside Apache Ignite should be referred via table
or path
parameter.
*
* If table doesn't exists it will be created.
* If `mode` is Overwrite and `table` already exists it will be recreated(DROP TABLE, CREATE TABLE).
*
* If table create is required use can set following options:
*
*
* - `OPTION_PRIMARY_KEY_FIELDS` - required option. comma separated list of fields for primary key.
* - `OPTION_CACHE_FOR_DDL` - required option. Existing cache name for executing SQL DDL statements.
*
- `OPTION_CREATE_TABLE_OPTIONS` - Ignite specific parameters for a new table. See WITH [https://apacheignite-sql.readme.io/docs/create-table].
*
*
* Data write executed 'by partition'. User can set `OPTION_WRITE_PARTITIONS_NUM` - number of partition for data.
*
* @param sqlCtx SQLContext.
* @param mode Save mode.
* @param params Additional parameters.
* @param data Data to save.
* @return IgniteRelation.
*/
override def createRelation(sqlCtx: SQLContext,
mode: SaveMode,
params: Map[String, String],
data: DataFrame): BaseRelation = {
val ctx = igniteContext(params, sqlCtx)
val tblName = tableName(params)
val tblInfoOption = sqlTableInfo[Any, Any](ctx.ignite(), tblName)
if (tblInfoOption.isDefined) {
mode match {
case Overwrite ⇒
ensureCreateTableOptions(data.schema, params, ctx)
dropTable(tblName, ctx.ignite())
val createTblOpts = params.get(OPTION_CREATE_TABLE_PARAMETERS)
createTable(data.schema,
tblName,
primaryKeyFields(params),
createTblOpts,
ctx.ignite())
saveTable(data,
tblName,
ctx,
params.get(OPTION_STREAMER_ALLOW_OVERWRITE).map(_.toBoolean),
params.get(OPTION_STREAMER_FLUSH_FREQUENCY).map(_.toLong),
params.get(OPTION_STREAMER_PER_NODE_BUFFER_SIZE).map(_.toInt),
params.get(OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS).map(_.toInt))
case Append ⇒
saveTable(data,
tblName,
ctx,
params.get(OPTION_STREAMER_ALLOW_OVERWRITE).map(_.toBoolean),
params.get(OPTION_STREAMER_FLUSH_FREQUENCY).map(_.toLong),
params.get(OPTION_STREAMER_PER_NODE_BUFFER_SIZE).map(_.toInt),
params.get(OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS).map(_.toInt))
case SaveMode.ErrorIfExists =>
throw new IgniteException(s"Table or view '$tblName' already exists. SaveMode: ErrorIfExists.")
case SaveMode.Ignore =>
// With `SaveMode.Ignore` mode, if table already exists, the save operation is expected
// to not save the contents of the DataFrame and to not change the existing data.
// Therefore, it is okay to do nothing here and then just return the relation below.
}
}
else {
ensureCreateTableOptions(data.schema, params, ctx)
val primaryKeyFields = params(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS).split(",")
val createTblOpts = params.get(OPTION_CREATE_TABLE_PARAMETERS)
createTable(data.schema,
tblName,
primaryKeyFields,
createTblOpts,
ctx.ignite())
saveTable(data,
tblName,
ctx,
params.get(OPTION_STREAMER_ALLOW_OVERWRITE).map(_.toBoolean),
params.get(OPTION_STREAMER_FLUSH_FREQUENCY).map(_.toLong),
params.get(OPTION_STREAMER_PER_NODE_BUFFER_SIZE).map(_.toInt),
params.get(OPTION_STREAMER_PER_NODE_PARALLEL_OPERATIONS).map(_.toInt))
}
createRelation(ctx,
tblName,
sqlCtx)
}
/**
* @param igniteCtx Ignite context.
* @param tblName Table name.
* @param sqlCtx SQL context.
* @return Ignite SQL relation.
*/
private def createRelation(igniteCtx: IgniteContext, tblName: String, sqlCtx: SQLContext): BaseRelation = {
val optimizationDisabled =
sqlCtx.sparkSession.conf.get(OPTION_DISABLE_SPARK_SQL_OPTIMIZATION, "false").toBoolean
val experimentalMethods = sqlCtx.sparkSession.sessionState.experimentalMethods
if (optimizationDisabled) {
experimentalMethods.extraOptimizations =
experimentalMethods.extraOptimizations.filter(_ != IgniteOptimization)
}
else {
val optimizationExists = experimentalMethods.extraOptimizations.contains(IgniteOptimization)
if (!optimizationExists)
experimentalMethods.extraOptimizations = experimentalMethods.extraOptimizations :+ IgniteOptimization
}
IgniteSQLRelation(
igniteCtx,
tblName,
sqlCtx)
}
/**
* @param params Params.
* @param sqlCtx SQL Context.
* @return IgniteContext.
*/
private def igniteContext(params: Map[String, String], sqlCtx: SQLContext): IgniteContext = {
val igniteHome = IgniteUtils.getIgniteHome
def configProvider: () ⇒ IgniteConfiguration = {
if (params.contains(OPTION_CONFIG_FILE))
() ⇒ {
IgniteContext.setIgniteHome(igniteHome)
val cfg = IgnitionEx.loadConfiguration(params(OPTION_CONFIG_FILE)).get1()
cfg.setClientMode(true)
cfg
}
else if (params.contains(OPTION_GRID))
() ⇒ {
IgniteContext.setIgniteHome(igniteHome)
val cfg = ignite(params(OPTION_GRID)).configuration()
cfg.setClientMode(true)
cfg
}
else
throw new IgniteException("'config' must be specified to connect to ignite cluster.")
}
IgniteContext(sqlCtx.sparkContext, configProvider)
}
/**
* @param params Params.
* @return Table name.
*/
private def tableName(params: Map[String, String]): String = {
val tblName = params.getOrElse(OPTION_TABLE,
params.getOrElse("path", throw new IgniteException("'table' or 'path' must be specified.")))
if (tblName.startsWith(IGNITE_PROTOCOL))
tblName.replace(IGNITE_PROTOCOL, "").toUpperCase()
else
tblName.toUpperCase
}
/**
* @param params Params.
* @return Sequence of primary key fields.
*/
private def primaryKeyFields(params: Map[String, String]): Seq[String] =
params(OPTION_CREATE_TABLE_PRIMARY_KEY_FIELDS).split(",")
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy