schema.sparkHistoryServer.json Maven / Gradle / Ivy
{
"$schema": "http://json-schema.org/draft-07/schema#",
"description": "A Spark history server configuration",
"type": "object",
"extends": {
"type": "object",
"existingJavaType": "io.radanalytics.operator.common.EntityInfo"
},
"properties": {
"type": {
"type": "string",
"default": "sharedVolume",
"enum": ["sharedVolume", "remoteStorage"],
"javaEnumNames" : ["sharedVolume","remoteStorage"]
},
"sharedVolume": {
"type": "object",
"existingJavaType": "io.radanalytics.types.SharedVolume",
"properties": {
"size": {
"type": "string",
"default": "0.3Gi"
},
"mountPath": {
"type": "string",
"default": "/history/spark-events"
},
"matchLabels": {
"type": "object",
"existingJavaType": "java.util.Map"
}
}
},
"sparkConfiguration": {
"type": "array",
"items": {
"type": "object",
"existingJavaType": "io.radanalytics.types.SparkConfiguration",
"properties": {
"name": { "type": "string" },
"value": { "type": "string" }
},
"required": ["name", "value"]
}
},
"remoteURI": {
"type": "string",
"description": "s3 bucket or hdfs path"
},
"expose": {
"type": "boolean",
"default": "false",
"description": "Should the operator also expose the service? For OpenShift the route is created, while for Kubernetes the Ingress object is created."
},
"host": {
"type": "string",
"default": "",
"description": "Custom dns hostname under which the Spark History server will be exposed. If not specified it should be generated by OpenShift route, for K8s the Ingress resource is created and it's up to the Ingress controller."
},
"customImage": {
"type": "string",
"description": "Container image that will be used for the spark history server. It assumes the standard Spark distribution under /opt/spark"
},
"logDirectory": {
"type": "string",
"default": "file:/history/spark-events",
"description": "For the filesystem history provider, the URL to the directory containing application event logs to load. This can be a local file:// path, an HDFS path hdfs://namenode/shared/spark-logs or that of an alternative filesystem supported by the Hadoop APIs."
},
"updateInterval": {
"type": "integer",
"default": "10",
"minimum": "1",
"description": "The period (seconds) at which the filesystem history provider checks for new or updated logs in the log directory. A shorter interval detects new applications faster, at the expense of more server load re-reading updated applications. As soon as an update has completed, listings of the completed and incomplete applications will reflect the changes."
},
"internalPort": {
"type": "integer",
"default": "18080",
"minimum": "1025",
"description": "The port on pod to which the web interface of the history server binds. If exposed via Route or Ingress, this internal port will probably map to some other port."
},
"retainedApplications": {
"type": "integer",
"default": "50",
"minimum": "1",
"description": "The number of applications to retain UI data for in the cache. If this cap is exceeded, then the oldest applications will be removed from the cache. If an application is not in the cache, it will have to be loaded from disk if it is accessed from the UI."
},
"maxApplications": {
"type": "integer",
"default": "999999",
"minimum": "1",
"description": "The number of applications to display on the history summary page. Application UIs are still available by accessing their URLs directly even if they are not displayed on the history summary page."
},
"provider": {
"type": "string",
"default": "org.apache.spark.deploy.history.FsHistoryProvider",
"description": "Name of the class implementing the application history backend. Currently there is only one implementation, provided by Spark, which looks for application logs stored in the file system."
},
"kerberos": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean",
"default": "false",
"description": "Indicates whether the history server should use kerberos to login. This is required if the history server is accessing HDFS files on a secure Hadoop cluster. If this is true, it uses the configs spark.history.kerberos.principal and spark.history.kerberos.keytab."
},
"principal": {
"type": "string",
"description": "Kerberos principal name for the History Server."
},
"keytab": {
"type": "string",
"description": "Location of the kerberos keytab file for the History Server."
}
}
},
"cleaner": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean",
"default": "false",
"description": "Specifies whether the History Server should periodically clean up event logs from storage."
},
"interval": {
"type": "integer",
"default": "1",
"minimum": "1",
"description": "How often (days) the filesystem job history cleaner checks for files to delete. Files are only deleted if they are older than spark.history.fs.cleaner.maxAge"
},
"maxAge": {
"type": "integer",
"default": "7",
"minimum": "1",
"description": "# of days, job history files older than this will be deleted when the filesystem history cleaner runs."
}
}
},
"endEventReparseChunkSize": {
"type": "integer",
"default": "1",
"minimum": "1",
"description": "# of MB; How many bytes to parse at the end of log files looking for the end event. This is used to speed up generation of application listings by skipping unnecessary parts of event log files. It can be disabled by setting this config to 0."
},
"inProgressOptimization": {
"type": "boolean",
"default": "true",
"description": "Enable optimized handling of in-progress logs. This option may leave finished applications that fail to rename their event logs listed as in-progress."
},
"numReplayThreads": {
"type": "string",
"description": "Number of threads that will be used by history server to process event logs. If empty, 25% of available cores will be used."
},
"maxDiskUsage": {
"type": "integer",
"default": "10",
"minimum": "1",
"description": "# of GB; Maximum disk usage for the local directory where the cache application history information are stored."
},
"persistentPath": {
"type": "string",
"description": "Local directory where to cache application history data. If set, the history server will store application data on disk instead of keeping it in memory. The data written to disk will be re-used in the event of a history server restart."
}
},
"required": []
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy