Spark Configuration

Configuration

  • Spark Properties

    Application Properties
    Runtime Environment
    Shuffle Behavior
    Spark UI
    Compression and Serialization
    Memory Management
    Execution Behavior
    Networking
    Scheduling
    Dynamic Allocation
    Security
    TLS / SSL
    Spark SQL
    Spark Streaming
    SparkR
    GraphX
    Deploy
    Cluster Managers

  • Environment Variables

  • Configuring Logging

  • Overriding configuration directory

  • Inheriting Hadoop Cluster Configuration

Priority

SparkSubmit -> val appArgs = new SparkSubmitArguments(args)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// Init parameters
var master: String = null
var deployMode: String = null
var executorMemory: String = null
var executorCores: String = null
var totalExecutorCores: String = null
var propertiesFile: String = null
var driverMemory: String = null
var driverExtraClassPath: String = null
var driverExtraLibraryPath: String = null
var driverExtraJavaOptions: String = null
var queue: String = null
var numExecutors: String = null
var files: String = null
var archives: String = null
var mainClass: String = null
var primaryResource: String = null
var name: String = null
var childArgs: ArrayBuffer[String] = new ArrayBuffer[String]()
var jars: String = null
var packages: String = null
var repositories: String = null
var ivyRepoPath: String = null
var packagesExclusions: String = null
var verbose: Boolean = false
var isPython: Boolean = false
var pyFiles: String = null
var isR: Boolean = false
var action: SparkSubmitAction = null
val sparkProperties: HashMap[String, String] = new HashMap[String, String]()
var proxyUser: String = null
var principal: String = null
var keytab: String = null
// Standalone cluster mode only
var supervise: Boolean = false
var driverCores: String = null
var submissionToKill: String = null
var submissionToRequestStatusFor: String = null
var useRest: Boolean = true // used internally

// Set parameters from command line arguments
try {
parse(args.asJava)
} catch {
case e: IllegalArgumentException =>
SparkSubmit.printErrorAndExit(e.getMessage())
}

// Populate `sparkProperties` map from properties file
mergeDefaultSparkProperties()

// Remove keys that don't start with "spark." from `sparkProperties`.
ignoreNonSparkProperties()

// Use `sparkProperties` map along with env vars to fill in any missing parameters
loadEnvironmentArguments()

validateArguments()

Priority: code > spark-submit options > spark-defaults.conf > spark-env.sh > default