Sequence
Driver: RDD(foreach, foreachPartition, collect, collectPartitions, toLocalIterator, reduce, fold, aggregate, count, take)#runJob -> DAGScheduler#runJob, submitJob -> DAGSchedulerEventProcessLoop
#doOnReceive(JobSubmitted ) -> DAGScheduler#handleJobSubmitted, submitStage(Submits stage, but first recursively submits any missing parents, BFS.), submitMissingTasks -> TaskScheduler#submitTasks(TaskSet) -> CoarseGrainedSchedulerBackend#reviveOffers Executor: Executor#launchTask -> TaskRunner#run -> Task#run -> ShuffleMapTask(MapStatus), ResultTask(func():U)#runTask -> CoarseGrainedExecutorBackend#statusUpdate
Driver: DAGSchedulerEventProcessLoop
#doOnReceive(CompletionEvent ) -> DAGScheduler#handleTaskCompletion DAGSchedulerEvent
AllJobsCancelled$
BeginEvent
CompletionEvent
DAGSchedulerEvent
ExecutorAdded
ExecutorLost
GettingResultEvent
JobCancelled
JobGroupCancelled
JobSubmitted
MapStageSubmitted
ResubmitFailedStages$
SpeculativeTaskSubmitted
StageCancelled
TaskSetFailed
WorkerRemovedTask
ShuffleMapTask
ResultTask
Links
- Author:HyperJ
- Source:HyperJ’s Blog
- Link:Spark Scheduling - Sequence