By
CREATE TABLE
PARTITIONED BY
[PARTITIONED BY (col3 data_type [COMMENT col_comment], ...)]
CLUSTERED/SORTED BY
[CLUSTERED BY (col1, ...) [SORTED BY (col1 [ASC|DESC], ...)] INTO num_buckets BUCKETS]
SKEWED BY
[SKEWED BY (col1, col2, ...) ON ((col_value, col_value, ...), ...)
Query
ORDER BY
CLUSTER BY
DISTRIBUTE BY(repartition)
# The default number of partitions to use when shuffling data for joins or aggregations.
spark.sql.shuffle.partitions=200
SORT BY
WINDOW
CLUSTER BY
PARTITION|DISTRIBUTE BY
ORDER|SORT BY
Writer
partitionBy
bucketBy
sortBy
Reference