spark 提交作业
Last updated on November 22, 2024 pm
🧙 Questions
将作业提交给yarn执行
☄️ Id
spark集群
bash spark-submit \
--class com.isxcode.star.plugin.query.sql.Execute \
--master spark://ispong-mac.local:7077 \
--deploy-mode cluster \
--supervise \
/Users/ispong/code/spark-yun/spark-yun-plugins/spark-query-sql-plugin/build/libs/spark-query-sql-plugin.jar
bash spark-submit \
--name ispong-custom \
--class com.isxcode.star.plugin.query.sql.Execute \
--master spark://ispong-mac.local:7077 \
--deploy-mode cluster \
/Users/ispong/spark-query-sql-plugin.jar
http://47.92.145.79:16060/v1/submissions/status/driver-20230612161336-0000
spark-submit \
--name ispong-custom \
--class com.isxcode.star.plugin.query.sql.Execute \
--master spark://isxcode:7077 \
--deploy-mode cluster \
--jars /home/ispong/spark-yun/spark-yun-dist/build/lib/fastjson-2.0.16.jar,/home/ispong/spark-yun/spark-yun-dist/build/lib/spark-yun-api-latest-plain.jar,/home/ispong/spark-yun/spark-yun-dist/build/lib/fastjson2-2.0.16.jar,/home/ispong/spark-yun/spark-yun-dist/build/lib/fastjson2-extension-2.0.16.jar\
/home/ispong/spark-query-sql-plugin.jar
查看状态
spark.master.rest.enabled true
spark-defaults.conf
bash spark-submit --kill driver-20230609174843-0001 --master spark://ispong-mac.local:7077
bash spark-submit --status driver-20230610221254-0000 --master spark://ispong-mac.local:7077
yarn集群
bash spark-submit \
--class org.apache.spark.examples.SparkPi \
--master yarn \
--deploy-mode cluster \
--supervise \
--executor-memory 1G \
--total-executor-cores 1 \
/opt/spark/examples/jars/spark-examples_2.12-3.4.0.jar \
10000
bash spark-submit \
--name ispong-custom \
--class com.isxcode.star.plugin.query.sql.Execute \
--master yarn \
--deploy-mode cluster \
--supervise \
--executor-memory 1G \
--total-executor-cores 1 \
/home/ispong/spark-query-sql-plugin.jar
yarn application -status application_1686367092059_0002
yarn application -kill application_1686291741790_0003
yarn logs -applicationId application_1686302603508_0001
查看数据返回
k8s集群
- spark.kubernetes.file.upload.path: 只能配置共享盘
需要用户填写共享存储路径
和spark镜像 我去帮他打
kubectl cluster-info
- 构建镜像 local/spark:latest
cd /opt/spark/bin
bash docker-image-tool.sh -r zhiqingyun -t latest build
# 创建空间名称
kubectl create namespace spark-yun
# 创建账号
kubectl create serviceaccount zhiqingyun -n spark-yun
# 绑定权限
# kubectl create rolebinding spark-role --clusterrole=edit --serviceaccount=spark-yun:spark --namespace=spark-yun
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=spark-yun:zhiqingyun --namespace=spark-yun
# 检查是否成功
kubectl auth can-i create pods --as=system:serviceaccount:spark-yun:zhiqingyun
bash spark-submit \
--class org.apache.spark.examples.SparkPi \
--master k8s://127.0.0.1:6443 \
--deploy-mode cluster \
--name ispong-spark-pi \
--executor-memory 1G \
--total-executor-cores 1 \
--conf spark.executor.instances=1 \
--conf spark.kubernetes.container.image=zhiqingyun/spark:latest \
--conf spark.kubernetes.driver.volumes.hostPath.local-path.mount.path=/opt/spark/examples/jars/spark-examples_2.12-3.1.1.jar \
--conf spark.kubernetes.driver.volumes.hostPath.local-path.mount.readOnly=false \
--conf spark.kubernetes.driver.volumes.hostPath.local-path.options.path=/opt/spark-3.1.1-bin-hadoop3.2/examples/jars/spark-examples_2.12-3.1.1.jar \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=zhiqingyun \
--conf spark.kubernetes.namespace=spark-yun \
local:///opt/spark/examples/jars/spark-examples_2.12-3.1.1.jar \
100
bash spark-submit \
--verbose \
--class com.isxcode.star.plugin.query.sql.Execute \
--master k8s://127.0.0.1:6443 \
--deploy-mode cluster \
--name ispong-custom \
--executor-memory 1G \
--total-executor-cores 1 \
--conf spark.executor.instances=1 \
--conf spark.kubernetes.container.image=zhiqingyun/spark:latest \
--conf spark.kubernetes.driver.volumes.hostPath.local-path.mount.path=/opt/spark/examples/jars/spark-query-sql-plugin.jar \
--conf spark.kubernetes.driver.volumes.hostPath.local-path.mount.readOnly=false \
--conf spark.kubernetes.driver.volumes.hostPath.local-path.options.path=/Users/ispong/Isxcode/spark-yun/spark-yun-plugins/spark-query-sql-plugin/build/libs/spark-query-sql-plugin.jar \
--conf spark.kubernetes.driver.volumes.hostPath.libs1.mount.path=/opt/spark/examples/jars/lib/fastjson-2.0.16.jar \
--conf spark.kubernetes.driver.volumes.hostPath.libs1.mount.readOnly=false \
--conf spark.kubernetes.driver.volumes.hostPath.libs1.options.path=/Users/ispong/Isxcode/spark-yun/spark-yun-dist/build/lib/fastjson-2.0.16.jar \
--conf spark.kubernetes.driver.volumes.hostPath.libs2.mount.path=/opt/spark/examples/jars/lib/fastjson2-extension-2.0.16.jar \
--conf spark.kubernetes.driver.volumes.hostPath.libs2.mount.readOnly=false \
--conf spark.kubernetes.driver.volumes.hostPath.libs2.options.path=/Users/ispong/Isxcode/spark-yun/spark-yun-dist/build/lib/fastjson2-extension-2.0.16.jar \
--conf spark.kubernetes.driver.volumes.hostPath.libs3.mount.path=/opt/spark/examples/jars/lib/fastjson2-2.0.16.jar \
--conf spark.kubernetes.driver.volumes.hostPath.libs3.mount.readOnly=false \
--conf spark.kubernetes.driver.volumes.hostPath.libs3.options.path=/Users/ispong/Isxcode/spark-yun/spark-yun-dist/build/lib/fastjson2-2.0.16.jar \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=zhiqingyun \
--conf spark.kubernetes.namespace=spark-yun \
--jars local:///opt/spark/examples/jars/lib/fastjson2-2.0.16.jar,local:///Users/ispong/Isxcode/spark-yun/spark-yun-dist/build/lib/fastjson-2.0.16.jar,local:///opt/spark/examples/jars/lib/fastjson-2.0.16.jar \
local:///opt/spark/examples/jars/spark-query-sql-plugin.jar
# 查看状态
kubectl get pod ispong-spark-pi-dd976d88a44dc3ab-driver -n spark-yun
bash spark-submit --status spark-yun:ispong-spark-pi-a8d972889e1444da-driver --master k8s://172.16.215.103:6443
# 杀死应用
kubectl delete pod ispong-custom-bf70a588a47d08bd-driver -n spark-yun
bash spark-submit --kill spark-yun:ispong-spark-pi-a8d972889e1444da-driver --master k8s://172.16.215.103:6443
# 获取日志
kubectl logs -f ispong-spark-pi-dd976d88a44dc3ab-driver -n spark-yun
# ui
kubectl port-forward ispong-spark-pi-2e730e889e1f7df7-driver 4040:4040 -n=spark-yun
# 查看yml
kubectl get pod zhiqingyun-job-bb66f188b4265d45-driver -n=spark-yun -o yaml
# 自己配置podTemplateFile
sparkLauncher.setConf("spark.kubernetes.driver.podTemplateFile","/home/ispong/pod-init.yaml");
查看界面
kubectl port-forward <driver-pod-name> 4040:4040
查看状态
spark-submit --status spark-yun:spark-76480f8a6df54ae1858e3517a43f4cfe --master k8s://172.16.215.101:6443
# 检查权限
kubectl auth can-i get pods --as=system:serviceaccount:default:default
# 创建角色绑定
kubectl create rolebinding default-edit --clusterrole=edit --serviceaccount=default:default --namespace=default
# 再次检查
kubectl auth can-i get pods --as=system:serviceaccount:default:default
中止
spark-submit --kill spark:spark-pi-1547948636094-driver --master k8s://https://192.168.2.8:8443
查看日志
kubectl port-forward ispong-spark-pi-317e00889e0c5847-driver 4040:4040
查看数据返回
kubectl get pods
kubectl logs spark-yun:spark-pi-f3809388851a14da-driver
kubectl port-forward spark-pi-c9b77c8881dfd132-driver 8888:4040
standalone
- FAILED
- RUNNING
- FINISHED
- KILLED
yarn
- UNDEFINED
- KILLED
- FAILED
- SUCCEEDED
k8s
- Error
- Running
- Completed
- KILLED
🔗 Links
spark 提交作业
https://ispong.isxcode.com/hadoop/spark/spark 提交作业/