spark 压力测试
Last updated on February 21, 2025 am
🧙 Questions
☄️ Ideas
实际执行命令
/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.402.b06-1.el7_9.x86_64/jre/bin/java
-cp
/home/zhiqingyun/zhiqingyun-agent/spark-min/conf/:/home/zhiqingyun/zhiqingyun-agent/spark-min/jars/*:/opt/hadoop/etc/hadoop/
-XX:+IgnoreUnrecognizedVMOptions
--add-opens=java.base/java.lang=ALL-UNNAMED
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED
--add-opens=java.base/java.io=ALL-UNNAMED
--add-opens=java.base/java.net=ALL-UNNAMED
--add-opens=java.base/java.nio=ALL-UNNAMED
--add-opens=java.base/java.util=ALL-UNNAMED
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED
--add-opens=java.base/sun.security.action=ALL-UNNAMED
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED
--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED
-Djdk.reflect.useDirectMethodHandle=false
org.apache.spark.deploy.SparkSubmit
--master yarn
--deploy-mode cluster
--conf spark.executor.memory=2g
--conf spark.driver.memory=1g
--conf spark.driver.cores=1
--conf spark.executor.cores=1
--conf spark.executor.instances=1
--conf spark.executor.extraJavaOptions=-Dfile.encoding=utf-8
--conf spark.cores.max=1
--conf spark.driver.extraJavaOptions=-Dfile.encoding=utf-8
--class com.isxcode.star.plugin.query.sql.Execute
--name zhiqingyun-job
--jars
file:/home/zhiqingyun/zhiqingyun-agent/lib/mysql-connector-java-5.1.49.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-http-9.4.50.v20221201.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/mysql-connector-j-8.1.0.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/oceanbase-client-2.4.6.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-server-9.4.51.v20230217.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/mssql-jdbc-12.4.2.jre8.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/ngdbc-2.18.13.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/clickhouse-jdbc-0.5.0.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/ojdbc10-19.20.0.0.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/postgresql-42.6.0.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/jcc-11.5.8.0.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/fastjson2-2.0.16.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/Dm8JdbcDriver18-8.1.1.49.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/fastjson-2.0.16.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/log4j-api-2.17.2.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/zhiqingyun-agent.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/spark-yun-api-main-plain.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-io-9.4.50.v20221201.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-util-ajax-9.4.50.v20221201.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/spark-core-2.9.4.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-util-9.4.50.v20221201.jar,
file:/home/zhiqingyun/zhiqingyun-agent/lib/fastjson2-extension-2.0.16.jar
/home/zhiqingyun/zhiqingyun-agent/plugins/spark-query-sql-plugin.jar
eyJjb250YWluZXJQb3J0IjowLCJkYXRhYmFzZSI6ImlzeGNvZGVfZGIiLCJsaW1pdCI6MjAwLCJzcGFya0NvbmZpZyI6eyJzcGFyay5leGVjdXRvci5tZW1vcnkiOiIyZyIsInNwYXJrLmRyaXZlci5tZW1vcnkiOiIxZyIsInNwYXJrLmRyaXZlci5jb3JlcyI6IjEiLCJzcGFyay5leGVjdXRvci5jb3JlcyI6IjEiLCJzcGFyay5leGVjdXRvci5pbnN0YW5jZXMiOiIxIiwic3BhcmsuZXhlY3V0b3IuZXh0cmFKYXZhT3B0aW9ucyI6Ii1EZmlsZS5lbmNvZGluZz11dGYtOCIsImhpdmUubWV0YXN0b3JlLnVyaXMiOiJ0aHJpZnQ6Ly9pc3hjb2RlOjkwODMiLCJzcGFyay5jb3Jlcy5tYXgiOiIxIiwic3BhcmsuZHJpdmVyLmV4dHJhSmF2YU9wdGlvbnMiOiItRGZpbGUuZW5jb2Rpbmc9dXRmLTgifSwic3FsIjoiLS0gc2hvdyBkYXRhYmFzZXNcbnNlbGVjdCAqIGZyb20gdGVzdF9mdW5jMSJ9
未整理的内容
{
"spark.executor.memory": "2g",
"spark.driver.memory": "1g",
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.driver.cores": "1",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.cores": "1",
"spark.executor.instances": "1",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"hive.metastore.uris": "thrift://isxcode:9083",
"spark.cores.max": "1",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
}
-- 分区是1 并发1
-- spark.executor.instances 并不生效
-- 2容器2cpu4GB
{
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.instances": "8",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 分区是200 并发8
-- 9容器9cpu18GB
-- 8个running
-- 200个stages
-- 内存实际消耗5GB
{
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.instances": "8",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 总结 内存没吃满
-- 分区是10 并发18
-- 10 10 20480
-- 9个running
-- 内存实际消耗7GB
-- Exector 9个 driver 1个
-- 366MB 366MB
{
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 分区是10 并发11
-- 7 7 20480
-- 6个running
-- 内存实际消耗5.7 GiB
-- Exector 6个 driver 1个
-- 912.3 MiB 366MB
{
"spark.executor.memory": "2g",
"spark.driver.memory": "1g",
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 分区是10 并发11
-- 5 5 18432
-- 4个running
-- 内存实际消耗4GB
-- Exector 4个 driver 1个
-- 2.1 GiB 549.4 MiB
{
"spark.executor.memory": "3g",
"spark.driver.memory": "1g",
"spark.memory.fraction": 0.9,
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 单个executor处理的数据还是太大了
-- 分区是100 并发11
-- 5 5 18432
-- 4个running
-- 内存实际消耗4GB
-- Exector 4个 driver 1个
-- 2.1 GiB 549.4 MiB
{
"spark.executor.memory": "3g",
"spark.driver.memory": "1g",
"spark.memory.fraction": 0.9,
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
前提结论:
容器个数: 受到内存限制,受到spark.executor.memory和spark.driver.memory限制
exector: spark.executor.cores 2个*容器数
spark.executor.instances 1个
按照数据大小分区 140万 10分区 每个分区14万
-- 分区是10 并发1
-- 5 5 18432
-- 8个running (5-1)*2
-- 内存实际消耗4GB
-- Exector 4个 driver 1个
-- 2.1 GiB 549.4 MiB
{
"spark.executor.memory": "2g",
"spark.executor.cores": 1,
"spark.driver.memory": "1g",
"spark.driver.cores": 1,
"spark.memory.fraction": 0.9,
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
spark.executor.cores 决定running个数 和yarn中申请的核心数一摸一样
spark.executor.memory 和 并发数 决定容器个数
yarn配置上线上 最好1核心2GB 20GB的话 就10核心
按照1核心2GB 2个内核心作为标准 数据多少一个分区 1.2 MiB / 14125 太低了
需要使用内网
默认会启动2个 一个driver一个executor
数据库抽取数据有上线
3.7 min 0.1 s 0.0 B / 280050 22.8 MiB / 280050
5.0 min 0.3 s 0.0 B / 279487 22.8 MiB / 279487
2 2 5120
140万 一个核心 20分钟
1核2GB 0.9 能承接140万的数据量
压力测试不用手动修改 看看速度
单表 spark容器测试 看看速度
数据同步 看看速度
测试yarn最佳性能测试 把cpu全部用上 内存全部用上
删除这下面两个ip白名单
210.87.110.108
101.90.12.139
600038145
6亿数据
开始数据同步
23GB 内存
8核心 cpu
part表同步 140 0000 一百四十万
开到顶配
最多8核心 8个实例
{
"spark.executor.memory": "2g",
"spark.driver.memory": "1g",
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.driver.cores": "1",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.cores": "1",
"spark.executor.instances": "1",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"hive.metastore.uris": "thrift://isxcode:9083",
"spark.cores.max": "1",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
}
-- 分区是1 并发1
-- spark.executor.instances 并不生效
-- 2容器2cpu4GB
{
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.instances": "8",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 分区是200 并发8
-- 9容器9cpu18GB
-- 8个running
-- 200个stages
-- 内存实际消耗5GB
{
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.instances": "8",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 总结 内存没吃满
-- 分区是10 并发18
-- 10 10 20480
-- 9个running
-- 内存实际消耗7GB
-- Exector 9个 driver 1个
-- 366MB 366MB
{
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 分区是10 并发11
-- 7 7 20480
-- 6个running
-- 内存实际消耗5.7 GiB
-- Exector 6个 driver 1个
-- 912.3 MiB 366MB
{
"spark.executor.memory": "2g",
"spark.driver.memory": "1g",
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 分区是10 并发11
-- 5 5 18432
-- 4个running
-- 内存实际消耗4GB
-- Exector 4个 driver 1个
-- 2.1 GiB 549.4 MiB
{
"spark.executor.memory": "3g",
"spark.driver.memory": "1g",
"spark.memory.fraction": 0.9,
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
-- 单个executor处理的数据还是太大了
-- 分区是100 并发11
-- 5 5 18432
-- 4个running
-- 内存实际消耗4GB
-- Exector 4个 driver 1个
-- 2.1 GiB 549.4 MiB
{
"spark.executor.memory": "3g",
"spark.driver.memory": "1g",
"spark.memory.fraction": 0.9,
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
前提结论:
容器个数: 受到内存限制,受到spark.executor.memory和spark.driver.memory限制
exector: spark.executor.cores 2个*容器数
spark.executor.instances 1个
按照数据大小分区 140万 10分区 每个分区14万
-- 分区是10 并发1
-- 5 5 18432
-- 8个running (5-1)*2
-- 内存实际消耗4GB
-- Exector 4个 driver 1个
-- 2.1 GiB 549.4 MiB
{
"spark.executor.memory": "2g",
"spark.executor.cores": 1,
"spark.driver.memory": "1g",
"spark.driver.cores": 1,
"spark.memory.fraction": 0.9,
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
"hive.metastore.uris": "thrift://isxcode:9083"
}
spark.executor.cores 决定running个数 和yarn中申请的核心数一摸一样
spark.executor.memory 和 并发数 决定容器个数
{
"spark.executor.memory": "2g",
"spark.driver.memory": "1g",
"spark.sql.legacy.timeParserPolicy": "LEGACY",
"spark.driver.cores": "1",
"spark.sql.storeAssignmentPolicy": "LEGACY",
"spark.executor.cores": "1",
"spark.memory.fraction": "0.9",
"spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
"hive.metastore.uris": "thrift://isxcode:9083",
"spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
}
修改"spark.memory.fraction": "0.9", 为默认值
🔗 Links
spark 压力测试
https://ispong.isxcode.com/hadoop/spark/spark 压力测试/