spark 压力测试

Last updated on September 15, 2024 pm

🧙 Questions

☄️ Ideas

实际执行命令
/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.402.b06-1.el7_9.x86_64/jre/bin/java
-cp 

/home/zhiqingyun/zhiqingyun-agent/spark-min/conf/:/home/zhiqingyun/zhiqingyun-agent/spark-min/jars/*:/opt/hadoop/etc/hadoop/ 

-XX:+IgnoreUnrecognizedVMOptions 
--add-opens=java.base/java.lang=ALL-UNNAMED 
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED 
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED 
--add-opens=java.base/java.io=ALL-UNNAMED 
--add-opens=java.base/java.net=ALL-UNNAMED 
--add-opens=java.base/java.nio=ALL-UNNAMED 
--add-opens=java.base/java.util=ALL-UNNAMED 
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED 
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED 
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED 
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED 
--add-opens=java.base/sun.security.action=ALL-UNNAMED 
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED 
--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED 

-Djdk.reflect.useDirectMethodHandle=false 

org.apache.spark.deploy.SparkSubmit 

--master yarn 
--deploy-mode cluster 
--conf spark.executor.memory=2g 
--conf spark.driver.memory=1g 
--conf spark.driver.cores=1 
--conf spark.executor.cores=1 
--conf spark.executor.instances=1 
--conf spark.executor.extraJavaOptions=-Dfile.encoding=utf-8 
--conf spark.cores.max=1 
--conf spark.driver.extraJavaOptions=-Dfile.encoding=utf-8 
--class com.isxcode.star.plugin.query.sql.Execute 
--name zhiqingyun-job 
--jars 
    file:/home/zhiqingyun/zhiqingyun-agent/lib/mysql-connector-java-5.1.49.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-http-9.4.50.v20221201.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/mysql-connector-j-8.1.0.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/oceanbase-client-2.4.6.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-server-9.4.51.v20230217.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/mssql-jdbc-12.4.2.jre8.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/ngdbc-2.18.13.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/clickhouse-jdbc-0.5.0.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/ojdbc10-19.20.0.0.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/postgresql-42.6.0.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/jcc-11.5.8.0.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/fastjson2-2.0.16.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/Dm8JdbcDriver18-8.1.1.49.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/fastjson-2.0.16.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/log4j-api-2.17.2.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/zhiqingyun-agent.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/spark-yun-api-main-plain.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-io-9.4.50.v20221201.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-util-ajax-9.4.50.v20221201.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/spark-core-2.9.4.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/jetty-util-9.4.50.v20221201.jar,
    file:/home/zhiqingyun/zhiqingyun-agent/lib/fastjson2-extension-2.0.16.jar 
/home/zhiqingyun/zhiqingyun-agent/plugins/spark-query-sql-plugin.jar 
eyJjb250YWluZXJQb3J0IjowLCJkYXRhYmFzZSI6ImlzeGNvZGVfZGIiLCJsaW1pdCI6MjAwLCJzcGFya0NvbmZpZyI6eyJzcGFyay5leGVjdXRvci5tZW1vcnkiOiIyZyIsInNwYXJrLmRyaXZlci5tZW1vcnkiOiIxZyIsInNwYXJrLmRyaXZlci5jb3JlcyI6IjEiLCJzcGFyay5leGVjdXRvci5jb3JlcyI6IjEiLCJzcGFyay5leGVjdXRvci5pbnN0YW5jZXMiOiIxIiwic3BhcmsuZXhlY3V0b3IuZXh0cmFKYXZhT3B0aW9ucyI6Ii1EZmlsZS5lbmNvZGluZz11dGYtOCIsImhpdmUubWV0YXN0b3JlLnVyaXMiOiJ0aHJpZnQ6Ly9pc3hjb2RlOjkwODMiLCJzcGFyay5jb3Jlcy5tYXgiOiIxIiwic3BhcmsuZHJpdmVyLmV4dHJhSmF2YU9wdGlvbnMiOiItRGZpbGUuZW5jb2Rpbmc9dXRmLTgifSwic3FsIjoiLS0gc2hvdyBkYXRhYmFzZXNcbnNlbGVjdCAqIGZyb20gdGVzdF9mdW5jMSJ9

未整理的内容

{
  "spark.executor.memory": "2g",
  "spark.driver.memory": "1g",
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.driver.cores": "1",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.cores": "1",
  "spark.executor.instances": "1",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "hive.metastore.uris": "thrift://isxcode:9083",
  "spark.cores.max": "1",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
}


-- 分区是1 并发1
-- spark.executor.instances 并不生效
-- 2容器2cpu4GB  
{
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.instances": "8",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}


-- 分区是200 并发8
-- 9容器9cpu18GB
-- 8个running
-- 200个stages
-- 内存实际消耗5GB  
{
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.instances": "8",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}
-- 总结 内存没吃满


-- 分区是10 并发18
-- 10	10	20480
-- 9个running
-- 内存实际消耗7GB  
-- Exector 9个  driver 1个
-- 366MB       366MB
{
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}



-- 分区是10 并发11
-- 7	7	20480
-- 6个running
-- 内存实际消耗5.7 GiB  
-- Exector 6个  driver 1个
-- 912.3 MiB   366MB
{
  "spark.executor.memory": "2g",
  "spark.driver.memory": "1g",
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}



-- 分区是10 并发11
-- 5	5	18432
-- 4个running
-- 内存实际消耗4GB
-- Exector 4个  driver 1个
-- 2.1 GiB   549.4 MiB
{
  "spark.executor.memory": "3g",
  "spark.driver.memory": "1g",
  "spark.memory.fraction": 0.9,
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}
-- 单个executor处理的数据还是太大了




-- 分区是100 并发11
-- 5	5	18432
-- 4个running
-- 内存实际消耗4GB
-- Exector 4个  driver 1个
-- 2.1 GiB   549.4 MiB
{
  "spark.executor.memory": "3g",
  "spark.driver.memory": "1g",
  "spark.memory.fraction": 0.9,
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}


前提结论:
容器个数: 受到内存限制,受到spark.executor.memory和spark.driver.memory限制 
exector:  spark.executor.cores    2个*容器数
spark.executor.instances   1个



按照数据大小分区   140万   10分区 每个分区14万


-- 分区是10 并发1
-- 5	5	18432
-- 8个running  (5-1)*2
-- 内存实际消耗4GB
-- Exector 4个  driver 1个
-- 2.1 GiB   549.4 MiB

{
  "spark.executor.memory": "2g",
  "spark.executor.cores": 1,
  "spark.driver.memory": "1g",
  "spark.driver.cores": 1,
  "spark.memory.fraction": 0.9,
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}

spark.executor.cores 决定running个数  和yarn中申请的核心数一摸一样
spark.executor.memory 和 并发数 决定容器个数  

yarn配置上线上 最好1核心2GB   20GB的话 就10核心 

按照1核心2GB 2个内核心作为标准  数据多少一个分区   	1.2 MiB / 14125 太低了 
需要使用内网

默认会启动2个 一个driver一个executor

数据库抽取数据有上线



3.7 min	0.1 s	0.0 B / 280050	22.8 MiB / 280050
5.0 min	0.3 s	0.0 B / 279487	22.8 MiB / 279487


2	2	5120
140万 一个核心 20分钟

1核2GB 0.9 能承接140万的数据量



压力测试不用手动修改 看看速度
单表 spark容器测试 看看速度
数据同步 看看速度
测试yarn最佳性能测试  把cpu全部用上  内存全部用上

删除这下面两个ip白名单
210.87.110.108
101.90.12.139


600038145
6亿数据

开始数据同步


23GB 内存
8核心 cpu


part表同步 140 0000 一百四十万
开到顶配 
最多8核心 8个实例


{
  "spark.executor.memory": "2g",
  "spark.driver.memory": "1g",
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.driver.cores": "1",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.cores": "1",
  "spark.executor.instances": "1",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "hive.metastore.uris": "thrift://isxcode:9083",
  "spark.cores.max": "1",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
}


-- 分区是1 并发1
-- spark.executor.instances 并不生效
-- 2容器2cpu4GB  
{
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.instances": "8",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}


-- 分区是200 并发8
-- 9容器9cpu18GB
-- 8个running
-- 200个stages
-- 内存实际消耗5GB  
{
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.instances": "8",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}
-- 总结 内存没吃满


-- 分区是10 并发18
-- 10	10	20480
-- 9个running
-- 内存实际消耗7GB  
-- Exector 9个  driver 1个
-- 366MB       366MB
{
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}



-- 分区是10 并发11
-- 7	7	20480
-- 6个running
-- 内存实际消耗5.7 GiB  
-- Exector 6个  driver 1个
-- 912.3 MiB   366MB
{
  "spark.executor.memory": "2g",
  "spark.driver.memory": "1g",
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}



-- 分区是10 并发11
-- 5	5	18432
-- 4个running
-- 内存实际消耗4GB
-- Exector 4个  driver 1个
-- 2.1 GiB   549.4 MiB
{
  "spark.executor.memory": "3g",
  "spark.driver.memory": "1g",
  "spark.memory.fraction": 0.9,
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}
-- 单个executor处理的数据还是太大了




-- 分区是100 并发11
-- 5	5	18432
-- 4个running
-- 内存实际消耗4GB
-- Exector 4个  driver 1个
-- 2.1 GiB   549.4 MiB
{
  "spark.executor.memory": "3g",
  "spark.driver.memory": "1g",
  "spark.memory.fraction": 0.9,
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}


前提结论:
容器个数: 受到内存限制,受到spark.executor.memory和spark.driver.memory限制 
exector:  spark.executor.cores    2个*容器数
spark.executor.instances   1个



按照数据大小分区   140万   10分区 每个分区14万


-- 分区是10 并发1
-- 5	5	18432
-- 8个running  (5-1)*2
-- 内存实际消耗4GB
-- Exector 4个  driver 1个
-- 2.1 GiB   549.4 MiB

{
  "spark.executor.memory": "2g",
  "spark.executor.cores": 1,
  "spark.driver.memory": "1g",
  "spark.driver.cores": 1,
  "spark.memory.fraction": 0.9,
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
  "hive.metastore.uris": "thrift://isxcode:9083"
}

spark.executor.cores 决定running个数  和yarn中申请的核心数一摸一样
spark.executor.memory 和 并发数 决定容器个数  




{
  "spark.executor.memory": "2g",
  "spark.driver.memory": "1g",
  "spark.sql.legacy.timeParserPolicy": "LEGACY",
  "spark.driver.cores": "1",
  "spark.sql.storeAssignmentPolicy": "LEGACY",
  "spark.executor.cores": "1",
  "spark.memory.fraction": "0.9",
  "spark.executor.extraJavaOptions": "-Dfile.encoding=utf-8",
  "hive.metastore.uris": "thrift://isxcode:9083",
  "spark.driver.extraJavaOptions": "-Dfile.encoding=utf-8"
}

修改"spark.memory.fraction": "0.9", 为默认值

spark 压力测试
https://ispong.isxcode.com/hadoop/spark/spark 压力测试/
Author
ispong
Posted on
March 31, 2024
Licensed under