spark 创建容器
Last updated on January 17, 2025 am
🧙 Questions
☄️ Ideas
package com.isxcode.star.plugin.query.sql;
import com.alibaba.fastjson.JSONArray;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import spark.Spark;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static spark.Spark.port;
import static spark.Spark.post;
public class Execute {
public static void main(String[] args) {
// 初始化session,一定要放前面
SparkSession.Builder sparkSessionBuilder = SparkSession.builder();
SparkConf conf = new SparkConf();
conf.set("spark.executor.memory", "1g");
conf.set("spark.driver.memory", "1g");
SparkSession sparkSession = sparkSessionBuilder.config(conf).enableHiveSupport().getOrCreate();
// 使用jetty,创建一个小型的接口服务器
port(8088);
post("/getData", (req, res) -> {
String body = req.body();
List<String> strings = JSONArray.parseArray(body, String.class);
Dataset<Row> rowDataset = sparkSession.sql(strings.get(0));
return exportResult(rowDataset);
});
Spark.awaitInitialization();
}
public static List<List<String>> exportResult(Dataset<Row> rowDataset) {
List<List<String>> result = new ArrayList<>();
// 表头
result.add(Arrays.asList(rowDataset.columns()));
// 数据
rowDataset
.collectAsList()
.forEach(
e -> {
List<String> metaData = new ArrayList<>();
for (int i = 0; i < e.size(); i++) {
metaData.add(String.valueOf(e.get(i)));
}
result.add(metaData);
});
return result;
}
}
dependencies {
// spark-yun-api
api(project(':spark-yun-api'))
// spark-streaming
implementation "org.apache.spark:spark-streaming_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-streaming-kafka
implementation "org.apache.spark:spark-streaming-kafka-0-10_${SPARK_SCALA_VERSION}:${SPARK_KAFKA_VERSION}"
// spark-core
implementation "org.apache.spark:spark-core_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-catalyst
implementation "org.apache.spark:spark-catalyst_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-sql
implementation "org.apache.spark:spark-sql_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-hive
implementation "org.apache.spark:spark-hive_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-network-common
implementation "org.apache.spark:spark-network-common_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-network-shuffle
implementation "org.apache.spark:spark-network-shuffle_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-yarn
implementation "org.apache.spark:spark-yarn_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-kvstore
implementation "org.apache.spark:spark-kvstore_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// spark-launcher
implementation "org.apache.spark:spark-launcher_${SPARK_SCALA_VERSION}:${SPARK_VERSION}"
// jersey-container-servlet-core
implementation "org.glassfish.jersey.containers:jersey-container-servlet-core:2.27"
// hive-common
implementation "org.apache.hive:hive-common:${HIVE_VERSION}"
// hadoop-yarn-client
implementation "org.apache.hadoop:hadoop-yarn-client:${HADOOP_VERSION}"
// hadoop-hdfs
implementation "org.apache.hadoop:hadoop-hdfs:${HADOOP_VERSION}"
// hadoop-mapreduce-client-core
implementation "org.apache.hadoop:hadoop-mapreduce-client-core:${HADOOP_VERSION}"
// json4s-jackson
implementation "org.json4s:json4s-jackson_${SPARK_SCALA_VERSION}:${JSON4S_VERSION}"
// json4s-core
implementation "org.json4s:json4s-core_${SPARK_SCALA_VERSION}:${JSON4S_VERSION}"
// janino
implementation "org.codehaus.janino:janino:3.1.6"
// commons-compiler
implementation "org.codehaus.janino:commons-compiler:3.1.6"
// jackson-module
implementation "com.fasterxml.jackson.module:jackson-module-scala_${SPARK_SCALA_VERSION}:2.10.0"
// commons-compiler
implementation "com.fasterxml.jackson.core:jackson-databind:2.10.0"
// lz4-java
implementation "org.lz4:lz4-java:1.5.0"
// xbean-asm6-shaded
implementation "org.apache.xbean:xbean-asm6-shaded:4.8"
// univocity-parsers
implementation "com.univocity:univocity-parsers:2.8.2"
// spark-core
// wget https://repo1.maven.org/maven2/com/sparkjava/spark-core/2.9.4/spark-core-2.9.4.jar
implementation 'com.sparkjava:spark-core:2.9.4'
// wget https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-server/9.4.51.v20230217/jetty-server-9.4.51.v20230217.jar
// wget https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-util/9.4.51.v20230217/jetty-util-9.4.51.v20230217.jar
// wget https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-http/9.4.51.v20230217/jetty-http-9.4.51.v20230217.jar
// wget https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-io/9.4.51.v20230217/jetty-io-9.4.51.v20230217.jar
// https://mvnrepository.com/artifact/org.eclipse.jetty/jetty-server
implementation group: 'org.eclipse.jetty', name: 'jetty-server', version: '9.4.51.v20230217'
}
jar {
archiveFileName = "spark-query-sql-plugin.jar"
}
🔗 Links
spark 创建容器
https://ispong.isxcode.com/hadoop/spark/spark 创建容器/