doris spark同步到doris
Last updated on January 17, 2025 am
🧙 Questions
将spark中的数据导入doris中
☄️ Ideas
docker exec -it isxcode-mysql bash
mysql -u root -pispong123 -h isxcode -P 30131
SHOW PROC ‘/brokers’;
Note:
RESOURCE NAME 不能使用-
CREATE EXTERNAL RESOURCE "ispong_spark"
PROPERTIES
(
"type" = "spark",
"spark.master" = "yarn",
"spark.submit.deployMode" = "cluster",
-- "spark.jars" = "/data/cdh/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/*",
-- "spark.files" = "/tmp/aaa,/tmp/bbb",
"spark.executor.memory" = "1g",
-- "spark.yarn.queue" = "ispong-queue",
"spark.hadoop.yarn.resourcemanager.address" = "172.23.39.206:8032",
"spark.hadoop.fs.defaultFS" = "hdfs://172.23.39.206:30116",
"working_dir" = "hdfs://172.23.39.206:30116/tmp/doris",
"broker" = "broker_ispong",
"broker.username" = "",
"broker.password" = ""
);
SHOW RESOURCES;
SHOW PROC "/resources";
DROP RESOURCE ispong_spark;
CREATE EXTERNAL RESOURCE "ispong_spark"
PROPERTIES
(
"type" = "spark",
"spark.master" = "spark://172.23.39.206:7777",
"spark.submit.deployMode" = "client",
"working_dir" = "hdfs://172.23.39.206:30116/tmp/doris",
"broker" = "broker_ispong"
);
CREATE EXTERNAL TABLE ispong_table_hive
(
username varchar(50),
age INT
)
ENGINE=hive
properties
(
"database" = "cdh_dev",
"table" = "ispong_table",
"hive.metastore.uris" = "thrift://172.23.39.206:30123"
);
需要将spark的jar引入到项目
vim /data/doris/fe/conf/fe.conf
# === vim /data/doris/fe/conf/fe.conf ===
spark_home_default_dir=/data/cdh/cloudera/parcels/CDH/lib/spark
spark_resource_patj=/data/doris/fe/lib/spark2x/jars/spark-2x.zip
# === vim /data/doris/fe/conf/fe.conf ===
cd ~/spark-jars
zip spark-2x.zip ./*
mkdir -p /data/doris/fe/lib/spark2x/jars
mv spark-2x.zip /data/doris/fe/lib/spark2x/jars/
hadoop fs -mkdir -p /tmp/doris
hadoop fs -ls hdfs://172.23.39.206:30116/tmp/doris
LOAD LABEL ispong_db.ispong_job_10
(
DATA INFILE("hdfs://isxcode:30116/user/hive/warehouse/cdh_dev.db/ispong_table/*")
INTO TABLE ispong_table
COLUMNS TERMINATED BY ","
(username,age)
SET
(
username=username,
age=age
)
)
WITH RESOURCE 'ispong_spark'
(
"spark.executor.memory" = "2g",
"spark.shuffle.compress" = "true"
)
PROPERTIES
(
"timeout" = "3600"
);
ERROR 1064 (HY000): errCode = 2, detailMessage = Spark Load is coming soon
SHOW TABLES;
-- 导入数据
-- LOAD LABEL ispong_db.ispong_job_1
-- (
-- DATA INFILE("hdfs://isxcode:30116/user/hive/warehouse/cdh_dev.db/ispong_table/*")
-- INTO TABLE ispong_table_copy
-- COLUMNS TERMINATED BY ","
-- (username,age)
-- SET
-- (
-- username=username,
-- age=age
-- )
-- )
-- WITH BROKER 'broker_ispong'
-- (
-- "username"="",
-- "password"=""
-- )
-- PROPERTIES
-- (
-- "timeout" = "3600"
-- );
-- 查询load
SHOW LOAD;
-- 取消load
CANCEL LOAD FROM ispong_db WHERE LABEL = "ispong_label_2";
-- 删除label
DELETE FROM ispong_db WHERE LABEL = "ispong_label_2";
-- 查询表
SELECT * FROM ispong_db.ispong_table;
🔗 Links
doris spark同步到doris
https://ispong.isxcode.com/db/doris/doris spark同步到doris/