spark 集成prql
Last updated on November 20, 2024 am
🧙 Questions
☄️ Ideas
下载代码
git clone https://github.com/PRQL/prql.git
安装rust
# https://rustup.rs/
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
source "$HOME/.cargo/env"
编译prqlc
cd prql
cargo clean
# cargo build # 需要python3.9
cargo build -p prqlc
# /root/prql/target/debug/libprql_java.so
cargo build -p prql-java
编译jar
cd prql/prqlc/bindings/java/java
mvn clean package -Dmaven.test.skips
安装到本地
mvn install:install-file -DgroupId=org.prqllang -DartifactId=prql-java -Dversion=0.5.2 -Dpackaging=jar -Dfile=/Users/ispong/Downloads/prql/prqlc/bindings/java/java/target/prql-java-0.5.2.jar
项目中引入
<dependency>
<groupId>org.prqllang</groupId>
<artifactId>prql-java</artifactId>
<version>0.5.2</version>
</dependency>
将prqlc引入到resources下
linux arm64: libprql_java-linux-aarch64.so
linux amd64: libprql_java-linux64.so
mac mar64: libprql_java-osx-arm64.dylib
cp /Users/ispong/Downloads/prql/target/release/libprql_java.dylib /Users/ispong/definesys/ispong_framework/ispong_admin/src/main/resources/libprql_java-osx.dylib
编写代码
public static void main(String[] args) throws Exception {
String sql = PrqlCompiler.toSql("from invoices # A PRQL query begins with a table\n" +
" # Subsequent lines \"transform\" (modify) it\n" +
"derive { # \"derive\" adds columns to the result\n" +
" transaction_fee = 0.8, # \"=\" sets a column name\n" +
" income = total - transaction_fee # Calculations can use other column names\n" +
"}\n" +
"# starts a comment; commenting out a line leaves a valid query\n" +
"filter income > 5 # \"filter\" replaces both of SQL's WHERE & HAVING\n" +
"filter invoice_date >= @2010-01-16 # Clear date syntax\n" +
"group customer_id ( # \"group\" performs the pipeline in (...) on each group\n" +
" aggregate { # \"aggregate\" reduces each group to a single row\n" +
" sum_income = sum income, # ... using SQL SUM(), COUNT(), etc. functions\n" +
" ct = count customer_id, #\n" +
" }\n" +
")\n" +
"join c=customers (==customer_id) # join on \"customer_id\" from both tables\n" +
"derive name = f\"{c.last_name}, {c.first_name}\" # F-strings like Python\n" +
"derive db_version = s\"version()\" # S-string offers escape hatch to SQL\n" +
"select { # \"select\" passes along only the named columns\n" +
" c.customer_id, name, sum_income, ct, db_version,\n" +
"} # trailing commas always ignored\n" +
"sort {-sum_income} # \"sort\" sorts the result; \"-\" is decreasing order\n" +
"take 1..10 # Limit to a range - could also be \"take 10\"\n" +
"#\n" +
"# The \"output.sql\" tab at right shows the SQL generated from this PRQL query\n" +
"# The \"output.arrow\" tab shows the result of the query\n", "mysql", true, true);
System.out.println(sql);
}
🔗 Links
spark 集成prql
https://ispong.isxcode.com/hadoop/spark/spark 集成prql/