spark 集成prql

Last updated on September 15, 2024 pm

🧙 Questions

☄️ Ideas

下载代码

git clone https://github.com/PRQL/prql.git

安装rust

# https://rustup.rs/
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
source "$HOME/.cargo/env"

编译prqlc

cd prql
cargo clean
# cargo build  # 需要python3.9
cargo build -p prqlc 
# /root/prql/target/debug/libprql_java.so
cargo build -p prql-java

编译jar

cd prql/prqlc/bindings/java/java
mvn clean package -Dmaven.test.skips

安装到本地

mvn install:install-file -DgroupId=org.prqllang -DartifactId=prql-java -Dversion=0.5.2 -Dpackaging=jar -Dfile=/Users/ispong/Downloads/prql/prqlc/bindings/java/java/target/prql-java-0.5.2.jar

项目中引入

<dependency>
    <groupId>org.prqllang</groupId>
    <artifactId>prql-java</artifactId>
    <version>0.5.2</version>
</dependency>

将prqlc引入到resources下

linux arm64: libprql_java-linux-aarch64.so
linux amd64: libprql_java-linux64.so
mac mar64: libprql_java-osx-arm64.dylib

cp /Users/ispong/Downloads/prql/target/release/libprql_java.dylib  /Users/ispong/definesys/ispong_framework/ispong_admin/src/main/resources/libprql_java-osx.dylib

编写代码

public static void main(String[] args) throws Exception {

        String sql = PrqlCompiler.toSql("from invoices                        # A PRQL query begins with a table\n" +
            "                                     # Subsequent lines \"transform\" (modify) it\n" +
            "derive {                             # \"derive\" adds columns to the result\n" +
            "  transaction_fee = 0.8,             # \"=\" sets a column name\n" +
            "  income = total - transaction_fee   # Calculations can use other column names\n" +
            "}\n" +
            "# starts a comment; commenting out a line leaves a valid query\n" +
            "filter income > 5                    # \"filter\" replaces both of SQL's WHERE & HAVING\n" +
            "filter invoice_date >= @2010-01-16   # Clear date syntax\n" +
            "group customer_id (                  # \"group\" performs the pipeline in (...) on each group\n" +
            "  aggregate {                        # \"aggregate\" reduces each group to a single row\n" +
            "    sum_income = sum income,         # ... using SQL SUM(), COUNT(), etc. functions\n" +
            "    ct = count customer_id,          #\n" +
            "  }\n" +
            ")\n" +
            "join c=customers (==customer_id)     # join on \"customer_id\" from both tables\n" +
            "derive name = f\"{c.last_name}, {c.first_name}\" # F-strings like Python\n" +
            "derive db_version = s\"version()\"     # S-string offers escape hatch to SQL\n" +
            "select {                             # \"select\" passes along only the named columns\n" +
            "  c.customer_id, name, sum_income, ct, db_version,\n" +
            "}                                    # trailing commas always ignored\n" +
            "sort {-sum_income}                   # \"sort\" sorts the result; \"-\" is decreasing order\n" +
            "take 1..10                           # Limit to a range - could also be \"take 10\"\n" +
            "#\n" +
            "# The \"output.sql\" tab at right shows the SQL generated from this PRQL query\n" +
            "# The \"output.arrow\" tab shows the result of the query\n", "mysql", true, true);
        System.out.println(sql);
    }

spark 集成prql
https://ispong.isxcode.com/hadoop/spark/spark 集成prql/
Author
ispong
Posted on
January 30, 2024
Licensed under