From 3c2dd39c150d48c1427eb332ef810b24f834aaf3 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 15 Mar 2026 19:58:42 +0800 Subject: [PATCH 01/44] feat(sql): migrate to DataFusion-based streaming SQL planner --- Cargo.lock | 2422 ++++++++++++++++- Cargo.toml | 28 + src/coordinator/analyze/analyzer.rs | 11 +- src/coordinator/coordinator.rs | 4 +- src/coordinator/execution/executor.rs | 14 + src/coordinator/mod.rs | 2 +- src/coordinator/plan/logical_plan_visitor.rs | 35 +- src/coordinator/plan/mod.rs | 2 + .../plan/streaming_sql_plan.rs} | 30 +- src/coordinator/plan/visitor.rs | 8 +- src/coordinator/statement/mod.rs | 2 + src/coordinator/statement/streaming_sql.rs | 39 + src/coordinator/statement/visitor.rs | 7 + src/datastream/logical.rs | 317 +++ src/datastream/mod.rs | 2 + src/datastream/optimizers.rs | 100 + src/lib.rs | 1 + src/server/handler.rs | 10 +- src/sql/grammar.pest | 134 - src/sql/mod.rs | 7 +- src/sql/parser/sql_parser.rs | 249 -- src/sql/planner/extension/aggregate.rs | 348 +++ src/sql/planner/extension/join.rs | 61 + src/sql/planner/extension/key_calculation.rs | 138 + src/sql/planner/extension/mod.rs | 153 ++ src/sql/planner/extension/projection.rs | 91 + src/sql/planner/extension/remote_table.rs | 71 + src/sql/planner/extension/watermark_node.rs | 110 + src/sql/planner/extension/window_fn.rs | 62 + src/sql/planner/mod.rs | 355 +++ src/sql/planner/parse.rs | 183 ++ src/sql/planner/plan/aggregate.rs | 275 ++ src/sql/planner/plan/join.rs | 242 ++ src/sql/planner/plan/mod.rs | 449 +++ src/sql/planner/plan/window_fn.rs | 178 ++ src/sql/planner/schemas.rs | 59 + src/sql/planner/sql_to_plan.rs | 22 + src/sql/planner/types.rs | 513 ++++ 38 files changed, 6199 insertions(+), 535 deletions(-) rename src/{sql/parser/mod.rs => coordinator/plan/streaming_sql_plan.rs} (52%) create mode 100644 src/coordinator/statement/streaming_sql.rs create mode 100644 src/datastream/logical.rs create mode 100644 src/datastream/mod.rs create mode 100644 src/datastream/optimizers.rs delete mode 100644 src/sql/grammar.pest delete mode 100644 src/sql/parser/sql_parser.rs create mode 100644 src/sql/planner/extension/aggregate.rs create mode 100644 src/sql/planner/extension/join.rs create mode 100644 src/sql/planner/extension/key_calculation.rs create mode 100644 src/sql/planner/extension/mod.rs create mode 100644 src/sql/planner/extension/projection.rs create mode 100644 src/sql/planner/extension/remote_table.rs create mode 100644 src/sql/planner/extension/watermark_node.rs create mode 100644 src/sql/planner/extension/window_fn.rs create mode 100644 src/sql/planner/mod.rs create mode 100644 src/sql/planner/parse.rs create mode 100644 src/sql/planner/plan/aggregate.rs create mode 100644 src/sql/planner/plan/join.rs create mode 100644 src/sql/planner/plan/mod.rs create mode 100644 src/sql/planner/plan/window_fn.rs create mode 100644 src/sql/planner/schemas.rs create mode 100644 src/sql/planner/sql_to_plan.rs create mode 100644 src/sql/planner/types.rs diff --git a/Cargo.lock b/Cargo.lock index 26f07400..6cf6182a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "gimli", ] +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "ahash" version = "0.8.12" @@ -34,6 +40,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "allocator-api2" version = "0.2.21" @@ -111,12 +132,68 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "ar_archive_writer" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +dependencies = [ + "object", +] + [[package]] name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "arrow" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" +dependencies = [ + "arrow-arith", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-cast 55.2.0", + "arrow-csv", + "arrow-data 55.2.0", + "arrow-ipc 55.2.0", + "arrow-json 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "arrow-ord", + "arrow-row", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" +dependencies = [ + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "chrono", + "num", +] + [[package]] name = "arrow-array" version = "52.2.0" @@ -124,15 +201,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", "chrono", "half", "hashbrown 0.14.5", "num", ] +[[package]] +name = "arrow-array" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +dependencies = [ + "ahash", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.5", + "num", +] + [[package]] name = "arrow-buffer" version = "52.2.0" @@ -144,34 +238,93 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-cast" version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "atoi", + "base64", + "chrono", + "half", + "lexical-core 0.8.5", + "num", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +dependencies = [ + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", "atoi", "base64", "chrono", + "comfy-table", "half", - "lexical-core", + "lexical-core 1.0.6", "num", "ryu", ] +[[package]] +name = "arrow-csv" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" +dependencies = [ + "arrow-array 55.2.0", + "arrow-cast 55.2.0", + "arrow-schema 55.2.0", + "chrono", + "csv", + "csv-core", + "regex", +] + [[package]] name = "arrow-data" version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 52.2.0", + "arrow-schema 52.2.0", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +dependencies = [ + "arrow-buffer 55.2.0", + "arrow-schema 55.2.0", "half", "num", ] @@ -182,12 +335,96 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "flatbuffers", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "flatbuffers 24.12.23", +] + +[[package]] +name = "arrow-ipc" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" +dependencies = [ + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "flatbuffers 25.12.19", + "lz4_flex", +] + +[[package]] +name = "arrow-json" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" +dependencies = [ + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-cast 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "chrono", + "half", + "indexmap 2.12.1", + "lexical-core 1.0.6", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-json" +version = "55.2.0" +source = "git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson#d31f8d8f97c6e1394b52927cd8c23c14fec6ba16" +dependencies = [ + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-cast 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "base64", + "chrono", + "half", + "indexmap 2.12.1", + "lexical-core 1.0.6", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-ord" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" +dependencies = [ + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", +] + +[[package]] +name = "arrow-row" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" +dependencies = [ + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "half", ] [[package]] @@ -196,6 +433,16 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" +[[package]] +name = "arrow-schema" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "arrow-select" version = "52.2.0" @@ -203,11 +450,59 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "num", +] + +[[package]] +name = "arrow-select" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" +dependencies = [ + "ahash", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "num", +] + +[[package]] +name = "arrow-string" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +dependencies = [ + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-data 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", + "memchr", "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-compression" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", ] [[package]] @@ -311,12 +606,49 @@ dependencies = [ "tower-service", ] +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link", +] + +[[package]] +name = "backtrace-ext" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50" +dependencies = [ + "backtrace", +] + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bigdecimal" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bincode" version = "1.3.3" @@ -386,6 +718,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -395,6 +762,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "8.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.19.1" @@ -404,12 +792,27 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + [[package]] name = "bzip2-sys" version = "0.1.13+1.0.8" @@ -429,7 +832,7 @@ dependencies = [ "cap-primitives", "cap-std", "io-lifetimes", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -458,7 +861,7 @@ dependencies = [ "maybe-owned", "rustix 1.1.3", "rustix-linux-procfs", - "windows-sys 0.59.0", + "windows-sys 0.52.0", "winx", ] @@ -469,7 +872,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8144c22e24bbcf26ade86cb6501a0916c46b7e4787abdb0045a467eb1645a1d" dependencies = [ "ambient-authority", - "rand", + "rand 0.8.5", ] [[package]] @@ -542,6 +945,26 @@ dependencies = [ "windows-link", ] +[[package]] +name = "chrono-tz" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +dependencies = [ + "chrono", + "phf 0.12.1", +] + +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown 0.14.5", + "stacker", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -581,7 +1004,7 @@ version = "4.5.49" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -620,6 +1043,15 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "codegen_template" +version = "0.1.0" +source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209" +dependencies = [ + "unicode-xid", + "unscanny", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -657,12 +1089,59 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cornucopia" +version = "0.9.0" +source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209" +dependencies = [ + "chumsky", + "clap", + "codegen_template", + "heck 0.4.1", + "indexmap 2.12.1", + "miette", + "postgres", + "postgres-types", + "prettyplease", + "rusqlite", + "syn", + "thiserror 1.0.69", +] + +[[package]] +name = "cornucopia_async" +version = "0.6.0" +source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209" +dependencies = [ + "async-trait", + "cornucopia_client_core", + "deadpool-postgres", + "rusqlite", + "tokio-postgres", +] + +[[package]] +name = "cornucopia_client_core" +version = "0.4.0" +source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209" +dependencies = [ + "fallible-iterator 0.2.0", + "postgres-protocol", + "postgres-types", +] + [[package]] name = "cpp_demangle" version = "0.4.5" @@ -757,7 +1236,7 @@ dependencies = [ "cranelift-assembler-x64-meta", "cranelift-codegen-shared", "cranelift-srcgen", - "heck", + "heck 0.5.0", "pulley-interpreter", ] @@ -823,85 +1302,750 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e378a54e7168a689486d67ee1f818b7e5356e54ae51a1d7a53f4f13f7f8b7a" [[package]] -name = "crc32fast" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crossterm" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" +dependencies = [ + "bitflags 2.10.0", + "crossterm_winapi", + "document-features", + "parking_lot", + "rustix 1.1.3", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "arrow-ipc 55.2.0", + "arrow-schema 55.2.0", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "flate2", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.9.2", + "regex", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-common" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc 55.2.0", + "base64", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.1", + "libc", + "log", + "object_store", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "futures", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.9.2", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.9.2", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" + +[[package]] +name = "datafusion-execution" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap 2.12.1", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap 2.12.1", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "arrow-buffer 55.2.0", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "rand 0.9.2", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-nested" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-macros", + "datafusion-physical-expr-common", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "datafusion-expr", + "quote", + "syn", +] + +[[package]] +name = "datafusion-optimizer" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "indexmap 2.12.1", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.1", + "itertools 0.14.0", + "log", + "paste", + "petgraph 0.8.3", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "cfg-if", + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", + "recursive", ] [[package]] -name = "crossbeam-channel" -version = "0.5.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +name = "datafusion-physical-plan" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "crossbeam-utils", + "ahash", + "arrow", + "arrow-ord", + "arrow-schema 55.2.0", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.1", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", ] [[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +name = "datafusion-proto" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", + "arrow", + "chrono", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-proto-common", + "object_store", + "prost", ] [[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +name = "datafusion-proto-common" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "crossbeam-utils", + "arrow", + "datafusion-common", + "prost", ] [[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +name = "datafusion-session" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] [[package]] -name = "crossterm" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" +name = "datafusion-sql" +version = "48.0.1" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "bitflags 2.10.0", - "crossterm_winapi", - "document-features", - "parking_lot", - "rustix 1.1.3", - "winapi", + "arrow", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap 2.12.1", + "log", + "recursive", + "regex", + "sqlparser", ] [[package]] -name = "crossterm_winapi" -version = "0.9.1" +name = "deadpool" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" dependencies = [ - "winapi", + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", ] [[package]] -name = "crunchy" -version = "0.2.4" +name = "deadpool-postgres" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +checksum = "3d697d376cbfa018c23eb4caab1fd1883dd9c906a8c034e8d9a3cb06a7e0bef9" +dependencies = [ + "async-trait", + "deadpool", + "getrandom 0.2.16", + "tokio", + "tokio-postgres", + "tracing", +] [[package]] -name = "crypto-common" -version = "0.1.7" +name = "deadpool-runtime" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" dependencies = [ - "generic-array", - "typenum", + "tokio", ] [[package]] @@ -930,6 +2074,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", + "subtle", ] [[package]] @@ -985,6 +2130,12 @@ dependencies = [ "shared_child", ] +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + [[package]] name = "either" version = "1.15.0" @@ -1053,12 +2204,24 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + [[package]] name = "fallible-iterator" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.3.0" @@ -1073,7 +2236,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix 1.1.3", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1104,6 +2267,27 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "flatbuffers" +version = "25.12.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" +dependencies = [ + "bitflags 2.10.0", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", + "zlib-rs", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1133,7 +2317,7 @@ checksum = "94e7099f6313ecacbe1256e8ff9d617b75d1bcb16a6fddef94866d225a01a14a" dependencies = [ "io-lifetimes", "rustix 1.1.3", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1141,20 +2325,37 @@ name = "function-stream" version = "0.6.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-ipc 52.2.0", + "arrow-json 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson)", + "arrow-schema 52.2.0", "async-trait", "base64", "bincode", "clap", + "cornucopia", + "cornucopia_async", "crossbeam-channel", + "datafusion", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-window", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-proto", + "itertools 0.14.0", + "jiter", "log", "lru", "num_cpus", "parking_lot", + "parquet 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fparquet)", "pest", "pest_derive", + "petgraph 0.7.1", "proctitle", "protocol", "rdkafka", @@ -1162,6 +2363,8 @@ dependencies = [ "serde", "serde_json", "serde_yaml", + "sqlparser", + "strum", "thiserror 2.0.17", "tokio", "tokio-stream", @@ -1169,6 +2372,8 @@ dependencies = [ "tracing", "tracing-appender", "tracing-subscriber", + "typify", + "unicase", "uuid", "wasmtime", "wasmtime-wasi", @@ -1178,9 +2383,9 @@ dependencies = [ name = "function-stream-cli" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-ipc 52.2.0", + "arrow-schema 52.2.0", "clap", "comfy-table", "function-stream", @@ -1191,6 +2396,12 @@ dependencies = [ "tonic", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.31" @@ -1199,6 +2410,7 @@ checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", + "futures-executor", "futures-io", "futures-sink", "futures-task", @@ -1221,12 +2433,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -1248,6 +2482,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -1287,8 +2522,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", + "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -1309,7 +2546,7 @@ version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" dependencies = [ - "fallible-iterator", + "fallible-iterator 0.3.0", "indexmap 2.12.1", "stable_deref_trait", ] @@ -1357,11 +2594,24 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "hashbrown" @@ -1381,6 +2631,21 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -1393,6 +2658,21 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + [[package]] name = "home" version = "0.5.12" @@ -1649,7 +2929,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af1955a75fa080c677d3972822ec4bad316169ab1cfc6c257a942c2265dbe5fe" dependencies = [ "bitmaps", - "rand_core", + "rand_core 0.6.4", "rand_xoshiro", "sized-chunks", "typenum", @@ -1678,6 +2958,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + [[package]] name = "io-extras" version = "0.18.4" @@ -1685,7 +2971,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2285ddfe3054097ef4b2fe909ef8c3bcd1ea52a8f0d274416caebeef39f04a65" dependencies = [ "io-lifetimes", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1711,6 +2997,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "is_ci" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1761,6 +3053,19 @@ dependencies = [ "cc", ] +[[package]] +name = "jiter" +version = "0.10.0" +source = "git+https://github.com/ArroyoSystems/jiter?branch=disable_python#e5a90990780433a5972031a62eff87555d98884d" +dependencies = [ + "ahash", + "bitvec", + "lexical-parse-float 1.0.6", + "num-bigint", + "num-traits", + "smallvec", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -1811,11 +3116,24 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "lexical-parse-float 0.8.5", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "lexical-write-float 0.8.5", + "lexical-write-integer 0.8.5", +] + +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float 1.0.6", + "lexical-parse-integer 1.0.6", + "lexical-util 1.0.7", + "lexical-write-float 1.0.6", + "lexical-write-integer 1.0.6", ] [[package]] @@ -1824,21 +3142,40 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" dependencies = [ - "lexical-parse-integer", - "lexical-util", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", "static_assertions", ] +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer 1.0.6", + "lexical-util 1.0.7", +] + [[package]] name = "lexical-parse-integer" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", "static_assertions", ] +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util 1.0.7", +] + [[package]] name = "lexical-util" version = "0.8.5" @@ -1848,27 +3185,52 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + [[package]] name = "lexical-write-float" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "lexical-util", - "lexical-write-integer", + "lexical-util 0.8.5", + "lexical-write-integer 0.8.5", "static_assertions", ] +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util 1.0.7", + "lexical-write-integer 1.0.6", +] + [[package]] name = "lexical-write-integer" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", "static_assertions", ] +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util 1.0.7", +] + [[package]] name = "libc" version = "0.2.179" @@ -1917,6 +3279,17 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "libsqlite3-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "libz-sys" version = "1.1.23" @@ -1972,19 +3345,39 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" name = "lru" version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "lz4_flex" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" dependencies = [ - "hashbrown 0.15.5", + "twox-hash", ] [[package]] -name = "lz4-sys" -version = "1.11.1+lz4-1.10.0" +name = "lzma-sys" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" dependencies = [ "cc", "libc", + "pkg-config", ] [[package]] @@ -2017,6 +3410,16 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.6" @@ -2032,6 +3435,38 @@ dependencies = [ "rustix 1.1.3", ] +[[package]] +name = "miette" +version = "5.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59bb584eaeeab6bd0226ccf3509a69d7936d148cf3d036ad350abe35e8c6856e" +dependencies = [ + "backtrace", + "backtrace-ext", + "is-terminal", + "miette-derive", + "once_cell", + "owo-colors", + "supports-color", + "supports-hyperlinks", + "supports-unicode", + "terminal_size", + "textwrap", + "thiserror 1.0.69", + "unicode-width 0.1.14", +] + +[[package]] +name = "miette-derive" +version = "5.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "mime" version = "0.3.17" @@ -2044,6 +3479,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" version = "1.1.1" @@ -2051,7 +3496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -2225,6 +3670,30 @@ dependencies = [ "memchr", ] +[[package]] +name = "object_store" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "http", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror 2.0.17", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -2249,6 +3718,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "os_pipe" version = "1.2.3" @@ -2259,6 +3737,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + [[package]] name = "parking_lot" version = "0.12.5" @@ -2282,6 +3766,80 @@ dependencies = [ "windows-link", ] +[[package]] +name = "parquet" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa" +dependencies = [ + "ahash", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-cast 55.2.0", + "arrow-data 55.2.0", + "arrow-ipc 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.5", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + +[[package]] +name = "parquet" +version = "55.2.0" +source = "git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fparquet#d1d2dd8edf673cddc79ba6403dc6508263a2ddda" +dependencies = [ + "ahash", + "arrow-array 55.2.0", + "arrow-buffer 55.2.0", + "arrow-cast 55.2.0", + "arrow-data 55.2.0", + "arrow-ipc 55.2.0", + "arrow-schema 55.2.0", + "arrow-select 55.2.0", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "half", + "hashbrown 0.15.5", + "lz4_flex", + "num", + "num-bigint", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "twox-hash", + "zstd", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "peeking_take_while" version = "0.1.2" @@ -2357,6 +3915,55 @@ dependencies = [ "indexmap 2.12.1", ] +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset 0.5.7", + "hashbrown 0.15.5", + "indexmap 2.12.1", + "serde", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared 0.12.1", +] + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_shared 0.13.1", + "serde", +] + +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.10" @@ -2407,6 +4014,49 @@ dependencies = [ "serde", ] +[[package]] +name = "postgres" +version = "0.19.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c48ece1c6cda0db61b058c1721378da76855140e9214339fa1317decacb176" +dependencies = [ + "bytes", + "fallible-iterator 0.2.0", + "futures-util", + "log", + "tokio", + "tokio-postgres", +] + +[[package]] +name = "postgres-protocol" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491" +dependencies = [ + "base64", + "byteorder", + "bytes", + "fallible-iterator 0.2.0", + "hmac", + "md-5", + "memchr", + "rand 0.9.2", + "sha2", + "stringprep", +] + +[[package]] +name = "postgres-types" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b858f82211e84682fecd373f68e1ceae642d8d751a1ebd13f33de6257b3e20" +dependencies = [ + "bytes", + "fallible-iterator 0.2.0", + "postgres-protocol", +] + [[package]] name = "potential_utf" version = "0.1.4" @@ -2486,7 +4136,7 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ - "heck", + "heck 0.5.0", "itertools 0.14.0", "log", "multimap", @@ -2533,6 +4183,16 @@ dependencies = [ "tonic-build", ] +[[package]] +name = "psm" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" +dependencies = [ + "ar_archive_writer", + "cc", +] + [[package]] name = "pulley-interpreter" version = "41.0.3" @@ -2571,6 +4231,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "radix_trie" version = "0.2.1" @@ -2588,8 +4254,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", ] [[package]] @@ -2599,7 +4275,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -2611,13 +4297,22 @@ dependencies = [ "getrandom 0.2.16", ] +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + [[package]] name = "rand_xoshiro" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" dependencies = [ - "rand_core", + "rand_core 0.6.4", ] [[package]] @@ -2673,6 +4368,26 @@ dependencies = [ "sasl2-sys", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -2737,6 +4452,16 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "regress" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb" +dependencies = [ + "hashbrown 0.13.2", + "memchr", +] + [[package]] name = "rocksdb" version = "0.21.0" @@ -2747,6 +4472,21 @@ dependencies = [ "librocksdb-sys", ] +[[package]] +name = "rusqlite" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +dependencies = [ + "bitflags 2.10.0", + "fallible-iterator 0.3.0", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "serde_json", + "smallvec", +] + [[package]] name = "rustc-demangle" version = "0.1.26" @@ -2784,7 +4524,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2844,6 +4584,15 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "sasl2-sys" version = "0.1.22+2.1.28" @@ -2856,6 +4605,30 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2872,6 +4645,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.228" @@ -2902,6 +4681,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_json" version = "1.0.148" @@ -2924,6 +4714,18 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_tokenstream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c49585c52c01f13c5c2ebb333f14f6885d76daa768d8a037d28017ec538c69" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "syn", +] + [[package]] name = "serde_yaml" version = "0.9.34+deprecated" @@ -3005,6 +4807,24 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "sized-chunks" version = "0.6.5" @@ -3030,6 +4850,18 @@ dependencies = [ "serde", ] +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + [[package]] name = "socket2" version = "0.5.10" @@ -3050,24 +4882,125 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "sqlparser" +version = "0.55.0" +source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#7e7cfb6145a426a26a7db12ae5874fed8b9c6b95" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#7e7cfb6145a426a26a7db12ae5874fed8b9c6b95" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "stacker" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.52.0", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "supports-color" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +dependencies = [ + "is-terminal", + "is_ci", +] + +[[package]] +name = "supports-hyperlinks" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84231692eb0d4d41e4cdd0cabfdd2e6cd9e255e65f80c9aa7c98dd502b4233d" +dependencies = [ + "is-terminal", +] + +[[package]] +name = "supports-unicode" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f850c19edd184a205e883199a261ed44471c81e39bd95b1357f5febbef00e77a" +dependencies = [ + "is-terminal", +] + [[package]] name = "syn" version = "2.0.113" @@ -3108,10 +5041,16 @@ dependencies = [ "fd-lock", "io-lifetimes", "rustix 0.38.44", - "windows-sys 0.59.0", + "windows-sys 0.52.0", "winx", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "target-lexicon" version = "0.13.4" @@ -3140,6 +5079,27 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "textwrap" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width 0.1.14", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -3189,6 +5149,17 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + [[package]] name = "time" version = "0.3.44" @@ -3239,6 +5210,21 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.49.0" @@ -3267,6 +5253,32 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-postgres" +version = "0.7.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcea47c8f71744367793f16c2db1f11cb859d28f436bdb4ca9193eb1f787ee42" +dependencies = [ + "async-trait", + "byteorder", + "bytes", + "fallible-iterator 0.2.0", + "futures-channel", + "futures-util", + "log", + "parking_lot", + "percent-encoding", + "phf 0.13.1", + "pin-project-lite", + "postgres-protocol", + "postgres-types", + "rand 0.9.2", + "socket2 0.6.1", + "tokio", + "tokio-util", + "whoami", +] + [[package]] name = "tokio-stream" version = "0.1.18" @@ -3397,7 +5409,7 @@ dependencies = [ "indexmap 1.9.3", "pin-project", "pin-project-lite", - "rand", + "rand 0.8.5", "slab", "tokio", "tokio-util", @@ -3525,24 +5537,104 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + [[package]] name = "typenum" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typify" +version = "0.0.13" +source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc016bf9d63618d8b43b4d74a648980737b" +dependencies = [ + "typify-impl", + "typify-macro", +] + +[[package]] +name = "typify-impl" +version = "0.0.13" +source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc016bf9d63618d8b43b4d74a648980737b" +dependencies = [ + "heck 0.4.1", + "log", + "proc-macro2", + "quote", + "regress", + "schemars", + "serde_json", + "syn", + "thiserror 1.0.69", + "unicode-ident", +] + +[[package]] +name = "typify-macro" +version = "0.0.13" +source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc016bf9d63618d8b43b4d74a648980737b" +dependencies = [ + "proc-macro2", + "quote", + "schemars", + "serde", + "serde_json", + "serde_tokenstream", + "syn", + "typify-impl", +] + [[package]] name = "ucd-trie" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -3573,6 +5665,12 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" +[[package]] +name = "unscanny" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47" + [[package]] name = "url" version = "2.5.7" @@ -3626,6 +5724,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3641,6 +5749,15 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -3650,6 +5767,15 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] + [[package]] name = "wasm-bindgen" version = "0.2.106" @@ -3663,6 +5789,19 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.106" @@ -3702,7 +5841,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af801b6f36459023eaec63fdbaedad2fd5a4ab7dc74ecc110a8b5d375c5775e4" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "im-rc", "indexmap 2.12.1", "log", @@ -4005,7 +6144,7 @@ checksum = "87acbd416227cdd279565ba49e57cf7f08d112657c3b3f39b70250acdfd094fe" dependencies = [ "anyhow", "bitflags 2.10.0", - "heck", + "heck 0.5.0", "indexmap 2.12.1", "wit-parser", ] @@ -4085,6 +6224,37 @@ dependencies = [ "wast 243.0.0", ] +[[package]] +name = "web-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "whoami" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fae98cf96deed1b7572272dfc777713c249ae40aa1cf8862e091e8b745f5361" +dependencies = [ + "libredox", + "wasite", + "web-sys", +] + [[package]] name = "wiggle" version = "41.0.3" @@ -4106,7 +6276,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57f773d51c1696bd7d028aa35c884d9fc58f48d79a1176dfbad6c908de314235" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -4416,7 +6586,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f3fd376f71958b862e7afb20cfe5a22830e1963462f3a17f49d82a6c1d1f42d" dependencies = [ "bitflags 2.10.0", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -4461,6 +6631,24 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.8.1" @@ -4558,6 +6746,12 @@ dependencies = [ "syn", ] +[[package]] +name = "zlib-rs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" + [[package]] name = "zmij" version = "1.0.10" diff --git a/Cargo.toml b/Cargo.toml index 4b855aa9..0d906ca6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,8 +52,36 @@ arrow-array = "52" arrow-ipc = "52" arrow-schema = "52" proctitle = "0.1" +unicase = "2.7" +petgraph = "0.7" +itertools = "0.14" +strum = { version = "0.26", features = ["derive"] } +datafusion-functions-aggregate = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} + +typify = { git = 'https://github.com/ArroyoSystems/typify.git', branch = 'arroyo' } +parquet = {git = 'https://github.com/ArroyoSystems/arrow-rs', branch = '55.2.0/parquet'} +arrow-json = {git = 'https://github.com/ArroyoSystems/arrow-rs', branch = '55.2.0/json'} +datafusion = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} +datafusion-common = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} +datafusion-execution = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} +datafusion-expr = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} +datafusion-physical-expr = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} +datafusion-physical-plan = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} +datafusion-proto = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} +datafusion-functions = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} +datafusion-functions-window = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} + +sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.6.0/function-sql-parser" } + +cornucopia_async = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" } +cornucopia = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" } +jiter = {git = "https://github.com/ArroyoSystems/jiter", branch = "disable_python" } + [features] default = ["incremental-cache", "python"] incremental-cache = ["wasmtime/incremental-cache"] python = [] + +[patch."https://github.com/ArroyoSystems/sqlparser-rs"] +sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.6.0/function-sql-parser" } diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs index 30552191..58056b67 100644 --- a/src/coordinator/analyze/analyzer.rs +++ b/src/coordinator/analyze/analyzer.rs @@ -14,7 +14,7 @@ use super::Analysis; use crate::coordinator::execution_context::ExecutionContext; use crate::coordinator::statement::{ CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, Statement, - StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, + StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingSql, }; use std::fmt; @@ -115,4 +115,13 @@ impl StatementVisitor for Analyzer<'_> { ) -> StatementVisitorResult { StatementVisitorResult::Analyze(Box::new(stmt.clone())) } + + fn visit_streaming_sql( + &self, + stmt: &StreamingSql, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + // TODO: add semantic analysis for streaming SQL (schema validation, etc.) + StatementVisitorResult::Analyze(Box::new(StreamingSql::new(stmt.statement.clone()))) + } } diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs index 4ad766d5..378c670b 100644 --- a/src/coordinator/coordinator.rs +++ b/src/coordinator/coordinator.rs @@ -20,6 +20,7 @@ use crate::coordinator::execution::Executor; use crate::coordinator::plan::{LogicalPlanVisitor, LogicalPlanner, PlanNode}; use crate::coordinator::statement::Statement; use crate::runtime::taskexecutor::TaskManager; +use crate::sql::planner::StreamSchemaProvider; use super::execution_context::ExecutionContext; @@ -90,7 +91,8 @@ impl Coordinator { } fn step_build_logical_plan(&self, analysis: &Analysis) -> Result> { - let visitor = LogicalPlanVisitor::new(); + let schema_provider = StreamSchemaProvider::new(); + let visitor = LogicalPlanVisitor::new(schema_provider); let plan = visitor.visit(analysis); Ok(plan) } diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 7e44217e..5d96bf45 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -14,6 +14,7 @@ use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_reco use crate::coordinator::plan::{ CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, + StreamingSqlPlan, }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::taskexecutor::TaskManager; @@ -200,4 +201,17 @@ impl PlanVisitor for Executor { PlanVisitorResult::Execute(result) } + + fn visit_streaming_sql_plan( + &self, + plan: &StreamingSqlPlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + // TODO: apply rewrite_plan for streaming transformations, then execute + let result = Err(ExecuteError::Internal(format!( + "Streaming SQL execution not yet implemented. LogicalPlan:\n{}", + plan.logical_plan.display_indent() + ))); + PlanVisitorResult::Execute(result) + } } diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs index 0b94d4bf..26627a8b 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/mod.rs @@ -23,5 +23,5 @@ pub use coordinator::Coordinator; pub use dataset::{DataSet, ShowFunctionsResult}; pub use statement::{ CreateFunction, CreatePythonFunction, DropFunction, PythonModule, ShowFunctions, StartFunction, - Statement, StopFunction, + Statement, StopFunction, StreamingSql, }; diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 536fec37..3462d033 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -10,22 +10,26 @@ // See the License for the specific language governing permissions and // limitations under the License. +use tracing::debug; + use crate::coordinator::analyze::analysis::Analysis; use crate::coordinator::plan::{ CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, PlanNode, ShowFunctionsPlan, - StartFunctionPlan, StopFunctionPlan, + StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan, }; use crate::coordinator::statement::{ CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, - StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, + StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingSql, }; +use crate::sql::planner::StreamSchemaProvider; -#[derive(Debug, Default)] -pub struct LogicalPlanVisitor; +pub struct LogicalPlanVisitor { + schema_provider: StreamSchemaProvider, +} impl LogicalPlanVisitor { - pub fn new() -> Self { - Self + pub fn new(schema_provider: StreamSchemaProvider) -> Self { + Self { schema_provider } } pub fn visit(&self, analysis: &Analysis) -> Box { @@ -51,7 +55,6 @@ impl StatementVisitor for LogicalPlanVisitor { let config_source = stmt.get_config_source().cloned(); let extra_props = stmt.get_extra_properties().clone(); - // Name will be read from config file during execution StatementVisitorResult::Plan(Box::new(CreateFunctionPlan::new( function_source, config_source, @@ -106,4 +109,22 @@ impl StatementVisitor for LogicalPlanVisitor { config_content, ))) } + + fn visit_streaming_sql( + &self, + stmt: &StreamingSql, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider); + + match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) { + Ok(plan) => { + debug!("Logical plan:\n{}", plan.display_graphviz()); + StatementVisitorResult::Plan(Box::new(StreamingSqlPlan::new(plan))) + } + Err(e) => { + panic!("Failed to convert SQL statement to logical plan: {e}"); + } + } + } } diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs index 9aa403b5..744410e1 100644 --- a/src/coordinator/plan/mod.rs +++ b/src/coordinator/plan/mod.rs @@ -18,6 +18,7 @@ mod optimizer; mod show_functions_plan; mod start_function_plan; mod stop_function_plan; +mod streaming_sql_plan; mod visitor; pub use create_function_plan::CreateFunctionPlan; @@ -28,6 +29,7 @@ pub use optimizer::LogicalPlanner; pub use show_functions_plan::ShowFunctionsPlan; pub use start_function_plan::StartFunctionPlan; pub use stop_function_plan::StopFunctionPlan; +pub use streaming_sql_plan::StreamingSqlPlan; pub use visitor::{PlanVisitor, PlanVisitorContext, PlanVisitorResult}; use std::fmt; diff --git a/src/sql/parser/mod.rs b/src/coordinator/plan/streaming_sql_plan.rs similarity index 52% rename from src/sql/parser/mod.rs rename to src/coordinator/plan/streaming_sql_plan.rs index 11f4b18e..607420a8 100644 --- a/src/sql/parser/mod.rs +++ b/src/coordinator/plan/streaming_sql_plan.rs @@ -10,33 +10,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod sql_parser; +use datafusion::logical_expr::LogicalPlan; -pub use sql_parser::SqlParser; +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; #[derive(Debug)] -pub struct ParseError { - pub message: String, +pub struct StreamingSqlPlan { + pub logical_plan: LogicalPlan, } -impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Parse error: {}", self.message) +impl StreamingSqlPlan { + pub fn new(logical_plan: LogicalPlan) -> Self { + Self { logical_plan } } } -impl std::error::Error for ParseError {} - -impl From for ParseError { - fn from(message: String) -> Self { - ParseError { message } - } -} - -impl ParseError { - pub fn new(message: impl Into) -> Self { - Self { - message: message.into(), - } +impl PlanNode for StreamingSqlPlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_streaming_sql_plan(self, context) } } diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs index 44059c67..e8bd0ffc 100644 --- a/src/coordinator/plan/visitor.rs +++ b/src/coordinator/plan/visitor.rs @@ -12,7 +12,7 @@ use super::{ CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, ShowFunctionsPlan, - StartFunctionPlan, StopFunctionPlan, + StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan, }; /// Context passed to PlanVisitor methods @@ -84,4 +84,10 @@ pub trait PlanVisitor { plan: &CreatePythonFunctionPlan, context: &PlanVisitorContext, ) -> PlanVisitorResult; + + fn visit_streaming_sql_plan( + &self, + plan: &StreamingSqlPlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; } diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs index f887209c..a115af91 100644 --- a/src/coordinator/statement/mod.rs +++ b/src/coordinator/statement/mod.rs @@ -16,6 +16,7 @@ mod drop_function; mod show_functions; mod start_function; mod stop_function; +mod streaming_sql; mod visitor; pub use create_function::{ConfigSource, CreateFunction, FunctionSource}; @@ -24,6 +25,7 @@ pub use drop_function::DropFunction; pub use show_functions::ShowFunctions; pub use start_function::StartFunction; pub use stop_function::StopFunction; +pub use streaming_sql::StreamingSql; pub use visitor::{StatementVisitor, StatementVisitorContext, StatementVisitorResult}; use std::fmt; diff --git a/src/coordinator/statement/streaming_sql.rs b/src/coordinator/statement/streaming_sql.rs new file mode 100644 index 00000000..1aa49205 --- /dev/null +++ b/src/coordinator/statement/streaming_sql.rs @@ -0,0 +1,39 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::sql::sqlparser::ast::Statement as DFStatement; + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// Wraps a DataFusion SQL statement (SELECT, INSERT, CREATE TABLE, etc.) +/// so it can flow through the same Statement → StatementVisitor pipeline +/// as FunctionStream DDL commands. +#[derive(Debug)] +pub struct StreamingSql { + pub statement: DFStatement, +} + +impl StreamingSql { + pub fn new(statement: DFStatement) -> Self { + Self { statement } + } +} + +impl Statement for StreamingSql { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_streaming_sql(self, context) + } +} diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs index 13ce2cfc..c9a63831 100644 --- a/src/coordinator/statement/visitor.rs +++ b/src/coordinator/statement/visitor.rs @@ -12,6 +12,7 @@ use super::{ CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, StopFunction, + StreamingSql, }; use crate::coordinator::plan::PlanNode; use crate::coordinator::statement::Statement; @@ -87,4 +88,10 @@ pub trait StatementVisitor { stmt: &CreatePythonFunction, context: &StatementVisitorContext, ) -> StatementVisitorResult; + + fn visit_streaming_sql( + &self, + stmt: &StreamingSql, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; } diff --git a/src/datastream/logical.rs b/src/datastream/logical.rs new file mode 100644 index 00000000..60101bdd --- /dev/null +++ b/src/datastream/logical.rs @@ -0,0 +1,317 @@ +use itertools::Itertools; + +use crate::datastream::optimizers::Optimizer; +use crate::sql::planner::types::StreamSchema; +use datafusion::arrow::datatypes::DataType; +use petgraph::Direction; +use petgraph::dot::Dot; +use petgraph::graph::DiGraph; +use std::collections::{HashMap, HashSet}; +use std::fmt::{Debug, Display, Formatter}; +use std::sync::Arc; +use strum::{Display, EnumString}; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] +pub enum OperatorName { + ExpressionWatermark, + ArrowValue, + ArrowKey, + Projection, + AsyncUdf, + Join, + InstantJoin, + LookupJoin, + WindowFunction, + TumblingWindowAggregate, + SlidingWindowAggregate, + SessionWindowAggregate, + UpdatingAggregate, + ConnectorSource, + ConnectorSink, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +pub enum LogicalEdgeType { + Forward, + Shuffle, + LeftJoin, + RightJoin, +} + +impl Display for LogicalEdgeType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + LogicalEdgeType::Forward => write!(f, "→"), + LogicalEdgeType::Shuffle => write!(f, "⤨"), + LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"), + LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct LogicalEdge { + pub edge_type: LogicalEdgeType, + pub schema: Arc, +} + +impl LogicalEdge { + pub fn new(edge_type: LogicalEdgeType, schema: StreamSchema) -> Self { + LogicalEdge { + edge_type, + schema: Arc::new(schema), + } + } + + pub fn project_all(edge_type: LogicalEdgeType, schema: StreamSchema) -> Self { + LogicalEdge { + edge_type, + schema: Arc::new(schema), + } + } +} + +#[derive(Clone, Debug)] +pub struct ChainedLogicalOperator { + pub operator_id: String, + pub operator_name: OperatorName, + pub operator_config: Vec, +} + +#[derive(Clone, Debug)] +pub struct OperatorChain { + pub(crate) operators: Vec, + pub(crate) edges: Vec>, +} + +impl OperatorChain { + pub fn new(operator: ChainedLogicalOperator) -> Self { + Self { + operators: vec![operator], + edges: vec![], + } + } + + pub fn iter( + &self, + ) -> impl Iterator>)> { + self.operators + .iter() + .zip_longest(self.edges.iter()) + .map(|e| e.left_and_right()) + .map(|(l, r)| (l.unwrap(), r)) + } + + pub fn iter_mut( + &mut self, + ) -> impl Iterator>)> { + self.operators + .iter_mut() + .zip_longest(self.edges.iter()) + .map(|e| e.left_and_right()) + .map(|(l, r)| (l.unwrap(), r)) + } + + pub fn first(&self) -> &ChainedLogicalOperator { + &self.operators[0] + } + + pub fn len(&self) -> usize { + self.operators.len() + } + + pub fn is_empty(&self) -> bool { + self.operators.is_empty() + } + + pub fn is_source(&self) -> bool { + self.operators[0].operator_name == OperatorName::ConnectorSource + } + + pub fn is_sink(&self) -> bool { + self.operators[0].operator_name == OperatorName::ConnectorSink + } +} + +#[derive(Clone)] +pub struct LogicalNode { + pub node_id: u32, + pub description: String, + pub operator_chain: OperatorChain, + pub parallelism: usize, +} + +impl LogicalNode { + pub fn single( + id: u32, + operator_id: String, + name: OperatorName, + config: Vec, + description: String, + parallelism: usize, + ) -> Self { + Self { + node_id: id, + description, + operator_chain: OperatorChain { + operators: vec![ChainedLogicalOperator { + operator_id, + operator_name: name, + operator_config: config, + }], + edges: vec![], + }, + parallelism, + } + } +} + +impl Display for LogicalNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.description) + } +} + +impl Debug for LogicalNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}[{}]", + self.operator_chain + .operators + .iter() + .map(|op| op.operator_id.clone()) + .collect::>() + .join(" -> "), + self.parallelism + ) + } +} + +pub type LogicalGraph = DiGraph; + +#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)] +pub struct DylibUdfConfig { + pub dylib_path: String, + pub arg_types: Vec, + pub return_type: DataType, + pub aggregate: bool, + pub is_async: bool, +} + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct PythonUdfConfig { + pub arg_types: Vec, + pub return_type: DataType, + pub name: Arc, + pub definition: Arc, +} + +#[derive(Clone, Debug, Default)] +pub struct ProgramConfig { + pub udf_dylibs: HashMap, + pub python_udfs: HashMap, +} + +#[derive(Clone, Debug, Default)] +pub struct LogicalProgram { + pub graph: LogicalGraph, + pub program_config: ProgramConfig, +} + +impl LogicalProgram { + pub fn new(graph: LogicalGraph, program_config: ProgramConfig) -> Self { + Self { + graph, + program_config, + } + } + + pub fn optimize(&mut self, optimizer: &dyn Optimizer) { + optimizer.optimize(&mut self.graph); + } + + pub fn update_parallelism(&mut self, overrides: &HashMap) { + for node in self.graph.node_weights_mut() { + if let Some(p) = overrides.get(&node.node_id) { + node.parallelism = *p; + } + } + } + + pub fn dot(&self) -> String { + format!("{:?}", Dot::with_config(&self.graph, &[])) + } + + pub fn task_count(&self) -> usize { + self.graph.node_weights().map(|nw| nw.parallelism).sum() + } + + pub fn sources(&self) -> HashSet { + self.graph + .externals(Direction::Incoming) + .map(|t| self.graph.node_weight(t).unwrap().node_id) + .collect() + } + + pub fn tasks_per_operator(&self) -> HashMap { + let mut tasks_per_operator = HashMap::new(); + for node in self.graph.node_weights() { + for op in &node.operator_chain.operators { + tasks_per_operator.insert(op.operator_id.clone(), node.parallelism); + } + } + tasks_per_operator + } + + pub fn operator_names_by_id(&self) -> HashMap { + let mut m = HashMap::new(); + for node in self.graph.node_weights() { + for op in &node.operator_chain.operators { + m.insert(op.operator_id.clone(), op.operator_name.to_string()); + } + } + m + } + + pub fn tasks_per_node(&self) -> HashMap { + let mut tasks_per_node = HashMap::new(); + for node in self.graph.node_weights() { + tasks_per_node.insert(node.node_id, node.parallelism); + } + tasks_per_node + } + + pub fn features(&self) -> HashSet { + let mut s = HashSet::new(); + for n in self.graph.node_weights() { + for t in &n.operator_chain.operators { + let feature = match &t.operator_name { + OperatorName::AsyncUdf => "async-udf".to_string(), + OperatorName::ExpressionWatermark + | OperatorName::ArrowValue + | OperatorName::ArrowKey + | OperatorName::Projection => continue, + OperatorName::Join => "join-with-expiration".to_string(), + OperatorName::InstantJoin => "windowed-join".to_string(), + OperatorName::WindowFunction => "sql-window-function".to_string(), + OperatorName::LookupJoin => "lookup-join".to_string(), + OperatorName::TumblingWindowAggregate => { + "sql-tumbling-window-aggregate".to_string() + } + OperatorName::SlidingWindowAggregate => { + "sql-sliding-window-aggregate".to_string() + } + OperatorName::SessionWindowAggregate => { + "sql-session-window-aggregate".to_string() + } + OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(), + OperatorName::ConnectorSource => "connector-source".to_string(), + OperatorName::ConnectorSink => "connector-sink".to_string(), + }; + s.insert(feature); + } + } + s + } +} diff --git a/src/datastream/mod.rs b/src/datastream/mod.rs new file mode 100644 index 00000000..994a96b4 --- /dev/null +++ b/src/datastream/mod.rs @@ -0,0 +1,2 @@ +pub mod logical; +pub mod optimizers; diff --git a/src/datastream/optimizers.rs b/src/datastream/optimizers.rs new file mode 100644 index 00000000..2d258aff --- /dev/null +++ b/src/datastream/optimizers.rs @@ -0,0 +1,100 @@ +use crate::datastream::logical::{LogicalEdgeType, LogicalGraph}; +use petgraph::prelude::*; +use petgraph::visit::NodeRef; +use std::mem; + +pub trait Optimizer { + fn optimize_once(&self, plan: &mut LogicalGraph) -> bool; + + fn optimize(&self, plan: &mut LogicalGraph) { + loop { + if !self.optimize_once(plan) { + break; + } + } + } +} + +pub struct ChainingOptimizer {} + +fn remove_in_place(graph: &mut DiGraph, node: NodeIndex) { + let incoming = graph.edges_directed(node, Incoming).next().unwrap(); + + let parent = incoming.source().id(); + let incoming = incoming.id(); + graph.remove_edge(incoming); + + let outgoing: Vec<_> = graph + .edges_directed(node, Outgoing) + .map(|e| (e.id(), e.target().id())) + .collect(); + + for (edge, target) in outgoing { + let weight = graph.remove_edge(edge).unwrap(); + graph.add_edge(parent, target, weight); + } + + graph.remove_node(node); +} + +impl Optimizer for ChainingOptimizer { + fn optimize_once(&self, plan: &mut LogicalGraph) -> bool { + let node_indices: Vec = plan.node_indices().collect(); + + for &node_idx in &node_indices { + let cur = plan.node_weight(node_idx).unwrap(); + + if cur.operator_chain.is_source() { + continue; + } + + let mut successors = plan.edges_directed(node_idx, Outgoing).collect::>(); + + if successors.len() != 1 { + continue; + } + + let edge = successors.remove(0); + let edge_type = edge.weight().edge_type; + + if edge_type != LogicalEdgeType::Forward { + continue; + } + + let successor_idx = edge.target(); + + let successor_node = plan.node_weight(successor_idx).unwrap(); + + if cur.parallelism != successor_node.parallelism + || successor_node.operator_chain.is_sink() + { + continue; + } + + if plan.edges_directed(successor_idx, Incoming).count() > 1 { + continue; + } + + let mut new_cur = cur.clone(); + + new_cur.description = format!("{} -> {}", cur.description, successor_node.description); + + new_cur + .operator_chain + .operators + .extend(successor_node.operator_chain.operators.clone()); + + new_cur + .operator_chain + .edges + .push(edge.weight().schema.clone()); + + mem::swap(&mut new_cur, plan.node_weight_mut(node_idx).unwrap()); + + remove_in_place(plan, successor_idx); + return true; + } + + false + } +} diff --git a/src/lib.rs b/src/lib.rs index a6bb4d28..e8596864 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,7 @@ pub mod config; pub mod coordinator; +pub mod datastream; pub mod logging; pub mod runtime; pub mod server; diff --git a/src/server/handler.rs b/src/server/handler.rs index 4721a5a1..45b0cd07 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -29,7 +29,7 @@ use crate::coordinator::{ CreateFunction, CreatePythonFunction, DataSet, DropFunction, ShowFunctions, ShowFunctionsResult, StartFunction, Statement, StopFunction, }; -use crate::sql::SqlParser; +use crate::sql::planner::parse::parse_sql; pub struct FunctionStreamServiceImpl { coordinator: Arc, @@ -70,10 +70,10 @@ impl FunctionStreamService for FunctionStreamServiceImpl { let req = request.into_inner(); let parse_start = Instant::now(); - let stmt = match SqlParser::parse(&req.sql) { - Ok(stmt) => { + let parsed = match parse_sql(&req.sql) { + Ok(parsed) => { log::debug!("SQL parsed in {}ms", parse_start.elapsed().as_millis()); - stmt + parsed } Err(e) => { return Ok(TonicResponse::new(Self::build_response( @@ -85,7 +85,7 @@ impl FunctionStreamService for FunctionStreamServiceImpl { }; let exec_start = Instant::now(); - let result = self.coordinator.execute(stmt.as_ref()); + let result = self.coordinator.execute(parsed.as_ref()); log::debug!( "Coordinator execution finished in {}ms", exec_start.elapsed().as_millis() diff --git a/src/sql/grammar.pest b/src/sql/grammar.pest deleted file mode 100644 index 15f70dd7..00000000 --- a/src/sql/grammar.pest +++ /dev/null @@ -1,134 +0,0 @@ -// ============================================================================= -// FUNCTION SQL Grammar -// -// Using pest PEG syntax, referencing ANTLR style -// ============================================================================= - -// ============================================================================= -// 1. Whitespace (automatically skipped) -// ============================================================================= - -WHITESPACE = _{ " " | "\t" | "\r" | "\n" } - -// ============================================================================= -// 2. Keywords (case-insensitive) -// ============================================================================= - -kw_create = _{ C ~ R ~ E ~ A ~ T ~ E } -kw_drop = _{ D ~ R ~ O ~ P } -kw_start = _{ S ~ T ~ A ~ R ~ T } -kw_stop = _{ S ~ T ~ O ~ P } -kw_show = _{ S ~ H ~ O ~ W } -kw_with = _{ W ~ I ~ T ~ H } -kw_function = _{ F ~ U ~ N ~ C ~ T ~ I ~ O ~ N } -kw_functions = _{ F ~ U ~ N ~ C ~ T ~ I ~ O ~ N ~ S } - -// ============================================================================= -// 3. Operators & Symbols -// ============================================================================= - -LPAREN = _{ "(" } -RPAREN = _{ ")" } -COMMA = _{ "," } -EQ = _{ "=" } -SQUOTE = _{ "'" } -DQUOTE = _{ "\"" } - -// ============================================================================= -// 4. Literals -// ============================================================================= - -// String literal (single or double quotes) -string_literal = @{ - SQUOTE ~ string_inner_single ~ SQUOTE | - DQUOTE ~ string_inner_double ~ DQUOTE -} - -string_inner_single = @{ (!(SQUOTE | "\\") ~ ANY | escape_seq)* } -string_inner_double = @{ (!(DQUOTE | "\\") ~ ANY | escape_seq)* } -escape_seq = @{ "\\" ~ ANY } - -// ============================================================================= -// 5. Identifiers -// ============================================================================= - -// Task name identifier -identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | "-")* } - -// ============================================================================= -// 6. Statements -// ============================================================================= - -// Entry rule -statement = _{ - SOI ~ ( - create_stmt | - drop_stmt | - start_stmt | - stop_stmt | - show_stmt - ) ~ EOI -} - -// CREATE FUNCTION WITH (...) -// Note: name is read from config file, not from SQL statement -create_stmt = { kw_create ~ kw_function ~ kw_with ~ properties } - -// DROP FUNCTION name -drop_stmt = { kw_drop ~ kw_function ~ identifier } - -// START FUNCTION name -start_stmt = { kw_start ~ kw_function ~ identifier } - -// STOP FUNCTION name -stop_stmt = { kw_stop ~ kw_function ~ identifier } - -// SHOW FUNCTIONS -show_stmt = { kw_show ~ kw_functions } - -// ============================================================================= -// 7. Properties -// ============================================================================= - -// Property list ('key'='value', ...) -properties = { LPAREN ~ property ~ (COMMA ~ property)* ~ RPAREN } - -// Single property 'key'='value' -property = { property_key ~ EQ ~ property_value } - -// Property key (string) -property_key = { string_literal } - -// Property value (string) -property_value = { string_literal } - -// ============================================================================= -// 8. Character Fragments (for case-insensitive matching) -// ============================================================================= - -A = _{ "A" | "a" } -B = _{ "B" | "b" } -C = _{ "C" | "c" } -D = _{ "D" | "d" } -E = _{ "E" | "e" } -F = _{ "F" | "f" } -G = _{ "G" | "g" } -H = _{ "H" | "h" } -I = _{ "I" | "i" } -J = _{ "J" | "j" } -K = _{ "K" | "k" } -L = _{ "L" | "l" } -M = _{ "M" | "m" } -N = _{ "N" | "n" } -O = _{ "O" | "o" } -P = _{ "P" | "p" } -Q = _{ "Q" | "q" } -R = _{ "R" | "r" } -S = _{ "S" | "s" } -T = _{ "T" | "t" } -U = _{ "U" | "u" } -V = _{ "V" | "v" } -W = _{ "W" | "w" } -X = _{ "X" | "x" } -Y = _{ "Y" | "y" } -Z = _{ "Z" | "z" } diff --git a/src/sql/mod.rs b/src/sql/mod.rs index ed3c2e30..31b5c4b9 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -10,6 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod parser; +pub mod planner; -pub use parser::SqlParser; +pub use planner::StreamSchemaProvider; +pub use planner::parse::parse_sql; +pub use planner::plan::rewrite_plan; +pub use planner::sql_to_plan::statement_to_plan; diff --git a/src/sql/parser/sql_parser.rs b/src/sql/parser/sql_parser.rs deleted file mode 100644 index dc110745..00000000 --- a/src/sql/parser/sql_parser.rs +++ /dev/null @@ -1,249 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use pest::Parser; -use pest_derive::Parser; - -use super::ParseError; -use crate::coordinator::{ - CreateFunction, DropFunction, ShowFunctions, StartFunction, Statement, StopFunction, -}; -use std::collections::HashMap; - -#[derive(Parser)] -#[grammar = "src/sql/grammar.pest"] -struct Grammar; - -#[derive(Debug, Default)] -pub struct SqlParser; - -impl SqlParser { - pub fn parse(sql: &str) -> Result, ParseError> { - let pairs = Grammar::parse(Rule::statement, sql) - .map_err(|e| ParseError::new(format!("Parse error: {}", e)))?; - - for pair in pairs { - return match pair.as_rule() { - Rule::create_stmt => { - handle_create_stmt(pair).map(|stmt| stmt as Box) - } - Rule::drop_stmt => handle_drop_stmt(pair).map(|stmt| stmt as Box), - Rule::start_stmt => handle_start_stmt(pair).map(|stmt| stmt as Box), - Rule::stop_stmt => handle_stop_stmt(pair).map(|stmt| stmt as Box), - Rule::show_stmt => handle_show_stmt(pair).map(|stmt| stmt as Box), - _ => continue, - }; - } - - Err(ParseError::new("Unknown statement type")) - } -} - -fn handle_create_stmt( - pair: pest::iterators::Pair, -) -> Result, ParseError> { - let mut inner = pair.into_inner(); - // Note: name is read from config file, not from SQL statement - // Pass empty string here, name will be read from config file later - let properties = inner - .next() - .map(parse_properties) - .ok_or_else(|| ParseError::new("Missing WITH clause"))?; - - Ok(Box::new( - CreateFunction::from_properties(properties).map_err(ParseError::from)?, - )) -} - -fn handle_drop_stmt(pair: pest::iterators::Pair) -> Result, ParseError> { - let mut inner = pair.into_inner(); - let name = inner.next().map(extract_string).unwrap_or_default(); - Ok(Box::new(DropFunction::new(name))) -} - -fn handle_start_stmt(pair: pest::iterators::Pair) -> Result, ParseError> { - let mut inner = pair.into_inner(); - let name = inner.next().map(extract_string).unwrap_or_default(); - Ok(Box::new(StartFunction::new(name))) -} - -fn handle_stop_stmt(pair: pest::iterators::Pair) -> Result, ParseError> { - let mut inner = pair.into_inner(); - let name = inner.next().map(extract_string).unwrap_or_default(); - Ok(Box::new(StopFunction::new(name))) -} - -fn handle_show_stmt(_pair: pest::iterators::Pair) -> Result, ParseError> { - Ok(Box::new(ShowFunctions::new())) -} - -fn extract_string(pair: pest::iterators::Pair) -> String { - match pair.as_rule() { - Rule::string_literal => { - let s = pair.as_str(); - if (s.starts_with('\'') && s.ends_with('\'')) - || (s.starts_with('"') && s.ends_with('"')) - { - unescape_string(&s[1..s.len() - 1]) - } else { - unescape_string(s) - } - } - Rule::identifier => pair.as_str().to_string(), - _ => pair.as_str().to_string(), - } -} - -fn unescape_string(s: &str) -> String { - let mut result = String::with_capacity(s.len()); - let mut chars = s.chars().peekable(); - - while let Some(ch) = chars.next() { - if ch == '\\' { - if let Some(&next) = chars.peek() { - chars.next(); - match next { - 'n' => result.push('\n'), - 't' => result.push('\t'), - 'r' => result.push('\r'), - '\\' => result.push('\\'), - '\'' => result.push('\''), - '"' => result.push('"'), - _ => { - result.push('\\'); - result.push(next); - } - } - } else { - result.push(ch); - } - } else { - result.push(ch); - } - } - - result -} - -fn parse_properties(pair: pest::iterators::Pair) -> HashMap { - let mut properties = HashMap::new(); - - for prop in pair.into_inner() { - if prop.as_rule() == Rule::property { - let mut inner = prop.into_inner(); - if let (Some(key_pair), Some(val_pair)) = (inner.next(), inner.next()) { - let key = key_pair - .into_inner() - .next() - .map(extract_string) - .unwrap_or_default(); - let value = val_pair - .into_inner() - .next() - .map(extract_string) - .unwrap_or_default(); - properties.insert(key, value); - } - } - } - - properties -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_create_function() { - let sql = - "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')"; - let _stmt = SqlParser::parse(sql).unwrap(); - } - - #[test] - fn test_create_function_minimal() { - let sql = "CREATE FUNCTION WITH ('function_path'='./processor.wasm')"; - let _stmt = SqlParser::parse(sql).unwrap(); - } - - // Note: SQL only supports Path mode, not Bytes mode - // Bytes mode is only for gRPC requests - - #[test] - fn test_drop_function() { - let sql = "DROP FUNCTION my_task"; - let _stmt = SqlParser::parse(sql).unwrap(); - } - - #[test] - fn test_start_function() { - let sql = "START FUNCTION my_task"; - let _stmt = SqlParser::parse(sql).unwrap(); - } - - #[test] - fn test_stop_function() { - let sql = "STOP FUNCTION my_task"; - let _stmt = SqlParser::parse(sql).unwrap(); - } - - #[test] - fn test_show_functions() { - let sql = "SHOW FUNCTIONS"; - let _stmt = SqlParser::parse(sql).unwrap(); - } - - #[test] - fn test_case_insensitive_keywords() { - let sql1 = "create function with ('function_path'='./test.wasm')"; - let _stmt1 = SqlParser::parse(sql1).unwrap(); - - let sql2 = "Create Function With ('Function_Path'='./test.wasm')"; - let _stmt2 = SqlParser::parse(sql2).unwrap(); - - let sql3 = "show functions"; - let _stmt3 = SqlParser::parse(sql3).unwrap(); - - let sql4 = "start function my_task"; - let _stmt4 = SqlParser::parse(sql4).unwrap(); - } - - #[test] - fn test_case_insensitive_property_keys() { - let sql1 = - "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')"; - let _stmt1 = SqlParser::parse(sql1).unwrap(); - - let sql2 = - "CREATE FUNCTION WITH ('Function_Path'='./test.wasm', 'Config_Path'='./config.yml')"; - let _stmt2 = SqlParser::parse(sql2).unwrap(); - - let sql3 = - "CREATE FUNCTION WITH ('FUNCTION_PATH'='./test.wasm', 'CONFIG_PATH'='./config.yml')"; - let _stmt3 = SqlParser::parse(sql3).unwrap(); - - // Note: SQL only supports Path mode (function_path, config_path) - // Bytes mode (function, config) is only for gRPC requests - } - - #[test] - fn test_with_extra_properties() { - let sql = r#"CREATE FUNCTION WITH ( - 'function_path'='./test.wasm', - 'config_path'='./config.yml', - 'parallelism'='4', - 'memory-limit'='256mb' - )"#; - let _stmt = SqlParser::parse(sql).unwrap(); - } -} diff --git a/src/sql/planner/extension/aggregate.rs b/src/sql/planner/extension/aggregate.rs new file mode 100644 index 00000000..911e595f --- /dev/null +++ b/src/sql/planner/extension/aggregate.rs @@ -0,0 +1,348 @@ +use std::fmt::Formatter; +use std::sync::Arc; +use std::time::Duration; + +use datafusion::arrow::datatypes::DataType; +use datafusion::common::{Column, DFSchemaRef, Result, ScalarValue, internal_err}; +use datafusion::logical_expr; +use datafusion::logical_expr::{ + BinaryExpr, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, expr::ScalarFunction, +}; + +use crate::multifield_partial_ord; +use crate::sql::planner::extension::{NamedNode, StreamExtension, TimestampAppendExtension}; +use crate::sql::planner::types::{ + DFField, StreamSchema, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, + schema_from_df_fields, schema_from_df_fields_with_metadata, +}; + +pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension"; + +/// Extension node for windowed aggregate operations in streaming SQL. +/// Supports tumbling, sliding, session, and instant window aggregations. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct AggregateExtension { + pub(crate) window_behavior: WindowBehavior, + pub(crate) aggregate: LogicalPlan, + pub(crate) schema: DFSchemaRef, + pub(crate) key_fields: Vec, + pub(crate) final_calculation: LogicalPlan, +} + +multifield_partial_ord!(AggregateExtension, aggregate, key_fields, final_calculation); + +impl AggregateExtension { + pub fn new( + window_behavior: WindowBehavior, + aggregate: LogicalPlan, + key_fields: Vec, + ) -> Self { + let final_calculation = + Self::final_projection(&aggregate, window_behavior.clone()).unwrap(); + Self { + window_behavior, + aggregate, + schema: final_calculation.schema().clone(), + key_fields, + final_calculation, + } + } + + /// Build the final projection after aggregation, which adds the window struct + /// and computes the output timestamp based on the window behavior. + pub fn final_projection( + aggregate_plan: &LogicalPlan, + window_behavior: WindowBehavior, + ) -> Result { + let timestamp_field: DFField = aggregate_plan.inputs()[0] + .schema() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)? + .into(); + let timestamp_append = LogicalPlan::Extension(Extension { + node: Arc::new(TimestampAppendExtension::new( + aggregate_plan.clone(), + timestamp_field.qualifier().cloned(), + )), + }); + let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema()); + let mut aggregate_expressions: Vec<_> = aggregate_fields + .iter() + .map(|field| Expr::Column(field.qualified_column())) + .collect(); + + let (window_field, window_index, width, is_nested) = match window_behavior { + WindowBehavior::InData => return Ok(timestamp_append), + WindowBehavior::FromOperator { + window, + window_field, + window_index, + is_nested, + } => match window { + WindowType::Tumbling { width, .. } | WindowType::Sliding { width, .. } => { + (window_field, window_index, width, is_nested) + } + WindowType::Session { .. } => { + return Ok(LogicalPlan::Extension(Extension { + node: Arc::new(WindowAppendExtension::new( + timestamp_append, + window_field, + window_index, + )), + })); + } + WindowType::Instant => return Ok(timestamp_append), + }, + }; + + if is_nested { + return Self::nested_final_projection( + timestamp_append, + window_field, + window_index, + width, + ); + } + + let timestamp_column = + Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name()); + aggregate_fields.insert(window_index, window_field.clone()); + + let window_expression = Self::build_window_struct_expr(×tamp_column, width); + aggregate_expressions.insert( + window_index, + window_expression + .alias_qualified(window_field.qualifier().cloned(), window_field.name()), + ); + aggregate_fields.push(timestamp_field); + + let bin_end_calculation = Expr::BinaryExpr(BinaryExpr { + left: Box::new(Expr::Column(timestamp_column.clone())), + op: logical_expr::Operator::Plus, + right: Box::new(Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some( + datafusion::arrow::datatypes::IntervalMonthDayNanoType::make_value( + 0, + 0, + (width.as_nanos() - 1) as i64, + ), + )), + None, + )), + }); + aggregate_expressions.push(bin_end_calculation); + + Ok(LogicalPlan::Projection( + logical_expr::Projection::try_new_with_schema( + aggregate_expressions, + Arc::new(timestamp_append), + Arc::new(schema_from_df_fields(&aggregate_fields)?), + )?, + )) + } + + fn build_window_struct_expr(timestamp_column: &Column, width: Duration) -> Expr { + let start_expr = Expr::Column(timestamp_column.clone()); + let end_expr = Expr::BinaryExpr(BinaryExpr { + left: Box::new(Expr::Column(timestamp_column.clone())), + op: logical_expr::Operator::Plus, + right: Box::new(Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some( + datafusion::arrow::datatypes::IntervalMonthDayNanoType::make_value( + 0, + 0, + width.as_nanos() as i64, + ), + )), + None, + )), + }); + + Expr::ScalarFunction(ScalarFunction { + func: Arc::new(datafusion::logical_expr::ScalarUDF::new_from_impl( + WindowStructUdf {}, + )), + args: vec![start_expr, end_expr], + }) + } + + fn nested_final_projection( + aggregate_plan: LogicalPlan, + window_field: DFField, + window_index: usize, + width: Duration, + ) -> Result { + let timestamp_field: DFField = aggregate_plan + .schema() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) + .unwrap() + .into(); + let timestamp_column = + Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name()); + + let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema()); + let mut aggregate_expressions: Vec<_> = aggregate_fields + .iter() + .map(|field| Expr::Column(field.qualified_column())) + .collect(); + aggregate_fields.insert(window_index, window_field.clone()); + + let window_expression = Self::build_window_struct_expr(×tamp_column, width); + aggregate_expressions.insert( + window_index, + window_expression + .alias_qualified(window_field.qualifier().cloned(), window_field.name()), + ); + + Ok(LogicalPlan::Projection( + logical_expr::Projection::try_new_with_schema( + aggregate_expressions, + Arc::new(aggregate_plan), + Arc::new(schema_from_df_fields(&aggregate_fields).unwrap()), + ) + .unwrap(), + )) + } +} + +impl UserDefinedLogicalNodeCore for AggregateExtension { + fn name(&self) -> &str { + AGGREGATE_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.aggregate] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "AggregateExtension: {} | window_behavior: {:?}", + self.schema(), + match &self.window_behavior { + WindowBehavior::InData => "InData".to_string(), + WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"), + } + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("input size inconsistent"); + } + Ok(Self::new( + self.window_behavior.clone(), + inputs[0].clone(), + self.key_fields.clone(), + )) + } +} + +impl StreamExtension for AggregateExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + let output_schema = (*self.schema).clone().into(); + StreamSchema::from_schema_keys(Arc::new(output_schema), vec![]).unwrap() + } +} + +/// Extension for appending window struct (start, end) to the output +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct WindowAppendExtension { + pub(crate) input: LogicalPlan, + pub(crate) window_field: DFField, + pub(crate) window_index: usize, + pub(crate) schema: DFSchemaRef, +} + +multifield_partial_ord!(WindowAppendExtension, input, window_index); + +impl WindowAppendExtension { + fn new(input: LogicalPlan, window_field: DFField, window_index: usize) -> Self { + let mut fields = fields_with_qualifiers(input.schema()); + fields.insert(window_index, window_field.clone()); + let metadata = input.schema().metadata().clone(); + Self { + input, + window_field, + window_index, + schema: Arc::new(schema_from_df_fields_with_metadata(&fields, metadata).unwrap()), + } + } +} + +impl UserDefinedLogicalNodeCore for WindowAppendExtension { + fn name(&self) -> &str { + "WindowAppendExtension" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "WindowAppendExtension: field {:?} at {}", + self.window_field, self.window_index + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self::new( + inputs[0].clone(), + self.window_field.clone(), + self.window_index, + )) + } +} + +/// Placeholder UDF to construct the window struct at plan time +#[derive(Debug)] +struct WindowStructUdf; + +impl datafusion::logical_expr::ScalarUDFImpl for WindowStructUdf { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn name(&self) -> &str { + "window" + } + + fn signature(&self) -> &datafusion::logical_expr::Signature { + &datafusion::logical_expr::Signature { + type_signature: datafusion::logical_expr::TypeSignature::Any(2), + volatility: datafusion::logical_expr::Volatility::Immutable, + } + } + + fn return_type(&self, _args: &[DataType]) -> Result { + Ok(crate::sql::planner::schemas::window_arrow_struct()) + } + + fn invoke_with_args( + &self, + _args: datafusion::logical_expr::ScalarFunctionArgs, + ) -> Result { + unimplemented!("WindowStructUdf is a plan-time-only function") + } +} diff --git a/src/sql/planner/extension/join.rs b/src/sql/planner/extension/join.rs new file mode 100644 index 00000000..0b4fa13a --- /dev/null +++ b/src/sql/planner/extension/join.rs @@ -0,0 +1,61 @@ +use std::time::Duration; + +use datafusion::common::{DFSchemaRef, Result}; +use datafusion::logical_expr::expr::Expr; +use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use crate::sql::planner::types::StreamSchema; + +use std::sync::Arc; + +pub(crate) const JOIN_NODE_NAME: &str = "JoinNode"; + +/// Extension node for streaming joins. +/// Supports instant joins (windowed, no state) and updating joins (with TTL-based state). +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub struct JoinExtension { + pub(crate) rewritten_join: LogicalPlan, + pub(crate) is_instant: bool, + pub(crate) ttl: Option, +} + +impl StreamExtension for JoinExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap() + } +} + +impl UserDefinedLogicalNodeCore for JoinExtension { + fn name(&self) -> &str { + JOIN_NODE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.rewritten_join] + } + + fn schema(&self) -> &DFSchemaRef { + self.rewritten_join.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "JoinExtension: {}", self.schema()) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self { + rewritten_join: inputs[0].clone(), + is_instant: self.is_instant, + ttl: self.ttl, + }) + } +} diff --git a/src/sql/planner/extension/key_calculation.rs b/src/sql/planner/extension/key_calculation.rs new file mode 100644 index 00000000..f60c4d32 --- /dev/null +++ b/src/sql/planner/extension/key_calculation.rs @@ -0,0 +1,138 @@ +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::arrow::datatypes::{Field, Schema}; +use datafusion::common::{DFSchemaRef, Result, internal_err}; +use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::multifield_partial_ord; +use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use crate::sql::planner::types::{ + StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata, +}; + +pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension"; + +/// Two ways of specifying keys: column indices or expressions to evaluate +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum KeysOrExprs { + Keys(Vec), + Exprs(Vec), +} + +/// Calculation for computing keyed data, used for shuffling data to correct nodes +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct KeyCalculationExtension { + pub(crate) name: Option, + pub(crate) input: LogicalPlan, + pub(crate) keys: KeysOrExprs, + pub(crate) schema: DFSchemaRef, +} + +multifield_partial_ord!(KeyCalculationExtension, name, input, keys); + +impl KeyCalculationExtension { + pub fn new_named_and_trimmed(input: LogicalPlan, keys: Vec, name: String) -> Self { + let output_fields: Vec<_> = fields_with_qualifiers(input.schema()) + .into_iter() + .enumerate() + .filter_map(|(index, field)| { + if !keys.contains(&index) { + Some(field.clone()) + } else { + None + } + }) + .collect(); + + let schema = + schema_from_df_fields_with_metadata(&output_fields, input.schema().metadata().clone()) + .unwrap(); + Self { + name: Some(name), + input, + keys: KeysOrExprs::Keys(keys), + schema: Arc::new(schema), + } + } + + pub fn new(input: LogicalPlan, keys: KeysOrExprs) -> Self { + let schema = input.schema().clone(); + Self { + name: None, + input, + keys, + schema, + } + } +} + +impl StreamExtension for KeyCalculationExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + let input_schema = self.input.schema().as_ref(); + match &self.keys { + KeysOrExprs::Keys(keys) => { + StreamSchema::from_schema_keys(Arc::new(input_schema.into()), keys.clone()).unwrap() + } + KeysOrExprs::Exprs(exprs) => { + let mut fields = vec![]; + for (i, e) in exprs.iter().enumerate() { + let (dt, nullable) = e.data_type_and_nullable(input_schema).unwrap(); + fields.push(Field::new(format!("__key_{i}"), dt, nullable).into()); + } + for f in input_schema.fields().iter() { + fields.push(f.clone()); + } + StreamSchema::from_schema_keys( + Arc::new(Schema::new(fields)), + (1..=exprs.len()).collect(), + ) + .unwrap() + } + } + } +} + +impl UserDefinedLogicalNodeCore for KeyCalculationExtension { + fn name(&self) -> &str { + KEY_CALCULATION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "KeyCalculationExtension: {}", self.schema()) + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("input size inconsistent"); + } + + let keys = match &self.keys { + KeysOrExprs::Keys(k) => KeysOrExprs::Keys(k.clone()), + KeysOrExprs::Exprs(_) => KeysOrExprs::Exprs(exprs), + }; + + Ok(Self { + name: self.name.clone(), + input: inputs[0].clone(), + keys, + schema: self.schema.clone(), + }) + } +} diff --git a/src/sql/planner/extension/mod.rs b/src/sql/planner/extension/mod.rs new file mode 100644 index 00000000..96ac5f32 --- /dev/null +++ b/src/sql/planner/extension/mod.rs @@ -0,0 +1,153 @@ +use std::fmt::Debug; +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, DataFusionError, Result, TableReference}; +use datafusion::logical_expr::{ + Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, +}; + +use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; +use crate::sql::planner::types::StreamSchema; + +pub(crate) mod aggregate; +pub(crate) mod join; +pub(crate) mod key_calculation; +pub(crate) mod projection; +pub(crate) mod remote_table; +pub(crate) mod watermark_node; +pub(crate) mod window_fn; + +pub(crate) trait StreamExtension: Debug { + fn node_name(&self) -> Option; + fn output_schema(&self) -> StreamSchema; + fn transparent(&self) -> bool { + false + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum NamedNode { + Source(TableReference), + Watermark(TableReference), + RemoteTable(TableReference), + Sink(TableReference), +} + +fn try_from_t( + node: &dyn UserDefinedLogicalNode, +) -> std::result::Result<&dyn StreamExtension, ()> { + node.as_any() + .downcast_ref::() + .map(|t| t as &dyn StreamExtension) + .ok_or(()) +} + +impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension { + type Error = DataFusionError; + + fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result { + use aggregate::AggregateExtension; + use join::JoinExtension; + use key_calculation::KeyCalculationExtension; + use projection::ProjectionExtension; + use remote_table::RemoteTableExtension; + use watermark_node::WatermarkNode; + use window_fn::WindowFunctionExtension; + + try_from_t::(node) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name()))) + } +} + +impl<'a> TryFrom<&'a Arc> for &'a dyn StreamExtension { + type Error = DataFusionError; + + fn try_from(node: &'a Arc) -> Result { + TryFrom::try_from(node.as_ref()) + } +} + +#[macro_export] +macro_rules! multifield_partial_ord { + ($ty:ty, $($field:tt), *) => { + impl PartialOrd for $ty { + fn partial_cmp(&self, other: &Self) -> Option { + $( + let cmp = self.$field.partial_cmp(&other.$field)?; + if cmp != std::cmp::Ordering::Equal { + return Some(cmp); + } + )* + Some(std::cmp::Ordering::Equal) + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct TimestampAppendExtension { + pub(crate) input: LogicalPlan, + pub(crate) qualifier: Option, + pub(crate) schema: DFSchemaRef, +} + +impl TimestampAppendExtension { + pub(crate) fn new(input: LogicalPlan, qualifier: Option) -> Self { + if has_timestamp_field(input.schema()) { + unreachable!( + "shouldn't be adding timestamp to a plan that already has it: {:?}", + input.schema() + ); + } + let schema = add_timestamp_field(input.schema().clone(), qualifier.clone()).unwrap(); + Self { + input, + qualifier, + schema, + } + } +} + +multifield_partial_ord!(TimestampAppendExtension, input, qualifier); + +impl UserDefinedLogicalNodeCore for TimestampAppendExtension { + fn name(&self) -> &str { + "TimestampAppendExtension" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "TimestampAppendExtension({:?}): {}", + self.qualifier, + self.schema + .fields() + .iter() + .map(|f| f.name().to_string()) + .collect::>() + .join(", ") + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self::new(inputs[0].clone(), self.qualifier.clone())) + } +} diff --git a/src/sql/planner/extension/projection.rs b/src/sql/planner/extension/projection.rs new file mode 100644 index 00000000..f7ecb6ed --- /dev/null +++ b/src/sql/planner/extension/projection.rs @@ -0,0 +1,91 @@ +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, Result}; +use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::multifield_partial_ord; +use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use crate::sql::planner::types::{DFField, StreamSchema, schema_from_df_fields}; + +pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension"; + +/// Projection operations for streaming SQL plans. +/// Handles column projections, shuffles for key-based operations, etc. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct ProjectionExtension { + pub(crate) inputs: Vec, + pub(crate) name: Option, + pub(crate) exprs: Vec, + pub(crate) schema: DFSchemaRef, + pub(crate) shuffle: bool, +} + +multifield_partial_ord!(ProjectionExtension, name, exprs); + +impl ProjectionExtension { + pub(crate) fn new(inputs: Vec, name: Option, exprs: Vec) -> Self { + let input_schema = inputs.first().unwrap().schema(); + let fields: Vec = exprs + .iter() + .map(|e| DFField::from(e.to_field(input_schema).unwrap())) + .collect(); + + let schema = Arc::new(schema_from_df_fields(&fields).unwrap()); + + Self { + inputs, + name, + exprs, + schema, + shuffle: false, + } + } + + pub(crate) fn shuffled(mut self) -> Self { + self.shuffle = true; + self + } +} + +impl StreamExtension for ProjectionExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_arrow().clone())).unwrap() + } +} + +impl UserDefinedLogicalNodeCore for ProjectionExtension { + fn name(&self) -> &str { + PROJECTION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + self.inputs.iter().collect() + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "ProjectionExtension: {}", self.schema()) + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + Ok(Self { + name: self.name.clone(), + inputs, + exprs, + schema: self.schema.clone(), + shuffle: self.shuffle, + }) + } +} diff --git a/src/sql/planner/extension/remote_table.rs b/src/sql/planner/extension/remote_table.rs new file mode 100644 index 00000000..4935efd9 --- /dev/null +++ b/src/sql/planner/extension/remote_table.rs @@ -0,0 +1,71 @@ +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::multifield_partial_ord; +use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use crate::sql::planner::types::StreamSchema; + +pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension"; + +/// Lightweight extension that segments the execution graph and enables merging +/// nodes with the same name. Allows materializing intermediate results. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct RemoteTableExtension { + pub(crate) input: LogicalPlan, + pub(crate) name: TableReference, + pub(crate) schema: DFSchemaRef, + pub(crate) materialize: bool, +} + +multifield_partial_ord!(RemoteTableExtension, input, name, materialize); + +impl StreamExtension for RemoteTableExtension { + fn node_name(&self) -> Option { + if self.materialize { + Some(NamedNode::RemoteTable(self.name.to_owned())) + } else { + None + } + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap() + } +} + +impl UserDefinedLogicalNodeCore for RemoteTableExtension { + fn name(&self) -> &str { + REMOTE_TABLE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "RemoteTableExtension: {}", self.schema) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("input size inconsistent"); + } + Ok(Self { + input: inputs[0].clone(), + name: self.name.clone(), + schema: self.schema.clone(), + materialize: self.materialize, + }) + } +} diff --git a/src/sql/planner/extension/watermark_node.rs b/src/sql/planner/extension/watermark_node.rs new file mode 100644 index 00000000..eb776ff2 --- /dev/null +++ b/src/sql/planner/extension/watermark_node.rs @@ -0,0 +1,110 @@ +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err}; +use datafusion::error::DataFusionError; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::multifield_partial_ord; +use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use crate::sql::planner::schemas::add_timestamp_field; +use crate::sql::planner::types::{StreamSchema, TIMESTAMP_FIELD}; + +pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode"; + +/// Represents a watermark node in the streaming query plan. +/// Watermarks track event-time progress and enable time-based operations. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct WatermarkNode { + pub input: LogicalPlan, + pub qualifier: TableReference, + pub watermark_expression: Expr, + pub schema: DFSchemaRef, + timestamp_index: usize, +} + +multifield_partial_ord!( + WatermarkNode, + input, + qualifier, + watermark_expression, + timestamp_index +); + +impl UserDefinedLogicalNodeCore for WatermarkNode { + fn name(&self) -> &str { + WATERMARK_NODE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![self.watermark_expression.clone()] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "WatermarkNode({}): {}", self.qualifier, self.schema) + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("input size inconsistent"); + } + if exprs.len() != 1 { + return internal_err!("expected one expression; found {}", exprs.len()); + } + + let timestamp_index = self + .schema + .index_of_column_by_name(Some(&self.qualifier), TIMESTAMP_FIELD) + .ok_or_else(|| DataFusionError::Plan("missing timestamp column".to_string()))?; + + Ok(Self { + input: inputs[0].clone(), + qualifier: self.qualifier.clone(), + watermark_expression: exprs.into_iter().next().unwrap(), + schema: self.schema.clone(), + timestamp_index, + }) + } +} + +impl StreamExtension for WatermarkNode { + fn node_name(&self) -> Option { + Some(NamedNode::Watermark(self.qualifier.clone())) + } + + fn output_schema(&self) -> StreamSchema { + self.stream_schema() + } +} + +impl WatermarkNode { + pub(crate) fn new( + input: LogicalPlan, + qualifier: TableReference, + watermark_expression: Expr, + ) -> Result { + let schema = add_timestamp_field(input.schema().clone(), Some(qualifier.clone()))?; + let timestamp_index = schema + .index_of_column_by_name(None, TIMESTAMP_FIELD) + .ok_or_else(|| DataFusionError::Plan("missing _timestamp column".to_string()))?; + Ok(Self { + input, + qualifier, + watermark_expression, + schema, + timestamp_index, + }) + } + + pub(crate) fn stream_schema(&self) -> StreamSchema { + StreamSchema::new_unkeyed(Arc::new(self.schema.as_ref().into()), self.timestamp_index) + } +} diff --git a/src/sql/planner/extension/window_fn.rs b/src/sql/planner/extension/window_fn.rs new file mode 100644 index 00000000..6e6e1c36 --- /dev/null +++ b/src/sql/planner/extension/window_fn.rs @@ -0,0 +1,62 @@ +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, Result}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use crate::sql::planner::types::StreamSchema; + +pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension"; + +/// Extension for window functions (e.g., ROW_NUMBER, RANK) over windowed input. +/// Window functions require already-windowed input and are evaluated per-window. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub(crate) struct WindowFunctionExtension { + pub(crate) window_plan: LogicalPlan, + pub(crate) key_fields: Vec, +} + +impl WindowFunctionExtension { + pub fn new(window_plan: LogicalPlan, key_fields: Vec) -> Self { + Self { + window_plan, + key_fields, + } + } +} + +impl UserDefinedLogicalNodeCore for WindowFunctionExtension { + fn name(&self) -> &str { + WINDOW_FUNCTION_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.window_plan] + } + + fn schema(&self) -> &DFSchemaRef { + self.window_plan.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "WindowFunction: {}", self.schema()) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self::new(inputs[0].clone(), self.key_fields.clone())) + } +} + +impl StreamExtension for WindowFunctionExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).unwrap() + } +} diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs new file mode 100644 index 00000000..ead5e212 --- /dev/null +++ b/src/sql/planner/mod.rs @@ -0,0 +1,355 @@ +#![allow(clippy::new_without_default)] + +pub(crate) mod extension; +pub mod parse; +pub mod plan; +pub mod schemas; +pub mod sql_to_plan; +pub mod types; + +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema}; +use datafusion::common::{Result, plan_err}; +use datafusion::datasource::DefaultTableSource; +use datafusion::error::DataFusionError; +use datafusion::execution::{FunctionRegistry, SessionStateDefaults}; +use datafusion::logical_expr::expr_rewriter::FunctionRewrite; +use datafusion::logical_expr::planner::ExprPlanner; +use datafusion::logical_expr::{ + AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF, +}; +use datafusion::optimizer::Analyzer; +use datafusion::sql::TableReference; +use datafusion::sql::planner::ContextProvider; +use unicase::UniCase; + +use crate::sql::planner::schemas::window_arrow_struct; +use crate::sql::planner::types::{PlaceholderUdf, PlanningOptions}; + +/// Catalog provider for streaming SQL queries. +/// Manages tables, UDFs, and configuration for streaming SQL planning. +#[derive(Clone, Default)] +pub struct StreamSchemaProvider { + pub source_defs: HashMap, + tables: HashMap, StreamTable>, + pub functions: HashMap>, + pub aggregate_functions: HashMap>, + pub window_functions: HashMap>, + config_options: datafusion::config::ConfigOptions, + pub expr_planners: Vec>, + pub planning_options: PlanningOptions, + pub analyzer: Analyzer, +} + +/// Represents a table registered in the streaming SQL context +#[derive(Clone, Debug)] +pub enum StreamTable { + Source { + name: String, + schema: Arc, + event_time_field: Option, + watermark_field: Option, + }, + Sink { + name: String, + schema: Arc, + }, + Memory { + name: String, + logical_plan: Option, + }, +} + +impl StreamTable { + pub fn name(&self) -> &str { + match self { + StreamTable::Source { name, .. } => name, + StreamTable::Sink { name, .. } => name, + StreamTable::Memory { name, .. } => name, + } + } + + pub fn get_fields(&self) -> Vec> { + match self { + StreamTable::Source { schema, .. } => schema.fields().to_vec(), + StreamTable::Sink { schema, .. } => schema.fields().to_vec(), + StreamTable::Memory { .. } => vec![], + } + } +} + +#[derive(Debug)] +struct LogicalBatchInput { + table_name: String, + schema: Arc, +} + +impl datafusion::datasource::TableProvider for LogicalBatchInput { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn schema(&self) -> Arc { + self.schema.clone() + } + + fn table_type(&self) -> datafusion::datasource::TableType { + datafusion::datasource::TableType::Base + } + + fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>( + &'life0 self, + _state: &'life1 dyn datafusion::catalog::Session, + _projection: Option<&'life2 Vec>, + _filters: &'life3 [Expr], + _limit: Option, + ) -> std::pin::Pin< + Box< + dyn std::future::Future< + Output = Result>, + > + Send + + 'async_trait, + >, + > + where + 'life0: 'async_trait, + 'life1: 'async_trait, + 'life2: 'async_trait, + 'life3: 'async_trait, + Self: 'async_trait, + { + unimplemented!("LogicalBatchInput is for planning only") + } +} + +fn create_table(table_name: String, schema: Arc) -> Arc { + let table_provider = LogicalBatchInput { table_name, schema }; + let wrapped = Arc::new(table_provider); + let provider = DefaultTableSource::new(wrapped); + Arc::new(provider) +} + +impl StreamSchemaProvider { + pub fn new() -> Self { + let mut registry = Self { + ..Default::default() + }; + + registry + .register_udf(PlaceholderUdf::with_return( + "hop", + vec![ + DataType::Interval(datatypes::IntervalUnit::MonthDayNano), + DataType::Interval(datatypes::IntervalUnit::MonthDayNano), + ], + window_arrow_struct(), + )) + .unwrap(); + + registry + .register_udf(PlaceholderUdf::with_return( + "tumble", + vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], + window_arrow_struct(), + )) + .unwrap(); + + registry + .register_udf(PlaceholderUdf::with_return( + "session", + vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], + window_arrow_struct(), + )) + .unwrap(); + + registry + .register_udf(PlaceholderUdf::with_return( + "unnest", + vec![DataType::List(Arc::new(Field::new( + "field", + DataType::Utf8, + true, + )))], + DataType::Utf8, + )) + .unwrap(); + + registry + .register_udf(PlaceholderUdf::with_return( + "row_time", + vec![], + DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None), + )) + .unwrap(); + + for p in SessionStateDefaults::default_scalar_functions() { + registry.register_udf(p).unwrap(); + } + for p in SessionStateDefaults::default_aggregate_functions() { + registry.register_udaf(p).unwrap(); + } + for p in SessionStateDefaults::default_window_functions() { + registry.register_udwf(p).unwrap(); + } + for p in SessionStateDefaults::default_expr_planners() { + registry.register_expr_planner(p).unwrap(); + } + + registry + } + + pub fn add_source_table( + &mut self, + name: String, + schema: Arc, + event_time_field: Option, + watermark_field: Option, + ) { + self.tables.insert( + UniCase::new(name.clone()), + StreamTable::Source { + name, + schema, + event_time_field, + watermark_field, + }, + ); + } + + pub fn add_sink_table(&mut self, name: String, schema: Arc) { + self.tables.insert( + UniCase::new(name.clone()), + StreamTable::Sink { name, schema }, + ); + } + + fn insert_table(&mut self, table: StreamTable) { + self.tables + .insert(UniCase::new(table.name().to_string()), table); + } + + pub fn get_table(&self, table_name: impl Into) -> Option<&StreamTable> { + self.tables.get(&UniCase::new(table_name.into())) + } + + pub fn get_table_mut(&mut self, table_name: impl Into) -> Option<&mut StreamTable> { + self.tables.get_mut(&UniCase::new(table_name.into())) + } +} + +impl ContextProvider for StreamSchemaProvider { + fn get_table_source(&self, name: TableReference) -> Result> { + let table = self + .get_table(name.to_string()) + .ok_or_else(|| DataFusionError::Plan(format!("Table {name} not found")))?; + + let fields = table.get_fields(); + let schema = Arc::new(Schema::new_with_metadata( + fields + .iter() + .map(|f| f.as_ref().clone()) + .collect::>(), + HashMap::new(), + )); + Ok(create_table(name.to_string(), schema)) + } + + fn get_function_meta(&self, name: &str) -> Option> { + self.functions.get(name).cloned() + } + + fn get_aggregate_meta(&self, name: &str) -> Option> { + self.aggregate_functions.get(name).cloned() + } + + fn get_variable_type(&self, _variable_names: &[String]) -> Option { + None + } + + fn options(&self) -> &datafusion::config::ConfigOptions { + &self.config_options + } + + fn get_window_meta(&self, name: &str) -> Option> { + self.window_functions.get(name).cloned() + } + + fn udf_names(&self) -> Vec { + self.functions.keys().cloned().collect() + } + + fn udaf_names(&self) -> Vec { + self.aggregate_functions.keys().cloned().collect() + } + + fn udwf_names(&self) -> Vec { + self.window_functions.keys().cloned().collect() + } + + fn get_expr_planners(&self) -> &[Arc] { + &self.expr_planners + } +} + +impl FunctionRegistry for StreamSchemaProvider { + fn udfs(&self) -> HashSet { + self.functions.keys().cloned().collect() + } + + fn udf(&self, name: &str) -> Result> { + if let Some(f) = self.functions.get(name) { + Ok(Arc::clone(f)) + } else { + plan_err!("No UDF with name {name}") + } + } + + fn udaf(&self, name: &str) -> Result> { + if let Some(f) = self.aggregate_functions.get(name) { + Ok(Arc::clone(f)) + } else { + plan_err!("No UDAF with name {name}") + } + } + + fn udwf(&self, name: &str) -> Result> { + if let Some(f) = self.window_functions.get(name) { + Ok(Arc::clone(f)) + } else { + plan_err!("No UDWF with name {name}") + } + } + + fn register_function_rewrite( + &mut self, + rewrite: Arc, + ) -> Result<()> { + self.analyzer.add_function_rewrite(rewrite); + Ok(()) + } + + fn register_udf(&mut self, udf: Arc) -> Result>> { + Ok(self.functions.insert(udf.name().to_string(), udf)) + } + + fn register_udaf(&mut self, udaf: Arc) -> Result>> { + Ok(self + .aggregate_functions + .insert(udaf.name().to_string(), udaf)) + } + + fn register_udwf(&mut self, udwf: Arc) -> Result>> { + Ok(self.window_functions.insert(udwf.name().to_string(), udwf)) + } + + fn register_expr_planner(&mut self, expr_planner: Arc) -> Result<()> { + self.expr_planners.push(expr_planner); + Ok(()) + } + + fn expr_planners(&self) -> Vec> { + self.expr_planners.clone() + } +} diff --git a/src/sql/planner/parse.rs b/src/sql/planner/parse.rs new file mode 100644 index 00000000..dfaec9a6 --- /dev/null +++ b/src/sql/planner/parse.rs @@ -0,0 +1,183 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use datafusion::common::{Result, plan_err}; +use datafusion::error::DataFusionError; +use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement}; +use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; +use datafusion::sql::sqlparser::parser::Parser; + +use crate::coordinator::{ + CreateFunction, DropFunction, ShowFunctions, StartFunction, Statement as CoordinatorStatement, + StopFunction, StreamingSql, +}; + +/// Stage 1: String → Box +/// +/// Parses SQL using FunctionStreamDialect (from sqlparser-rs), then classifies +/// the result into either a FunctionStream DDL statement or a StreamingSql, +/// both unified under the coordinator's Statement trait. +pub fn parse_sql(query: &str) -> Result> { + let trimmed = query.trim(); + if trimmed.is_empty() { + return plan_err!("Query is empty"); + } + + let dialect = FunctionStreamDialect {}; + let mut statements = Parser::parse_sql(&dialect, trimmed) + .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?; + + if statements.is_empty() { + return plan_err!("No SQL statements found"); + } + + let stmt = statements.remove(0); + classify_statement(stmt) +} + +/// Classify a parsed DataFusion Statement into the coordinator's Statement type. +/// +/// FunctionStream DDL (CREATE/DROP/START/STOP FUNCTION, SHOW FUNCTIONS) +/// is converted to concrete coordinator types; everything else is wrapped +/// in StreamingSql. +fn classify_statement(stmt: DFStatement) -> Result> { + match stmt { + DFStatement::CreateFunctionWith { options } => { + let properties = sql_options_to_map(&options); + let create_fn = CreateFunction::from_properties(properties) + .map_err(|e| DataFusionError::Plan(format!("CREATE FUNCTION: {e}")))?; + Ok(Box::new(create_fn)) + } + DFStatement::StartFunction { name } => Ok(Box::new(StartFunction::new(name.to_string()))), + DFStatement::StopFunction { name } => Ok(Box::new(StopFunction::new(name.to_string()))), + DFStatement::DropFunction { func_desc, .. } => { + let name = func_desc + .first() + .map(|d| d.name.to_string()) + .unwrap_or_default(); + Ok(Box::new(DropFunction::new(name))) + } + DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())), + other => Ok(Box::new(StreamingSql::new(other))), + } +} + +/// Convert Vec (KeyValue pairs) into HashMap. +fn sql_options_to_map(options: &[SqlOption]) -> HashMap { + options + .iter() + .filter_map(|opt| match opt { + SqlOption::KeyValue { key, value } => Some(( + key.value.clone(), + value.to_string().trim_matches('\'').to_string(), + )), + _ => None, + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn is_streaming_sql(stmt: &dyn CoordinatorStatement) -> bool { + let debug = format!("{:?}", stmt); + debug.starts_with("StreamingSql") + } + + fn is_ddl(stmt: &dyn CoordinatorStatement) -> bool { + !is_streaming_sql(stmt) + } + + #[test] + fn test_parse_create_function() { + let sql = + "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')"; + let stmt = parse_sql(sql).unwrap(); + assert!(is_ddl(stmt.as_ref())); + } + + #[test] + fn test_parse_create_function_minimal() { + let sql = "CREATE FUNCTION WITH ('function_path'='./processor.wasm')"; + let stmt = parse_sql(sql).unwrap(); + assert!(is_ddl(stmt.as_ref())); + } + + #[test] + fn test_parse_drop_function() { + let sql = "DROP FUNCTION my_task"; + let stmt = parse_sql(sql).unwrap(); + assert!(is_ddl(stmt.as_ref())); + } + + #[test] + fn test_parse_start_function() { + let sql = "START FUNCTION my_task"; + let stmt = parse_sql(sql).unwrap(); + assert!(is_ddl(stmt.as_ref())); + } + + #[test] + fn test_parse_stop_function() { + let sql = "STOP FUNCTION my_task"; + let stmt = parse_sql(sql).unwrap(); + assert!(is_ddl(stmt.as_ref())); + } + + #[test] + fn test_parse_show_functions() { + let sql = "SHOW FUNCTIONS"; + let stmt = parse_sql(sql).unwrap(); + assert!(is_ddl(stmt.as_ref())); + } + + #[test] + fn test_parse_case_insensitive() { + let sql1 = "create function with ('function_path'='./test.wasm')"; + assert!(is_ddl(parse_sql(sql1).unwrap().as_ref())); + + let sql2 = "show functions"; + assert!(is_ddl(parse_sql(sql2).unwrap().as_ref())); + + let sql3 = "start function my_task"; + assert!(is_ddl(parse_sql(sql3).unwrap().as_ref())); + } + + #[test] + fn test_parse_streaming_sql() { + let sql = + "SELECT count(*), tumble(interval '1 minute') as window FROM events GROUP BY window"; + let stmt = parse_sql(sql).unwrap(); + assert!(is_streaming_sql(stmt.as_ref())); + } + + #[test] + fn test_parse_empty() { + assert!(parse_sql("").is_err()); + assert!(parse_sql(" ").is_err()); + } + + #[test] + fn test_parse_with_extra_properties() { + let sql = r#"CREATE FUNCTION WITH ( + 'function_path'='./test.wasm', + 'config_path'='./config.yml', + 'parallelism'='4', + 'memory-limit'='256mb' + )"#; + let stmt = parse_sql(sql).unwrap(); + assert!(is_ddl(stmt.as_ref())); + } +} diff --git a/src/sql/planner/plan/aggregate.rs b/src/sql/planner/plan/aggregate.rs new file mode 100644 index 00000000..6ed7499d --- /dev/null +++ b/src/sql/planner/plan/aggregate.rs @@ -0,0 +1,275 @@ +use std::sync::Arc; + +use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; +use datafusion::common::{DFSchema, Result, not_impl_err, plan_err}; +use datafusion::functions_aggregate::expr_fn::max; +use datafusion::logical_expr; +use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan}; +use datafusion::prelude::col; +use tracing::debug; + +use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::extension::aggregate::AggregateExtension; +use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::planner::plan::WindowDetectingVisitor; +use crate::sql::planner::types::{ + DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, + schema_from_df_fields_with_metadata, +}; + +pub(crate) struct AggregateRewriter<'a> { + pub schema_provider: &'a StreamSchemaProvider, +} + +impl AggregateRewriter<'_> { + /// Rewrite a non-windowed aggregate into an updating aggregate with key calculation + pub fn rewrite_non_windowed_aggregate( + input: Arc, + mut key_fields: Vec, + group_expr: Vec, + mut aggr_expr: Vec, + schema: Arc, + _schema_provider: &StreamSchemaProvider, + ) -> Result> { + let key_count = key_fields.len(); + key_fields.extend(fields_with_qualifiers(input.schema())); + + let key_schema = Arc::new(schema_from_df_fields_with_metadata( + &key_fields, + schema.metadata().clone(), + )?); + + let mut key_projection_expressions: Vec<_> = group_expr + .iter() + .zip(key_fields.iter()) + .map(|(expr, f)| expr.clone().alias(f.name().to_string())) + .collect(); + + key_projection_expressions.extend( + fields_with_qualifiers(input.schema()) + .iter() + .map(|field| Expr::Column(field.qualified_column())), + ); + + let key_projection = + LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema( + key_projection_expressions, + input.clone(), + key_schema, + )?); + + let key_plan = LogicalPlan::Extension(Extension { + node: Arc::new(KeyCalculationExtension::new( + key_projection, + KeysOrExprs::Keys((0..key_count).collect()), + )), + }); + + let Ok(timestamp_field) = key_plan + .schema() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) + else { + return plan_err!("no timestamp field found in schema"); + }; + + let timestamp_field: DFField = timestamp_field.into(); + let column = timestamp_field.qualified_column(); + aggr_expr.push(max(col(column.clone())).alias(TIMESTAMP_FIELD)); + + let mut output_schema_fields = fields_with_qualifiers(&schema); + output_schema_fields.push(timestamp_field.clone()); + let output_schema = Arc::new(schema_from_df_fields_with_metadata( + &output_schema_fields, + schema.metadata().clone(), + )?); + + let aggregate = Aggregate::try_new_with_schema( + Arc::new(key_plan), + group_expr, + aggr_expr, + output_schema, + )?; + + debug!( + "non-windowed aggregate field names: {:?}", + aggregate + .schema + .fields() + .iter() + .map(|f| f.name()) + .collect::>() + ); + + let final_plan = LogicalPlan::Aggregate(aggregate); + Ok(Transformed::yes(final_plan)) + } +} + +impl TreeNodeRewriter for AggregateRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> Result> { + let LogicalPlan::Aggregate(Aggregate { + input, + mut group_expr, + aggr_expr, + schema, + .. + }) = node + else { + return Ok(Transformed::no(node)); + }; + + let mut window_group_expr: Vec<_> = group_expr + .iter() + .enumerate() + .filter_map(|(i, expr)| { + find_window(expr) + .map(|option| option.map(|inner| (i, inner))) + .transpose() + }) + .collect::>>()?; + + if window_group_expr.len() > 1 { + return not_impl_err!( + "do not support {} window expressions in group by", + window_group_expr.len() + ); + } + + let mut key_fields: Vec = fields_with_qualifiers(&schema) + .iter() + .take(group_expr.len()) + .map(|field| { + DFField::new( + field.qualifier().cloned(), + format!("_key_{}", field.name()), + field.data_type().clone(), + field.is_nullable(), + ) + }) + .collect(); + + let mut window_detecting_visitor = WindowDetectingVisitor::default(); + input.visit_with_subqueries(&mut window_detecting_visitor)?; + + let window = window_detecting_visitor.window; + let window_behavior = match (window.is_some(), !window_group_expr.is_empty()) { + (true, true) => { + let input_window = window.unwrap(); + let (window_index, group_by_window_type) = window_group_expr.pop().unwrap(); + if group_by_window_type != input_window { + return Err(datafusion::error::DataFusionError::NotImplemented( + "window in group by does not match input window".to_string(), + )); + } + let matching_field = window_detecting_visitor.fields.iter().next(); + match matching_field { + Some(field) => { + group_expr[window_index] = Expr::Column(field.qualified_column()); + WindowBehavior::InData + } + None => { + if matches!(input_window, WindowType::Session { .. }) { + return plan_err!("can't reinvoke session window in nested aggregates"); + } + group_expr.remove(window_index); + key_fields.remove(window_index); + let window_field = schema.qualified_field(window_index).into(); + WindowBehavior::FromOperator { + window: input_window, + window_field, + window_index, + is_nested: true, + } + } + } + } + (true, false) => WindowBehavior::InData, + (false, true) => { + let (window_index, window_type) = window_group_expr.pop().unwrap(); + group_expr.remove(window_index); + key_fields.remove(window_index); + let window_field = schema.qualified_field(window_index).into(); + WindowBehavior::FromOperator { + window: window_type, + window_field, + window_index, + is_nested: false, + } + } + (false, false) => { + return Self::rewrite_non_windowed_aggregate( + input, + key_fields, + group_expr, + aggr_expr, + schema, + self.schema_provider, + ); + } + }; + + let key_count = key_fields.len(); + key_fields.extend(fields_with_qualifiers(input.schema())); + + let key_schema = Arc::new(schema_from_df_fields_with_metadata( + &key_fields, + schema.metadata().clone(), + )?); + + let mut key_projection_expressions: Vec<_> = group_expr + .iter() + .zip(key_fields.iter()) + .map(|(expr, f)| expr.clone().alias(f.name().to_string())) + .collect(); + + key_projection_expressions.extend( + fields_with_qualifiers(input.schema()) + .iter() + .map(|field| Expr::Column(field.qualified_column())), + ); + + let key_projection = + LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema( + key_projection_expressions, + input.clone(), + key_schema, + )?); + + let key_plan = LogicalPlan::Extension(Extension { + node: Arc::new(KeyCalculationExtension::new( + key_projection, + KeysOrExprs::Keys((0..key_count).collect()), + )), + }); + + let mut aggregate_schema_fields = fields_with_qualifiers(&schema); + if let WindowBehavior::FromOperator { window_index, .. } = &window_behavior { + aggregate_schema_fields.remove(*window_index); + } + let internal_schema = Arc::new(schema_from_df_fields_with_metadata( + &aggregate_schema_fields, + schema.metadata().clone(), + )?); + + let rewritten_aggregate = Aggregate::try_new_with_schema( + Arc::new(key_plan), + group_expr, + aggr_expr, + internal_schema, + )?; + + let aggregate_extension = AggregateExtension::new( + window_behavior, + LogicalPlan::Aggregate(rewritten_aggregate), + (0..key_count).collect(), + ); + let final_plan = LogicalPlan::Extension(Extension { + node: Arc::new(aggregate_extension), + }); + + WindowDetectingVisitor::get_window(&final_plan)?; + Ok(Transformed::yes(final_plan)) + } +} diff --git a/src/sql/planner/plan/join.rs b/src/sql/planner/plan/join.rs new file mode 100644 index 00000000..f8225905 --- /dev/null +++ b/src/sql/planner/plan/join.rs @@ -0,0 +1,242 @@ +use std::sync::Arc; + +use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; +use datafusion::common::{ + Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference, + not_impl_err, +}; +use datafusion::logical_expr; +use datafusion::logical_expr::expr::Alias; +use datafusion::logical_expr::{ + BinaryExpr, Case, Expr, Extension, Join, LogicalPlan, Projection, build_join_schema, +}; +use datafusion::prelude::coalesce; + +use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::extension::join::JoinExtension; +use crate::sql::planner::extension::key_calculation::KeyCalculationExtension; +use crate::sql::planner::plan::WindowDetectingVisitor; +use crate::sql::planner::types::{ + WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata, +}; + +pub(crate) struct JoinRewriter<'a> { + pub schema_provider: &'a StreamSchemaProvider, +} + +impl JoinRewriter<'_> { + fn check_join_windowing(join: &Join) -> Result { + let left_window = WindowDetectingVisitor::get_window(&join.left)?; + let right_window = WindowDetectingVisitor::get_window(&join.right)?; + match (left_window, right_window) { + (None, None) => { + if join.join_type == JoinType::Inner { + Ok(false) + } else { + Err(DataFusionError::NotImplemented( + "can't handle non-inner joins without windows".into(), + )) + } + } + (None, Some(_)) => Err(DataFusionError::NotImplemented( + "can't handle mixed windowing between left (non-windowed) and right (windowed)" + .into(), + )), + (Some(_), None) => Err(DataFusionError::NotImplemented( + "can't handle mixed windowing between left (windowed) and right (non-windowed)" + .into(), + )), + (Some(left_window), Some(right_window)) => { + if left_window != right_window { + return Err(DataFusionError::NotImplemented( + "can't handle mixed windowing between left and right".into(), + )); + } + if let WindowType::Session { .. } = left_window { + return Err(DataFusionError::NotImplemented( + "can't handle session windows in joins".into(), + )); + } + Ok(true) + } + } + } + + fn create_join_key_plan( + input: Arc, + join_expressions: Vec, + name: &'static str, + ) -> Result { + let key_count = join_expressions.len(); + + let join_expressions: Vec<_> = join_expressions + .into_iter() + .enumerate() + .map(|(index, expr)| { + expr.alias_qualified( + Some(TableReference::bare("_stream")), + format!("_key_{index}"), + ) + }) + .chain( + fields_with_qualifiers(input.schema()) + .iter() + .map(|field| Expr::Column(field.qualified_column())), + ) + .collect(); + + let projection = Projection::try_new(join_expressions, input)?; + let key_calculation_extension = KeyCalculationExtension::new_named_and_trimmed( + LogicalPlan::Projection(projection), + (0..key_count).collect(), + name.to_string(), + ); + Ok(LogicalPlan::Extension(Extension { + node: Arc::new(key_calculation_extension), + })) + } + + fn post_join_timestamp_projection(&mut self, input: LogicalPlan) -> Result { + let schema = input.schema().clone(); + let mut schema_with_timestamp = fields_with_qualifiers(&schema); + let timestamp_fields = schema_with_timestamp + .iter() + .filter(|field| field.name() == "_timestamp") + .cloned() + .collect::>(); + + if timestamp_fields.len() != 2 { + return not_impl_err!("join must have two timestamp fields"); + } + + schema_with_timestamp.retain(|field| field.name() != "_timestamp"); + let mut projection_expr = schema_with_timestamp + .iter() + .map(|field| { + Expr::Column(Column { + relation: field.qualifier().cloned(), + name: field.name().to_string(), + spans: Spans::default(), + }) + }) + .collect::>(); + + schema_with_timestamp.push(timestamp_fields[0].clone()); + + let output_schema = Arc::new(schema_from_df_fields_with_metadata( + &schema_with_timestamp, + schema.metadata().clone(), + )?); + + let left_field = ×tamp_fields[0]; + let left_column = Expr::Column(Column { + relation: left_field.qualifier().cloned(), + name: left_field.name().to_string(), + spans: Spans::default(), + }); + let right_field = ×tamp_fields[1]; + let right_column = Expr::Column(Column { + relation: right_field.qualifier().cloned(), + name: right_field.name().to_string(), + spans: Spans::default(), + }); + + let max_timestamp = Expr::Case(Case { + expr: Some(Box::new(Expr::BinaryExpr(BinaryExpr { + left: Box::new(left_column.clone()), + op: logical_expr::Operator::GtEq, + right: Box::new(right_column.clone()), + }))), + when_then_expr: vec![ + ( + Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)), None)), + Box::new(left_column.clone()), + ), + ( + Box::new(Expr::Literal(ScalarValue::Boolean(Some(false)), None)), + Box::new(right_column.clone()), + ), + ], + else_expr: Some(Box::new(coalesce(vec![ + left_column.clone(), + right_column.clone(), + ]))), + }); + + projection_expr.push(Expr::Alias(Alias { + expr: Box::new(max_timestamp), + relation: timestamp_fields[0].qualifier().cloned(), + name: timestamp_fields[0].name().to_string(), + metadata: None, + })); + + Ok(LogicalPlan::Projection(Projection::try_new_with_schema( + projection_expr, + Arc::new(input), + output_schema, + )?)) + } +} + +impl TreeNodeRewriter for JoinRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> Result> { + let LogicalPlan::Join(join) = node else { + return Ok(Transformed::no(node)); + }; + + let is_instant = Self::check_join_windowing(&join)?; + + let Join { + left, + right, + on, + filter, + join_type, + join_constraint: JoinConstraint::On, + schema: _, + null_equals_null: false, + } = join + else { + return not_impl_err!("can't handle join constraint other than ON"); + }; + + if on.is_empty() && !is_instant { + return not_impl_err!("Updating joins must include an equijoin condition"); + } + + let (left_expressions, right_expressions): (Vec<_>, Vec<_>) = + on.clone().into_iter().unzip(); + + let left_input = Self::create_join_key_plan(left, left_expressions, "left")?; + let right_input = Self::create_join_key_plan(right, right_expressions, "right")?; + + let rewritten_join = LogicalPlan::Join(Join { + schema: Arc::new(build_join_schema( + left_input.schema(), + right_input.schema(), + &join_type, + )?), + left: Arc::new(left_input), + right: Arc::new(right_input), + on, + join_type, + join_constraint: JoinConstraint::On, + null_equals_null: false, + filter, + }); + + let final_logical_plan = self.post_join_timestamp_projection(rewritten_join)?; + + let join_extension = JoinExtension { + rewritten_join: final_logical_plan, + is_instant, + ttl: (!is_instant).then_some(self.schema_provider.planning_options.ttl), + }; + + Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(join_extension), + }))) + } +} diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs new file mode 100644 index 00000000..8d1dd388 --- /dev/null +++ b/src/sql/planner/plan/mod.rs @@ -0,0 +1,449 @@ +use std::collections::HashSet; +use std::sync::Arc; + +use datafusion::common::tree_node::{Transformed, TreeNodeRecursion}; +use datafusion::common::{ + Column, DataFusionError, Result, Spans, TableReference, plan_err, + tree_node::{TreeNode, TreeNodeRewriter, TreeNodeVisitor}, +}; +use datafusion::logical_expr::{ + Aggregate, Expr, Extension, Filter, LogicalPlan, SubqueryAlias, expr::Alias, +}; + +use crate::sql::planner::extension::StreamExtension; +use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension}; +use crate::sql::planner::extension::join::JOIN_NODE_NAME; +use crate::sql::planner::extension::remote_table::RemoteTableExtension; +use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; +use crate::sql::planner::types::{ + DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, +}; + +use self::aggregate::AggregateRewriter; +use self::join::JoinRewriter; +use self::window_fn::WindowFunctionRewriter; + +pub(crate) mod aggregate; +pub(crate) mod join; +pub(crate) mod window_fn; + +use super::StreamSchemaProvider; +use tracing::debug; + +/// Stage 3: LogicalPlan → Streaming LogicalPlan +/// +/// Rewrites a standard DataFusion logical plan into one that supports +/// streaming semantics (timestamps, windows, watermarks). +pub fn rewrite_plan( + plan: LogicalPlan, + schema_provider: &StreamSchemaProvider, +) -> Result { + let rewritten_plan = plan.rewrite_with_subqueries(&mut StreamRewriter { schema_provider })?; + + rewritten_plan + .data + .visit_with_subqueries(&mut TimeWindowUdfChecker {})?; + + debug!( + "Streaming logical plan:\n{}", + rewritten_plan.data.display_graphviz() + ); + + Ok(rewritten_plan.data) +} + +/// Visitor that detects window types in a logical plan +#[derive(Debug, Default)] +pub(crate) struct WindowDetectingVisitor { + pub(crate) window: Option, + pub(crate) fields: HashSet, +} + +impl WindowDetectingVisitor { + pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result> { + let mut visitor = WindowDetectingVisitor { + window: None, + fields: HashSet::new(), + }; + logical_plan.visit_with_subqueries(&mut visitor)?; + Ok(visitor.window.take()) + } +} + +fn extract_column(expr: &Expr) -> Option<&Column> { + match expr { + Expr::Column(column) => Some(column), + Expr::Alias(Alias { expr, .. }) => extract_column(expr), + _ => None, + } +} + +impl TreeNodeVisitor<'_> for WindowDetectingVisitor { + type Node = LogicalPlan; + + fn f_down(&mut self, node: &Self::Node) -> Result { + let LogicalPlan::Extension(Extension { node }) = node else { + return Ok(TreeNodeRecursion::Continue); + }; + + if node.name() == JOIN_NODE_NAME { + let input_windows: HashSet<_> = node + .inputs() + .iter() + .map(|input| Self::get_window(input)) + .collect::>>()?; + if input_windows.len() > 1 { + return Err(DataFusionError::Plan( + "can't handle mixed windowing between left and right".to_string(), + )); + } + self.window = input_windows + .into_iter() + .next() + .expect("join has at least one input"); + return Ok(TreeNodeRecursion::Jump); + } + Ok(TreeNodeRecursion::Continue) + } + + fn f_up(&mut self, node: &Self::Node) -> Result { + match node { + LogicalPlan::Projection(projection) => { + let window_expressions = projection + .expr + .iter() + .enumerate() + .filter_map(|(index, expr)| { + if let Some(column) = extract_column(expr) { + let input_field = projection + .input + .schema() + .field_with_name(column.relation.as_ref(), &column.name); + let input_field = match input_field { + Ok(field) => field, + Err(err) => return Some(Err(err)), + }; + if self.fields.contains( + &(column.relation.clone(), Arc::new(input_field.clone())).into(), + ) { + return self.window.clone().map(|window| Ok((index, window))); + } + } + find_window(expr) + .map(|option| option.map(|inner| (index, inner))) + .transpose() + }) + .collect::>>()?; + self.fields.clear(); + for (index, window) in window_expressions { + if let Some(existing_window) = &self.window { + if *existing_window != window { + return plan_err!( + "can't window by both {:?} and {:?}", + existing_window, + window + ); + } + self.fields + .insert(projection.schema.qualified_field(index).into()); + } else { + return plan_err!( + "can't call a windowing function without grouping by it in an aggregate" + ); + } + } + } + LogicalPlan::SubqueryAlias(subquery_alias) => { + self.fields = self + .fields + .drain() + .map(|field| { + Ok(subquery_alias + .schema + .qualified_field( + subquery_alias + .input + .schema() + .index_of_column(&field.qualified_column())?, + ) + .into()) + }) + .collect::>>()?; + } + LogicalPlan::Aggregate(Aggregate { + input, + group_expr, + aggr_expr: _, + schema, + .. + }) => { + let window_expressions = group_expr + .iter() + .enumerate() + .filter_map(|(index, expr)| { + if let Some(column) = extract_column(expr) { + let input_field = input + .schema() + .field_with_name(column.relation.as_ref(), &column.name); + let input_field = match input_field { + Ok(field) => field, + Err(err) => return Some(Err(err)), + }; + if self + .fields + .contains(&(column.relation.as_ref(), input_field).into()) + { + return self.window.clone().map(|window| Ok((index, window))); + } + } + find_window(expr) + .map(|option| option.map(|inner| (index, inner))) + .transpose() + }) + .collect::>>()?; + self.fields.clear(); + for (index, window) in window_expressions { + if let Some(existing_window) = &self.window { + if *existing_window != window { + return Err(DataFusionError::Plan( + "window expressions do not match".to_string(), + )); + } + } else { + self.window = Some(window); + } + self.fields.insert(schema.qualified_field(index).into()); + } + } + LogicalPlan::Extension(Extension { node }) => { + if node.name() == AGGREGATE_EXTENSION_NAME { + let aggregate_extension = node + .as_any() + .downcast_ref::() + .expect("should be aggregate extension"); + + match &aggregate_extension.window_behavior { + WindowBehavior::FromOperator { + window, + window_field, + window_index: _, + is_nested, + } => { + if self.window.is_some() && !*is_nested { + return Err(DataFusionError::Plan( + "aggregate node should not be recalculating window, as input is windowed.".to_string(), + )); + } + self.window = Some(window.clone()); + self.fields.insert(window_field.clone()); + } + WindowBehavior::InData => { + let input_fields = self.fields.clone(); + self.fields.clear(); + for field in fields_with_qualifiers(node.schema()) { + if input_fields.contains(&field) { + self.fields.insert(field); + } + } + if self.fields.is_empty() { + return Err(DataFusionError::Plan( + "must have window in aggregate. Make sure you are calling one of the windowing functions (hop, tumble, session) or using the window field of the input".to_string(), + )); + } + } + } + } + } + _ => {} + } + Ok(TreeNodeRecursion::Continue) + } +} + +/// Main rewriter for streaming SQL plans. +/// Rewrites standard logical plans into streaming-aware plans with +/// timestamp propagation, window detection, and streaming operator insertion. +pub struct StreamRewriter<'a> { + pub(crate) schema_provider: &'a StreamSchemaProvider, +} + +impl TreeNodeRewriter for StreamRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, mut node: Self::Node) -> Result> { + match node { + LogicalPlan::Projection(ref mut projection) => { + if !has_timestamp_field(&projection.schema) { + let timestamp_field: DFField = projection + .input + .schema() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) + .map_err(|_| { + DataFusionError::Plan(format!( + "No timestamp field found in projection input ({})", + projection.input.display() + )) + })? + .into(); + projection.schema = add_timestamp_field( + projection.schema.clone(), + timestamp_field.qualifier().cloned(), + ) + .expect("in projection"); + projection.expr.push(Expr::Column(Column { + relation: timestamp_field.qualifier().cloned(), + name: TIMESTAMP_FIELD.to_string(), + spans: Spans::default(), + })); + } + + // Rewrite row_time() calls to _timestamp column references + let rewritten = projection + .expr + .iter() + .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {})) + .collect::>>()?; + if rewritten.iter().any(|r| r.transformed) { + projection.expr = rewritten.into_iter().map(|r| r.data).collect(); + } + return Ok(Transformed::yes(node)); + } + LogicalPlan::Aggregate(aggregate) => { + return AggregateRewriter { + schema_provider: self.schema_provider, + } + .f_up(LogicalPlan::Aggregate(aggregate)); + } + LogicalPlan::Join(join) => { + return JoinRewriter { + schema_provider: self.schema_provider, + } + .f_up(LogicalPlan::Join(join)); + } + LogicalPlan::Filter(f) => { + let expr = f + .predicate + .clone() + .rewrite(&mut TimeWindowNullCheckRemover {})?; + return Ok(if expr.transformed { + Transformed::yes(LogicalPlan::Filter(Filter::try_new(expr.data, f.input)?)) + } else { + Transformed::no(LogicalPlan::Filter(f)) + }); + } + LogicalPlan::Window(_) => { + return WindowFunctionRewriter {}.f_up(node); + } + LogicalPlan::Sort(_) => { + return plan_err!( + "ORDER BY is not currently supported in streaming SQL ({})", + node.display() + ); + } + LogicalPlan::Repartition(_) => { + return plan_err!( + "Repartitions are not currently supported ({})", + node.display() + ); + } + LogicalPlan::Union(mut union) => { + union.schema = union.inputs[0].schema().clone(); + for input in union.inputs.iter_mut() { + if let LogicalPlan::Extension(Extension { node }) = input.as_ref() { + let stream_extension: &dyn StreamExtension = node.try_into().unwrap(); + if !stream_extension.transparent() { + continue; + } + } + let remote_table_extension = Arc::new(RemoteTableExtension { + input: input.as_ref().clone(), + name: TableReference::bare("union_input"), + schema: union.schema.clone(), + materialize: false, + }); + *input = Arc::new(LogicalPlan::Extension(Extension { + node: remote_table_extension, + })); + } + return Ok(Transformed::yes(LogicalPlan::Union(union))); + } + LogicalPlan::SubqueryAlias(sa) => { + return Ok(Transformed::yes(LogicalPlan::SubqueryAlias( + SubqueryAlias::try_new(sa.input, sa.alias)?, + ))); + } + LogicalPlan::Limit(_) => { + return plan_err!( + "LIMIT is not currently supported in streaming SQL ({})", + node.display() + ); + } + LogicalPlan::Explain(_) => { + return plan_err!("EXPLAIN is not supported ({})", node.display()); + } + LogicalPlan::Analyze(_) => { + return plan_err!("ANALYZE is not supported ({})", node.display()); + } + _ => {} + } + Ok(Transformed::no(node)) + } +} + +/// Rewrites row_time() function calls to _timestamp column references +struct RowTimeRewriter; + +impl TreeNodeRewriter for RowTimeRewriter { + type Node = Expr; + + fn f_up(&mut self, node: Self::Node) -> Result> { + if let Expr::ScalarFunction(ref func) = node { + if func.func.name() == "row_time" { + return Ok(Transformed::yes(Expr::Column(Column::new_unqualified( + TIMESTAMP_FIELD.to_string(), + )))); + } + } + Ok(Transformed::no(node)) + } +} + +/// Removes IS NOT NULL checks on window expressions that get pushed down incorrectly +pub(crate) struct TimeWindowNullCheckRemover; + +impl TreeNodeRewriter for TimeWindowNullCheckRemover { + type Node = Expr; + + fn f_up(&mut self, node: Self::Node) -> Result> { + if let Expr::IsNotNull(ref inner) = node { + if find_window(inner)?.is_some() { + return Ok(Transformed::yes(Expr::Literal( + datafusion::common::ScalarValue::Boolean(Some(true)), + None, + ))); + } + } + Ok(Transformed::no(node)) + } +} + +/// Checks that window UDFs (tumble/hop/session) are not used outside aggregates +pub(crate) struct TimeWindowUdfChecker; + +impl TreeNodeVisitor<'_> for TimeWindowUdfChecker { + type Node = LogicalPlan; + + fn f_up(&mut self, node: &Self::Node) -> Result { + if let LogicalPlan::Projection(projection) = node { + for expr in &projection.expr { + if let Some(window) = find_window(expr)? { + return plan_err!( + "Window function {:?} can only be used as a GROUP BY expression in an aggregate", + window + ); + } + } + } + Ok(TreeNodeRecursion::Continue) + } +} diff --git a/src/sql/planner/plan/window_fn.rs b/src/sql/planner/plan/window_fn.rs new file mode 100644 index 00000000..0bd3314f --- /dev/null +++ b/src/sql/planner/plan/window_fn.rs @@ -0,0 +1,178 @@ +use std::sync::Arc; + +use datafusion::common::tree_node::Transformed; +use datafusion::common::{Result as DFResult, plan_err, tree_node::TreeNodeRewriter}; +use datafusion::logical_expr; +use datafusion::logical_expr::expr::WindowFunctionParams; +use datafusion::logical_expr::{ + Expr, Extension, LogicalPlan, Projection, Sort, Window, expr::WindowFunction, +}; +use tracing::debug; + +use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::planner::extension::window_fn::WindowFunctionExtension; +use crate::sql::planner::plan::{WindowDetectingVisitor, extract_column}; +use crate::sql::planner::types::{WindowType, fields_with_qualifiers, schema_from_df_fields}; + +pub(crate) struct WindowFunctionRewriter; + +fn get_window_and_name(expr: &Expr) -> DFResult<(WindowFunction, String)> { + match expr { + Expr::Alias(alias) => { + let (window, _) = get_window_and_name(&alias.expr)?; + Ok((window, alias.name.clone())) + } + Expr::WindowFunction(window_function) => { + Ok((*window_function.clone(), expr.name_for_alias()?)) + } + _ => plan_err!("Expect a column or alias expression, not {:?}", expr), + } +} + +impl TreeNodeRewriter for WindowFunctionRewriter { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> DFResult> { + let LogicalPlan::Window(window) = node else { + return Ok(Transformed::no(node)); + }; + + debug!( + "Rewriting window function: {:?}", + LogicalPlan::Window(window.clone()) + ); + + let mut window_detecting_visitor = WindowDetectingVisitor::default(); + window + .input + .visit_with_subqueries(&mut window_detecting_visitor)?; + + let Some(input_window) = window_detecting_visitor.window else { + return plan_err!("Window functions require already windowed input"); + }; + if matches!(input_window, WindowType::Session { .. }) { + return plan_err!("Window functions do not support session windows"); + } + + let input_window_fields = window_detecting_visitor.fields; + + let Window { + input, window_expr, .. + } = window; + + if window_expr.len() != 1 { + return plan_err!("Window functions require exactly one window expression"); + } + + let (WindowFunction { fun, params }, original_name) = get_window_and_name(&window_expr[0])?; + + let mut window_field: Vec<_> = params + .partition_by + .iter() + .enumerate() + .filter_map(|(index, expr)| { + if let Some(column) = extract_column(expr) { + let Ok(input_field) = input + .schema() + .field_with_name(column.relation.as_ref(), &column.name) + else { + return Some(plan_err!( + "Column {} not found in input schema", + column.name + )); + }; + if input_window_fields.contains(&(column.relation.as_ref(), input_field).into()) + { + return Some(Ok((input_field.clone(), index))); + } + } + None + }) + .collect::>()?; + + if window_field.len() != 1 { + return plan_err!( + "Window function requires exactly one window expression in partition_by" + ); + } + + let (_window_field, index) = window_field.pop().unwrap(); + let mut additional_keys = params.partition_by.clone(); + additional_keys.remove(index); + let key_count = additional_keys.len(); + + let params = WindowFunctionParams { + args: params.args, + partition_by: additional_keys.clone(), + order_by: params.order_by, + window_frame: params.window_frame, + null_treatment: params.null_treatment, + }; + + let new_window_func = WindowFunction { fun, params }; + + let mut key_projection_expressions: Vec<_> = additional_keys + .iter() + .enumerate() + .map(|(index, expression)| expression.clone().alias(format!("_key_{index}"))) + .collect(); + + key_projection_expressions.extend( + fields_with_qualifiers(input.schema()) + .iter() + .map(|field| Expr::Column(field.qualified_column())), + ); + + let auto_schema = + Projection::try_new(key_projection_expressions.clone(), input.clone())?.schema; + let mut key_fields = fields_with_qualifiers(&auto_schema) + .iter() + .take(additional_keys.len()) + .cloned() + .collect::>(); + key_fields.extend(fields_with_qualifiers(input.schema())); + let key_schema = Arc::new(schema_from_df_fields(&key_fields)?); + + let key_projection = LogicalPlan::Projection(Projection::try_new_with_schema( + key_projection_expressions, + input.clone(), + key_schema, + )?); + + let key_plan = LogicalPlan::Extension(Extension { + node: Arc::new(KeyCalculationExtension::new( + key_projection, + KeysOrExprs::Keys((0..key_count).collect()), + )), + }); + + let mut sort_expressions: Vec<_> = additional_keys + .iter() + .map(|partition| logical_expr::expr::Sort { + expr: partition.clone(), + asc: true, + nulls_first: false, + }) + .collect(); + sort_expressions.extend(new_window_func.params.order_by.clone()); + + let shuffle = LogicalPlan::Sort(Sort { + expr: sort_expressions, + input: Arc::new(key_plan), + fetch: None, + }); + + let window_expr = + Expr::WindowFunction(Box::new(new_window_func)).alias_if_changed(original_name)?; + + let rewritten_window_plan = + LogicalPlan::Window(Window::try_new(vec![window_expr], Arc::new(shuffle))?); + + Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(WindowFunctionExtension::new( + rewritten_window_plan, + (0..key_count).collect(), + )), + }))) + } +} diff --git a/src/sql/planner/schemas.rs b/src/sql/planner/schemas.rs new file mode 100644 index 00000000..0440cc85 --- /dev/null +++ b/src/sql/planner/schemas.rs @@ -0,0 +1,59 @@ +use crate::sql::planner::types::{DFField, TIMESTAMP_FIELD}; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference}; +use std::{collections::HashMap, sync::Arc}; + +pub fn window_arrow_struct() -> DataType { + DataType::Struct( + vec![ + Arc::new(Field::new( + "start", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )), + Arc::new(Field::new( + "end", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )), + ] + .into(), + ) +} + +pub(crate) fn add_timestamp_field( + schema: DFSchemaRef, + qualifier: Option, +) -> DFResult { + if has_timestamp_field(&schema) { + return Ok(schema); + } + + let timestamp_field = DFField::new( + qualifier, + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ); + Ok(Arc::new(schema.join(&DFSchema::new_with_metadata( + vec![timestamp_field.into()], + HashMap::new(), + )?)?)) +} + +pub(crate) fn has_timestamp_field(schema: &DFSchemaRef) -> bool { + schema + .fields() + .iter() + .any(|field| field.name() == TIMESTAMP_FIELD) +} + +pub fn add_timestamp_field_arrow(schema: Schema) -> SchemaRef { + let mut fields = schema.fields().to_vec(); + fields.push(Arc::new(Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ))); + Arc::new(Schema::new(fields)) +} diff --git a/src/sql/planner/sql_to_plan.rs b/src/sql/planner/sql_to_plan.rs new file mode 100644 index 00000000..049cd18e --- /dev/null +++ b/src/sql/planner/sql_to_plan.rs @@ -0,0 +1,22 @@ +use datafusion::common::Result; +use datafusion::logical_expr::LogicalPlan; +use datafusion::sql::sqlparser::ast::Statement; +use tracing::debug; + +use crate::sql::planner::StreamSchemaProvider; + +/// Stage 2: Statement → LogicalPlan +/// +/// Converts a parsed SQL AST statement into a DataFusion logical plan +/// using the StreamSchemaProvider as the catalog context. +pub fn statement_to_plan( + statement: Statement, + schema_provider: &StreamSchemaProvider, +) -> Result { + let sql_to_rel = datafusion::sql::planner::SqlToRel::new(schema_provider); + let plan = sql_to_rel.sql_statement_to_plan(statement)?; + + debug!("Logical plan:\n{}", plan.display_graphviz()); + + Ok(plan) +} diff --git a/src/sql/planner/types.rs b/src/sql/planner/types.rs new file mode 100644 index 00000000..2330c0de --- /dev/null +++ b/src/sql/planner/types.rs @@ -0,0 +1,513 @@ +use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; +use std::time::Duration; + +use datafusion::arrow::datatypes::{ + DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DataType, Field, FieldRef, IntervalUnit, + Schema, SchemaRef, TimeUnit, +}; +use datafusion::common::{Column, DFSchema, Result, TableReference, plan_datafusion_err, plan_err}; +use datafusion::logical_expr::{ + ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, +}; +use std::any::Any; + +pub const TIMESTAMP_FIELD: &str = "_timestamp"; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ProcessingMode { + Append, + Update, +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum WindowType { + Tumbling { width: Duration }, + Sliding { width: Duration, slide: Duration }, + Session { gap: Duration }, + Instant, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) enum WindowBehavior { + FromOperator { + window: WindowType, + window_field: DFField, + window_index: usize, + is_nested: bool, + }, + InData, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct DFField { + qualifier: Option, + field: FieldRef, +} + +impl From<(Option, FieldRef)> for DFField { + fn from(value: (Option, FieldRef)) -> Self { + Self { + qualifier: value.0, + field: value.1, + } + } +} + +impl From<(Option<&TableReference>, &Field)> for DFField { + fn from(value: (Option<&TableReference>, &Field)) -> Self { + Self { + qualifier: value.0.cloned(), + field: Arc::new(value.1.clone()), + } + } +} + +impl From for (Option, FieldRef) { + fn from(value: DFField) -> Self { + (value.qualifier, value.field) + } +} + +impl DFField { + pub fn new( + qualifier: Option, + name: impl Into, + data_type: DataType, + nullable: bool, + ) -> Self { + Self { + qualifier, + field: Arc::new(Field::new(name, data_type, nullable)), + } + } + + pub fn new_unqualified(name: &str, data_type: DataType, nullable: bool) -> Self { + DFField { + qualifier: None, + field: Arc::new(Field::new(name, data_type, nullable)), + } + } + + pub fn name(&self) -> &String { + self.field.name() + } + + pub fn data_type(&self) -> &DataType { + self.field.data_type() + } + + pub fn is_nullable(&self) -> bool { + self.field.is_nullable() + } + + pub fn metadata(&self) -> &HashMap { + self.field.metadata() + } + + pub fn qualified_name(&self) -> String { + if let Some(qualifier) = &self.qualifier { + format!("{}.{}", qualifier, self.field.name()) + } else { + self.field.name().to_owned() + } + } + + pub fn qualified_column(&self) -> Column { + Column { + relation: self.qualifier.clone(), + name: self.field.name().to_string(), + spans: Default::default(), + } + } + + pub fn unqualified_column(&self) -> Column { + Column { + relation: None, + name: self.field.name().to_string(), + spans: Default::default(), + } + } + + pub fn qualifier(&self) -> Option<&TableReference> { + self.qualifier.as_ref() + } + + pub fn field(&self) -> &FieldRef { + &self.field + } + + pub fn strip_qualifier(mut self) -> Self { + self.qualifier = None; + self + } + + pub fn with_nullable(mut self, nullable: bool) -> Self { + let f = self.field().as_ref().clone().with_nullable(nullable); + self.field = f.into(); + self + } + + pub fn with_metadata(mut self, metadata: HashMap) -> Self { + let f = self.field().as_ref().clone().with_metadata(metadata); + self.field = f.into(); + self + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StreamSchema { + pub schema: SchemaRef, + pub timestamp_index: usize, + pub key_indices: Option>, +} + +impl StreamSchema { + pub fn new(schema: SchemaRef, timestamp_index: usize, key_indices: Option>) -> Self { + Self { + schema, + timestamp_index, + key_indices, + } + } + + pub fn new_unkeyed(schema: SchemaRef, timestamp_index: usize) -> Self { + Self { + schema, + timestamp_index, + key_indices: None, + } + } + + pub fn from_fields(fields: Vec) -> Self { + let schema = Arc::new(Schema::new(fields)); + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .map(|(i, _)| i) + .unwrap_or(0); + Self { + schema, + timestamp_index, + key_indices: None, + } + } + + pub fn from_schema_keys(schema: SchemaRef, key_indices: Vec) -> Result { + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .ok_or_else(|| { + datafusion::error::DataFusionError::Plan(format!( + "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}" + )) + })? + .0; + Ok(Self { + schema, + timestamp_index, + key_indices: Some(key_indices), + }) + } + + pub fn from_schema_unkeyed(schema: SchemaRef) -> Result { + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .ok_or_else(|| { + datafusion::error::DataFusionError::Plan(format!( + "no {TIMESTAMP_FIELD} field in schema" + )) + })? + .0; + Ok(Self { + schema, + timestamp_index, + key_indices: None, + }) + } +} + +#[allow(clippy::type_complexity)] +pub(crate) struct PlaceholderUdf { + name: String, + signature: Signature, + return_type: Arc Result + Send + Sync + 'static>, +} + +impl Debug for PlaceholderUdf { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "PlaceholderUDF<{}>", self.name) + } +} + +impl ScalarUDFImpl for PlaceholderUdf { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + &self.name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, args: &[DataType]) -> Result { + (self.return_type)(args) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + unimplemented!("PlaceholderUdf should never be called at execution time"); + } +} + +impl PlaceholderUdf { + pub fn with_return( + name: impl Into, + args: Vec, + ret: DataType, + ) -> Arc { + Arc::new(ScalarUDF::new_from_impl(PlaceholderUdf { + name: name.into(), + signature: Signature::exact(args, Volatility::Volatile), + return_type: Arc::new(move |_| Ok(ret.clone())), + })) + } +} + +#[derive(Clone, Debug)] +pub struct SqlConfig { + pub default_parallelism: usize, +} + +impl Default for SqlConfig { + fn default() -> Self { + Self { + default_parallelism: 4, + } + } +} + +#[derive(Clone)] +pub struct PlanningOptions { + pub ttl: Duration, +} + +impl Default for PlanningOptions { + fn default() -> Self { + Self { + ttl: Duration::from_secs(24 * 60 * 60), + } + } +} + +pub fn convert_data_type(sql_type: &datafusion::sql::sqlparser::ast::DataType) -> Result { + use datafusion::sql::sqlparser::ast::ArrayElemTypeDef; + use datafusion::sql::sqlparser::ast::DataType as SQLDataType; + + match sql_type { + SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) + | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type, _)) => { + let data_type = convert_data_type(inner_sql_type)?; + Ok(DataType::List(Arc::new(Field::new( + "field", data_type, true, + )))) + } + SQLDataType::Array(ArrayElemTypeDef::None) => { + plan_err!("Arrays with unspecified type is not supported") + } + other => convert_simple_data_type(other), + } +} + +fn convert_simple_data_type( + sql_type: &datafusion::sql::sqlparser::ast::DataType, +) -> Result { + use datafusion::sql::sqlparser::ast::DataType as SQLDataType; + use datafusion::sql::sqlparser::ast::{ExactNumberInfo, TimezoneInfo}; + + match sql_type { + SQLDataType::Boolean | SQLDataType::Bool => Ok(DataType::Boolean), + SQLDataType::TinyInt(_) => Ok(DataType::Int8), + SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16), + SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => Ok(DataType::Int32), + SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64), + SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8), + SQLDataType::SmallIntUnsigned(_) | SQLDataType::Int2Unsigned(_) => Ok(DataType::UInt16), + SQLDataType::IntUnsigned(_) + | SQLDataType::UnsignedInteger + | SQLDataType::Int4Unsigned(_) => Ok(DataType::UInt32), + SQLDataType::BigIntUnsigned(_) | SQLDataType::Int8Unsigned(_) => Ok(DataType::UInt64), + SQLDataType::Float(_) | SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32), + SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => { + Ok(DataType::Float64) + } + SQLDataType::Char(_) + | SQLDataType::Varchar(_) + | SQLDataType::Text + | SQLDataType::String(_) => Ok(DataType::Utf8), + SQLDataType::Timestamp(None, TimezoneInfo::None) | SQLDataType::Datetime(_) => { + Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) + } + SQLDataType::Timestamp(Some(precision), TimezoneInfo::None) => match *precision { + 0 => Ok(DataType::Timestamp(TimeUnit::Second, None)), + 3 => Ok(DataType::Timestamp(TimeUnit::Millisecond, None)), + 6 => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)), + 9 => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)), + _ => { + plan_err!( + "unsupported precision {} -- supported: 0 (seconds), 3 (ms), 6 (us), 9 (ns)", + precision + ) + } + }, + SQLDataType::Date => Ok(DataType::Date32), + SQLDataType::Time(None, tz_info) => { + if matches!(tz_info, TimezoneInfo::None) + || matches!(tz_info, TimezoneInfo::WithoutTimeZone) + { + Ok(DataType::Time64(TimeUnit::Nanosecond)) + } else { + plan_err!("Unsupported SQL type {sql_type:?}") + } + } + SQLDataType::Numeric(exact_number_info) | SQLDataType::Decimal(exact_number_info) => { + let (precision, scale) = match *exact_number_info { + ExactNumberInfo::None => (None, None), + ExactNumberInfo::Precision(precision) => (Some(precision), None), + ExactNumberInfo::PrecisionAndScale(precision, scale) => { + (Some(precision), Some(scale)) + } + }; + make_decimal_type(precision, scale) + } + SQLDataType::Bytea => Ok(DataType::Binary), + SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)), + SQLDataType::Struct(fields, _) => { + let fields: Vec<_> = fields + .iter() + .map(|f| { + Ok::<_, datafusion::error::DataFusionError>(Arc::new(Field::new( + f.field_name + .as_ref() + .ok_or_else(|| { + plan_datafusion_err!("anonymous struct fields are not allowed") + })? + .to_string(), + convert_data_type(&f.field_type)?, + true, + ))) + }) + .collect::>()?; + Ok(DataType::Struct(fields.into())) + } + _ => plan_err!("Unsupported SQL type {sql_type:?}"), + } +} + +fn make_decimal_type(precision: Option, scale: Option) -> Result { + let (precision, scale) = match (precision, scale) { + (Some(p), Some(s)) => (p as u8, s as i8), + (Some(p), None) => (p as u8, 0), + (None, Some(_)) => return plan_err!("Cannot specify only scale for decimal data type"), + (None, None) => (DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE), + }; + + if precision == 0 || precision > DECIMAL128_MAX_PRECISION || scale.unsigned_abs() > precision { + plan_err!( + "Decimal(precision = {precision}, scale = {scale}) should satisfy `0 < precision <= 38`, and `scale <= precision`." + ) + } else { + Ok(DataType::Decimal128(precision, scale)) + } +} + +pub fn fields_with_qualifiers(schema: &DFSchema) -> Vec { + schema + .fields() + .iter() + .enumerate() + .map(|(i, f)| (schema.qualified_field(i).0.cloned(), f.clone()).into()) + .collect() +} + +pub fn schema_from_df_fields(fields: &[DFField]) -> Result { + schema_from_df_fields_with_metadata(fields, HashMap::new()) +} + +pub fn schema_from_df_fields_with_metadata( + fields: &[DFField], + metadata: HashMap, +) -> Result { + DFSchema::new_with_metadata(fields.iter().map(|t| t.clone().into()).collect(), metadata) +} + +pub fn get_duration(expression: &Expr) -> Result { + use datafusion::common::ScalarValue; + + match expression { + Expr::Literal(ScalarValue::IntervalDayTime(Some(val)), _) => { + Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60) + + Duration::from_millis(val.milliseconds as u64)) + } + Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(val)), _) => { + if val.months != 0 { + return datafusion::common::not_impl_err!( + "Windows do not support durations specified as months" + ); + } + Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60) + + Duration::from_nanos(val.nanoseconds as u64)) + } + _ => plan_err!( + "unsupported Duration expression, expect duration literal, not {}", + expression + ), + } +} + +pub fn find_window(expression: &Expr) -> Result> { + use datafusion::logical_expr::expr::Alias; + use datafusion::logical_expr::expr::ScalarFunction; + + match expression { + Expr::ScalarFunction(ScalarFunction { func: fun, args }) => match fun.name() { + "hop" => { + if args.len() != 2 { + unreachable!(); + } + let slide = get_duration(&args[0])?; + let width = get_duration(&args[1])?; + if width.as_nanos() % slide.as_nanos() != 0 { + return plan_err!( + "hop() width {:?} must be a multiple of slide {:?}", + width, + slide + ); + } + if slide == width { + Ok(Some(WindowType::Tumbling { width })) + } else { + Ok(Some(WindowType::Sliding { width, slide })) + } + } + "tumble" => { + if args.len() != 1 { + unreachable!("wrong number of arguments for tumble(), expect one"); + } + let width = get_duration(&args[0])?; + Ok(Some(WindowType::Tumbling { width })) + } + "session" => { + if args.len() != 1 { + unreachable!("wrong number of arguments for session(), expected one"); + } + let gap = get_duration(&args[0])?; + Ok(Some(WindowType::Session { gap })) + } + _ => Ok(None), + }, + Expr::Alias(Alias { expr, .. }) => find_window(expr), + _ => Ok(None), + } +} From 18c76c80faad1d442319705d988893b9a0a57a64 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Mon, 16 Mar 2026 22:57:02 +0800 Subject: [PATCH 02/44] update --- Cargo.lock | 124 +- Cargo.toml | 14 +- protocol/Cargo.toml | 1 + protocol/build.rs | 48 +- protocol/proto/fs_api.proto | 289 ++++ protocol/src/lib.rs | 25 +- src/api/checkpoints.rs | 96 ++ src/api/connections.rs | 604 ++++++++ src/api/metrics.rs | 41 + src/api/mod.rs | 43 + src/api/pipelines.rs | 156 ++ src/api/public_ids.rs | 57 + src/api/schema_resolver.rs | 82 ++ src/api/udfs.rs | 56 + src/api/var_str.rs | 79 + src/datastream/logical.rs | 2 +- src/lib.rs | 2 + src/main.rs | 3 + src/sql/catalog/connector.rs | 59 + src/sql/catalog/connector_table.rs | 199 +++ src/sql/catalog/field_spec.rs | 52 + src/sql/catalog/insert.rs | 55 + src/sql/catalog/mod.rs | 25 + src/sql/catalog/optimizer.rs | 95 ++ src/sql/catalog/table.rs | 202 +++ src/sql/catalog/utils.rs | 78 + src/sql/functions/mod.rs | 600 ++++++++ src/sql/mod.rs | 5 + src/sql/physical/mod.rs | 1265 +++++++++++++++++ src/sql/planner/extension/aggregate.rs | 2 +- src/sql/planner/extension/debezium.rs | 250 ++++ src/sql/planner/extension/join.rs | 2 +- src/sql/planner/extension/key_calculation.rs | 2 +- src/sql/planner/extension/lookup.rs | 127 ++ src/sql/planner/extension/mod.rs | 209 ++- src/sql/planner/extension/projection.rs | 2 +- src/sql/planner/extension/remote_table.rs | 2 +- src/sql/planner/extension/sink.rs | 135 ++ src/sql/planner/extension/table_source.rs | 94 ++ .../planner/extension/updating_aggregate.rs | 89 ++ src/sql/planner/extension/watermark_node.rs | 2 +- src/sql/planner/extension/window_fn.rs | 2 +- src/sql/planner/mod.rs | 598 ++++---- src/sql/planner/physical_planner.rs | 396 ++++++ src/sql/planner/plan/aggregate.rs | 2 +- src/sql/planner/plan/join.rs | 4 +- src/sql/planner/plan/mod.rs | 2 +- src/sql/planner/plan/window_fn.rs | 2 +- src/sql/planner/rewrite/async_udf_rewriter.rs | 118 ++ src/sql/planner/rewrite/mod.rs | 27 + src/sql/planner/rewrite/row_time.rs | 39 + .../planner/rewrite/sink_input_rewriter.rs | 46 + .../rewrite/source_metadata_visitor.rs | 57 + src/sql/planner/rewrite/source_rewriter.rs | 272 ++++ src/sql/planner/rewrite/time_window.rs | 83 ++ src/sql/planner/rewrite/unnest_rewriter.rs | 178 +++ src/sql/planner/schema_provider.rs | 360 +++++ src/sql/planner/schemas.rs | 64 +- src/sql/planner/types.rs | 513 ------- src/sql/planner/udafs.rs | 31 + src/sql/types/data_type.rs | 144 ++ src/sql/types/df_field.rs | 141 ++ src/sql/types/mod.rs | 50 + src/sql/types/placeholder_udf.rs | 58 + src/sql/types/stream_schema.rs | 76 + src/sql/types/window.rs | 95 ++ src/storage/task/rocksdb_storage.rs | 44 +- src/types/arrow_ext.rs | 169 +++ src/types/control.rs | 152 ++ src/types/date.rs | 70 + src/types/debezium.rs | 136 ++ src/types/df.rs | 394 +++++ src/types/errors.rs | 67 + src/types/formats.rs | 234 +++ src/types/hash.rs | 88 ++ src/types/message.rs | 42 + src/types/mod.rs | 71 + src/types/operator_config.rs | 30 + src/types/task_info.rs | 80 ++ src/types/time_utils.rs | 62 + src/types/worker.rs | 14 + 81 files changed, 9339 insertions(+), 945 deletions(-) create mode 100644 protocol/proto/fs_api.proto create mode 100644 src/api/checkpoints.rs create mode 100644 src/api/connections.rs create mode 100644 src/api/metrics.rs create mode 100644 src/api/mod.rs create mode 100644 src/api/pipelines.rs create mode 100644 src/api/public_ids.rs create mode 100644 src/api/schema_resolver.rs create mode 100644 src/api/udfs.rs create mode 100644 src/api/var_str.rs create mode 100644 src/sql/catalog/connector.rs create mode 100644 src/sql/catalog/connector_table.rs create mode 100644 src/sql/catalog/field_spec.rs create mode 100644 src/sql/catalog/insert.rs create mode 100644 src/sql/catalog/mod.rs create mode 100644 src/sql/catalog/optimizer.rs create mode 100644 src/sql/catalog/table.rs create mode 100644 src/sql/catalog/utils.rs create mode 100644 src/sql/functions/mod.rs create mode 100644 src/sql/physical/mod.rs create mode 100644 src/sql/planner/extension/debezium.rs create mode 100644 src/sql/planner/extension/lookup.rs create mode 100644 src/sql/planner/extension/sink.rs create mode 100644 src/sql/planner/extension/table_source.rs create mode 100644 src/sql/planner/extension/updating_aggregate.rs create mode 100644 src/sql/planner/physical_planner.rs create mode 100644 src/sql/planner/rewrite/async_udf_rewriter.rs create mode 100644 src/sql/planner/rewrite/mod.rs create mode 100644 src/sql/planner/rewrite/row_time.rs create mode 100644 src/sql/planner/rewrite/sink_input_rewriter.rs create mode 100644 src/sql/planner/rewrite/source_metadata_visitor.rs create mode 100644 src/sql/planner/rewrite/source_rewriter.rs create mode 100644 src/sql/planner/rewrite/time_window.rs create mode 100644 src/sql/planner/rewrite/unnest_rewriter.rs create mode 100644 src/sql/planner/schema_provider.rs delete mode 100644 src/sql/planner/types.rs create mode 100644 src/sql/planner/udafs.rs create mode 100644 src/sql/types/data_type.rs create mode 100644 src/sql/types/df_field.rs create mode 100644 src/sql/types/mod.rs create mode 100644 src/sql/types/placeholder_udf.rs create mode 100644 src/sql/types/stream_schema.rs create mode 100644 src/sql/types/window.rs create mode 100644 src/types/arrow_ext.rs create mode 100644 src/types/control.rs create mode 100644 src/types/date.rs create mode 100644 src/types/debezium.rs create mode 100644 src/types/df.rs create mode 100644 src/types/errors.rs create mode 100644 src/types/formats.rs create mode 100644 src/types/hash.rs create mode 100644 src/types/message.rs create mode 100644 src/types/mod.rs create mode 100644 src/types/operator_config.rs create mode 100644 src/types/task_info.rs create mode 100644 src/types/time_utils.rs create mode 100644 src/types/worker.rs diff --git a/Cargo.lock b/Cargo.lock index 6cf6182a..cb19233d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -651,11 +651,22 @@ dependencies = [ [[package]] name = "bincode" -version = "1.3.3" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" dependencies = [ + "bincode_derive", "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", ] [[package]] @@ -832,7 +843,7 @@ dependencies = [ "cap-primitives", "cap-std", "io-lifetimes", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -861,7 +872,7 @@ dependencies = [ "maybe-owned", "rustix 1.1.3", "rustix-linux-procfs", - "windows-sys 0.52.0", + "windows-sys 0.59.0", "winx", ] @@ -941,7 +952,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ "iana-time-zone", + "js-sys", "num-traits", + "wasm-bindgen", "windows-link", ] @@ -2236,7 +2249,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix 1.1.3", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2317,7 +2330,7 @@ checksum = "94e7099f6313ecacbe1256e8ff9d617b75d1bcb16a6fddef94866d225a01a14a" dependencies = [ "io-lifetimes", "rustix 1.1.3", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2325,13 +2338,15 @@ name = "function-stream" version = "0.6.0" dependencies = [ "anyhow", - "arrow-array 52.2.0", - "arrow-ipc 52.2.0", + "arrow", + "arrow-array 55.2.0", + "arrow-ipc 55.2.0", "arrow-json 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson)", - "arrow-schema 52.2.0", + "arrow-schema 55.2.0", "async-trait", "base64", "bincode", + "chrono", "clap", "cornucopia", "cornucopia_async", @@ -2346,6 +2361,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-proto", + "futures", "itertools 0.14.0", "jiter", "log", @@ -2357,11 +2373,13 @@ dependencies = [ "pest_derive", "petgraph 0.7.1", "proctitle", + "prost", "protocol", "rdkafka", "rocksdb", "serde", "serde_json", + "serde_json_path", "serde_yaml", "sqlparser", "strum", @@ -2377,6 +2395,7 @@ dependencies = [ "uuid", "wasmtime", "wasmtime-wasi", + "xxhash-rust", ] [[package]] @@ -2964,6 +2983,15 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "inventory" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" +dependencies = [ + "rustversion", +] + [[package]] name = "io-extras" version = "0.18.4" @@ -2971,7 +2999,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2285ddfe3054097ef4b2fe909ef8c3bcd1ea52a8f0d274416caebeef39f04a65" dependencies = [ "io-lifetimes", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4179,6 +4207,7 @@ dependencies = [ "env_logger", "log", "prost", + "serde", "tonic", "tonic-build", ] @@ -4524,7 +4553,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4705,6 +4734,56 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_json_path" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b992cea3194eea663ba99a042d61cea4bd1872da37021af56f6a37e0359b9d33" +dependencies = [ + "inventory", + "nom", + "regex", + "serde", + "serde_json", + "serde_json_path_core", + "serde_json_path_macros", + "thiserror 2.0.17", +] + +[[package]] +name = "serde_json_path_core" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde67d8dfe7d4967b5a95e247d4148368ddd1e753e500adb34b3ffe40c6bc1bc" +dependencies = [ + "inventory", + "serde", + "serde_json", + "thiserror 2.0.17", +] + +[[package]] +name = "serde_json_path_macros" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "517acfa7f77ddaf5c43d5f119c44a683774e130b4247b7d3210f8924506cfac8" +dependencies = [ + "inventory", + "serde_json_path_core", + "serde_json_path_macros_internal", +] + +[[package]] +name = "serde_json_path_macros_internal" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafbefbe175fa9bf03ca83ef89beecff7d2a95aaacd5732325b90ac8c3bd7b90" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_spanned" version = "1.0.4" @@ -4918,7 +4997,6 @@ dependencies = [ "cfg-if", "libc", "psm", - "windows-sys 0.52.0", "windows-sys 0.59.0", ] @@ -5041,7 +5119,7 @@ dependencies = [ "fd-lock", "io-lifetimes", "rustix 0.38.44", - "windows-sys 0.52.0", + "windows-sys 0.59.0", "winx", ] @@ -5671,6 +5749,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47" +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + [[package]] name = "url" version = "2.5.7" @@ -5724,6 +5808,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + [[package]] name = "walkdir" version = "2.5.0" @@ -6586,7 +6676,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f3fd376f71958b862e7afb20cfe5a22830e1963462f3a17f49d82a6c1d1f42d" dependencies = [ "bitflags 2.10.0", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -6640,6 +6730,12 @@ dependencies = [ "tap", ] +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "xz2" version = "0.1.7" diff --git a/Cargo.toml b/Cargo.toml index 0d906ca6..8b38dfe4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ tonic = { version = "0.12", features = ["default"] } async-trait = "0.1" num_cpus = "1.0" protocol = { path = "./protocol" } +prost = "0.13" rdkafka = { version = "0.38", features = ["cmake-build", "ssl", "gssapi"] } crossbeam-channel = "0.5" pest = "2.7" @@ -44,13 +45,18 @@ wasmtime = { version = "41.0.3", features = ["component-model", "async"] } base64 = "0.22" wasmtime-wasi = "41.0.3" rocksdb = { version = "0.21", features = ["multi-threaded-cf", "lz4"] } -bincode = "1.3" +bincode = { version = "2", features = ["serde"] } +chrono = "0.4" tokio-stream = "0.1.18" lru = "0.12" parking_lot = "0.12" -arrow-array = "52" -arrow-ipc = "52" -arrow-schema = "52" +arrow = { version = "55", default-features = false } +arrow-array = "55" +arrow-ipc = "55" +arrow-schema = { version = "55", features = ["serde"] } +futures = "0.3" +serde_json_path = "0.7" +xxhash-rust = { version = "0.8", features = ["xxh3"] } proctitle = "0.1" unicase = "2.7" petgraph = "0.7" diff --git a/protocol/Cargo.toml b/protocol/Cargo.toml index fde9de52..5fa7d0f0 100644 --- a/protocol/Cargo.toml +++ b/protocol/Cargo.toml @@ -9,6 +9,7 @@ repository = "https://github.com/your-username/rust-function-stream" [dependencies] prost = "0.13" tonic = { version = "0.12", features = ["default"] } +serde = { version = "1.0", features = ["derive"] } log = "0.4" [build-dependencies] diff --git a/protocol/build.rs b/protocol/build.rs index 17e77d30..e258f456 100644 --- a/protocol/build.rs +++ b/protocol/build.rs @@ -10,54 +10,56 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::path::Path; +use std::path::{Path, PathBuf}; fn main() -> Result<(), Box> { - // Initialize logger for build script env_logger::init(); - // Create output directories in the protocol package directory - // Use CARGO_MANIFEST_DIR to get the package root directory let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")?; let out_dir = Path::new(&manifest_dir).join("generated"); - let proto_file = Path::new(&manifest_dir).join("proto/function_stream.proto"); - - // Note: Cargo doesn't directly support cleaning custom directories via cargo clean. - // The generated directory will be automatically regenerated on each build if needed. - // To clean it manually, use: ./clean.sh or make clean or rm -rf protocol/generated log::info!("Generated code will be placed in: {}", out_dir.display()); - log::info!("Proto file: {}", proto_file.display()); - // Create output directories let cli_dir = out_dir.join("cli"); let service_dir = out_dir.join("service"); std::fs::create_dir_all(&cli_dir)?; std::fs::create_dir_all(&service_dir)?; - log::info!( - "Created output directories: {} and {}", - cli_dir.display(), - service_dir.display() - ); - // Generate code for CLI - only client code needed + // 1. function_stream.proto → CLI (client) and Service (server) tonic_build::configure() .out_dir(&cli_dir) - .build_client(true) // Enable client code generation - .build_server(false) // Disable server code generation + .build_client(true) + .build_server(false) .compile_protos(&["proto/function_stream.proto"], &["proto"])?; - // Generate code for Service - only server code needed tonic_build::configure() .out_dir(&service_dir) - .build_client(false) // Disable client code generation - .build_server(true) // Enable server code generation + .build_client(false) + .build_server(true) .compile_protos(&["proto/function_stream.proto"], &["proto"])?; + // 2. fs_api.proto → with file descriptor set + serde for REST/JSON + let api_dir = out_dir.join("api"); + std::fs::create_dir_all(&api_dir)?; + + let descriptor_path = + PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("fs_api_descriptor.bin"); + + tonic_build::configure() + .out_dir(&api_dir) + .protoc_arg("--experimental_allow_proto3_optional") + .file_descriptor_set_path(&descriptor_path) + .type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]") + .type_attribute(".", "#[serde(rename_all = \"camelCase\")]") + .build_client(false) + .build_server(false) + .compile_protos(&["proto/fs_api.proto"], &["proto"])?; + log::info!("Protocol Buffers code generated successfully"); println!("cargo:rustc-env=PROTO_GEN_DIR={}", out_dir.display()); - println!("cargo:rerun-if-changed={}", proto_file.display()); + println!("cargo:rerun-if-changed=proto/function_stream.proto"); + println!("cargo:rerun-if-changed=proto/fs_api.proto"); Ok(()) } diff --git a/protocol/proto/fs_api.proto b/protocol/proto/fs_api.proto new file mode 100644 index 00000000..24525583 --- /dev/null +++ b/protocol/proto/fs_api.proto @@ -0,0 +1,289 @@ +// Licensed under the Apache License, Version 2.0 +// Adapted from Arroyo's api.proto for FunctionStream + +syntax = "proto3"; +package fs_api; + +// ─────────────────────── Operators ─────────────────────── + +message ConnectorOp { + string connector = 1; + string config = 2; + string description = 3; +} + +message ProjectionOperator { + string name = 1; + FsSchema input_schema = 2; + FsSchema output_schema = 3; + repeated bytes exprs = 4; +} + +message TumblingWindowAggregateOperator { + string name = 1; + uint64 width_micros = 2; + bytes binning_function = 3; + FsSchema input_schema = 4; + FsSchema partial_schema = 5; + bytes partial_aggregation_plan = 6; + bytes final_aggregation_plan = 7; + optional bytes final_projection = 8; +} + +message SlidingWindowAggregateOperator { + string name = 1; + uint64 width_micros = 2; + uint64 slide_micros = 3; + bytes binning_function = 4; + FsSchema input_schema = 5; + FsSchema partial_schema = 6; + bytes partial_aggregation_plan = 7; + bytes final_aggregation_plan = 8; + bytes final_projection = 9; +} + +message SessionWindowAggregateOperator { + string name = 1; + uint64 gap_micros = 2; + string window_field_name = 3; + uint64 window_index = 4; + FsSchema input_schema = 5; + FsSchema unkeyed_aggregate_schema = 6; + bytes partial_aggregation_plan = 7; + bytes final_aggregation_plan = 8; +} + +message JoinOperator { + string name = 1; + FsSchema left_schema = 2; + FsSchema right_schema = 3; + FsSchema output_schema = 4; + bytes join_plan = 5; + optional uint64 ttl_micros = 6; +} + +message LookupJoinCondition { + bytes left_expr = 1; + string right_key = 2; +} + +message LookupJoinOperator { + FsSchema input_schema = 1; + FsSchema lookup_schema = 2; + ConnectorOp connector = 3; + repeated LookupJoinCondition key_exprs = 4; + JoinType join_type = 5; + optional uint64 ttl_micros = 6; + optional uint64 max_capacity_bytes = 7; +} + +message WindowFunctionOperator { + string name = 1; + FsSchema input_schema = 2; + bytes binning_function = 3; + bytes window_function_plan = 4; +} + +enum AsyncUdfOrdering { + UNORDERED = 0; + ORDERED = 1; +} + +message AsyncUdfOperator { + string name = 1; + DylibUdfConfig udf = 2; + repeated bytes arg_exprs = 3; + repeated bytes final_exprs = 4; + AsyncUdfOrdering ordering = 5; + uint32 max_concurrency = 6; + uint64 timeout_micros = 7; +} + +message UpdatingAggregateOperator { + string name = 1; + FsSchema input_schema = 2; + FsSchema final_schema = 3; + bytes aggregate_exec = 5; + bytes metadata_expr = 6; + uint64 flush_interval_micros = 7; + uint64 ttl_micros = 8; +} + +// ─────────────────────── Watermark ─────────────────────── + +message ExpressionWatermarkConfig { + uint64 period_micros = 1; + optional uint64 idle_time_micros = 2; + FsSchema input_schema = 3; + bytes expression = 4; +} + +// ─────────────────────── Windows ─────────────────────── + +message Window { + oneof window { + SlidingWindow sliding_window = 2; + TumblingWindow tumbling_window = 3; + InstantWindow instant_window = 4; + SessionWindow session_window = 5; + } +} + +message SlidingWindow { + uint64 size_micros = 1; + uint64 slide_micros = 2; +} + +message TumblingWindow { + uint64 size_micros = 1; +} + +message InstantWindow {} + +message SessionWindow { + uint64 gap_micros = 1; +} + +// ─────────────────────── Enums ─────────────────────── + +enum JoinType { + INNER = 0; + LEFT = 1; + RIGHT = 2; + FULL = 3; +} + +enum OffsetMode { + EARLIEST = 0; + LATEST = 1; +} + +enum EdgeType { + UNUSED = 0; + FORWARD = 1; + SHUFFLE = 2; + LEFT_JOIN = 3; + RIGHT_JOIN = 4; +} + +// ─────────────────── Physical Extension Nodes ─────────────────── + +message MemExecNode { + string table_name = 1; + string schema = 2; // json-encoded +} + +message UnnestExecNode { + string schema = 1; // json-encoded +} + +message DebeziumDecodeNode { + string schema = 1; // json-encoded + repeated uint64 primary_keys = 2; +} + +message DebeziumEncodeNode { + string schema = 1; // json-encoded +} + +message FsExecNode { + oneof node { + MemExecNode mem_exec = 1; + UnnestExecNode unnest_exec = 2; + DebeziumDecodeNode debezium_decode = 3; + DebeziumEncodeNode debezium_encode = 4; + } +} + +// ─────────────────── Checkpoints ─────────────────── + +enum TaskCheckpointEventType { + ALIGNMENT_STARTED = 0; + CHECKPOINT_STARTED = 1; + CHECKPOINT_OPERATOR_SETUP_FINISHED = 2; + CHECKPOINT_SYNC_FINISHED = 3; + CHECKPOINT_PRE_COMMIT = 4; +} + +message TaskCheckpointEvent { + uint64 time = 1; + TaskCheckpointEventType event_type = 2; +} + +message TaskCheckpointDetail { + uint32 subtask_index = 1; + uint64 start_time = 2; + optional uint64 finish_time = 3; + optional uint64 bytes = 4; + repeated TaskCheckpointEvent events = 5; +} + +message OperatorCheckpointDetail { + string operator_id = 1; + uint64 start_time = 2; + optional uint64 finish_time = 3; + bool has_state = 4; + optional uint64 started_metadata_write = 6; + map tasks = 5; +} + +// ─────────────────── UDF Config ─────────────────── + +message DylibUdfConfig { + string dylib_path = 1; + repeated bytes arg_types = 2; + bytes return_type = 3; + bool aggregate = 4; + bool is_async = 5; +} + +message PythonUdfConfig { + string name = 1; + repeated bytes arg_types = 2; + bytes return_type = 3; + string definition = 4; +} + +message FsProgramConfig { + map udf_dylibs = 1; + map python_udfs = 2; +} + +// ─────────────────── Arrow Program ─────────────────── + +message FsProgram { + repeated FsNode nodes = 1; + repeated FsEdge edges = 2; + FsProgramConfig program_config = 3; +} + +message FsSchema { + string arrow_schema = 1; // json-encoded Arrow Schema + uint32 timestamp_index = 2; + repeated uint32 key_indices = 3; + bool has_keys = 4; + repeated uint32 routing_key_indices = 5; + bool has_routing_keys = 6; +} + +message ChainedOperator { + string operator_id = 1; + string operator_name = 2; + bytes operator_config = 3; +} + +message FsNode { + int32 node_index = 1; + uint32 node_id = 2; + uint32 parallelism = 3; + string description = 4; + repeated ChainedOperator operators = 5; + repeated FsSchema edges = 6; +} + +message FsEdge { + int32 source = 1; + int32 target = 2; + FsSchema schema = 4; + EdgeType edge_type = 5; +} diff --git a/protocol/src/lib.rs b/protocol/src/lib.rs index b0c6da06..f924a5c6 100644 --- a/protocol/src/lib.rs +++ b/protocol/src/lib.rs @@ -10,25 +10,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Protocol Buffers protocol definitions for function stream -// This module exports the generated Protocol Buffers code +// ─────────────── FunctionStream Service (original) ─────────────── -// CLI module - exports client code #[path = "../generated/cli/function_stream.rs"] pub mod cli; -// Service module - exports server code #[path = "../generated/service/function_stream.rs"] pub mod service; -// Re-export commonly used types from both modules -// Data structures are the same in both, so we can re-export from either pub use cli::function_stream_service_client; - -// Re-export client-specific types pub use cli::function_stream_service_client::FunctionStreamServiceClient; - -// Re-export server-specific types pub use service::function_stream_service_server::{ FunctionStreamService, FunctionStreamServiceServer, }; + +// ─────────────── Streaming Pipeline API (fs_api.proto) ─────────────── + +pub mod grpc { + /// Serde-annotated API types for streaming operators, schemas, programs. + #[allow(clippy::all)] + pub mod api { + include!("../generated/api/fs_api.rs"); + } +} + +/// File descriptor set for fs_api.proto (for gRPC reflection / REST gateway). +pub const FS_API_FILE_DESCRIPTOR_SET: &[u8] = + tonic::include_file_descriptor_set!("fs_api_descriptor"); diff --git a/src/api/checkpoints.rs b/src/api/checkpoints.rs new file mode 100644 index 00000000..8462f311 --- /dev/null +++ b/src/api/checkpoints.rs @@ -0,0 +1,96 @@ +use crate::types::to_micros; +use serde::{Deserialize, Serialize}; +use std::time::SystemTime; + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct Checkpoint { + pub epoch: u32, + pub backend: String, + pub start_time: u64, + pub finish_time: Option, + pub events: Vec, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct CheckpointEventSpan { + pub start_time: u64, + pub finish_time: u64, + pub event: String, + pub description: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct SubtaskCheckpointGroup { + pub index: u32, + pub bytes: u64, + pub event_spans: Vec, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct OperatorCheckpointGroup { + pub operator_id: String, + pub bytes: u64, + pub started_metadata_write: Option, + pub finish_time: Option, + pub subtasks: Vec, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)] +pub enum JobCheckpointEventType { + Checkpointing, + CheckpointingOperators, + WritingMetadata, + Compacting, + Committing, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct JobCheckpointSpan { + pub event: JobCheckpointEventType, + pub start_time: u64, + pub finish_time: Option, +} + +impl JobCheckpointSpan { + pub fn now(event: JobCheckpointEventType) -> Self { + Self { + event, + start_time: to_micros(SystemTime::now()), + finish_time: None, + } + } + + pub fn finish(&mut self) { + if self.finish_time.is_none() { + self.finish_time = Some(to_micros(SystemTime::now())); + } + } +} + +impl From for CheckpointEventSpan { + fn from(value: JobCheckpointSpan) -> Self { + let description = match value.event { + JobCheckpointEventType::Checkpointing => "The entire checkpointing process", + JobCheckpointEventType::CheckpointingOperators => { + "The time spent checkpointing operator states" + } + JobCheckpointEventType::WritingMetadata => "Writing the final checkpoint metadata", + JobCheckpointEventType::Compacting => "Compacting old checkpoints", + JobCheckpointEventType::Committing => { + "Running two-phase commit for transactional connectors" + } + } + .to_string(); + + Self { + start_time: value.start_time, + finish_time: value.finish_time.unwrap_or_default(), + event: format!("{:?}", value.event), + description, + } + } +} diff --git a/src/api/connections.rs b/src/api/connections.rs new file mode 100644 index 00000000..eb69690e --- /dev/null +++ b/src/api/connections.rs @@ -0,0 +1,604 @@ +use crate::types::formats::{BadData, Format, Framing}; +use crate::types::{FsExtensionType, FsSchema}; +use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit}; +use serde::ser::SerializeMap; +use serde::{Deserialize, Serialize, Serializer}; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::{Display, Formatter}; +use std::sync::Arc; + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct Connector { + pub id: String, + pub name: String, + pub icon: String, + pub description: String, + pub table_config: String, + pub enabled: bool, + pub source: bool, + pub sink: bool, + pub custom_schemas: bool, + pub testing: bool, + pub hidden: bool, + pub connection_config: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ConnectionProfile { + pub id: String, + pub name: String, + pub connector: String, + pub config: serde_json::Value, + pub description: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ConnectionProfilePost { + pub name: String, + pub connector: String, + pub config: serde_json::Value, +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[serde(rename_all = "snake_case")] +pub enum ConnectionType { + Source, + Sink, + Lookup, +} + +impl Display for ConnectionType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ConnectionType::Source => write!(f, "SOURCE"), + ConnectionType::Sink => write!(f, "SINK"), + ConnectionType::Lookup => write!(f, "LOOKUP"), + } + } +} + +impl TryFrom for ConnectionType { + type Error = String; + + fn try_from(value: String) -> Result { + match value.to_lowercase().as_str() { + "source" => Ok(ConnectionType::Source), + "sink" => Ok(ConnectionType::Sink), + "lookup" => Ok(ConnectionType::Lookup), + _ => Err(format!("Invalid connection type: {value}")), + } + } +} + +// ─────────────────── Field Types ─────────────────── + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum FieldType { + Int32, + Int64, + Uint32, + Uint64, + #[serde(alias = "f32")] + Float32, + #[serde(alias = "f64")] + Float64, + Decimal128(DecimalField), + Bool, + #[serde(alias = "utf8")] + String, + #[serde(alias = "binary")] + Bytes, + Timestamp(TimestampField), + Json, + Struct(StructField), + List(ListField), +} + +impl FieldType { + pub fn sql_type(&self) -> String { + match self { + FieldType::Int32 => "INTEGER".into(), + FieldType::Int64 => "BIGINT".into(), + FieldType::Uint32 => "INTEGER UNSIGNED".into(), + FieldType::Uint64 => "BIGINT UNSIGNED".into(), + FieldType::Float32 => "FLOAT".into(), + FieldType::Float64 => "DOUBLE".into(), + FieldType::Decimal128(f) => format!("DECIMAL({}, {})", f.precision, f.scale), + FieldType::Bool => "BOOLEAN".into(), + FieldType::String => "TEXT".into(), + FieldType::Bytes => "BINARY".into(), + FieldType::Timestamp(t) => format!("TIMESTAMP({})", t.unit.precision()), + FieldType::Json => "JSON".into(), + FieldType::List(item) => format!("{}[]", item.items.field_type.sql_type()), + FieldType::Struct(StructField { fields, .. }) => { + format!( + "STRUCT <{}>", + fields + .iter() + .map(|f| format!("{} {}", f.name, f.field_type.sql_type())) + .collect::>() + .join(", ") + ) + } + } + } +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum TimestampUnit { + #[serde(alias = "s")] + Second, + #[default] + #[serde(alias = "ms")] + Millisecond, + #[serde(alias = "µs", alias = "us")] + Microsecond, + #[serde(alias = "ns")] + Nanosecond, +} + +impl TimestampUnit { + pub fn precision(&self) -> u8 { + match self { + TimestampUnit::Second => 0, + TimestampUnit::Millisecond => 3, + TimestampUnit::Microsecond => 6, + TimestampUnit::Nanosecond => 9, + } + } +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub struct TimestampField { + #[serde(default)] + pub unit: TimestampUnit, +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub struct DecimalField { + pub precision: u8, + pub scale: i8, +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub struct StructField { + pub fields: Vec, +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub struct ListField { + pub items: Box, +} + +fn default_item_name() -> String { + "item".to_string() +} + +#[derive(Deserialize, Clone, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub struct ListFieldItem { + #[serde(default = "default_item_name")] + pub name: String, + #[serde(flatten)] + pub field_type: FieldType, + #[serde(default)] + pub required: bool, + #[serde(default)] + pub sql_name: Option, +} + +impl From for Field { + fn from(value: ListFieldItem) -> Self { + SourceField { + name: value.name, + field_type: value.field_type, + required: value.required, + sql_name: None, + metadata_key: None, + } + .into() + } +} + +impl Serialize for ListFieldItem { + fn serialize(&self, s: S) -> Result + where + S: Serializer, + { + let mut f = Serializer::serialize_map(s, None)?; + f.serialize_entry("name", &self.name)?; + serialize_field_type_flat(&self.field_type, &mut f)?; + f.serialize_entry("required", &self.required)?; + f.serialize_entry("sql_name", &self.field_type.sql_type())?; + f.end() + } +} + +impl TryFrom for ListFieldItem { + type Error = String; + + fn try_from(value: Field) -> Result { + let source_field: SourceField = value.try_into()?; + Ok(Self { + name: source_field.name, + field_type: source_field.field_type, + required: source_field.required, + sql_name: None, + }) + } +} + +fn serialize_field_type_flat(ft: &FieldType, map: &mut M) -> Result<(), M::Error> { + let type_tag = match ft { + FieldType::Int32 => "int32", + FieldType::Int64 => "int64", + FieldType::Uint32 => "uint32", + FieldType::Uint64 => "uint64", + FieldType::Float32 => "float32", + FieldType::Float64 => "float64", + FieldType::Decimal128(_) => "decimal128", + FieldType::Bool => "bool", + FieldType::String => "string", + FieldType::Bytes => "bytes", + FieldType::Timestamp(_) => "timestamp", + FieldType::Json => "json", + FieldType::Struct(_) => "struct", + FieldType::List(_) => "list", + }; + map.serialize_entry("type", type_tag)?; + + match ft { + FieldType::Decimal128(d) => { + map.serialize_entry("precision", &d.precision)?; + map.serialize_entry("scale", &d.scale)?; + } + FieldType::Timestamp(t) => { + map.serialize_entry("unit", &t.unit)?; + } + FieldType::Struct(s) => { + map.serialize_entry("fields", &s.fields)?; + } + FieldType::List(l) => { + map.serialize_entry("items", &l.items)?; + } + _ => {} + } + Ok(()) +} + +// ─────────────────── Source Field ─────────────────── + +#[derive(Deserialize, Clone, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub struct SourceField { + pub name: String, + #[serde(flatten)] + pub field_type: FieldType, + #[serde(default)] + pub required: bool, + #[serde(default)] + pub sql_name: Option, + #[serde(default)] + pub metadata_key: Option, +} + +impl Serialize for SourceField { + fn serialize(&self, s: S) -> Result + where + S: Serializer, + { + let mut f = Serializer::serialize_map(s, None)?; + f.serialize_entry("name", &self.name)?; + serialize_field_type_flat(&self.field_type, &mut f)?; + f.serialize_entry("required", &self.required)?; + if let Some(metadata_key) = &self.metadata_key { + f.serialize_entry("metadata_key", metadata_key)?; + } + f.serialize_entry("sql_name", &self.field_type.sql_type())?; + f.end() + } +} + +impl From for Field { + fn from(f: SourceField) -> Self { + let (t, ext) = match f.field_type { + FieldType::Int32 => (DataType::Int32, None), + FieldType::Int64 => (DataType::Int64, None), + FieldType::Uint32 => (DataType::UInt32, None), + FieldType::Uint64 => (DataType::UInt64, None), + FieldType::Float32 => (DataType::Float32, None), + FieldType::Float64 => (DataType::Float64, None), + FieldType::Bool => (DataType::Boolean, None), + FieldType::String => (DataType::Utf8, None), + FieldType::Bytes => (DataType::Binary, None), + FieldType::Decimal128(d) => (DataType::Decimal128(d.precision, d.scale), None), + FieldType::Timestamp(TimestampField { + unit: TimestampUnit::Second, + }) => (DataType::Timestamp(TimeUnit::Second, None), None), + FieldType::Timestamp(TimestampField { + unit: TimestampUnit::Millisecond, + }) => (DataType::Timestamp(TimeUnit::Millisecond, None), None), + FieldType::Timestamp(TimestampField { + unit: TimestampUnit::Microsecond, + }) => (DataType::Timestamp(TimeUnit::Microsecond, None), None), + FieldType::Timestamp(TimestampField { + unit: TimestampUnit::Nanosecond, + }) => (DataType::Timestamp(TimeUnit::Nanosecond, None), None), + FieldType::Json => (DataType::Utf8, Some(FsExtensionType::JSON)), + FieldType::Struct(s) => ( + DataType::Struct(Fields::from( + s.fields + .into_iter() + .map(|t| t.into()) + .collect::>(), + )), + None, + ), + FieldType::List(t) => (DataType::List(Arc::new((*t.items).into())), None), + }; + + FsExtensionType::add_metadata(ext, Field::new(f.name, t, !f.required)) + } +} + +impl TryFrom for SourceField { + type Error = String; + + fn try_from(f: Field) -> Result { + let field_type = match (f.data_type(), FsExtensionType::from_map(f.metadata())) { + (DataType::Boolean, None) => FieldType::Bool, + (DataType::Int32, None) => FieldType::Int32, + (DataType::Int64, None) => FieldType::Int64, + (DataType::UInt32, None) => FieldType::Uint32, + (DataType::UInt64, None) => FieldType::Uint64, + (DataType::Float32, None) => FieldType::Float32, + (DataType::Float64, None) => FieldType::Float64, + (DataType::Decimal128(p, s), None) => FieldType::Decimal128(DecimalField { + precision: *p, + scale: *s, + }), + (DataType::Binary, None) | (DataType::LargeBinary, None) => FieldType::Bytes, + (DataType::Timestamp(TimeUnit::Second, _), None) => { + FieldType::Timestamp(TimestampField { + unit: TimestampUnit::Second, + }) + } + (DataType::Timestamp(TimeUnit::Millisecond, _), None) => { + FieldType::Timestamp(TimestampField { + unit: TimestampUnit::Millisecond, + }) + } + (DataType::Timestamp(TimeUnit::Microsecond, _), None) => { + FieldType::Timestamp(TimestampField { + unit: TimestampUnit::Microsecond, + }) + } + (DataType::Timestamp(TimeUnit::Nanosecond, _), None) => { + FieldType::Timestamp(TimestampField { + unit: TimestampUnit::Nanosecond, + }) + } + (DataType::Utf8, None) => FieldType::String, + (DataType::Utf8, Some(FsExtensionType::JSON)) => FieldType::Json, + (DataType::Struct(fields), None) => { + let fields: Result<_, String> = fields + .into_iter() + .map(|f| (**f).clone().try_into()) + .collect(); + FieldType::Struct(StructField { fields: fields? }) + } + (DataType::List(item), None) => FieldType::List(ListField { + items: Box::new((**item).clone().try_into()?), + }), + dt => return Err(format!("Unsupported data type {dt:?}")), + }; + + Ok(SourceField { + name: f.name().clone(), + field_type, + required: !f.is_nullable(), + sql_name: None, + metadata_key: None, + }) + } +} + +// ─────────────────── Schema Definitions ─────────────────── + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum SchemaDefinition { + JsonSchema { + schema: String, + }, + ProtobufSchema { + schema: String, + #[serde(default)] + dependencies: HashMap, + }, + AvroSchema { + schema: String, + }, +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] +#[serde(rename_all = "snake_case")] +pub struct ConnectionSchema { + pub format: Option, + #[serde(default)] + pub bad_data: Option, + #[serde(default)] + pub framing: Option, + #[serde(default)] + pub fields: Vec, + #[serde(default)] + pub definition: Option, + #[serde(default)] + pub inferred: Option, + #[serde(default)] + pub primary_keys: HashSet, +} + +impl ConnectionSchema { + pub fn try_new( + format: Option, + bad_data: Option, + framing: Option, + fields: Vec, + definition: Option, + inferred: Option, + primary_keys: HashSet, + ) -> anyhow::Result { + let s = ConnectionSchema { + format, + bad_data, + framing, + fields, + definition, + inferred, + primary_keys, + }; + s.validate() + } + + pub fn validate(self) -> anyhow::Result { + let non_metadata_fields: Vec<_> = self + .fields + .iter() + .filter(|f| f.metadata_key.is_none()) + .collect(); + + if let Some(Format::RawString(_)) = &self.format { + if non_metadata_fields.len() != 1 + || non_metadata_fields.first().unwrap().field_type != FieldType::String + || non_metadata_fields.first().unwrap().name != "value" + { + anyhow::bail!( + "raw_string format requires a schema with a single field called `value` of type TEXT" + ); + } + } + + if let Some(Format::Json(json_format)) = &self.format { + if json_format.unstructured + && (non_metadata_fields.len() != 1 + || non_metadata_fields.first().unwrap().field_type != FieldType::Json + || non_metadata_fields.first().unwrap().name != "value") + { + anyhow::bail!( + "json format with unstructured flag enabled requires a schema with a single field called `value` of type JSON" + ); + } + } + + Ok(self) + } + + pub fn fs_schema(&self) -> Arc { + let fields: Vec = self.fields.iter().map(|f| f.clone().into()).collect(); + Arc::new(FsSchema::from_fields(fields)) + } +} + +impl From for FsSchema { + fn from(val: ConnectionSchema) -> Self { + let fields: Vec = val.fields.into_iter().map(|f| f.into()).collect(); + FsSchema::from_fields(fields) + } +} + +// ─────────────────── Connection Table ─────────────────── + +#[derive(Serialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ConnectionTable { + #[serde(skip_serializing)] + pub id: i64, + #[serde(rename = "id")] + pub pub_id: String, + pub name: String, + pub created_at: u64, + pub connector: String, + pub connection_profile: Option, + pub table_type: ConnectionType, + pub config: serde_json::Value, + pub schema: ConnectionSchema, + pub consumers: u32, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ConnectionTablePost { + pub name: String, + pub connector: String, + pub connection_profile_id: Option, + pub config: serde_json::Value, + pub schema: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ConnectionAutocompleteResp { + pub values: BTreeMap>, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct TestSourceMessage { + pub error: bool, + pub done: bool, + pub message: String, +} + +impl TestSourceMessage { + pub fn info(message: impl Into) -> Self { + Self { + error: false, + done: false, + message: message.into(), + } + } + pub fn error(message: impl Into) -> Self { + Self { + error: true, + done: false, + message: message.into(), + } + } + pub fn done(message: impl Into) -> Self { + Self { + error: false, + done: true, + message: message.into(), + } + } + pub fn fail(message: impl Into) -> Self { + Self { + error: true, + done: true, + message: message.into(), + } + } +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ConfluentSchema { + pub schema: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ConfluentSchemaQueryParams { + pub endpoint: String, + pub topic: String, +} diff --git a/src/api/metrics.rs b/src/api/metrics.rs new file mode 100644 index 00000000..25d129e5 --- /dev/null +++ b/src/api/metrics.rs @@ -0,0 +1,41 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Copy, Clone, Debug, Hash, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum MetricName { + BytesRecv, + BytesSent, + MessagesRecv, + MessagesSent, + Backpressure, + TxQueueSize, + TxQueueRem, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct Metric { + pub time: u64, + pub value: f64, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct SubtaskMetrics { + pub index: u32, + pub metrics: Vec, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct MetricGroup { + pub name: MetricName, + pub subtasks: Vec, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct OperatorMetricGroup { + pub node_id: u32, + pub metric_groups: Vec, +} diff --git a/src/api/mod.rs b/src/api/mod.rs new file mode 100644 index 00000000..85cbcaaa --- /dev/null +++ b/src/api/mod.rs @@ -0,0 +1,43 @@ +//! REST/RPC API types for the FunctionStream system. +//! +//! Adapted from Arroyo's `arroyo-rpc/src/api_types` and utility modules. + +pub mod checkpoints; +pub mod connections; +pub mod metrics; +pub mod pipelines; +pub mod public_ids; +pub mod schema_resolver; +pub mod udfs; +pub mod var_str; + +use serde::{Deserialize, Serialize}; + +pub use checkpoints::*; +pub use connections::{ + ConnectionProfile, ConnectionSchema, ConnectionType, Connector, FieldType, SchemaDefinition, + SourceField, +}; +pub use metrics::*; +pub use pipelines::*; +pub use udfs::*; + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct PaginatedCollection { + pub data: Vec, + pub has_more: bool, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "camelCase")] +pub struct NonPaginatedCollection { + pub data: Vec, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct PaginationQueryParams { + pub starting_after: Option, + pub limit: Option, +} diff --git a/src/api/pipelines.rs b/src/api/pipelines.rs new file mode 100644 index 00000000..3c77ce7a --- /dev/null +++ b/src/api/pipelines.rs @@ -0,0 +1,156 @@ +use super::udfs::Udf; +use crate::types::control::ErrorDomain; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ValidateQueryPost { + pub query: String, + pub udfs: Option>, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct QueryValidationResult { + pub graph: Option, + pub errors: Vec, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct PipelinePost { + pub name: String, + pub query: String, + pub udfs: Option>, + pub parallelism: u64, + pub checkpoint_interval_micros: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct PreviewPost { + pub query: String, + pub udfs: Option>, + #[serde(default)] + pub enable_sinks: bool, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct PipelinePatch { + pub parallelism: Option, + pub checkpoint_interval_micros: Option, + pub stop: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct PipelineRestart { + pub force: Option, + pub ignore_state: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct Pipeline { + pub id: String, + pub name: String, + pub query: String, + pub udfs: Vec, + pub checkpoint_interval_micros: u64, + pub stop: StopType, + pub created_at: u64, + pub action: Option, + pub action_text: String, + pub action_in_progress: bool, + pub graph: PipelineGraph, + pub preview: bool, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct PipelineGraph { + pub nodes: Vec, + pub edges: Vec, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct PipelineNode { + pub node_id: u32, + pub operator: String, + pub description: String, + pub parallelism: u32, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct PipelineEdge { + pub src_id: u32, + pub dest_id: u32, + pub key_type: String, + pub value_type: String, + pub edge_type: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub enum StopType { + None, + Checkpoint, + Graceful, + Immediate, + Force, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct FailureReason { + pub error: String, + pub domain: ErrorDomain, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct Job { + pub id: String, + pub running_desired: bool, + pub state: String, + pub run_id: u64, + pub start_time: Option, + pub finish_time: Option, + pub tasks: Option, + pub failure_reason: Option, + pub created_at: u64, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub enum JobLogLevel { + Info, + Warn, + Error, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct JobLogMessage { + pub id: String, + pub created_at: u64, + pub operator_id: Option, + pub task_index: Option, + pub level: JobLogLevel, + pub message: String, + pub details: String, + pub error_domain: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct OutputData { + pub operator_id: String, + pub subtask_idx: u32, + pub timestamps: Vec, + pub start_id: u64, + pub batch: String, +} diff --git a/src/api/public_ids.rs b/src/api/public_ids.rs new file mode 100644 index 00000000..15a9f72e --- /dev/null +++ b/src/api/public_ids.rs @@ -0,0 +1,57 @@ +use std::time::{SystemTime, UNIX_EPOCH}; + +const ID_LENGTH: usize = 10; + +const ALPHABET: &[u8; 62] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + +pub enum IdTypes { + ApiKey, + ConnectionProfile, + Schema, + Pipeline, + JobConfig, + Checkpoint, + JobStatus, + ClusterInfo, + JobLogMessage, + ConnectionTable, + ConnectionTablePipeline, + Udf, +} + +/// Generates a unique identifier with a type-specific prefix. +/// +/// Uses a simple time + random approach instead of nanoid to avoid an extra dependency. +pub fn generate_id(id_type: IdTypes) -> String { + let prefix = match id_type { + IdTypes::ApiKey => "ak", + IdTypes::ConnectionProfile => "cp", + IdTypes::Schema => "sch", + IdTypes::Pipeline => "pl", + IdTypes::JobConfig => "job", + IdTypes::Checkpoint => "chk", + IdTypes::JobStatus => "js", + IdTypes::ClusterInfo => "ci", + IdTypes::JobLogMessage => "jlm", + IdTypes::ConnectionTable => "ct", + IdTypes::ConnectionTablePipeline => "ctp", + IdTypes::Udf => "udf", + }; + + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + + let mut id = String::with_capacity(ID_LENGTH); + let mut seed = nanos; + for _ in 0..ID_LENGTH { + seed ^= seed + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + let idx = (seed % ALPHABET.len() as u128) as usize; + id.push(ALPHABET[idx] as char); + } + + format!("{prefix}_{id}") +} diff --git a/src/api/schema_resolver.rs b/src/api/schema_resolver.rs new file mode 100644 index 00000000..a9124900 --- /dev/null +++ b/src/api/schema_resolver.rs @@ -0,0 +1,82 @@ +use async_trait::async_trait; + +/// Trait for resolving schemas by ID (e.g., from a schema registry). +#[async_trait] +pub trait SchemaResolver: Send { + async fn resolve_schema(&self, id: u32) -> Result, String>; +} + +/// A resolver that always fails — used when no schema registry is configured. +pub struct FailingSchemaResolver; + +impl Default for FailingSchemaResolver { + fn default() -> Self { + Self + } +} + +#[async_trait] +impl SchemaResolver for FailingSchemaResolver { + async fn resolve_schema(&self, id: u32) -> Result, String> { + Err(format!( + "Schema with id {id} not available, and no schema registry configured" + )) + } +} + +/// A resolver that returns a fixed schema for a known ID. +pub struct FixedSchemaResolver { + id: u32, + schema: String, +} + +impl FixedSchemaResolver { + pub fn new(id: u32, schema: String) -> Self { + FixedSchemaResolver { id, schema } + } +} + +#[async_trait] +impl SchemaResolver for FixedSchemaResolver { + async fn resolve_schema(&self, id: u32) -> Result, String> { + if id == self.id { + Ok(Some(self.schema.clone())) + } else { + Err(format!("Unexpected schema id {}, expected {}", id, self.id)) + } + } +} + +/// A caching wrapper around any `SchemaResolver`. +pub struct CachingSchemaResolver { + inner: R, + cache: tokio::sync::RwLock>, +} + +impl CachingSchemaResolver { + pub fn new(inner: R) -> Self { + Self { + inner, + cache: tokio::sync::RwLock::new(std::collections::HashMap::new()), + } + } +} + +#[async_trait] +impl SchemaResolver for CachingSchemaResolver { + async fn resolve_schema(&self, id: u32) -> Result, String> { + { + let cache = self.cache.read().await; + if let Some(schema) = cache.get(&id) { + return Ok(Some(schema.clone())); + } + } + + let result = self.inner.resolve_schema(id).await?; + if let Some(ref schema) = result { + let mut cache = self.cache.write().await; + cache.insert(id, schema.clone()); + } + Ok(result) + } +} diff --git a/src/api/udfs.rs b/src/api/udfs.rs new file mode 100644 index 00000000..41085168 --- /dev/null +++ b/src/api/udfs.rs @@ -0,0 +1,56 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct Udf { + pub definition: String, + #[serde(default)] + pub language: UdfLanguage, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct ValidateUdfPost { + pub definition: String, + #[serde(default)] + pub language: UdfLanguage, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct UdfValidationResult { + pub udf_name: Option, + pub errors: Vec, +} + +#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, Eq, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum UdfLanguage { + Python, + #[default] + Rust, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct UdfPost { + pub prefix: String, + #[serde(default)] + pub language: UdfLanguage, + pub definition: String, + pub description: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +#[serde(rename_all = "snake_case")] +pub struct GlobalUdf { + pub id: String, + pub prefix: String, + pub name: String, + pub language: UdfLanguage, + pub created_at: u64, + pub updated_at: u64, + pub definition: String, + pub description: Option, + pub dylib_url: Option, +} diff --git a/src/api/var_str.rs b/src/api/var_str.rs new file mode 100644 index 00000000..c4256e38 --- /dev/null +++ b/src/api/var_str.rs @@ -0,0 +1,79 @@ +use serde::{Deserialize, Serialize}; +use std::env; + +/// A string that may contain `{{ VAR }}` placeholders for environment variable substitution. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(transparent)] +pub struct VarStr { + raw_val: String, +} + +impl VarStr { + pub fn new(raw_val: String) -> Self { + VarStr { raw_val } + } + + pub fn raw(&self) -> &str { + &self.raw_val + } + + /// Substitute `{{ VAR_NAME }}` patterns with the corresponding environment variable values. + pub fn sub_env_vars(&self) -> anyhow::Result { + let mut result = self.raw_val.clone(); + let mut start = 0; + + while let Some(open) = result[start..].find("{{") { + let open_abs = start + open; + let Some(close) = result[open_abs..].find("}}") else { + break; + }; + let close_abs = open_abs + close; + + let var_name = result[open_abs + 2..close_abs].trim(); + if var_name.is_empty() { + start = close_abs + 2; + continue; + } + + match env::var(var_name) { + Ok(value) => { + let full_match = &result[open_abs..close_abs + 2]; + let full_match_owned = full_match.to_string(); + result = result.replacen(&full_match_owned, &value, 1); + start = open_abs + value.len(); + } + Err(_) => { + anyhow::bail!("Environment variable {} not found", var_name); + } + } + } + + Ok(result) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_placeholders() { + let input = "This is a test string with no placeholders"; + assert_eq!( + VarStr::new(input.to_string()).sub_env_vars().unwrap(), + input + ); + } + + #[test] + fn test_with_placeholders() { + unsafe { env::set_var("FS_TEST_VAR", "environment variable") }; + let input = "This is a {{ FS_TEST_VAR }}"; + let expected = "This is a environment variable"; + assert_eq!( + VarStr::new(input.to_string()).sub_env_vars().unwrap(), + expected + ); + unsafe { env::remove_var("FS_TEST_VAR") }; + } +} diff --git a/src/datastream/logical.rs b/src/datastream/logical.rs index 60101bdd..a6486760 100644 --- a/src/datastream/logical.rs +++ b/src/datastream/logical.rs @@ -1,7 +1,7 @@ use itertools::Itertools; use crate::datastream::optimizers::Optimizer; -use crate::sql::planner::types::StreamSchema; +use crate::sql::types::StreamSchema; use datafusion::arrow::datatypes::DataType; use petgraph::Direction; use petgraph::dot::Dot; diff --git a/src/lib.rs b/src/lib.rs index e8596864..a41536c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,7 @@ #![allow(dead_code)] +pub mod api; pub mod config; pub mod coordinator; pub mod datastream; @@ -22,3 +23,4 @@ pub mod runtime; pub mod server; pub mod sql; pub mod storage; +pub mod types; diff --git a/src/main.rs b/src/main.rs index 562b1526..29935d62 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,13 +12,16 @@ #![allow(dead_code)] +mod api; mod config; mod coordinator; +mod datastream; mod logging; mod runtime; mod server; mod sql; mod storage; +mod types; use anyhow::{Context, Result}; use std::thread; diff --git a/src/sql/catalog/connector.rs b/src/sql/catalog/connector.rs new file mode 100644 index 00000000..01176d47 --- /dev/null +++ b/src/sql/catalog/connector.rs @@ -0,0 +1,59 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt; + +/// Describes the role of a connection in the streaming pipeline. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ConnectionType { + Source, + Sink, + Lookup, +} + +impl fmt::Display for ConnectionType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ConnectionType::Source => write!(f, "source"), + ConnectionType::Sink => write!(f, "sink"), + ConnectionType::Lookup => write!(f, "lookup"), + } + } +} + +/// A connector operation that describes how to interact with an external system. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ConnectorOp { + pub connector: String, + pub config: String, + pub description: String, +} + +impl ConnectorOp { + pub fn new(connector: impl Into, config: impl Into) -> Self { + let connector = connector.into(); + let description = connector.clone(); + Self { + connector, + config: config.into(), + description, + } + } +} + +/// Configuration for a connection profile (e.g., Kafka broker, Pulsar endpoint). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ConnectionProfile { + pub name: String, + pub connector: String, + pub config: std::collections::HashMap, +} diff --git a/src/sql/catalog/connector_table.rs b/src/sql/catalog/connector_table.rs new file mode 100644 index 00000000..8dae1745 --- /dev/null +++ b/src/sql/catalog/connector_table.rs @@ -0,0 +1,199 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::time::Duration; + +use datafusion::arrow::datatypes::{FieldRef, Schema}; +use datafusion::common::{Result, plan_err}; +use datafusion::logical_expr::Expr; + +use super::connector::{ConnectionType, ConnectorOp}; +use super::field_spec::FieldSpec; +use crate::multifield_partial_ord; +use crate::sql::types::ProcessingMode; + +/// Represents a table backed by an external connector (e.g., Kafka, Pulsar, NATS). +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ConnectorTable { + pub id: Option, + pub connector: String, + pub name: String, + pub connection_type: ConnectionType, + pub fields: Vec, + pub config: String, + pub description: String, + pub event_time_field: Option, + pub watermark_field: Option, + pub idle_time: Option, + pub primary_keys: Arc>, + pub inferred_fields: Option>, + pub partition_exprs: Arc>>, +} + +multifield_partial_ord!( + ConnectorTable, + id, + connector, + name, + connection_type, + config, + description, + event_time_field, + watermark_field, + idle_time, + primary_keys +); + +impl ConnectorTable { + pub fn new( + name: impl Into, + connector: impl Into, + connection_type: ConnectionType, + ) -> Self { + Self { + id: None, + connector: connector.into(), + name: name.into(), + connection_type, + fields: Vec::new(), + config: String::new(), + description: String::new(), + event_time_field: None, + watermark_field: None, + idle_time: None, + primary_keys: Arc::new(Vec::new()), + inferred_fields: None, + partition_exprs: Arc::new(None), + } + } + + pub fn has_virtual_fields(&self) -> bool { + self.fields.iter().any(|f| f.is_virtual()) + } + + pub fn is_updating(&self) -> bool { + // TODO: check format for debezium/update mode + false + } + + pub fn physical_schema(&self) -> Schema { + Schema::new( + self.fields + .iter() + .filter(|f| !f.is_virtual()) + .map(|f| f.field().clone()) + .collect::>(), + ) + } + + pub fn connector_op(&self) -> ConnectorOp { + ConnectorOp { + connector: self.connector.clone(), + config: self.config.clone(), + description: self.description.clone(), + } + } + + pub fn processing_mode(&self) -> ProcessingMode { + if self.is_updating() { + ProcessingMode::Update + } else { + ProcessingMode::Append + } + } + + pub fn timestamp_override(&self) -> Result> { + if let Some(field_name) = &self.event_time_field { + if self.is_updating() { + return plan_err!("can't use event_time_field with update mode"); + } + let _field = self.get_time_field(field_name)?; + Ok(Some(Expr::Column(datafusion::common::Column::from_name( + field_name, + )))) + } else { + Ok(None) + } + } + + fn get_time_field(&self, field_name: &str) -> Result<&FieldSpec> { + self.fields + .iter() + .find(|f| { + f.field().name() == field_name + && matches!( + f.field().data_type(), + datafusion::arrow::datatypes::DataType::Timestamp(..) + ) + }) + .ok_or_else(|| { + datafusion::error::DataFusionError::Plan(format!( + "field {field_name} not found or not a timestamp" + )) + }) + } + + pub fn watermark_column(&self) -> Result> { + if let Some(field_name) = &self.watermark_field { + let _field = self.get_time_field(field_name)?; + Ok(Some(Expr::Column(datafusion::common::Column::from_name( + field_name, + )))) + } else { + Ok(None) + } + } + + pub fn as_sql_source(&self) -> Result { + match self.connection_type { + ConnectionType::Source => {} + ConnectionType::Sink | ConnectionType::Lookup => { + return plan_err!("cannot read from sink"); + } + } + + if self.is_updating() && self.has_virtual_fields() { + return plan_err!("can't read from a source with virtual fields and update mode"); + } + + let timestamp_override = self.timestamp_override()?; + let watermark_column = self.watermark_column()?; + + Ok(SourceOperator { + name: self.name.clone(), + connector_op: self.connector_op(), + processing_mode: self.processing_mode(), + idle_time: self.idle_time, + struct_fields: self + .fields + .iter() + .filter(|f| !f.is_virtual()) + .map(|f| Arc::new(f.field().clone())) + .collect(), + timestamp_override, + watermark_column, + }) + } +} + +/// A fully resolved source operator ready for execution graph construction. +#[derive(Debug, Clone)] +pub struct SourceOperator { + pub name: String, + pub connector_op: ConnectorOp, + pub processing_mode: ProcessingMode, + pub idle_time: Option, + pub struct_fields: Vec, + pub timestamp_override: Option, + pub watermark_column: Option, +} diff --git a/src/sql/catalog/field_spec.rs b/src/sql/catalog/field_spec.rs new file mode 100644 index 00000000..2fe8a50e --- /dev/null +++ b/src/sql/catalog/field_spec.rs @@ -0,0 +1,52 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::arrow::datatypes::Field; +use datafusion::logical_expr::Expr; + +/// Describes how a field in a connector table should be interpreted. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum FieldSpec { + /// A regular struct field that maps to a column in the data. + Struct(Field), + /// A metadata field extracted from message metadata (e.g., Kafka headers). + Metadata { field: Field, key: String }, + /// A virtual field computed from an expression over other fields. + Virtual { field: Field, expression: Box }, +} + +impl FieldSpec { + pub fn is_virtual(&self) -> bool { + matches!(self, FieldSpec::Virtual { .. }) + } + + pub fn field(&self) -> &Field { + match self { + FieldSpec::Struct(f) => f, + FieldSpec::Metadata { field, .. } => field, + FieldSpec::Virtual { field, .. } => field, + } + } + + pub fn metadata_key(&self) -> Option<&str> { + match self { + FieldSpec::Metadata { key, .. } => Some(key.as_str()), + _ => None, + } + } +} + +impl From for FieldSpec { + fn from(value: Field) -> Self { + FieldSpec::Struct(value) + } +} diff --git a/src/sql/catalog/insert.rs b/src/sql/catalog/insert.rs new file mode 100644 index 00000000..a4a3814a --- /dev/null +++ b/src/sql/catalog/insert.rs @@ -0,0 +1,55 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::common::Result; +use datafusion::logical_expr::{DmlStatement, LogicalPlan, WriteOp}; +use datafusion::sql::sqlparser::ast::Statement; + +use super::optimizer::produce_optimized_plan; +use crate::sql::planner::StreamSchemaProvider; + +/// Represents an INSERT operation in a streaming SQL pipeline. +#[derive(Debug)] +pub enum Insert { + /// Insert into a named sink table. + InsertQuery { + sink_name: String, + logical_plan: LogicalPlan, + }, + /// An anonymous query (no explicit INSERT target). + Anonymous { logical_plan: LogicalPlan }, +} + +impl Insert { + pub fn try_from_statement( + statement: &Statement, + schema_provider: &StreamSchemaProvider, + ) -> Result { + let logical_plan = produce_optimized_plan(statement, schema_provider)?; + + match &logical_plan { + LogicalPlan::Dml(DmlStatement { + table_name, + op: WriteOp::Insert(_), + input, + .. + }) => { + let sink_name = table_name.to_string(); + Ok(Insert::InsertQuery { + sink_name, + logical_plan: (**input).clone(), + }) + } + _ => Ok(Insert::Anonymous { logical_plan }), + } + } +} diff --git a/src/sql/catalog/mod.rs b/src/sql/catalog/mod.rs new file mode 100644 index 00000000..39c7bfcd --- /dev/null +++ b/src/sql/catalog/mod.rs @@ -0,0 +1,25 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod connector; +pub mod connector_table; +pub mod field_spec; +pub mod insert; +pub mod optimizer; +pub mod table; +pub mod utils; + +pub use connector::{ConnectionType, ConnectorOp}; +pub use connector_table::{ConnectorTable, SourceOperator}; +pub use field_spec::FieldSpec; +pub use insert::Insert; +pub use table::Table; diff --git a/src/sql/catalog/optimizer.rs b/src/sql/catalog/optimizer.rs new file mode 100644 index 00000000..15abe61e --- /dev/null +++ b/src/sql/catalog/optimizer.rs @@ -0,0 +1,95 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use datafusion::common::Result; +use datafusion::common::config::ConfigOptions; +use datafusion::logical_expr::LogicalPlan; +use datafusion::optimizer::OptimizerContext; +use datafusion::optimizer::OptimizerRule; +use datafusion::optimizer::common_subexpr_eliminate::CommonSubexprEliminate; +use datafusion::optimizer::decorrelate_lateral_join::DecorrelateLateralJoin; +use datafusion::optimizer::decorrelate_predicate_subquery::DecorrelatePredicateSubquery; +use datafusion::optimizer::eliminate_cross_join::EliminateCrossJoin; +use datafusion::optimizer::eliminate_duplicated_expr::EliminateDuplicatedExpr; +use datafusion::optimizer::eliminate_filter::EliminateFilter; +use datafusion::optimizer::eliminate_group_by_constant::EliminateGroupByConstant; +use datafusion::optimizer::eliminate_join::EliminateJoin; +use datafusion::optimizer::eliminate_limit::EliminateLimit; +use datafusion::optimizer::eliminate_nested_union::EliminateNestedUnion; +use datafusion::optimizer::eliminate_one_union::EliminateOneUnion; +use datafusion::optimizer::eliminate_outer_join::EliminateOuterJoin; +use datafusion::optimizer::extract_equijoin_predicate::ExtractEquijoinPredicate; +use datafusion::optimizer::filter_null_join_keys::FilterNullJoinKeys; +use datafusion::optimizer::optimizer::Optimizer; +use datafusion::optimizer::propagate_empty_relation::PropagateEmptyRelation; +use datafusion::optimizer::push_down_filter::PushDownFilter; +use datafusion::optimizer::push_down_limit::PushDownLimit; +use datafusion::optimizer::replace_distinct_aggregate::ReplaceDistinctWithAggregate; +use datafusion::optimizer::scalar_subquery_to_join::ScalarSubqueryToJoin; +use datafusion::optimizer::simplify_expressions::SimplifyExpressions; +use datafusion::sql::planner::SqlToRel; +use datafusion::sql::sqlparser::ast::Statement; + +use crate::sql::planner::StreamSchemaProvider; + +/// Converts a SQL statement into an optimized DataFusion logical plan. +/// +/// Applies the DataFusion analyzer followed by a curated set of optimizer rules +/// suitable for streaming SQL (some rules like OptimizeProjections are excluded +/// because they can drop event-time calculation fields). +pub fn produce_optimized_plan( + statement: &Statement, + schema_provider: &StreamSchemaProvider, +) -> Result { + let sql_to_rel = SqlToRel::new(schema_provider); + let plan = sql_to_rel.sql_statement_to_plan(statement.clone())?; + + let analyzed_plan = schema_provider.analyzer.execute_and_check( + plan, + &ConfigOptions::default(), + |_plan, _rule| {}, + )?; + + let rules: Vec> = vec![ + Arc::new(EliminateNestedUnion::new()), + Arc::new(SimplifyExpressions::new()), + Arc::new(ReplaceDistinctWithAggregate::new()), + Arc::new(EliminateJoin::new()), + Arc::new(DecorrelatePredicateSubquery::new()), + Arc::new(ScalarSubqueryToJoin::new()), + Arc::new(DecorrelateLateralJoin::new()), + Arc::new(ExtractEquijoinPredicate::new()), + Arc::new(EliminateDuplicatedExpr::new()), + Arc::new(EliminateFilter::new()), + Arc::new(EliminateCrossJoin::new()), + Arc::new(EliminateLimit::new()), + Arc::new(PropagateEmptyRelation::new()), + Arc::new(EliminateOneUnion::new()), + Arc::new(FilterNullJoinKeys::default()), + Arc::new(EliminateOuterJoin::new()), + Arc::new(PushDownLimit::new()), + Arc::new(PushDownFilter::new()), + Arc::new(EliminateGroupByConstant::new()), + Arc::new(CommonSubexprEliminate::new()), + ]; + + let optimizer = Optimizer::with_rules(rules); + let optimized = optimizer.optimize( + analyzed_plan, + &OptimizerContext::default(), + |_plan, _rule| {}, + )?; + + Ok(optimized) +} diff --git a/src/sql/catalog/table.rs b/src/sql/catalog/table.rs new file mode 100644 index 00000000..b1d60028 --- /dev/null +++ b/src/sql/catalog/table.rs @@ -0,0 +1,202 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use datafusion::arrow::datatypes::FieldRef; +use datafusion::common::{Result, plan_err}; +use datafusion::logical_expr::{Extension, LogicalPlan}; +use datafusion::sql::sqlparser::ast::Statement; + +use super::connector_table::ConnectorTable; +use super::optimizer::produce_optimized_plan; +use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::extension::remote_table::RemoteTableExtension; +use crate::sql::planner::plan::rewrite_plan; +use crate::sql::types::DFField; + +/// Represents all table types in the FunctionStream SQL catalog. +#[allow(clippy::enum_variant_names)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Table { + /// A lookup table backed by an external connector. + LookupTable(ConnectorTable), + /// A source/sink table backed by an external connector. + ConnectorTable(ConnectorTable), + /// An in-memory table with an optional logical plan (for views). + MemoryTable { + name: String, + fields: Vec, + logical_plan: Option, + }, + /// A table defined by a query (CREATE VIEW / CREATE TABLE AS SELECT). + TableFromQuery { + name: String, + logical_plan: LogicalPlan, + }, + /// A preview sink for debugging/inspection. + PreviewSink { logical_plan: LogicalPlan }, +} + +impl Table { + /// Try to construct a Table from a CREATE TABLE or CREATE VIEW statement. + pub fn try_from_statement( + statement: &Statement, + schema_provider: &StreamSchemaProvider, + ) -> Result> { + use datafusion::logical_expr::{CreateMemoryTable, CreateView, DdlStatement}; + use datafusion::sql::sqlparser::ast::CreateTable; + + if let Statement::CreateTable(CreateTable { + name, + columns, + query: None, + .. + }) = statement + { + let name = name.to_string(); + + if columns.is_empty() { + return plan_err!("CREATE TABLE requires at least one column"); + } + + let fields: Vec = columns + .iter() + .map(|col| { + let data_type = crate::sql::types::convert_data_type(&col.data_type) + .map(|(dt, _)| dt) + .unwrap_or(datafusion::arrow::datatypes::DataType::Utf8); + let nullable = !col.options.iter().any(|opt| { + matches!( + opt.option, + datafusion::sql::sqlparser::ast::ColumnOption::NotNull + ) + }); + Arc::new(datafusion::arrow::datatypes::Field::new( + col.name.value.clone(), + data_type, + nullable, + )) + }) + .collect(); + + return Ok(Some(Table::MemoryTable { + name, + fields, + logical_plan: None, + })); + } + + match produce_optimized_plan(statement, schema_provider) { + Ok(LogicalPlan::Ddl(DdlStatement::CreateView(CreateView { name, input, .. }))) + | Ok(LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(CreateMemoryTable { + name, + input, + .. + }))) => { + let rewritten = rewrite_plan(input.as_ref().clone(), schema_provider)?; + let schema = rewritten.schema().clone(); + let remote = RemoteTableExtension { + input: rewritten, + name: name.to_owned(), + schema, + materialize: true, + }; + Ok(Some(Table::TableFromQuery { + name: name.to_string(), + logical_plan: LogicalPlan::Extension(Extension { + node: Arc::new(remote), + }), + })) + } + _ => Ok(None), + } + } + + pub fn name(&self) -> &str { + match self { + Table::MemoryTable { name, .. } | Table::TableFromQuery { name, .. } => name.as_str(), + Table::ConnectorTable(c) | Table::LookupTable(c) => c.name.as_str(), + Table::PreviewSink { .. } => "preview", + } + } + + pub fn get_fields(&self) -> Vec { + match self { + Table::MemoryTable { fields, .. } => fields.clone(), + Table::ConnectorTable(ConnectorTable { + fields, + inferred_fields, + .. + }) + | Table::LookupTable(ConnectorTable { + fields, + inferred_fields, + .. + }) => inferred_fields.clone().unwrap_or_else(|| { + fields + .iter() + .map(|field| field.field().clone().into()) + .collect() + }), + Table::TableFromQuery { logical_plan, .. } => { + logical_plan.schema().fields().iter().cloned().collect() + } + Table::PreviewSink { logical_plan } => { + logical_plan.schema().fields().iter().cloned().collect() + } + } + } + + pub fn set_inferred_fields(&mut self, fields: Vec) -> Result<()> { + let Table::ConnectorTable(t) = self else { + return Ok(()); + }; + + if !t.fields.is_empty() { + return Ok(()); + } + + if let Some(existing) = &t.inferred_fields { + let matches = existing.len() == fields.len() + && existing + .iter() + .zip(&fields) + .all(|(a, b)| a.name() == b.name() && a.data_type() == b.data_type()); + + if !matches { + return plan_err!("all inserts into a table must share the same schema"); + } + } + + let fields: Vec<_> = fields.into_iter().map(|f| f.field().clone()).collect(); + t.inferred_fields.replace(fields); + + Ok(()) + } + + pub fn connector_op(&self) -> Result { + match self { + Table::ConnectorTable(c) | Table::LookupTable(c) => Ok(c.connector_op()), + Table::MemoryTable { .. } => plan_err!("can't write to a memory table"), + Table::TableFromQuery { .. } => plan_err!("can't write to a query-defined table"), + Table::PreviewSink { .. } => Ok(super::connector::ConnectorOp::new("preview", "")), + } + } + + pub fn partition_exprs(&self) -> Option<&Vec> { + match self { + Table::ConnectorTable(c) => (*c.partition_exprs).as_ref(), + _ => None, + } + } +} diff --git a/src/sql/catalog/utils.rs b/src/sql/catalog/utils.rs new file mode 100644 index 00000000..c0b8a7d0 --- /dev/null +++ b/src/sql/catalog/utils.rs @@ -0,0 +1,78 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; + +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference}; + +use crate::sql::types::{DFField, TIMESTAMP_FIELD}; + +/// Returns the Arrow struct type for a window (start, end) pair. +pub fn window_arrow_struct() -> DataType { + DataType::Struct( + vec![ + Arc::new(Field::new( + "start", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )), + Arc::new(Field::new( + "end", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )), + ] + .into(), + ) +} + +/// Adds a `_timestamp` field to a DFSchema if it doesn't already have one. +pub fn add_timestamp_field( + schema: DFSchemaRef, + qualifier: Option, +) -> DFResult { + if has_timestamp_field(&schema) { + return Ok(schema); + } + + let timestamp_field = DFField::new( + qualifier, + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ); + Ok(Arc::new(schema.join(&DFSchema::new_with_metadata( + vec![timestamp_field.into()], + HashMap::new(), + )?)?)) +} + +/// Checks whether a DFSchema contains a `_timestamp` field. +pub fn has_timestamp_field(schema: &DFSchemaRef) -> bool { + schema + .fields() + .iter() + .any(|field| field.name() == TIMESTAMP_FIELD) +} + +/// Adds a `_timestamp` field to an Arrow Schema, returning a new SchemaRef. +pub fn add_timestamp_field_arrow(schema: Schema) -> SchemaRef { + let mut fields = schema.fields().to_vec(); + fields.push(Arc::new(Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ))); + Arc::new(Schema::new(fields)) +} diff --git a/src/sql/functions/mod.rs b/src/sql/functions/mod.rs new file mode 100644 index 00000000..84d3c7d4 --- /dev/null +++ b/src/sql/functions/mod.rs @@ -0,0 +1,600 @@ +use crate::sql::planner::StreamSchemaProvider; +use datafusion::arrow::array::{ + Array, ArrayRef, StringArray, UnionArray, + builder::{FixedSizeBinaryBuilder, ListBuilder, StringBuilder}, + cast::{AsArray, as_string_array}, + types::{Float64Type, Int64Type}, +}; +use datafusion::arrow::datatypes::{DataType, Field, UnionFields, UnionMode}; +use datafusion::arrow::row::{RowConverter, SortField}; +use datafusion::common::{DataFusionError, ScalarValue}; +use datafusion::common::{Result, TableReference}; +use datafusion::execution::FunctionRegistry; +use datafusion::logical_expr::expr::{Alias, ScalarFunction}; +use datafusion::logical_expr::{ + ColumnarValue, LogicalPlan, Projection, ScalarFunctionArgs, ScalarUDFImpl, Signature, + TypeSignature, Volatility, create_udf, +}; +use datafusion::prelude::{Expr, col}; +use serde_json_path::JsonPath; +use std::any::Any; +use std::collections::HashMap; +use std::fmt::{Debug, Write}; +use std::sync::{Arc, OnceLock}; + +const SERIALIZE_JSON_UNION: &str = "serialize_json_union"; + +/// Borrowed from DataFusion +/// +/// Creates a singleton `ScalarUDF` of the `$UDF` function named `$GNAME` and a +/// function named `$NAME` which returns that function named $NAME. +/// +/// This is used to ensure creating the list of `ScalarUDF` only happens once. +#[macro_export] +macro_rules! make_udf_function { + ($UDF:ty, $GNAME:ident, $NAME:ident) => { + /// Singleton instance of the function + static $GNAME: std::sync::OnceLock> = + std::sync::OnceLock::new(); + + /// Return a [`ScalarUDF`] for [`$UDF`] + /// + /// [`ScalarUDF`]: datafusion_expr::ScalarUDF + pub fn $NAME() -> std::sync::Arc { + $GNAME + .get_or_init(|| { + std::sync::Arc::new(datafusion::logical_expr::ScalarUDF::new_from_impl( + <$UDF>::default(), + )) + }) + .clone() + } + }; +} + +make_udf_function!(MultiHashFunction, MULTI_HASH, multi_hash); + +pub fn register_all(registry: &mut dyn FunctionRegistry) { + registry + .register_udf(Arc::new(create_udf( + "get_first_json_object", + vec![DataType::Utf8, DataType::Utf8], + DataType::Utf8, + Volatility::Immutable, + Arc::new(get_first_json_object), + ))) + .unwrap(); + + registry + .register_udf(Arc::new(create_udf( + "extract_json", + vec![DataType::Utf8, DataType::Utf8], + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + Volatility::Immutable, + Arc::new(extract_json), + ))) + .unwrap(); + + registry + .register_udf(Arc::new(create_udf( + "extract_json_string", + vec![DataType::Utf8, DataType::Utf8], + DataType::Utf8, + Volatility::Immutable, + Arc::new(extract_json_string), + ))) + .unwrap(); + + registry + .register_udf(Arc::new(create_udf( + SERIALIZE_JSON_UNION, + vec![DataType::Union(union_fields(), UnionMode::Sparse)], + DataType::Utf8, + Volatility::Immutable, + Arc::new(serialize_json_union), + ))) + .unwrap(); + + registry.register_udf(multi_hash()).unwrap(); +} + +fn parse_path(name: &str, path: &ScalarValue) -> Result> { + let path = match path { + ScalarValue::Utf8(Some(s)) => JsonPath::parse(s) + .map_err(|e| DataFusionError::Execution(format!("Invalid json path '{s}': {e:?}")))?, + ScalarValue::Utf8(None) => { + return Err(DataFusionError::Execution(format!( + "The path argument to {name} cannot be null" + ))); + } + _ => { + return Err(DataFusionError::Execution(format!( + "The path argument to {name} must be of type TEXT" + ))); + } + }; + + Ok(Arc::new(path)) +} + +// Hash function that can take any number of arguments and produces a fast (non-cryptographic) +// 128-bit hash from their string representations +#[derive(Debug)] +pub struct MultiHashFunction { + signature: Signature, +} + +impl MultiHashFunction { + pub fn invoke(&self, args: &[ColumnarValue]) -> Result { + let mut hasher = xxhash_rust::xxh3::Xxh3::new(); + + let all_scalar = args.iter().all(|a| matches!(a, ColumnarValue::Scalar(_))); + + let length = args + .iter() + .map(|t| match t { + ColumnarValue::Scalar(_) => 1, + ColumnarValue::Array(a) => a.len(), + }) + .max() + .ok_or_else(|| { + DataFusionError::Plan("multi_hash must have at least one argument".to_string()) + })?; + + let row_builder = RowConverter::new( + args.iter() + .map(|t| SortField::new(t.data_type().clone())) + .collect(), + )?; + + let arrays = args + .iter() + .map(|c| c.clone().into_array(length)) + .collect::>>()?; + let rows = row_builder.convert_columns(&arrays)?; + + if all_scalar { + hasher.update(rows.row(0).as_ref()); + let result = hasher.digest128().to_be_bytes().to_vec(); + hasher.reset(); + Ok(ColumnarValue::Scalar(ScalarValue::FixedSizeBinary( + size_of::() as i32, + Some(result), + ))) + } else { + let mut builder = + FixedSizeBinaryBuilder::with_capacity(length, size_of::() as i32); + + for row in rows.iter() { + hasher.update(row.as_ref()); + builder.append_value(hasher.digest128().to_be_bytes())?; + hasher.reset(); + } + + Ok(ColumnarValue::Array(Arc::new(builder.finish()))) + } + } +} + +impl Default for MultiHashFunction { + fn default() -> Self { + Self { + signature: Signature::new(TypeSignature::VariadicAny, Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for MultiHashFunction { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "multi_hash" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::FixedSizeBinary(size_of::() as i32)) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + self.invoke(&args.args) + } +} + +fn json_function( + name: &str, + f: F, + to_scalar: ToS, + args: &[ColumnarValue], +) -> Result +where + ArrayT: Array + FromIterator> + 'static, + F: Fn(serde_json::Value, &JsonPath) -> Option, + ToS: Fn(Option) -> ScalarValue, +{ + assert_eq!(args.len(), 2); + Ok(match (&args[0], &args[1]) { + (ColumnarValue::Array(values), ColumnarValue::Scalar(path)) => { + let path = parse_path(name, path)?; + let vs = as_string_array(values); + ColumnarValue::Array(Arc::new( + vs.iter() + .map(|s| s.and_then(|s| f(serde_json::from_str(s).ok()?, &path))) + .collect::(), + ) as ArrayRef) + } + (ColumnarValue::Scalar(value), ColumnarValue::Scalar(path)) => { + let path = parse_path(name, path)?; + let ScalarValue::Utf8(value) = value else { + return Err(DataFusionError::Execution(format!( + "The value argument to {name} must be of type TEXT" + ))); + }; + + let result = value + .as_ref() + .and_then(|v| f(serde_json::from_str(v).ok()?, &path)); + ColumnarValue::Scalar(to_scalar(result)) + } + _ => { + return Err(DataFusionError::Execution( + "The path argument to {name} must be a literal".to_string(), + )); + } + }) +} + +pub fn extract_json(args: &[ColumnarValue]) -> Result { + assert_eq!(args.len(), 2); + + let inner = |s, path: &JsonPath| { + Some( + path.query(&serde_json::from_str(s).ok()?) + .iter() + .map(|v| Some(v.to_string())) + .collect::>>(), + ) + }; + + Ok(match (&args[0], &args[1]) { + (ColumnarValue::Array(values), ColumnarValue::Scalar(path)) => { + let path = parse_path("extract_json", path)?; + let values = as_string_array(values); + + let mut builder = ListBuilder::with_capacity(StringBuilder::new(), values.len()); + + let queried = values.iter().map(|s| s.and_then(|s| inner(s, &path))); + + for v in queried { + builder.append_option(v); + } + + ColumnarValue::Array(Arc::new(builder.finish())) + } + (ColumnarValue::Scalar(value), ColumnarValue::Scalar(path)) => { + let path = parse_path("extract_json", path)?; + let ScalarValue::Utf8(v) = value else { + return Err(DataFusionError::Execution( + "The value argument to extract_json must be of type TEXT".to_string(), + )); + }; + + let mut builder = ListBuilder::with_capacity(StringBuilder::new(), 1); + let result = v.as_ref().and_then(|s| inner(s, &path)); + builder.append_option(result); + + ColumnarValue::Scalar(ScalarValue::List(Arc::new(builder.finish()))) + } + _ => { + return Err(DataFusionError::Execution( + "The path argument to extract_json must be a literal".to_string(), + )); + } + }) +} + +pub fn get_first_json_object(args: &[ColumnarValue]) -> Result { + json_function::( + "get_first_json_object", + |s, path| path.query(&s).first().map(|v| v.to_string()), + |s| s.as_deref().into(), + args, + ) +} + +pub fn extract_json_string(args: &[ColumnarValue]) -> Result { + json_function::( + "extract_json_string", + |s, path| { + path.query(&s) + .first() + .and_then(|v| v.as_str().map(|s| s.to_string())) + }, + |s| s.as_deref().into(), + args, + ) +} + +// This code is vendored from +// https://github.com/datafusion-contrib/datafusion-functions-json/blob/main/src/common_union.rs +// as the `is_json_union` function is not public. It should be kept in sync with that code so +// that we are able to detect JSON unions and rewrite them to serialized JSON for sinks. +pub(crate) fn is_json_union(data_type: &DataType) -> bool { + match data_type { + DataType::Union(fields, UnionMode::Sparse) => fields == &union_fields(), + _ => false, + } +} + +pub(crate) const TYPE_ID_NULL: i8 = 0; +const TYPE_ID_BOOL: i8 = 1; +const TYPE_ID_INT: i8 = 2; +const TYPE_ID_FLOAT: i8 = 3; +const TYPE_ID_STR: i8 = 4; +const TYPE_ID_ARRAY: i8 = 5; +const TYPE_ID_OBJECT: i8 = 6; + +fn union_fields() -> UnionFields { + static FIELDS: OnceLock = OnceLock::new(); + FIELDS + .get_or_init(|| { + let json_metadata: HashMap = + HashMap::from_iter(vec![("is_json".to_string(), "true".to_string())]); + UnionFields::from_iter([ + ( + TYPE_ID_NULL, + Arc::new(Field::new("null", DataType::Null, true)), + ), + ( + TYPE_ID_BOOL, + Arc::new(Field::new("bool", DataType::Boolean, false)), + ), + ( + TYPE_ID_INT, + Arc::new(Field::new("int", DataType::Int64, false)), + ), + ( + TYPE_ID_FLOAT, + Arc::new(Field::new("float", DataType::Float64, false)), + ), + ( + TYPE_ID_STR, + Arc::new(Field::new("str", DataType::Utf8, false)), + ), + ( + TYPE_ID_ARRAY, + Arc::new( + Field::new("array", DataType::Utf8, false) + .with_metadata(json_metadata.clone()), + ), + ), + ( + TYPE_ID_OBJECT, + Arc::new( + Field::new("object", DataType::Utf8, false) + .with_metadata(json_metadata.clone()), + ), + ), + ]) + }) + .clone() +} +// End vendored code + +pub fn serialize_json_union(args: &[ColumnarValue]) -> Result { + assert_eq!(args.len(), 1); + let array = match args.first().unwrap() { + ColumnarValue::Array(a) => a.clone(), + ColumnarValue::Scalar(s) => s.to_array_of_size(1)?, + }; + + let mut b = StringBuilder::with_capacity(array.len(), array.get_array_memory_size()); + + write_union(&mut b, &array)?; + + Ok(ColumnarValue::Array(Arc::new(b.finish()))) +} + +fn write_union(b: &mut StringBuilder, array: &ArrayRef) -> Result<(), std::fmt::Error> { + assert!( + is_json_union(array.data_type()), + "array item is not a valid JSON union" + ); + let json_union = array.as_any().downcast_ref::().unwrap(); + + for i in 0..json_union.len() { + if json_union.is_null(i) { + b.append_null(); + } else { + write_value(b, json_union.type_id(i), &json_union.value(i))?; + b.append_value(""); + } + } + + Ok(()) +} + +fn write_value(b: &mut StringBuilder, id: i8, a: &ArrayRef) -> Result<(), std::fmt::Error> { + match id { + TYPE_ID_NULL => write!(b, "null")?, + TYPE_ID_BOOL => write!(b, "{}", a.as_boolean().value(0))?, + TYPE_ID_INT => write!(b, "{}", a.as_primitive::().value(0))?, + TYPE_ID_FLOAT => write!(b, "{}", a.as_primitive::().value(0))?, + TYPE_ID_STR => { + // assumes that this is already a valid (escaped) json string as the only way to + // construct these values are by parsing (valid) JSON + b.write_char('"')?; + b.write_str(a.as_string::().value(0))?; + b.write_char('"')?; + } + TYPE_ID_ARRAY => { + b.write_str(a.as_string::().value(0))?; + } + TYPE_ID_OBJECT => { + b.write_str(a.as_string::().value(0))?; + } + _ => unreachable!("invalid union type in JSON union: {}", id), + } + + Ok(()) +} + +pub(crate) fn serialize_outgoing_json( + registry: &StreamSchemaProvider, + node: Arc, +) -> LogicalPlan { + let exprs = node + .schema() + .fields() + .iter() + .map(|f| { + if is_json_union(f.data_type()) { + Expr::Alias(Alias::new( + Expr::ScalarFunction(ScalarFunction::new_udf( + registry.udf(SERIALIZE_JSON_UNION).unwrap(), + vec![col(f.name())], + )), + Option::::None, + f.name(), + )) + } else { + col(f.name()) + } + }) + .collect(); + + LogicalPlan::Projection(Projection::try_new(exprs, node).unwrap()) +} + +#[cfg(test)] +mod test { + use datafusion::arrow::array::StringArray; + use datafusion::arrow::array::builder::{ListBuilder, StringBuilder}; + use datafusion::common::ScalarValue; + use std::sync::Arc; + + #[test] + fn test_extract_json() { + let input = Arc::new(StringArray::from(vec![ + r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#, + r#"{"a": 3, "b": 4}"#, + r#"{"a": 5, "b": 6}"#, + ])); + + let path = "$.c.d"; + + let result = super::extract_json(&[ + super::ColumnarValue::Array(input), + super::ColumnarValue::Scalar(path.into()), + ]) + .unwrap(); + + let mut expected = ListBuilder::new(StringBuilder::new()); + expected.append_value(vec![Some("\"hello\"".to_string())]); + expected.append_value(Vec::>::new()); + expected.append_value(Vec::>::new()); + if let super::ColumnarValue::Array(result) = result { + assert_eq!(*result, expected.finish()); + } else { + panic!("Expected array, got scalar"); + } + + let result = super::extract_json(&[ + super::ColumnarValue::Scalar(r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#.into()), + super::ColumnarValue::Scalar(path.into()), + ]) + .unwrap(); + + let mut expected = ListBuilder::with_capacity(StringBuilder::new(), 1); + expected.append_value(vec![Some("\"hello\"".to_string())]); + + if let super::ColumnarValue::Scalar(ScalarValue::List(result)) = result { + assert_eq!(*result, expected.finish()); + } else { + panic!("Expected scalar list"); + } + } + + #[test] + fn test_get_first_json_object() { + let input = Arc::new(StringArray::from(vec![ + r#"{"a": 1, "b": 2}"#, + r#"{"a": 3}"#, + r#"{"a": 5, "b": 6}"#, + ])); + + let path = "$.b"; + + let result = super::get_first_json_object(&[ + super::ColumnarValue::Array(input), + super::ColumnarValue::Scalar(path.into()), + ]) + .unwrap(); + + let expected = StringArray::from(vec![Some("2"), None, Some("6")]); + + if let super::ColumnarValue::Array(result) = result { + assert_eq!(*result, expected); + } else { + panic!("Expected array, got scalar"); + } + + let result = super::get_first_json_object(&[ + super::ColumnarValue::Scalar(r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#.into()), + super::ColumnarValue::Scalar("$.c.d".into()), + ]) + .unwrap(); + + let expected = ScalarValue::Utf8(Some("\"hello\"".to_string())); + + if let super::ColumnarValue::Scalar(result) = result { + assert_eq!(result, expected); + } else { + panic!("Expected scalar"); + } + } + + #[test] + fn test_extract_json_string() { + let input = Arc::new(StringArray::from(vec![ + r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#, + r#"{"a": 3, "b": 4}"#, + r#"{"a": 5, "b": 6}"#, + ])); + + let path = "$.c.d"; + + let result = super::extract_json_string(&[ + super::ColumnarValue::Array(input), + super::ColumnarValue::Scalar(path.into()), + ]) + .unwrap(); + + let expected = StringArray::from(vec![Some("hello"), None, None]); + + if let super::ColumnarValue::Array(result) = result { + assert_eq!(*result, expected); + } else { + panic!("Expected array, got scalar"); + } + + let result = super::extract_json_string(&[ + super::ColumnarValue::Scalar(r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#.into()), + super::ColumnarValue::Scalar(path.into()), + ]) + .unwrap(); + + let expected = ScalarValue::Utf8(Some("hello".to_string())); + + if let super::ColumnarValue::Scalar(result) = result { + assert_eq!(result, expected); + } else { + panic!("Expected scalar"); + } + } +} diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 31b5c4b9..e0931530 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -10,9 +10,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod catalog; +pub mod functions; +pub mod physical; pub mod planner; +pub mod types; pub use planner::StreamSchemaProvider; pub use planner::parse::parse_sql; pub use planner::plan::rewrite_plan; pub use planner::sql_to_plan::statement_to_plan; +pub use planner::{CompiledSql, parse_and_get_arrow_program, parse_sql_statements}; diff --git a/src/sql/physical/mod.rs b/src/sql/physical/mod.rs new file mode 100644 index 00000000..bfb37f11 --- /dev/null +++ b/src/sql/physical/mod.rs @@ -0,0 +1,1265 @@ +use datafusion::arrow::{ + array::{ + Array, AsArray, BooleanBuilder, PrimitiveArray, RecordBatch, StringArray, StructArray, + TimestampNanosecondArray, TimestampNanosecondBuilder, UInt32Builder, + }, + buffer::NullBuffer, + compute::{concat, take}, + datatypes::{DataType, Field, Fields, Schema, SchemaRef, TimeUnit}, +}; +use datafusion::common::{ + DataFusionError, Result, ScalarValue, Statistics, UnnestOptions, not_impl_err, plan_err, +}; +use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream}; +use datafusion::{ + execution::TaskContext, + physical_plan::{ + DisplayAs, ExecutionPlan, Partitioning, memory::MemoryStream, + stream::RecordBatchStreamAdapter, + }, +}; +use std::collections::HashMap; +use std::{ + any::Any, + mem, + pin::Pin, + sync::{Arc, OnceLock, RwLock}, + task::{Context, Poll}, +}; + +use crate::make_udf_function; +use crate::sql::functions::MultiHashFunction; +use crate::sql::planner::rewrite::UNNESTED_COL; +use crate::sql::planner::schemas::window_arrow_struct; +use crate::types::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; +use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type}; +use datafusion::catalog::memory::MemorySourceConfig; +use datafusion::datasource::memory::DataSourceExec; +use datafusion::logical_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, +}; +use datafusion::physical_expr::EquivalenceProperties; +use datafusion::physical_plan::PlanProperties; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec}; +use datafusion_proto::physical_plan::PhysicalExtensionCodec; +use futures::{ + ready, + stream::{Stream, StreamExt}, +}; +use prost::Message; +use protocol::grpc::api::{ + DebeziumDecodeNode, DebeziumEncodeNode, FsExecNode, MemExecNode, UnnestExecNode, + fs_exec_node::Node, +}; +use std::fmt::Debug; +use tokio::sync::mpsc::UnboundedReceiver; +use tokio_stream::wrappers::UnboundedReceiverStream; + +// ─────────────────── Updating Meta Helpers ─────────────────── + +pub fn updating_meta_fields() -> Fields { + static FIELDS: OnceLock = OnceLock::new(); + FIELDS + .get_or_init(|| { + Fields::from(vec![ + Field::new("is_retract", DataType::Boolean, true), + Field::new("id", DataType::FixedSizeBinary(16), true), + ]) + }) + .clone() +} + +pub fn updating_meta_field() -> Arc { + static FIELD: OnceLock> = OnceLock::new(); + FIELD + .get_or_init(|| { + Arc::new(Field::new( + UPDATING_META_FIELD, + DataType::Struct(updating_meta_fields()), + false, + )) + }) + .clone() +} + +// ─────────────────── WindowFunctionUdf ─────────────────── + +#[derive(Debug)] +pub struct WindowFunctionUdf { + signature: Signature, +} + +impl Default for WindowFunctionUdf { + fn default() -> Self { + Self { + signature: Signature::new( + TypeSignature::Exact(vec![ + DataType::Timestamp(TimeUnit::Nanosecond, None), + DataType::Timestamp(TimeUnit::Nanosecond, None), + ]), + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for WindowFunctionUdf { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "window" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _: &[DataType]) -> Result { + Ok(window_arrow_struct()) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let columns = args.args; + if columns.len() != 2 { + return plan_err!( + "window function expected 2 arguments, got {}", + columns.len() + ); + } + if columns[0].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) { + return plan_err!( + "window function expected first argument to be a timestamp, got {:?}", + columns[0].data_type() + ); + } + if columns[1].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) { + return plan_err!( + "window function expected second argument to be a timestamp, got {:?}", + columns[1].data_type() + ); + } + let fields = vec![ + Arc::new(Field::new( + "start", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )), + Arc::new(Field::new( + "end", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )), + ] + .into(); + + match (&columns[0], &columns[1]) { + (ColumnarValue::Array(start), ColumnarValue::Array(end)) => { + Ok(ColumnarValue::Array(Arc::new(StructArray::new( + fields, + vec![start.clone(), end.clone()], + None, + )))) + } + (ColumnarValue::Array(start), ColumnarValue::Scalar(end)) => { + let end = end.to_array_of_size(start.len())?; + Ok(ColumnarValue::Array(Arc::new(StructArray::new( + fields, + vec![start.clone(), end], + None, + )))) + } + (ColumnarValue::Scalar(start), ColumnarValue::Array(end)) => { + let start = start.to_array_of_size(end.len())?; + Ok(ColumnarValue::Array(Arc::new(StructArray::new( + fields, + vec![start, end.clone()], + None, + )))) + } + (ColumnarValue::Scalar(start), ColumnarValue::Scalar(end)) => { + Ok(ColumnarValue::Scalar(ScalarValue::Struct( + StructArray::new(fields, vec![start.to_array()?, end.to_array()?], None).into(), + ))) + } + } + } +} + +make_udf_function!(WindowFunctionUdf, WINDOW_FUNCTION, window); + +// ─────────────────── Physical Extension Codec ─────────────────── + +#[derive(Debug)] +pub struct FsPhysicalExtensionCodec { + pub context: DecodingContext, +} + +impl Default for FsPhysicalExtensionCodec { + fn default() -> Self { + Self { + context: DecodingContext::None, + } + } +} + +#[derive(Debug)] +pub enum DecodingContext { + None, + Planning, + SingleLockedBatch(Arc>>), + UnboundedBatchStream(Arc>>>), + LockedBatchVec(Arc>>), + LockedJoinPair { + left: Arc>>, + right: Arc>>, + }, + LockedJoinStream { + left: Arc>>>, + right: Arc>>>, + }, +} + +fn make_properties(schema: SchemaRef) -> PlanProperties { + PlanProperties::new( + EquivalenceProperties::new(schema), + Partitioning::UnknownPartitioning(1), + EmissionType::Incremental, + Boundedness::Unbounded { + requires_infinite_memory: false, + }, + ) +} + +impl PhysicalExtensionCodec for FsPhysicalExtensionCodec { + fn try_decode( + &self, + buf: &[u8], + inputs: &[Arc], + _registry: &dyn datafusion::execution::FunctionRegistry, + ) -> Result> { + let exec: FsExecNode = Message::decode(buf) + .map_err(|err| DataFusionError::Internal(format!("couldn't deserialize: {err}")))?; + + match exec + .node + .ok_or_else(|| DataFusionError::Internal("exec node is empty".to_string()))? + { + Node::MemExec(mem_exec) => { + let schema: Schema = serde_json::from_str(&mem_exec.schema).map_err(|e| { + DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")) + })?; + let schema = Arc::new(schema); + match &self.context { + DecodingContext::SingleLockedBatch(single_batch) => Ok(Arc::new( + RwLockRecordBatchReader::new(schema, single_batch.clone()), + )), + DecodingContext::UnboundedBatchStream(unbounded_stream) => Ok(Arc::new( + UnboundedRecordBatchReader::new(schema, unbounded_stream.clone()), + )), + DecodingContext::LockedBatchVec(locked_batches) => Ok(Arc::new( + RecordBatchVecReader::new(schema, locked_batches.clone()), + )), + DecodingContext::Planning => { + Ok(Arc::new(FsMemExec::new(mem_exec.table_name, schema))) + } + DecodingContext::None => Err(DataFusionError::Internal( + "Need an internal context to decode".into(), + )), + DecodingContext::LockedJoinPair { left, right } => { + match mem_exec.table_name.as_str() { + "left" => { + Ok(Arc::new(RwLockRecordBatchReader::new(schema, left.clone()))) + } + "right" => Ok(Arc::new(RwLockRecordBatchReader::new( + schema, + right.clone(), + ))), + _ => Err(DataFusionError::Internal(format!( + "unknown table name {}", + mem_exec.table_name + ))), + } + } + DecodingContext::LockedJoinStream { left, right } => { + match mem_exec.table_name.as_str() { + "left" => Ok(Arc::new(UnboundedRecordBatchReader::new( + schema, + left.clone(), + ))), + "right" => Ok(Arc::new(UnboundedRecordBatchReader::new( + schema, + right.clone(), + ))), + _ => Err(DataFusionError::Internal(format!( + "unknown table name {}", + mem_exec.table_name + ))), + } + } + } + } + Node::UnnestExec(unnest) => { + let schema: Schema = serde_json::from_str(&unnest.schema).map_err(|e| { + DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")) + })?; + + let column = schema.index_of(UNNESTED_COL).map_err(|_| { + DataFusionError::Internal(format!( + "unnest node schema does not contain {UNNESTED_COL} col" + )) + })?; + + Ok(Arc::new(UnnestExec::new( + inputs + .first() + .ok_or_else(|| { + DataFusionError::Internal("no input for unnest node".to_string()) + })? + .clone(), + vec![ListUnnest { + index_in_input_schema: column, + depth: 1, + }], + vec![], + Arc::new(schema), + UnnestOptions::default(), + ))) + } + Node::DebeziumDecode(debezium) => { + let schema = Arc::new(serde_json::from_str::(&debezium.schema).map_err( + |e| DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")), + )?); + Ok(Arc::new(DebeziumUnrollingExec { + input: inputs + .first() + .ok_or_else(|| { + DataFusionError::Internal("no input for debezium node".to_string()) + })? + .clone(), + schema: schema.clone(), + properties: make_properties(schema), + primary_keys: debezium + .primary_keys + .into_iter() + .map(|c| c as usize) + .collect(), + })) + } + Node::DebeziumEncode(debezium) => { + let schema = Arc::new(serde_json::from_str::(&debezium.schema).map_err( + |e| DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")), + )?); + Ok(Arc::new(ToDebeziumExec { + input: inputs + .first() + .ok_or_else(|| { + DataFusionError::Internal("no input for debezium node".to_string()) + })? + .clone(), + schema: schema.clone(), + properties: make_properties(schema), + })) + } + } + } + + fn try_encode(&self, node: Arc, buf: &mut Vec) -> Result<()> { + let mut proto = None; + + let mem_table: Option<&FsMemExec> = node.as_any().downcast_ref(); + if let Some(table) = mem_table { + proto = Some(FsExecNode { + node: Some(Node::MemExec(MemExecNode { + table_name: table.table_name.clone(), + schema: serde_json::to_string(&table.schema).unwrap(), + })), + }); + } + + let unnest: Option<&UnnestExec> = node.as_any().downcast_ref(); + if let Some(unnest) = unnest { + proto = Some(FsExecNode { + node: Some(Node::UnnestExec(UnnestExecNode { + schema: serde_json::to_string(&unnest.schema()).unwrap(), + })), + }); + } + + let debezium_decode: Option<&DebeziumUnrollingExec> = node.as_any().downcast_ref(); + if let Some(decode) = debezium_decode { + proto = Some(FsExecNode { + node: Some(Node::DebeziumDecode(DebeziumDecodeNode { + schema: serde_json::to_string(&decode.schema).unwrap(), + primary_keys: (*decode.primary_keys).iter().map(|c| *c as u64).collect(), + })), + }); + } + + let debezium_encode: Option<&ToDebeziumExec> = node.as_any().downcast_ref(); + if let Some(encode) = debezium_encode { + proto = Some(FsExecNode { + node: Some(Node::DebeziumEncode(DebeziumEncodeNode { + schema: serde_json::to_string(&encode.schema).unwrap(), + })), + }); + } + + if let Some(node) = proto { + node.encode(buf).map_err(|err| { + DataFusionError::Internal(format!("couldn't serialize exec node {err}")) + })?; + Ok(()) + } else { + Err(DataFusionError::Internal(format!( + "cannot serialize {node:?}" + ))) + } + } +} + +// ─────────────────── RwLockRecordBatchReader ─────────────────── + +#[derive(Debug)] +struct RwLockRecordBatchReader { + schema: SchemaRef, + locked_batch: Arc>>, + properties: PlanProperties, +} + +impl RwLockRecordBatchReader { + fn new(schema: SchemaRef, locked_batch: Arc>>) -> Self { + Self { + schema: schema.clone(), + locked_batch, + properties: make_properties(schema), + } + } +} + +impl DisplayAs for RwLockRecordBatchReader { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "RW Lock RecordBatchReader") + } +} + +impl ExecutionPlan for RwLockRecordBatchReader { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Err(DataFusionError::Internal("not supported".into())) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + let result = self + .locked_batch + .write() + .unwrap() + .take() + .expect("should have set a record batch before calling execute()"); + Ok(Box::pin(MemoryStream::try_new( + vec![result], + self.schema.clone(), + None, + )?)) + } + + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema)) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn name(&self) -> &str { + "rw_lock_reader" + } +} + +// ─────────────────── UnboundedRecordBatchReader ─────────────────── + +#[derive(Debug)] +struct UnboundedRecordBatchReader { + schema: SchemaRef, + receiver: Arc>>>, + properties: PlanProperties, +} + +impl UnboundedRecordBatchReader { + fn new( + schema: SchemaRef, + receiver: Arc>>>, + ) -> Self { + Self { + schema: schema.clone(), + receiver, + properties: make_properties(schema), + } + } +} + +impl DisplayAs for UnboundedRecordBatchReader { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "unbounded record batch reader") + } +} + +impl ExecutionPlan for UnboundedRecordBatchReader { + fn name(&self) -> &str { + "unbounded_reader" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Err(DataFusionError::Internal("not supported".into())) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + Ok(Box::pin(RecordBatchStreamAdapter::new( + self.schema.clone(), + UnboundedReceiverStream::new( + self.receiver + .write() + .unwrap() + .take() + .expect("unbounded receiver should be present before calling exec"), + ) + .map(Ok), + ))) + } + + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema)) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } +} + +// ─────────────────── RecordBatchVecReader ─────────────────── + +#[derive(Debug)] +struct RecordBatchVecReader { + schema: SchemaRef, + receiver: Arc>>, + properties: PlanProperties, +} + +impl RecordBatchVecReader { + fn new(schema: SchemaRef, receiver: Arc>>) -> Self { + Self { + schema: schema.clone(), + receiver, + properties: make_properties(schema), + } + } +} + +impl DisplayAs for RecordBatchVecReader { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "record batch vec reader") + } +} + +impl ExecutionPlan for RecordBatchVecReader { + fn name(&self) -> &str { + "vec_reader" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Err(DataFusionError::Internal("not supported".into())) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> Result { + let memory = MemorySourceConfig::try_new( + &[mem::take(self.receiver.write().unwrap().as_mut())], + self.schema.clone(), + None, + )?; + + DataSourceExec::new(Arc::new(memory)).execute(partition, context) + } + + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema)) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } +} + +// ─────────────────── FsMemExec ─────────────────── + +#[derive(Debug, Clone)] +pub struct FsMemExec { + pub table_name: String, + pub schema: SchemaRef, + properties: PlanProperties, +} + +impl DisplayAs for FsMemExec { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "EmptyPartitionStream: schema={}", self.schema) + } +} + +impl FsMemExec { + pub fn new(table_name: String, schema: SchemaRef) -> Self { + Self { + schema: schema.clone(), + table_name, + properties: make_properties(schema), + } + } +} + +impl ExecutionPlan for FsMemExec { + fn name(&self) -> &str { + "mem_exec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + not_impl_err!("with_new_children is not implemented for mem_exec; should not be called") + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + plan_err!( + "EmptyPartitionStream cannot be executed, this is only used for physical planning before serialization" + ) + } + + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema)) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } +} + +// ─────────────────── DebeziumUnrollingExec ─────────────────── + +#[derive(Debug)] +pub struct DebeziumUnrollingExec { + input: Arc, + schema: SchemaRef, + properties: PlanProperties, + primary_keys: Vec, +} + +impl DebeziumUnrollingExec { + pub fn try_new(input: Arc, primary_keys: Vec) -> Result { + let input_schema = input.schema(); + let before_index = input_schema.index_of("before")?; + let after_index = input_schema.index_of("after")?; + let op_index = input_schema.index_of("op")?; + let _timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; + let before_type = input_schema.field(before_index).data_type(); + let after_type = input_schema.field(after_index).data_type(); + if before_type != after_type { + return Err(DataFusionError::Internal( + "before and after columns must have the same type".to_string(), + )); + } + let op_type = input_schema.field(op_index).data_type(); + if *op_type != DataType::Utf8 { + return Err(DataFusionError::Internal( + "op column must be a string".to_string(), + )); + } + let DataType::Struct(fields) = before_type else { + return Err(DataFusionError::Internal( + "before and after columns must be structs".to_string(), + )); + }; + let mut fields = fields.to_vec(); + fields.push(updating_meta_field()); + fields.push(Arc::new(Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ))); + + let schema = Arc::new(Schema::new(fields)); + Ok(Self { + input, + schema: schema.clone(), + properties: make_properties(schema), + primary_keys, + }) + } +} + +impl DisplayAs for DebeziumUnrollingExec { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "DebeziumUnrollingExec") + } +} + +impl ExecutionPlan for DebeziumUnrollingExec { + fn name(&self) -> &str { + "debezium_unrolling_exec" + } + + fn as_any(&self) -> &dyn Any { + self as &dyn Any + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.input] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result> { + if children.len() != 1 { + return Err(DataFusionError::Internal( + "DebeziumUnrollingExec wrong number of children".to_string(), + )); + } + Ok(Arc::new(DebeziumUnrollingExec { + input: children[0].clone(), + schema: self.schema.clone(), + properties: self.properties.clone(), + primary_keys: self.primary_keys.clone(), + })) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> Result { + Ok(Box::pin(DebeziumUnrollingStream::try_new( + self.input.execute(partition, context)?, + self.schema.clone(), + self.primary_keys.clone(), + )?)) + } + + fn reset(&self) -> Result<()> { + self.input.reset() + } +} + +struct DebeziumUnrollingStream { + input: SendableRecordBatchStream, + schema: SchemaRef, + before_index: usize, + after_index: usize, + op_index: usize, + timestamp_index: usize, + primary_keys: Vec, +} + +impl DebeziumUnrollingStream { + fn try_new( + input: SendableRecordBatchStream, + schema: SchemaRef, + primary_keys: Vec, + ) -> Result { + if primary_keys.is_empty() { + return plan_err!("there must be at least one primary key for a Debezium source"); + } + let input_schema = input.schema(); + let before_index = input_schema.index_of("before")?; + let after_index = input_schema.index_of("after")?; + let op_index = input_schema.index_of("op")?; + let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; + + Ok(Self { + input, + schema, + before_index, + after_index, + op_index, + timestamp_index, + primary_keys, + }) + } + + fn unroll_batch(&self, batch: &RecordBatch) -> Result { + let before = batch.column(self.before_index).as_ref(); + let after = batch.column(self.after_index).as_ref(); + let op = batch + .column(self.op_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| DataFusionError::Internal("op column is not a string".to_string()))?; + + let timestamp = batch + .column(self.timestamp_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal("timestamp column is not a timestamp".to_string()) + })?; + + let num_rows = batch.num_rows(); + let combined_array = concat(&[before, after])?; + let mut take_indices = UInt32Builder::with_capacity(num_rows); + let mut is_retract_builder = BooleanBuilder::with_capacity(num_rows); + + let mut timestamp_builder = TimestampNanosecondBuilder::with_capacity(2 * num_rows); + for i in 0..num_rows { + let op = op.value(i); + match op { + "c" | "r" => { + take_indices.append_value((i + num_rows) as u32); + is_retract_builder.append_value(false); + timestamp_builder.append_value(timestamp.value(i)); + } + "u" => { + take_indices.append_value(i as u32); + is_retract_builder.append_value(true); + timestamp_builder.append_value(timestamp.value(i)); + take_indices.append_value((i + num_rows) as u32); + is_retract_builder.append_value(false); + timestamp_builder.append_value(timestamp.value(i)); + } + "d" => { + take_indices.append_value(i as u32); + is_retract_builder.append_value(true); + timestamp_builder.append_value(timestamp.value(i)); + } + _ => { + return Err(DataFusionError::Internal(format!( + "unexpected op value: {op}" + ))); + } + } + } + let take_indices = take_indices.finish(); + let unrolled_array = take(&combined_array, &take_indices, None)?; + + let mut columns = unrolled_array.as_struct().columns().to_vec(); + + let hash = MultiHashFunction::default().invoke( + &self + .primary_keys + .iter() + .map(|i| ColumnarValue::Array(columns[*i].clone())) + .collect::>(), + )?; + + let ids = hash.into_array(num_rows)?; + + let meta = StructArray::try_new( + updating_meta_fields(), + vec![Arc::new(is_retract_builder.finish()), ids], + None, + )?; + columns.push(Arc::new(meta)); + columns.push(Arc::new(timestamp_builder.finish())); + Ok(RecordBatch::try_new(self.schema.clone(), columns)?) + } +} + +impl Stream for DebeziumUnrollingStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let result = + ready!(self.input.poll_next_unpin(cx)).map(|result| self.unroll_batch(&result?)); + Poll::Ready(result) + } +} + +impl RecordBatchStream for DebeziumUnrollingStream { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} + +// ─────────────────── ToDebeziumExec ─────────────────── + +#[derive(Debug)] +pub struct ToDebeziumExec { + input: Arc, + schema: SchemaRef, + properties: PlanProperties, +} + +impl ToDebeziumExec { + pub fn try_new(input: Arc) -> Result { + let input_schema = input.schema(); + let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; + let struct_fields: Vec<_> = input_schema + .fields() + .into_iter() + .enumerate() + .filter_map(|(index, field)| { + if field.name() == UPDATING_META_FIELD || index == timestamp_index { + None + } else { + Some(field.clone()) + } + }) + .collect(); + let struct_data_type = DataType::Struct(struct_fields.into()); + let before_field = Arc::new(Field::new("before", struct_data_type.clone(), true)); + let after_field = Arc::new(Field::new("after", struct_data_type, true)); + let op_field = Arc::new(Field::new("op", DataType::Utf8, false)); + let timestamp_field = Arc::new(input_schema.field(timestamp_index).clone()); + + let output_schema = Arc::new(Schema::new(vec![ + before_field, + after_field, + op_field, + timestamp_field, + ])); + + Ok(Self { + input, + schema: output_schema.clone(), + properties: make_properties(output_schema), + }) + } +} + +impl DisplayAs for ToDebeziumExec { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "ToDebeziumExec") + } +} + +impl ExecutionPlan for ToDebeziumExec { + fn name(&self) -> &str { + "to_debezium_exec" + } + + fn as_any(&self) -> &dyn Any { + self as &dyn Any + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.input] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result> { + if children.len() != 1 { + return Err(DataFusionError::Internal( + "ToDebeziumExec wrong number of children".to_string(), + )); + } + Ok(Arc::new(ToDebeziumExec::try_new(children[0].clone())?)) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> Result { + let updating_meta_index = self.input.schema().index_of(UPDATING_META_FIELD).ok(); + let timestamp_index = self.input.schema().index_of(TIMESTAMP_FIELD)?; + let struct_projection = (0..self.input.schema().fields().len()) + .filter(|index| { + updating_meta_index + .map(|is_retract_index| *index != is_retract_index) + .unwrap_or(true) + && *index != timestamp_index + }) + .collect(); + + Ok(Box::pin(ToDebeziumStream { + input: self.input.execute(partition, context)?, + schema: self.schema.clone(), + updating_meta_index, + timestamp_index, + struct_projection, + })) + } + + fn reset(&self) -> Result<()> { + self.input.reset() + } +} + +struct ToDebeziumStream { + input: SendableRecordBatchStream, + schema: SchemaRef, + updating_meta_index: Option, + timestamp_index: usize, + struct_projection: Vec, +} + +impl ToDebeziumStream { + fn as_debezium_batch(&mut self, batch: &RecordBatch) -> Result { + let value_struct = batch.project(&self.struct_projection)?; + let timestamps = batch + .column(self.timestamp_index) + .as_primitive::(); + + let columns: Vec> = if let Some(metadata_index) = self.updating_meta_index { + let metadata = batch + .column(metadata_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal("Invalid type for updating_meta column".to_string()) + })?; + + let is_retract = metadata.column(0).as_boolean(); + let id = metadata.column(1).as_fixed_size_binary(); + + let mut id_map: HashMap<&[u8], (usize, usize, bool, bool, i64)> = HashMap::new(); + let mut order = vec![]; + for i in 0..batch.num_rows() { + let row_id = id.value(i); + let is_create = !is_retract.value(i); + let timestamp = timestamps.value(i); + + id_map + .entry(row_id) + .and_modify(|e| { + e.1 = i; + e.3 = is_create; + e.4 = e.4.max(timestamp); + }) + .or_insert_with(|| { + order.push(row_id); + (i, i, is_create, is_create, timestamp) + }); + } + + let mut before = Vec::with_capacity(id_map.len()); + let mut after = Vec::with_capacity(id_map.len()); + let mut op = Vec::with_capacity(id_map.len()); + let mut ts = TimestampNanosecondBuilder::with_capacity(id_map.len()); + + for row_id in order { + let (first_idx, last_idx, first_is_create, last_is_create, timestamp) = + id_map.get(row_id).unwrap(); + + if *first_is_create && *last_is_create { + before.push(None); + after.push(Some(*last_idx)); + op.push("c"); + } else if !(*first_is_create) && !(*last_is_create) { + before.push(Some(*first_idx)); + after.push(None); + op.push("d"); + } else if !(*first_is_create) && *last_is_create { + before.push(Some(*first_idx)); + after.push(Some(*last_idx)); + op.push("u"); + } else { + continue; + } + + ts.append_value(*timestamp); + } + + let before_array = Self::create_output_array(&value_struct, &before)?; + let after_array = Self::create_output_array(&value_struct, &after)?; + let op_array = StringArray::from(op); + + vec![ + Arc::new(before_array), + Arc::new(after_array), + Arc::new(op_array), + Arc::new(ts.finish()), + ] + } else { + let after_array = StructArray::try_new( + value_struct.schema().fields().clone(), + value_struct.columns().to_vec(), + None, + )?; + + let before_array = StructArray::new_null( + value_struct.schema().fields().clone(), + value_struct.num_rows(), + ); + + vec![ + Arc::new(before_array), + Arc::new(after_array), + Arc::new(StringArray::from(vec!["c"; value_struct.num_rows()])), + batch.column(self.timestamp_index).clone(), + ] + }; + + Ok(RecordBatch::try_new(self.schema.clone(), columns)?) + } + + fn create_output_array( + value_struct: &RecordBatch, + indices: &[Option], + ) -> Result { + let mut arrays: Vec> = Vec::with_capacity(value_struct.num_columns()); + for col in value_struct.columns() { + let new_array = take( + col.as_ref(), + &indices + .iter() + .map(|&idx| idx.map(|i| i as u64)) + .collect::>(), + None, + )?; + arrays.push(new_array); + } + + Ok(StructArray::try_new( + value_struct.schema().fields().clone(), + arrays, + Some(NullBuffer::from( + indices.iter().map(|&idx| idx.is_some()).collect::>(), + )), + )?) + } +} + +impl Stream for ToDebeziumStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let result = + ready!(self.input.poll_next_unpin(cx)).map(|result| self.as_debezium_batch(&result?)); + Poll::Ready(result) + } +} + +impl RecordBatchStream for ToDebeziumStream { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} diff --git a/src/sql/planner/extension/aggregate.rs b/src/sql/planner/extension/aggregate.rs index 911e595f..878d3cc5 100644 --- a/src/sql/planner/extension/aggregate.rs +++ b/src/sql/planner/extension/aggregate.rs @@ -11,7 +11,7 @@ use datafusion::logical_expr::{ use crate::multifield_partial_ord; use crate::sql::planner::extension::{NamedNode, StreamExtension, TimestampAppendExtension}; -use crate::sql::planner::types::{ +use crate::sql::types::{ DFField, StreamSchema, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata, }; diff --git a/src/sql/planner/extension/debezium.rs b/src/sql/planner/extension/debezium.rs new file mode 100644 index 00000000..1760533c --- /dev/null +++ b/src/sql/planner/extension/debezium.rs @@ -0,0 +1,250 @@ +use std::sync::Arc; + +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::common::{DFSchema, DFSchemaRef, Result, TableReference, plan_err}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; + +use super::{NamedNode, StreamExtension}; +use crate::multifield_partial_ord; +use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD}; + +pub(crate) const DEBEZIUM_UNROLLING_EXTENSION_NAME: &str = "DebeziumUnrollingExtension"; +pub(crate) const TO_DEBEZIUM_EXTENSION_NAME: &str = "ToDebeziumExtension"; + +/// Unrolls a Debezium-formatted (before/after/op) stream into individual rows +/// with an updating metadata column. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct DebeziumUnrollingExtension { + pub(crate) input: LogicalPlan, + pub(crate) schema: DFSchemaRef, + pub primary_keys: Vec, + primary_key_names: Arc>, +} + +multifield_partial_ord!( + DebeziumUnrollingExtension, + input, + primary_keys, + primary_key_names +); + +impl DebeziumUnrollingExtension { + pub(crate) fn as_debezium_schema( + input_schema: &DFSchemaRef, + qualifier: Option, + ) -> Result { + let timestamp_field = if input_schema.has_column_with_unqualified_name(TIMESTAMP_FIELD) { + Some( + input_schema + .field_with_unqualified_name(TIMESTAMP_FIELD)? + .clone(), + ) + } else { + None + }; + let struct_schema: Vec<_> = input_schema + .fields() + .iter() + .filter(|field| field.name() != TIMESTAMP_FIELD) + .cloned() + .collect(); + + let struct_type = DataType::Struct(struct_schema.into()); + + let before = Arc::new(Field::new("before", struct_type.clone(), true)); + let after = Arc::new(Field::new("after", struct_type, true)); + let op = Arc::new(Field::new("op", DataType::Utf8, true)); + let mut fields = vec![before, after, op]; + + if let Some(ts) = timestamp_field { + fields.push(Arc::new(ts)); + } + + let schema = match qualifier { + Some(q) => DFSchema::try_from_qualified_schema(q, &Schema::new(fields))?, + None => DFSchema::try_from(Schema::new(fields))?, + }; + Ok(Arc::new(schema)) + } + + pub fn try_new(input: LogicalPlan, primary_keys: Arc>) -> Result { + let input_schema = input.schema(); + + let Some(before_index) = input_schema.index_of_column_by_name(None, "before") else { + return plan_err!("DebeziumUnrollingExtension requires a before column"); + }; + let Some(after_index) = input_schema.index_of_column_by_name(None, "after") else { + return plan_err!("DebeziumUnrollingExtension requires an after column"); + }; + let Some(op_index) = input_schema.index_of_column_by_name(None, "op") else { + return plan_err!("DebeziumUnrollingExtension requires an op column"); + }; + + let before_type = input_schema.field(before_index).data_type(); + let after_type = input_schema.field(after_index).data_type(); + if before_type != after_type { + return plan_err!( + "before and after columns must have the same type, not {} and {}", + before_type, + after_type + ); + } + + let op_type = input_schema.field(op_index).data_type(); + if *op_type != DataType::Utf8 { + return plan_err!("op column must be a string, not {}", op_type); + } + + let DataType::Struct(fields) = before_type else { + return plan_err!( + "before and after columns must be structs, not {}", + before_type + ); + }; + + let primary_key_idx = primary_keys + .iter() + .map(|pk| fields.find(pk).map(|(i, _)| i)) + .collect::>>() + .ok_or_else(|| { + datafusion::error::DataFusionError::Plan( + "primary key field not found in Debezium schema".to_string(), + ) + })?; + + let qualifier = match ( + input_schema.qualified_field(before_index).0, + input_schema.qualified_field(after_index).0, + ) { + (Some(bq), Some(aq)) => { + if bq != aq { + return plan_err!("before and after columns must have the same alias"); + } + Some(bq.clone()) + } + (None, None) => None, + _ => return plan_err!("before and after columns must both have an alias or neither"), + }; + + let mut out_fields = fields.to_vec(); + + let Some(input_ts_index) = input_schema.index_of_column_by_name(None, TIMESTAMP_FIELD) + else { + return plan_err!("DebeziumUnrollingExtension requires a timestamp field"); + }; + out_fields.push(Arc::new(input_schema.field(input_ts_index).clone())); + + let arrow_schema = Schema::new(out_fields); + let schema = match qualifier { + Some(q) => DFSchema::try_from_qualified_schema(q, &arrow_schema)?, + None => DFSchema::try_from(arrow_schema)?, + }; + + Ok(Self { + input, + schema: Arc::new(schema), + primary_keys: primary_key_idx, + primary_key_names: primary_keys, + }) + } +} + +impl UserDefinedLogicalNodeCore for DebeziumUnrollingExtension { + fn name(&self) -> &str { + DEBEZIUM_UNROLLING_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "DebeziumUnrollingExtension") + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Self::try_new(inputs[0].clone(), self.primary_key_names.clone()) + } +} + +impl StreamExtension for DebeziumUnrollingExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + } + + fn transparent(&self) -> bool { + true + } +} + +/// Wraps an input stream into Debezium format (before/after/op) for updating sinks. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct ToDebeziumExtension { + pub(crate) input: Arc, + pub(crate) schema: DFSchemaRef, +} + +multifield_partial_ord!(ToDebeziumExtension, input); + +impl ToDebeziumExtension { + pub(crate) fn try_new(input: LogicalPlan) -> Result { + let schema = DebeziumUnrollingExtension::as_debezium_schema(input.schema(), None) + .expect("should be able to create ToDebeziumExtension"); + Ok(Self { + input: Arc::new(input), + schema, + }) + } +} + +impl UserDefinedLogicalNodeCore for ToDebeziumExtension { + fn name(&self) -> &str { + TO_DEBEZIUM_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "ToDebeziumExtension") + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Self::try_new(inputs[0].clone()) + } +} + +impl StreamExtension for ToDebeziumExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + } + + fn transparent(&self) -> bool { + true + } +} diff --git a/src/sql/planner/extension/join.rs b/src/sql/planner/extension/join.rs index 0b4fa13a..3857fee7 100644 --- a/src/sql/planner/extension/join.rs +++ b/src/sql/planner/extension/join.rs @@ -5,7 +5,7 @@ use datafusion::logical_expr::expr::Expr; use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore}; use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::planner::types::StreamSchema; +use crate::sql::types::StreamSchema; use std::sync::Arc; diff --git a/src/sql/planner/extension/key_calculation.rs b/src/sql/planner/extension/key_calculation.rs index f60c4d32..c90b6d1d 100644 --- a/src/sql/planner/extension/key_calculation.rs +++ b/src/sql/planner/extension/key_calculation.rs @@ -7,7 +7,7 @@ use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogi use crate::multifield_partial_ord; use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::planner::types::{ +use crate::sql::types::{ StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata, }; diff --git a/src/sql/planner/extension/lookup.rs b/src/sql/planner/extension/lookup.rs new file mode 100644 index 00000000..daa4b094 --- /dev/null +++ b/src/sql/planner/extension/lookup.rs @@ -0,0 +1,127 @@ +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::common::{Column, DFSchemaRef, JoinType, Result, TableReference, internal_err}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; + +use super::{NamedNode, StreamExtension}; +use crate::multifield_partial_ord; +use crate::sql::catalog::connector_table::ConnectorTable; +use crate::sql::types::StreamSchema; + +pub const SOURCE_EXTENSION_NAME: &str = "LookupSource"; +pub const JOIN_EXTENSION_NAME: &str = "LookupJoin"; + +/// Represents a lookup table source in the streaming plan. +/// Lookup sources provide point-query access to external state. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LookupSource { + pub(crate) table: ConnectorTable, + pub(crate) schema: DFSchemaRef, +} + +multifield_partial_ord!(LookupSource, table); + +impl UserDefinedLogicalNodeCore for LookupSource { + fn name(&self) -> &str { + SOURCE_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "LookupSource: {}", self.schema) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + if !inputs.is_empty() { + return internal_err!("LookupSource cannot have inputs"); + } + Ok(Self { + table: self.table.clone(), + schema: self.schema.clone(), + }) + } +} + +impl StreamExtension for LookupSource { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + } +} + +/// Represents a lookup join: a streaming input joined against a lookup table. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LookupJoin { + pub(crate) input: LogicalPlan, + pub(crate) schema: DFSchemaRef, + pub(crate) connector: ConnectorTable, + pub(crate) on: Vec<(Expr, Column)>, + pub(crate) filter: Option, + pub(crate) alias: Option, + pub(crate) join_type: JoinType, +} + +multifield_partial_ord!(LookupJoin, input, connector, on, filter, alias); + +impl UserDefinedLogicalNodeCore for LookupJoin { + fn name(&self) -> &str { + JOIN_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + let mut e: Vec<_> = self.on.iter().map(|(l, _)| l.clone()).collect(); + if let Some(filter) = &self.filter { + e.push(filter.clone()); + } + e + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "LookupJoinExtension: {}", self.schema) + } + + fn with_exprs_and_inputs(&self, _: Vec, inputs: Vec) -> Result { + Ok(Self { + input: inputs[0].clone(), + schema: self.schema.clone(), + connector: self.connector.clone(), + on: self.on.clone(), + filter: self.filter.clone(), + alias: self.alias.clone(), + join_type: self.join_type, + }) + } +} + +impl StreamExtension for LookupJoin { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + } +} diff --git a/src/sql/planner/extension/mod.rs b/src/sql/planner/extension/mod.rs index 96ac5f32..4de1892e 100644 --- a/src/sql/planner/extension/mod.rs +++ b/src/sql/planner/extension/mod.rs @@ -1,24 +1,53 @@ -use std::fmt::Debug; +use std::fmt::{Debug, Formatter}; use std::sync::Arc; +use std::time::Duration; +use datafusion::arrow::datatypes::{DataType, TimeUnit}; use datafusion::common::{DFSchemaRef, DataFusionError, Result, TableReference}; use datafusion::logical_expr::{ Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, }; +use crate::datastream::logical::{LogicalEdge, LogicalNode}; use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; -use crate::sql::planner::types::StreamSchema; +use crate::sql::types::{ + DFField, StreamSchema, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields, +}; +use crate::types::FsSchemaRef; pub(crate) mod aggregate; +pub(crate) mod debezium; pub(crate) mod join; pub(crate) mod key_calculation; +pub(crate) mod lookup; pub(crate) mod projection; pub(crate) mod remote_table; +pub(crate) mod sink; +pub(crate) mod table_source; +pub(crate) mod updating_aggregate; pub(crate) mod watermark_node; pub(crate) mod window_fn; +pub(crate) struct NodeWithIncomingEdges { + pub node: LogicalNode, + pub edges: Vec, +} + pub(crate) trait StreamExtension: Debug { fn node_name(&self) -> Option; + + fn plan_node( + &self, + _planner: &super::physical_planner::Planner, + _index: usize, + _input_schemas: Vec, + ) -> Result { + Err(DataFusionError::NotImplemented(format!( + "plan_node not yet implemented for {:?}", + self + ))) + } + fn output_schema(&self) -> StreamSchema; fn transparent(&self) -> bool { false @@ -47,20 +76,34 @@ impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension { fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result { use aggregate::AggregateExtension; + use debezium::{DebeziumUnrollingExtension, ToDebeziumExtension}; use join::JoinExtension; use key_calculation::KeyCalculationExtension; + use lookup::{LookupJoin, LookupSource}; use projection::ProjectionExtension; use remote_table::RemoteTableExtension; + use sink::SinkExtension; + use table_source::TableSourceExtension; + use updating_aggregate::UpdatingAggregateExtension; use watermark_node::WatermarkNode; use window_fn::WindowFunctionExtension; - try_from_t::(node) + try_from_t::(node) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name()))) } } @@ -151,3 +194,163 @@ impl UserDefinedLogicalNodeCore for TimestampAppendExtension { Ok(Self::new(inputs[0].clone(), self.qualifier.clone())) } } + +/// Appends an `_updating_meta` and properly qualified `_timestamp` field +/// to the output schema of an updating aggregate. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct IsRetractExtension { + pub(crate) input: LogicalPlan, + pub(crate) schema: DFSchemaRef, + pub(crate) timestamp_qualifier: Option, +} + +multifield_partial_ord!(IsRetractExtension, input, timestamp_qualifier); + +impl IsRetractExtension { + pub(crate) fn new(input: LogicalPlan, timestamp_qualifier: Option) -> Self { + let mut output_fields = fields_with_qualifiers(input.schema()); + + let timestamp_index = output_fields.len() - 1; + output_fields[timestamp_index] = DFField::new( + timestamp_qualifier.clone(), + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ); + let schema = Arc::new(schema_from_df_fields(&output_fields).unwrap()); + Self { + input, + schema, + timestamp_qualifier, + } + } +} + +impl UserDefinedLogicalNodeCore for IsRetractExtension { + fn name(&self) -> &str { + "IsRetractExtension" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "IsRetractExtension") + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self::new( + inputs[0].clone(), + self.timestamp_qualifier.clone(), + )) + } +} + +impl StreamExtension for IsRetractExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + } +} + +pub(crate) const ASYNC_RESULT_FIELD: &str = "__async_result"; + +/// Extension node for async UDF calls in streaming projections. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct AsyncUDFExtension { + pub(crate) input: Arc, + pub(crate) name: String, + pub(crate) arg_exprs: Vec, + pub(crate) final_exprs: Vec, + pub(crate) ordered: bool, + pub(crate) max_concurrency: usize, + pub(crate) timeout: Duration, + pub(crate) final_schema: DFSchemaRef, +} + +multifield_partial_ord!( + AsyncUDFExtension, + input, + name, + arg_exprs, + final_exprs, + ordered, + max_concurrency, + timeout +); + +impl UserDefinedLogicalNodeCore for AsyncUDFExtension { + fn name(&self) -> &str { + "AsyncUDFNode" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.final_schema + } + + fn expressions(&self) -> Vec { + self.arg_exprs + .iter() + .chain(self.final_exprs.iter()) + .cloned() + .collect() + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "AsyncUdfExtension<{}>: {}", self.name, self.final_schema) + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return Err(DataFusionError::Internal("input size inconsistent".into())); + } + if UserDefinedLogicalNode::expressions(self) != exprs { + return Err(DataFusionError::Internal( + "Tried to recreate async UDF node with different expressions".into(), + )); + } + + Ok(Self { + input: Arc::new(inputs[0].clone()), + name: self.name.clone(), + arg_exprs: self.arg_exprs.clone(), + final_exprs: self.final_exprs.clone(), + ordered: self.ordered, + max_concurrency: self.max_concurrency, + timeout: self.timeout, + final_schema: self.final_schema.clone(), + }) + } +} + +impl StreamExtension for AsyncUDFExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_fields( + self.final_schema + .fields() + .iter() + .map(|f| (**f).clone()) + .collect(), + ) + } +} diff --git a/src/sql/planner/extension/projection.rs b/src/sql/planner/extension/projection.rs index f7ecb6ed..e6dc8ce7 100644 --- a/src/sql/planner/extension/projection.rs +++ b/src/sql/planner/extension/projection.rs @@ -6,7 +6,7 @@ use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogi use crate::multifield_partial_ord; use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::planner::types::{DFField, StreamSchema, schema_from_df_fields}; +use crate::sql::types::{DFField, StreamSchema, schema_from_df_fields}; pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension"; diff --git a/src/sql/planner/extension/remote_table.rs b/src/sql/planner/extension/remote_table.rs index 4935efd9..2d81cafc 100644 --- a/src/sql/planner/extension/remote_table.rs +++ b/src/sql/planner/extension/remote_table.rs @@ -6,7 +6,7 @@ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use crate::multifield_partial_ord; use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::planner::types::StreamSchema; +use crate::sql::types::StreamSchema; pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension"; diff --git a/src/sql/planner/extension/sink.rs b/src/sql/planner/extension/sink.rs new file mode 100644 index 00000000..7820925f --- /dev/null +++ b/src/sql/planner/extension/sink.rs @@ -0,0 +1,135 @@ +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; +use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; + +use super::debezium::ToDebeziumExtension; +use super::remote_table::RemoteTableExtension; +use super::{NamedNode, StreamExtension}; +use crate::multifield_partial_ord; +use crate::sql::catalog::table::Table; +use crate::sql::types::StreamSchema; + +pub(crate) const SINK_NODE_NAME: &str = "SinkExtension"; + +/// Extension node representing a sink (output) in the streaming plan. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct SinkExtension { + pub(crate) name: TableReference, + pub(crate) table: Table, + pub(crate) schema: DFSchemaRef, + pub(crate) inputs: Arc>, +} + +multifield_partial_ord!(SinkExtension, name, inputs); + +impl SinkExtension { + pub fn new( + name: TableReference, + table: Table, + mut schema: DFSchemaRef, + mut input: Arc, + ) -> Result { + match &table { + Table::ConnectorTable(connector_table) => { + if connector_table.is_updating() { + let to_debezium = ToDebeziumExtension::try_new(input.as_ref().clone())?; + input = Arc::new(LogicalPlan::Extension(Extension { + node: Arc::new(to_debezium), + })); + schema = input.schema().clone(); + } + } + Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"), + Table::MemoryTable { .. } => return plan_err!("memory tables not supported as sinks"), + Table::TableFromQuery { .. } => {} + Table::PreviewSink { .. } => { + // preview sinks may also need debezium wrapping for updating inputs + } + } + + Self::add_remote_if_necessary(&schema, &mut input); + + let inputs = Arc::new(vec![(*input).clone()]); + Ok(Self { + name, + table, + schema, + inputs, + }) + } + + pub fn add_remote_if_necessary(schema: &DFSchemaRef, input: &mut Arc) { + if let LogicalPlan::Extension(node) = input.as_ref() { + let Ok(ext): Result<&dyn StreamExtension, _> = (&node.node).try_into() else { + // not a StreamExtension, wrap it + let remote = RemoteTableExtension { + input: input.as_ref().clone(), + name: TableReference::bare("sink projection"), + schema: schema.clone(), + materialize: false, + }; + *input = Arc::new(LogicalPlan::Extension(Extension { + node: Arc::new(remote), + })); + return; + }; + if !ext.transparent() { + return; + } + } + let remote = RemoteTableExtension { + input: input.as_ref().clone(), + name: TableReference::bare("sink projection"), + schema: schema.clone(), + materialize: false, + }; + *input = Arc::new(LogicalPlan::Extension(Extension { + node: Arc::new(remote), + })); + } +} + +impl UserDefinedLogicalNodeCore for SinkExtension { + fn name(&self) -> &str { + SINK_NODE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + self.inputs.iter().collect() + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "SinkExtension({:?}): {}", self.name, self.schema) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self { + name: self.name.clone(), + table: self.table.clone(), + schema: self.schema.clone(), + inputs: Arc::new(inputs), + }) + } +} + +impl StreamExtension for SinkExtension { + fn node_name(&self) -> Option { + match &self.table { + Table::PreviewSink { .. } => None, + _ => Some(NamedNode::Sink(self.name.clone())), + } + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_fields(vec![]) + } +} diff --git a/src/sql/planner/extension/table_source.rs b/src/sql/planner/extension/table_source.rs new file mode 100644 index 00000000..cab3ae3d --- /dev/null +++ b/src/sql/planner/extension/table_source.rs @@ -0,0 +1,94 @@ +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, Result, TableReference}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; + +use super::{NamedNode, StreamExtension}; +use crate::multifield_partial_ord; +use crate::sql::catalog::connector_table::ConnectorTable; +use crate::sql::catalog::field_spec::FieldSpec; +use crate::sql::planner::schemas::add_timestamp_field; +use crate::sql::types::{StreamSchema, schema_from_df_fields}; + +pub(crate) const TABLE_SOURCE_NAME: &str = "TableSourceExtension"; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct TableSourceExtension { + pub(crate) name: TableReference, + pub(crate) table: ConnectorTable, + pub(crate) schema: DFSchemaRef, +} + +multifield_partial_ord!(TableSourceExtension, name, table); + +impl TableSourceExtension { + pub fn new(name: TableReference, table: ConnectorTable) -> Self { + let physical_fields = table + .fields + .iter() + .filter_map(|field| match field { + FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => { + Some((Some(name.clone()), Arc::new(field.clone())).into()) + } + FieldSpec::Virtual { .. } => None, + }) + .collect::>(); + let base_schema = Arc::new(schema_from_df_fields(&physical_fields).unwrap()); + + let schema = if table.is_updating() { + super::debezium::DebeziumUnrollingExtension::as_debezium_schema( + &base_schema, + Some(name.clone()), + ) + .unwrap() + } else { + base_schema + }; + let schema = add_timestamp_field(schema, Some(name.clone())).unwrap(); + Self { + name, + table, + schema, + } + } +} + +impl UserDefinedLogicalNodeCore for TableSourceExtension { + fn name(&self) -> &str { + TABLE_SOURCE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "TableSourceExtension: {}", self.schema) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, _inputs: Vec) -> Result { + Ok(Self { + name: self.name.clone(), + table: self.table.clone(), + schema: self.schema.clone(), + }) + } +} + +impl StreamExtension for TableSourceExtension { + fn node_name(&self) -> Option { + Some(NamedNode::Source(self.name.clone())) + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap() + } +} diff --git a/src/sql/planner/extension/updating_aggregate.rs b/src/sql/planner/extension/updating_aggregate.rs new file mode 100644 index 00000000..758edc67 --- /dev/null +++ b/src/sql/planner/extension/updating_aggregate.rs @@ -0,0 +1,89 @@ +use std::sync::Arc; +use std::time::Duration; + +use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; +use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; + +use super::{IsRetractExtension, NamedNode, StreamExtension}; +use crate::sql::types::StreamSchema; + +pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension"; + +/// Extension node for updating (non-windowed) aggregations. +/// Maintains state with TTL and emits retraction/update pairs. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub(crate) struct UpdatingAggregateExtension { + pub(crate) aggregate: LogicalPlan, + pub(crate) key_fields: Vec, + pub(crate) final_calculation: LogicalPlan, + pub(crate) timestamp_qualifier: Option, + pub(crate) ttl: Duration, +} + +impl UpdatingAggregateExtension { + pub fn new( + aggregate: LogicalPlan, + key_fields: Vec, + timestamp_qualifier: Option, + ttl: Duration, + ) -> Result { + let final_calculation = LogicalPlan::Extension(Extension { + node: Arc::new(IsRetractExtension::new( + aggregate.clone(), + timestamp_qualifier.clone(), + )), + }); + + Ok(Self { + aggregate, + key_fields, + final_calculation, + timestamp_qualifier, + ttl, + }) + } +} + +impl UserDefinedLogicalNodeCore for UpdatingAggregateExtension { + fn name(&self) -> &str { + UPDATING_AGGREGATE_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.aggregate] + } + + fn schema(&self) -> &DFSchemaRef { + self.final_calculation.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "UpdatingAggregateExtension") + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return plan_err!("UpdatingAggregateExtension expects exactly one input"); + } + Self::new( + inputs[0].clone(), + self.key_fields.clone(), + self.timestamp_qualifier.clone(), + self.ttl, + ) + } +} + +impl StreamExtension for UpdatingAggregateExtension { + fn node_name(&self) -> Option { + None + } + + fn output_schema(&self) -> StreamSchema { + StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap() + } +} diff --git a/src/sql/planner/extension/watermark_node.rs b/src/sql/planner/extension/watermark_node.rs index eb776ff2..a06bdb9a 100644 --- a/src/sql/planner/extension/watermark_node.rs +++ b/src/sql/planner/extension/watermark_node.rs @@ -8,7 +8,7 @@ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use crate::multifield_partial_ord; use crate::sql::planner::extension::{NamedNode, StreamExtension}; use crate::sql::planner::schemas::add_timestamp_field; -use crate::sql::planner::types::{StreamSchema, TIMESTAMP_FIELD}; +use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD}; pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode"; diff --git a/src/sql/planner/extension/window_fn.rs b/src/sql/planner/extension/window_fn.rs index 6e6e1c36..95832183 100644 --- a/src/sql/planner/extension/window_fn.rs +++ b/src/sql/planner/extension/window_fn.rs @@ -4,7 +4,7 @@ use datafusion::common::{DFSchemaRef, Result}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::planner::types::StreamSchema; +use crate::sql::types::StreamSchema; pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension"; diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs index ead5e212..d80d3a8d 100644 --- a/src/sql/planner/mod.rs +++ b/src/sql/planner/mod.rs @@ -2,354 +2,360 @@ pub(crate) mod extension; pub mod parse; +pub(crate) mod physical_planner; pub mod plan; +pub mod rewrite; +pub mod schema_provider; pub mod schemas; pub mod sql_to_plan; -pub mod types; + +pub(crate) mod mod_prelude { + pub use super::StreamSchemaProvider; +} + +pub use schema_provider::{LogicalBatchInput, StreamSchemaProvider, StreamTable}; use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema}; +use datafusion::common::tree_node::TreeNode; use datafusion::common::{Result, plan_err}; -use datafusion::datasource::DefaultTableSource; use datafusion::error::DataFusionError; -use datafusion::execution::{FunctionRegistry, SessionStateDefaults}; -use datafusion::logical_expr::expr_rewriter::FunctionRewrite; -use datafusion::logical_expr::planner::ExprPlanner; -use datafusion::logical_expr::{ - AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF, -}; -use datafusion::optimizer::Analyzer; +use datafusion::execution::SessionStateBuilder; +use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion::prelude::SessionConfig; use datafusion::sql::TableReference; -use datafusion::sql::planner::ContextProvider; -use unicase::UniCase; - -use crate::sql::planner::schemas::window_arrow_struct; -use crate::sql::planner::types::{PlaceholderUdf, PlanningOptions}; - -/// Catalog provider for streaming SQL queries. -/// Manages tables, UDFs, and configuration for streaming SQL planning. -#[derive(Clone, Default)] -pub struct StreamSchemaProvider { - pub source_defs: HashMap, - tables: HashMap, StreamTable>, - pub functions: HashMap>, - pub aggregate_functions: HashMap>, - pub window_functions: HashMap>, - config_options: datafusion::config::ConfigOptions, - pub expr_planners: Vec>, - pub planning_options: PlanningOptions, - pub analyzer: Analyzer, -} +use datafusion::sql::sqlparser::ast::{OneOrManyWithParens, Statement}; +use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; +use datafusion::sql::sqlparser::parser::Parser; +use tracing::debug; + +use crate::datastream::logical::{LogicalProgram, ProgramConfig}; +use crate::datastream::optimizers::ChainingOptimizer; +use crate::sql::catalog::insert::Insert; +use crate::sql::catalog::table::Table as CatalogTable; +use crate::sql::functions::{is_json_union, serialize_outgoing_json}; +use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::planner::extension::projection::ProjectionExtension; +use crate::sql::planner::extension::sink::SinkExtension; +use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use crate::sql::planner::plan::rewrite_plan; +use crate::sql::planner::rewrite::{SinkInputRewriter, SourceMetadataVisitor}; +use crate::sql::types::SqlConfig; + +// ── Compilation pipeline ────────────────────────────────────────────── -/// Represents a table registered in the streaming SQL context #[derive(Clone, Debug)] -pub enum StreamTable { - Source { - name: String, - schema: Arc, - event_time_field: Option, - watermark_field: Option, - }, - Sink { - name: String, - schema: Arc, - }, - Memory { - name: String, - logical_plan: Option, - }, +pub struct CompiledSql { + pub program: LogicalProgram, + pub connection_ids: Vec, } -impl StreamTable { - pub fn name(&self) -> &str { - match self { - StreamTable::Source { name, .. } => name, - StreamTable::Sink { name, .. } => name, - StreamTable::Memory { name, .. } => name, - } - } - - pub fn get_fields(&self) -> Vec> { - match self { - StreamTable::Source { schema, .. } => schema.fields().to_vec(), - StreamTable::Sink { schema, .. } => schema.fields().to_vec(), - StreamTable::Memory { .. } => vec![], - } - } +pub fn parse_sql_statements( + sql: &str, +) -> std::result::Result, datafusion::sql::sqlparser::parser::ParserError> { + Parser::parse_sql(&FunctionStreamDialect {}, sql) } -#[derive(Debug)] -struct LogicalBatchInput { - table_name: String, - schema: Arc, -} - -impl datafusion::datasource::TableProvider for LogicalBatchInput { - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn schema(&self) -> Arc { - self.schema.clone() - } - - fn table_type(&self) -> datafusion::datasource::TableType { - datafusion::datasource::TableType::Base - } - - fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>( - &'life0 self, - _state: &'life1 dyn datafusion::catalog::Session, - _projection: Option<&'life2 Vec>, - _filters: &'life3 [Expr], - _limit: Option, - ) -> std::pin::Pin< - Box< - dyn std::future::Future< - Output = Result>, - > + Send - + 'async_trait, - >, - > - where - 'life0: 'async_trait, - 'life1: 'async_trait, - 'life2: 'async_trait, - 'life3: 'async_trait, - Self: 'async_trait, +fn try_handle_set_variable( + statement: &Statement, + schema_provider: &mut StreamSchemaProvider, +) -> Result { + if let Statement::SetVariable { + variables, value, .. + } = statement { - unimplemented!("LogicalBatchInput is for planning only") - } -} - -fn create_table(table_name: String, schema: Arc) -> Arc { - let table_provider = LogicalBatchInput { table_name, schema }; - let wrapped = Arc::new(table_provider); - let provider = DefaultTableSource::new(wrapped); - Arc::new(provider) -} - -impl StreamSchemaProvider { - pub fn new() -> Self { - let mut registry = Self { - ..Default::default() + let OneOrManyWithParens::One(opt) = variables else { + return plan_err!("invalid syntax for `SET` call"); }; - registry - .register_udf(PlaceholderUdf::with_return( - "hop", - vec![ - DataType::Interval(datatypes::IntervalUnit::MonthDayNano), - DataType::Interval(datatypes::IntervalUnit::MonthDayNano), - ], - window_arrow_struct(), - )) - .unwrap(); - - registry - .register_udf(PlaceholderUdf::with_return( - "tumble", - vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], - window_arrow_struct(), - )) - .unwrap(); - - registry - .register_udf(PlaceholderUdf::with_return( - "session", - vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], - window_arrow_struct(), - )) - .unwrap(); - - registry - .register_udf(PlaceholderUdf::with_return( - "unnest", - vec![DataType::List(Arc::new(Field::new( - "field", - DataType::Utf8, - true, - )))], - DataType::Utf8, - )) - .unwrap(); - - registry - .register_udf(PlaceholderUdf::with_return( - "row_time", - vec![], - DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None), - )) - .unwrap(); - - for p in SessionStateDefaults::default_scalar_functions() { - registry.register_udf(p).unwrap(); - } - for p in SessionStateDefaults::default_aggregate_functions() { - registry.register_udaf(p).unwrap(); - } - for p in SessionStateDefaults::default_window_functions() { - registry.register_udwf(p).unwrap(); - } - for p in SessionStateDefaults::default_expr_planners() { - registry.register_expr_planner(p).unwrap(); + if opt.to_string() != "updating_ttl" { + return plan_err!( + "invalid option '{}'; supported options are 'updating_ttl'", + opt + ); } - registry - } - - pub fn add_source_table( - &mut self, - name: String, - schema: Arc, - event_time_field: Option, - watermark_field: Option, - ) { - self.tables.insert( - UniCase::new(name.clone()), - StreamTable::Source { - name, - schema, - event_time_field, - watermark_field, - }, - ); - } - - pub fn add_sink_table(&mut self, name: String, schema: Arc) { - self.tables.insert( - UniCase::new(name.clone()), - StreamTable::Sink { name, schema }, - ); - } + if value.len() != 1 { + return plan_err!("invalid `SET updating_ttl` call; expected exactly one expression"); + } - fn insert_table(&mut self, table: StreamTable) { - self.tables - .insert(UniCase::new(table.name().to_string()), table); - } + let duration = duration_from_sql_expr(&value[0])?; + schema_provider.planning_options.ttl = duration; - pub fn get_table(&self, table_name: impl Into) -> Option<&StreamTable> { - self.tables.get(&UniCase::new(table_name.into())) + return Ok(true); } - pub fn get_table_mut(&mut self, table_name: impl Into) -> Option<&mut StreamTable> { - self.tables.get_mut(&UniCase::new(table_name.into())) - } + Ok(false) } -impl ContextProvider for StreamSchemaProvider { - fn get_table_source(&self, name: TableReference) -> Result> { - let table = self - .get_table(name.to_string()) - .ok_or_else(|| DataFusionError::Plan(format!("Table {name} not found")))?; - - let fields = table.get_fields(); - let schema = Arc::new(Schema::new_with_metadata( - fields - .iter() - .map(|f| f.as_ref().clone()) - .collect::>(), - HashMap::new(), - )); - Ok(create_table(name.to_string(), schema)) - } - - fn get_function_meta(&self, name: &str) -> Option> { - self.functions.get(name).cloned() +fn duration_from_sql_expr( + expr: &datafusion::sql::sqlparser::ast::Expr, +) -> Result { + use datafusion::sql::sqlparser::ast::Expr as SqlExpr; + use datafusion::sql::sqlparser::ast::Value as SqlValue; + use datafusion::sql::sqlparser::ast::ValueWithSpan; + + match expr { + SqlExpr::Interval(interval) => { + let value_str = match interval.value.as_ref() { + SqlExpr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + .. + }) => s.clone(), + other => return plan_err!("expected interval string literal, found {other}"), + }; + + parse_interval_to_duration(&value_str) + } + SqlExpr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + .. + }) => parse_interval_to_duration(s), + other => plan_err!("expected an interval expression, found {other}"), } +} - fn get_aggregate_meta(&self, name: &str) -> Option> { - self.aggregate_functions.get(name).cloned() +fn parse_interval_to_duration(s: &str) -> Result { + let parts: Vec<&str> = s.trim().split_whitespace().collect(); + if parts.len() != 2 { + return plan_err!("invalid interval string '{s}'; expected ' '"); + } + let value: u64 = parts[0] + .parse() + .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?; + match parts[1].to_lowercase().as_str() { + "second" | "seconds" | "s" => Ok(std::time::Duration::from_secs(value)), + "minute" | "minutes" | "min" => Ok(std::time::Duration::from_secs(value * 60)), + "hour" | "hours" | "h" => Ok(std::time::Duration::from_secs(value * 3600)), + "day" | "days" | "d" => Ok(std::time::Duration::from_secs(value * 86400)), + unit => plan_err!("unsupported interval unit '{unit}'"), } +} - fn get_variable_type(&self, _variable_names: &[String]) -> Option { - None +fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap> { + let mut sink_inputs = HashMap::>::new(); + for extension in extensions.iter() { + if let LogicalPlan::Extension(ext) = extension { + if let Some(sink_node) = ext.node.as_any().downcast_ref::() { + if let Some(named_node) = sink_node.node_name() { + let inputs = sink_node + .inputs() + .into_iter() + .cloned() + .collect::>(); + sink_inputs.entry(named_node).or_default().extend(inputs); + } + } + } } + sink_inputs +} - fn options(&self) -> &datafusion::config::ConfigOptions { - &self.config_options - } +fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result { + let LogicalPlan::Extension(ref ext) = plan else { + return Ok(plan); + }; + + let Some(sink) = ext.node.as_any().downcast_ref::() else { + return Ok(plan); + }; + + let Some(partition_exprs) = sink.table.partition_exprs() else { + return Ok(plan); + }; + + if partition_exprs.is_empty() { + return Ok(plan); + } + + let inputs = plan + .inputs() + .into_iter() + .map(|input| { + Ok(LogicalPlan::Extension(Extension { + node: Arc::new(KeyCalculationExtension { + name: Some("key-calc-partition".to_string()), + schema: input.schema().clone(), + input: input.clone(), + keys: KeysOrExprs::Exprs(partition_exprs.clone()), + }), + })) + }) + .collect::>()?; + + use datafusion::prelude::col; + let unkey = LogicalPlan::Extension(Extension { + node: Arc::new( + ProjectionExtension::new( + inputs, + Some("unkey".to_string()), + sink.schema().iter().map(|(_, f)| col(f.name())).collect(), + ) + .shuffled(), + ), + }); + + let node = sink.with_exprs_and_inputs(vec![], vec![unkey])?; + Ok(LogicalPlan::Extension(Extension { + node: Arc::new(node), + })) +} - fn get_window_meta(&self, name: &str) -> Option> { - self.window_functions.get(name).cloned() +fn rewrite_sinks(extensions: Vec) -> Result> { + let mut sink_inputs = build_sink_inputs(&extensions); + let mut new_extensions = vec![]; + for extension in extensions { + let mut rewriter = SinkInputRewriter::new(&mut sink_inputs); + let result = extension.rewrite(&mut rewriter)?; + if !rewriter.was_removed { + new_extensions.push(result.data); + } } - fn udf_names(&self) -> Vec { - self.functions.keys().cloned().collect() - } + new_extensions + .into_iter() + .map(maybe_add_key_extension_to_sink) + .collect() +} - fn udaf_names(&self) -> Vec { - self.aggregate_functions.keys().cloned().collect() - } +pub async fn parse_and_get_arrow_program( + query: String, + mut schema_provider: StreamSchemaProvider, + _config: SqlConfig, +) -> Result { + let mut config = SessionConfig::new(); + config + .options_mut() + .optimizer + .enable_round_robin_repartition = false; + config.options_mut().optimizer.repartition_aggregations = false; + config.options_mut().optimizer.repartition_windows = false; + config.options_mut().optimizer.repartition_sorts = false; + config.options_mut().optimizer.repartition_joins = false; + config.options_mut().execution.target_partitions = 1; + + let session_state = SessionStateBuilder::new() + .with_config(config) + .with_default_features() + .with_physical_optimizer_rules(vec![]) + .build(); + + let mut inserts = vec![]; + for statement in parse_sql_statements(&query)? { + if try_handle_set_variable(&statement, &mut schema_provider)? { + continue; + } - fn udwf_names(&self) -> Vec { - self.window_functions.keys().cloned().collect() + if let Some(table) = CatalogTable::try_from_statement(&statement, &schema_provider)? { + schema_provider.insert_catalog_table(table); + } else { + inserts.push(Insert::try_from_statement(&statement, &schema_provider)?); + }; } - fn get_expr_planners(&self) -> &[Arc] { - &self.expr_planners + if inserts.is_empty() { + return plan_err!("The provided SQL does not contain a query"); } -} -impl FunctionRegistry for StreamSchemaProvider { - fn udfs(&self) -> HashSet { - self.functions.keys().cloned().collect() - } + let mut used_connections = HashSet::new(); + let mut extensions = vec![]; - fn udf(&self, name: &str) -> Result> { - if let Some(f) = self.functions.get(name) { - Ok(Arc::clone(f)) - } else { - plan_err!("No UDF with name {name}") - } - } + for insert in inserts { + let (plan, sink_name) = match insert { + Insert::InsertQuery { + sink_name, + logical_plan, + } => (logical_plan, Some(sink_name)), + Insert::Anonymous { logical_plan } => (logical_plan, None), + }; - fn udaf(&self, name: &str) -> Result> { - if let Some(f) = self.aggregate_functions.get(name) { - Ok(Arc::clone(f)) - } else { - plan_err!("No UDAF with name {name}") - } - } + let mut plan_rewrite = rewrite_plan(plan, &schema_provider)?; - fn udwf(&self, name: &str) -> Result> { - if let Some(f) = self.window_functions.get(name) { - Ok(Arc::clone(f)) - } else { - plan_err!("No UDWF with name {name}") + if plan_rewrite + .schema() + .fields() + .iter() + .any(|f| is_json_union(f.data_type())) + { + plan_rewrite = serialize_outgoing_json(&schema_provider, Arc::new(plan_rewrite)); } - } - fn register_function_rewrite( - &mut self, - rewrite: Arc, - ) -> Result<()> { - self.analyzer.add_function_rewrite(rewrite); - Ok(()) + debug!("Plan = {}", plan_rewrite.display_graphviz()); + + let mut metadata = SourceMetadataVisitor::new(&schema_provider); + plan_rewrite.visit_with_subqueries(&mut metadata)?; + used_connections.extend(metadata.connection_ids.iter()); + + let sink = match sink_name { + Some(sink_name) => { + let table = schema_provider + .get_catalog_table_mut(&sink_name) + .ok_or_else(|| { + DataFusionError::Plan(format!("Connection {sink_name} not found")) + })?; + match table { + CatalogTable::ConnectorTable(c) => { + if let Some(id) = c.id { + used_connections.insert(id); + } + + SinkExtension::new( + TableReference::bare(sink_name), + table.clone(), + plan_rewrite.schema().clone(), + Arc::new(plan_rewrite), + ) + } + CatalogTable::MemoryTable { logical_plan, .. } => { + if logical_plan.is_some() { + return plan_err!("Can only insert into a memory table once"); + } + logical_plan.replace(plan_rewrite); + continue; + } + CatalogTable::LookupTable(_) => { + plan_err!("lookup (temporary) tables cannot be inserted into") + } + CatalogTable::TableFromQuery { .. } => { + plan_err!( + "shouldn't be inserting more data into a table made with CREATE TABLE AS" + ) + } + CatalogTable::PreviewSink { .. } => { + plan_err!("queries shouldn't be able insert into preview sink.") + } + } + } + None => SinkExtension::new( + TableReference::parse_str("preview"), + CatalogTable::PreviewSink { + logical_plan: plan_rewrite.clone(), + }, + plan_rewrite.schema().clone(), + Arc::new(plan_rewrite), + ), + }; + extensions.push(LogicalPlan::Extension(Extension { + node: Arc::new(sink?), + })); } - fn register_udf(&mut self, udf: Arc) -> Result>> { - Ok(self.functions.insert(udf.name().to_string(), udf)) - } + let extensions = rewrite_sinks(extensions)?; - fn register_udaf(&mut self, udaf: Arc) -> Result>> { - Ok(self - .aggregate_functions - .insert(udaf.name().to_string(), udaf)) + let mut plan_to_graph_visitor = + physical_planner::PlanToGraphVisitor::new(&schema_provider, &session_state); + for extension in extensions { + plan_to_graph_visitor.add_plan(extension)?; } + let graph = plan_to_graph_visitor.into_graph(); - fn register_udwf(&mut self, udwf: Arc) -> Result>> { - Ok(self.window_functions.insert(udwf.name().to_string(), udwf)) - } + let mut program = LogicalProgram::new(graph, ProgramConfig::default()); - fn register_expr_planner(&mut self, expr_planner: Arc) -> Result<()> { - self.expr_planners.push(expr_planner); - Ok(()) - } + program.optimize(&ChainingOptimizer {}); - fn expr_planners(&self) -> Vec> { - self.expr_planners.clone() - } + Ok(CompiledSql { + program, + connection_ids: used_connections.into_iter().collect(), + }) } diff --git a/src/sql/planner/physical_planner.rs b/src/sql/planner/physical_planner.rs new file mode 100644 index 00000000..e7e1cf60 --- /dev/null +++ b/src/sql/planner/physical_planner.rs @@ -0,0 +1,396 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +use datafusion::arrow::datatypes::IntervalMonthDayNanoType; +use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; +use datafusion::common::{ + DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, Spans, plan_err, +}; +use datafusion::execution::context::SessionState; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::functions::datetime::date_bin; +use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNode}; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner}; +use datafusion_proto::protobuf::{PhysicalExprNode, PhysicalPlanNode}; +use datafusion_proto::{ + physical_plan::AsExecutionPlan, + protobuf::{AggregateMode, physical_plan_node::PhysicalPlanType}, +}; +use petgraph::graph::{DiGraph, NodeIndex}; +use prost::Message; +use tokio::runtime::Builder; +use tokio::sync::oneshot; + +use async_trait::async_trait; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; + +use crate::datastream::logical::{LogicalEdge, LogicalGraph, LogicalNode}; +use crate::sql::physical::{ + DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec, +}; +use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::extension::debezium::{ + DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME, +}; +use crate::sql::planner::extension::key_calculation::KeyCalculationExtension; +use crate::sql::planner::extension::{NamedNode, NodeWithIncomingEdges, StreamExtension}; +use crate::sql::planner::schemas::add_timestamp_field_arrow; +use crate::types::{FsSchema, FsSchemaRef}; + +pub(crate) struct PlanToGraphVisitor<'a> { + graph: DiGraph, + output_schemas: HashMap, + named_nodes: HashMap, + traversal: Vec>, + planner: Planner<'a>, +} + +impl<'a> PlanToGraphVisitor<'a> { + pub fn new(schema_provider: &'a StreamSchemaProvider, session_state: &'a SessionState) -> Self { + Self { + graph: Default::default(), + output_schemas: Default::default(), + named_nodes: Default::default(), + traversal: vec![], + planner: Planner::new(schema_provider, session_state), + } + } +} + +pub(crate) struct Planner<'a> { + schema_provider: &'a StreamSchemaProvider, + planner: DefaultPhysicalPlanner, + session_state: &'a SessionState, +} + +impl<'a> Planner<'a> { + pub(crate) fn new( + schema_provider: &'a StreamSchemaProvider, + session_state: &'a SessionState, + ) -> Self { + let planner = + DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(FsExtensionPlanner {})]); + Self { + schema_provider, + planner, + session_state, + } + } + + pub(crate) fn sync_plan(&self, plan: &LogicalPlan) -> Result> { + let fut = self.planner.create_physical_plan(plan, self.session_state); + let (tx, mut rx) = oneshot::channel(); + thread::scope(|s| { + let _handle = tokio::runtime::Handle::current(); + let builder = thread::Builder::new(); + let builder = if cfg!(debug_assertions) { + builder.stack_size(10_000_000) + } else { + builder + }; + builder + .spawn_scoped(s, move || { + let rt = Builder::new_current_thread().enable_all().build().unwrap(); + rt.block_on(async { + let plan = fut.await; + tx.send(plan).unwrap(); + }); + }) + .unwrap(); + }); + + rx.try_recv().unwrap() + } + + pub(crate) fn create_physical_expr( + &self, + expr: &Expr, + input_dfschema: &DFSchema, + ) -> Result> { + self.planner + .create_physical_expr(expr, input_dfschema, self.session_state) + } + + pub(crate) fn serialize_as_physical_expr( + &self, + expr: &Expr, + schema: &DFSchema, + ) -> Result> { + let physical = self.create_physical_expr(expr, schema)?; + let proto = serialize_physical_expr(&physical, &DefaultPhysicalExtensionCodec {})?; + Ok(proto.encode_to_vec()) + } + + pub(crate) fn split_physical_plan( + &self, + key_indices: Vec, + aggregate: &LogicalPlan, + add_timestamp_field: bool, + ) -> Result { + let physical_plan = self.sync_plan(aggregate)?; + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::Planning, + }; + let mut physical_plan_node = + PhysicalPlanNode::try_from_physical_plan(physical_plan.clone(), &codec)?; + let PhysicalPlanType::Aggregate(mut final_aggregate_proto) = physical_plan_node + .physical_plan_type + .take() + .ok_or_else(|| DataFusionError::Plan("missing physical plan type".to_string()))? + else { + return plan_err!("unexpected physical plan type"); + }; + let AggregateMode::Final = final_aggregate_proto.mode() else { + return plan_err!("unexpected physical plan type"); + }; + + let partial_aggregation_plan = *final_aggregate_proto + .input + .take() + .ok_or_else(|| DataFusionError::Plan("missing input".to_string()))?; + + let partial_aggregation_exec_plan = partial_aggregation_plan.try_into_physical_plan( + self.schema_provider, + &RuntimeEnvBuilder::new().build().unwrap(), + &codec, + )?; + + let partial_schema = partial_aggregation_exec_plan.schema(); + let final_input_table_provider = FsMemExec::new("partial".into(), partial_schema.clone()); + + final_aggregate_proto.input = Some(Box::new(PhysicalPlanNode::try_from_physical_plan( + Arc::new(final_input_table_provider), + &codec, + )?)); + + let finish_plan = PhysicalPlanNode { + physical_plan_type: Some(PhysicalPlanType::Aggregate(final_aggregate_proto)), + }; + + let (partial_schema, timestamp_index) = if add_timestamp_field { + ( + add_timestamp_field_arrow((*partial_schema).clone()), + partial_schema.fields().len(), + ) + } else { + (partial_schema.clone(), partial_schema.fields().len() - 1) + }; + + let partial_schema = FsSchema::new_keyed(partial_schema, timestamp_index, key_indices); + + Ok(SplitPlanOutput { + partial_aggregation_plan, + partial_schema, + finish_plan, + }) + } + + pub fn binning_function_proto( + &self, + width: Duration, + input_schema: DFSchemaRef, + ) -> Result { + let date_bin = date_bin().call(vec![ + Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 0, + 0, + width.as_nanos() as i64, + ))), + None, + ), + Expr::Column(datafusion::common::Column { + relation: None, + name: "_timestamp".into(), + spans: Spans::new(), + }), + ]); + + let binning_function = self.create_physical_expr(&date_bin, &input_schema)?; + serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {}) + } +} + +struct FsExtensionPlanner {} + +#[async_trait] +impl ExtensionPlanner for FsExtensionPlanner { + async fn plan_extension( + &self, + _planner: &dyn PhysicalPlanner, + node: &dyn UserDefinedLogicalNode, + _logical_inputs: &[&LogicalPlan], + physical_inputs: &[Arc], + _session_state: &SessionState, + ) -> Result>> { + let schema = node.schema().as_ref().into(); + if let Ok::<&dyn StreamExtension, _>(stream_extension) = node.try_into() { + if stream_extension.transparent() { + match node.name() { + DEBEZIUM_UNROLLING_EXTENSION_NAME => { + let node = node + .as_any() + .downcast_ref::() + .unwrap(); + let input = physical_inputs[0].clone(); + return Ok(Some(Arc::new(DebeziumUnrollingExec::try_new( + input, + node.primary_keys.clone(), + )?))); + } + TO_DEBEZIUM_EXTENSION_NAME => { + let input = physical_inputs[0].clone(); + return Ok(Some(Arc::new(ToDebeziumExec::try_new(input)?))); + } + _ => return Ok(None), + } + } + }; + let name = + if let Some(key_extension) = node.as_any().downcast_ref::() { + key_extension.name.clone() + } else { + None + }; + Ok(Some(Arc::new(FsMemExec::new( + name.unwrap_or("memory".to_string()), + Arc::new(schema), + )))) + } +} + +impl PlanToGraphVisitor<'_> { + fn add_index_to_traversal(&mut self, index: NodeIndex) { + if let Some(last) = self.traversal.last_mut() { + last.push(index); + } + } + + pub(crate) fn add_plan(&mut self, plan: LogicalPlan) -> Result<()> { + self.traversal.clear(); + plan.visit(self)?; + Ok(()) + } + + pub fn into_graph(self) -> LogicalGraph { + self.graph + } + + pub fn build_extension( + &mut self, + input_nodes: Vec, + extension: &dyn StreamExtension, + ) -> Result<()> { + if let Some(node_name) = extension.node_name() { + if self.named_nodes.contains_key(&node_name) { + return plan_err!( + "extension {:?} has already been planned, shouldn't try again.", + node_name + ); + } + } + + let input_schemas = input_nodes + .iter() + .map(|index| { + Ok(self + .output_schemas + .get(index) + .ok_or_else(|| DataFusionError::Plan("missing input node".to_string()))? + .clone()) + }) + .collect::>>()?; + + let NodeWithIncomingEdges { node, edges } = extension + .plan_node(&self.planner, self.graph.node_count(), input_schemas) + .map_err(|e| e.context(format!("planning operator {extension:?}")))?; + + let node_index = self.graph.add_node(node); + self.add_index_to_traversal(node_index); + + for (source, edge) in input_nodes.into_iter().zip(edges.into_iter()) { + self.graph.add_edge(source, node_index, edge); + } + + self.output_schemas + .insert(node_index, extension.output_schema().into()); + + if let Some(node_name) = extension.node_name() { + self.named_nodes.insert(node_name, node_index); + } + Ok(()) + } +} + +impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> { + type Node = LogicalPlan; + + fn f_down(&mut self, node: &Self::Node) -> Result { + let LogicalPlan::Extension(Extension { node }) = node else { + return Ok(TreeNodeRecursion::Continue); + }; + + let stream_extension: &dyn StreamExtension = node + .try_into() + .map_err(|e: DataFusionError| e.context("converting extension"))?; + if stream_extension.transparent() { + return Ok(TreeNodeRecursion::Continue); + } + + if let Some(name) = stream_extension.node_name() { + if let Some(node_index) = self.named_nodes.get(&name) { + self.add_index_to_traversal(*node_index); + return Ok(TreeNodeRecursion::Jump); + } + } + + if !node.inputs().is_empty() { + self.traversal.push(vec![]); + } + + Ok(TreeNodeRecursion::Continue) + } + + fn f_up(&mut self, node: &Self::Node) -> Result { + let LogicalPlan::Extension(Extension { node }) = node else { + return Ok(TreeNodeRecursion::Continue); + }; + + let stream_extension: &dyn StreamExtension = node + .try_into() + .map_err(|e: DataFusionError| e.context("planning extension"))?; + + if stream_extension.transparent() { + return Ok(TreeNodeRecursion::Continue); + } + + if let Some(name) = stream_extension.node_name() { + if self.named_nodes.contains_key(&name) { + return Ok(TreeNodeRecursion::Continue); + } + } + + let input_nodes = if !node.inputs().is_empty() { + self.traversal.pop().unwrap_or_default() + } else { + vec![] + }; + let stream_extension: &dyn StreamExtension = node + .try_into() + .map_err(|e: DataFusionError| e.context("converting extension"))?; + self.build_extension(input_nodes, stream_extension)?; + + Ok(TreeNodeRecursion::Continue) + } +} + +pub(crate) struct SplitPlanOutput { + pub(crate) partial_aggregation_plan: PhysicalPlanNode, + pub(crate) partial_schema: FsSchema, + pub(crate) finish_plan: PhysicalPlanNode, +} diff --git a/src/sql/planner/plan/aggregate.rs b/src/sql/planner/plan/aggregate.rs index 6ed7499d..aad17edb 100644 --- a/src/sql/planner/plan/aggregate.rs +++ b/src/sql/planner/plan/aggregate.rs @@ -12,7 +12,7 @@ use crate::sql::planner::StreamSchemaProvider; use crate::sql::planner::extension::aggregate::AggregateExtension; use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; use crate::sql::planner::plan::WindowDetectingVisitor; -use crate::sql::planner::types::{ +use crate::sql::types::{ DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, schema_from_df_fields_with_metadata, }; diff --git a/src/sql/planner/plan/join.rs b/src/sql/planner/plan/join.rs index f8225905..04a27e9b 100644 --- a/src/sql/planner/plan/join.rs +++ b/src/sql/planner/plan/join.rs @@ -16,9 +16,7 @@ use crate::sql::planner::StreamSchemaProvider; use crate::sql::planner::extension::join::JoinExtension; use crate::sql::planner::extension::key_calculation::KeyCalculationExtension; use crate::sql::planner::plan::WindowDetectingVisitor; -use crate::sql::planner::types::{ - WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata, -}; +use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata}; pub(crate) struct JoinRewriter<'a> { pub schema_provider: &'a StreamSchemaProvider, diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs index 8d1dd388..d497ca65 100644 --- a/src/sql/planner/plan/mod.rs +++ b/src/sql/planner/plan/mod.rs @@ -15,7 +15,7 @@ use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, Aggreg use crate::sql::planner::extension::join::JOIN_NODE_NAME; use crate::sql::planner::extension::remote_table::RemoteTableExtension; use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; -use crate::sql::planner::types::{ +use crate::sql::types::{ DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, }; diff --git a/src/sql/planner/plan/window_fn.rs b/src/sql/planner/plan/window_fn.rs index 0bd3314f..66f673d1 100644 --- a/src/sql/planner/plan/window_fn.rs +++ b/src/sql/planner/plan/window_fn.rs @@ -12,7 +12,7 @@ use tracing::debug; use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; use crate::sql::planner::extension::window_fn::WindowFunctionExtension; use crate::sql::planner::plan::{WindowDetectingVisitor, extract_column}; -use crate::sql::planner::types::{WindowType, fields_with_qualifiers, schema_from_df_fields}; +use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields}; pub(crate) struct WindowFunctionRewriter; diff --git a/src/sql/planner/rewrite/async_udf_rewriter.rs b/src/sql/planner/rewrite/async_udf_rewriter.rs new file mode 100644 index 00000000..def3c4ef --- /dev/null +++ b/src/sql/planner/rewrite/async_udf_rewriter.rs @@ -0,0 +1,118 @@ +use crate::sql::planner::extension::remote_table::RemoteTableExtension; +use crate::sql::planner::extension::{ASYNC_RESULT_FIELD, AsyncUDFExtension}; +use crate::sql::planner::mod_prelude::StreamSchemaProvider; +use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; +use datafusion::common::{Column, Result as DFResult, TableReference, plan_err}; +use datafusion::logical_expr::expr::ScalarFunction; +use datafusion::logical_expr::{Expr, Extension, LogicalPlan}; +use std::sync::Arc; +use std::time::Duration; + +type AsyncSplitResult = (String, AsyncOptions, Vec); + +#[derive(Debug, Clone, Copy)] +pub struct AsyncOptions { + pub ordered: bool, + pub max_concurrency: usize, + pub timeout: Duration, +} + +pub struct AsyncUdfRewriter<'a> { + provider: &'a StreamSchemaProvider, +} + +impl<'a> AsyncUdfRewriter<'a> { + pub fn new(provider: &'a StreamSchemaProvider) -> Self { + Self { provider } + } + + fn split_async( + expr: Expr, + provider: &StreamSchemaProvider, + ) -> DFResult<(Expr, Option)> { + let mut found: Option<(String, AsyncOptions, Vec)> = None; + let expr = expr.transform_up(|e| { + if let Expr::ScalarFunction(ScalarFunction { func: udf, args }) = &e { + if let Some(opts) = provider.get_async_udf_options(udf.name()) { + if found + .replace((udf.name().to_string(), opts, args.clone())) + .is_some() + { + return plan_err!( + "multiple async calls in the same expression, which is not allowed" + ); + } + return Ok(Transformed::yes(Expr::Column(Column::new_unqualified( + ASYNC_RESULT_FIELD, + )))); + } + } + Ok(Transformed::no(e)) + })?; + + Ok((expr.data, found)) + } +} + +impl TreeNodeRewriter for AsyncUdfRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> DFResult> { + let LogicalPlan::Projection(mut projection) = node else { + for e in node.expressions() { + if let (_, Some((udf, _, _))) = Self::split_async(e.clone(), self.provider)? { + return plan_err!( + "async UDFs are only supported in projections, but {udf} was called in another context" + ); + } + } + return Ok(Transformed::no(node)); + }; + + let mut args = None; + for e in projection.expr.iter_mut() { + let (new_e, Some(udf)) = Self::split_async(e.clone(), self.provider)? else { + continue; + }; + if let Some((prev, _, _)) = args.replace(udf) { + return plan_err!( + "Projection contains multiple async UDFs, which is not supported \ + \n(hint: two async UDF calls, {} and {}, appear in the same SELECT statement)", + prev, + args.unwrap().0 + ); + } + *e = new_e; + } + + let Some((name, opts, arg_exprs)) = args else { + return Ok(Transformed::no(LogicalPlan::Projection(projection))); + }; + + let input = if matches!(*projection.input, LogicalPlan::Projection(..)) { + Arc::new(LogicalPlan::Extension(Extension { + node: Arc::new(RemoteTableExtension { + input: (*projection.input).clone(), + name: TableReference::bare("subquery_projection"), + schema: projection.input.schema().clone(), + materialize: false, + }), + })) + } else { + projection.input + }; + + Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(AsyncUDFExtension { + input, + name, + arg_exprs, + final_exprs: projection.expr, + ordered: opts.ordered, + max_concurrency: opts.max_concurrency, + timeout: opts.timeout, + final_schema: projection.schema, + }), + }))) + } +} diff --git a/src/sql/planner/rewrite/mod.rs b/src/sql/planner/rewrite/mod.rs new file mode 100644 index 00000000..20b2e9bb --- /dev/null +++ b/src/sql/planner/rewrite/mod.rs @@ -0,0 +1,27 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod async_udf_rewriter; +pub mod row_time; +pub mod sink_input_rewriter; +pub mod source_metadata_visitor; +pub mod source_rewriter; +pub mod time_window; +pub mod unnest_rewriter; + +pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter}; +pub use row_time::RowTimeRewriter; +pub use sink_input_rewriter::SinkInputRewriter; +pub use source_metadata_visitor::SourceMetadataVisitor; +pub use source_rewriter::SourceRewriter; +pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker, is_time_window}; +pub use unnest_rewriter::{UNNESTED_COL, UnnestRewriter}; diff --git a/src/sql/planner/rewrite/row_time.rs b/src/sql/planner/rewrite/row_time.rs new file mode 100644 index 00000000..51309feb --- /dev/null +++ b/src/sql/planner/rewrite/row_time.rs @@ -0,0 +1,39 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; +use datafusion::common::{Column, Result as DFResult}; +use datafusion::logical_expr::Expr; + +use crate::sql::types::TIMESTAMP_FIELD; + +/// Rewrites `row_time()` scalar function calls to a column reference on `_timestamp`. +pub struct RowTimeRewriter {} + +impl TreeNodeRewriter for RowTimeRewriter { + type Node = Expr; + + fn f_down(&mut self, node: Self::Node) -> DFResult> { + if let Expr::ScalarFunction(func) = &node + && func.name() == "row_time" + { + let transformed = Expr::Column(Column { + relation: None, + name: TIMESTAMP_FIELD.to_string(), + spans: Default::default(), + }) + .alias("row_time()"); + return Ok(Transformed::yes(transformed)); + } + Ok(Transformed::no(node)) + } +} diff --git a/src/sql/planner/rewrite/sink_input_rewriter.rs b/src/sql/planner/rewrite/sink_input_rewriter.rs new file mode 100644 index 00000000..e6b6a0bd --- /dev/null +++ b/src/sql/planner/rewrite/sink_input_rewriter.rs @@ -0,0 +1,46 @@ +use crate::sql::planner::extension::sink::SinkExtension; +use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use datafusion::common::Result as DFResult; +use datafusion::common::tree_node::{Transformed, TreeNodeRecursion, TreeNodeRewriter}; +use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; +use std::collections::HashMap; +use std::sync::Arc; + +type SinkInputs = HashMap>; + +/// Merges inputs for sinks with the same name to avoid duplicate sinks in the plan. +pub struct SinkInputRewriter<'a> { + sink_inputs: &'a mut SinkInputs, + pub was_removed: bool, +} + +impl<'a> SinkInputRewriter<'a> { + pub fn new(sink_inputs: &'a mut SinkInputs) -> Self { + Self { + sink_inputs, + was_removed: false, + } + } +} + +impl TreeNodeRewriter for SinkInputRewriter<'_> { + type Node = LogicalPlan; + + fn f_down(&mut self, node: Self::Node) -> DFResult> { + if let LogicalPlan::Extension(extension) = &node { + if let Some(sink_node) = extension.node.as_any().downcast_ref::() { + if let Some(named_node) = sink_node.node_name() { + if let Some(inputs) = self.sink_inputs.remove(&named_node) { + let new_node = LogicalPlan::Extension(Extension { + node: Arc::new(sink_node.with_exprs_and_inputs(vec![], inputs)?), + }); + return Ok(Transformed::new(new_node, true, TreeNodeRecursion::Jump)); + } else { + self.was_removed = true; + } + } + } + } + Ok(Transformed::no(node)) + } +} diff --git a/src/sql/planner/rewrite/source_metadata_visitor.rs b/src/sql/planner/rewrite/source_metadata_visitor.rs new file mode 100644 index 00000000..168ff712 --- /dev/null +++ b/src/sql/planner/rewrite/source_metadata_visitor.rs @@ -0,0 +1,57 @@ +use crate::sql::planner::extension::sink::SinkExtension; +use crate::sql::planner::extension::table_source::TableSourceExtension; +use crate::sql::planner::mod_prelude::StreamSchemaProvider; +use datafusion::common::Result as DFResult; +use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor}; +use datafusion::logical_expr::{Extension, LogicalPlan}; +use std::collections::HashSet; + +/// Collects connection IDs from source and sink nodes in the logical plan. +pub struct SourceMetadataVisitor<'a> { + schema_provider: &'a StreamSchemaProvider, + pub connection_ids: HashSet, +} + +impl<'a> SourceMetadataVisitor<'a> { + pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self { + Self { + schema_provider, + connection_ids: HashSet::new(), + } + } + + fn get_connection_id(&self, node: &LogicalPlan) -> Option { + let LogicalPlan::Extension(Extension { node }) = node else { + return None; + }; + + let table_name = match node.name() { + "TableSourceExtension" => { + let ext = node.as_any().downcast_ref::()?; + ext.name.to_string() + } + "SinkExtension" => { + let ext = node.as_any().downcast_ref::()?; + ext.name.to_string() + } + _ => return None, + }; + + let table = self.schema_provider.get_catalog_table(&table_name)?; + match table { + crate::sql::catalog::table::Table::ConnectorTable(t) => t.id, + _ => None, + } + } +} + +impl TreeNodeVisitor<'_> for SourceMetadataVisitor<'_> { + type Node = LogicalPlan; + + fn f_down(&mut self, node: &Self::Node) -> DFResult { + if let Some(id) = self.get_connection_id(node) { + self.connection_ids.insert(id); + } + Ok(TreeNodeRecursion::Continue) + } +} diff --git a/src/sql/planner/rewrite/source_rewriter.rs b/src/sql/planner/rewrite/source_rewriter.rs new file mode 100644 index 00000000..209c3288 --- /dev/null +++ b/src/sql/planner/rewrite/source_rewriter.rs @@ -0,0 +1,272 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::time::Duration; + +use datafusion::common::ScalarValue; +use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; +use datafusion::common::{Column, DataFusionError, Result as DFResult, TableReference, plan_err}; +use datafusion::logical_expr::{ + self, BinaryExpr, Expr, Extension, LogicalPlan, Projection, TableScan, +}; + +use crate::sql::catalog::connector_table::ConnectorTable; +use crate::sql::catalog::field_spec::FieldSpec; +use crate::sql::catalog::table::Table; +use crate::sql::catalog::utils::add_timestamp_field; +use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::extension::remote_table::RemoteTableExtension; +use crate::sql::planner::extension::watermark_node::WatermarkNode; +use crate::sql::types::TIMESTAMP_FIELD; + +/// Rewrites table scans into proper source nodes with projections and watermarks. +pub struct SourceRewriter<'a> { + pub(crate) schema_provider: &'a StreamSchemaProvider, +} + +impl SourceRewriter<'_> { + fn watermark_expression(table: &ConnectorTable) -> DFResult { + match table.watermark_field.clone() { + Some(watermark_field) => table + .fields + .iter() + .find_map(|f| { + if f.field().name() == &watermark_field { + return match f { + FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => { + Some(Expr::Column(Column { + relation: None, + name: field.name().to_string(), + spans: Default::default(), + })) + } + FieldSpec::Virtual { expression, .. } => Some(*expression.clone()), + }; + } + None + }) + .ok_or_else(|| { + DataFusionError::Plan(format!("Watermark field {watermark_field} not found")) + }), + None => Ok(Expr::BinaryExpr(BinaryExpr { + left: Box::new(Expr::Column(Column { + relation: None, + name: TIMESTAMP_FIELD.to_string(), + spans: Default::default(), + })), + op: logical_expr::Operator::Minus, + right: Box::new(Expr::Literal( + ScalarValue::DurationNanosecond(Some(Duration::from_secs(1).as_nanos() as i64)), + None, + )), + })), + } + } + + fn projection_expressions( + table: &ConnectorTable, + qualifier: &TableReference, + projection: &Option>, + ) -> DFResult> { + let mut expressions: Vec = table + .fields + .iter() + .map(|field| match field { + FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => { + Expr::Column(Column { + relation: Some(qualifier.clone()), + name: field.name().to_string(), + spans: Default::default(), + }) + } + FieldSpec::Virtual { field, expression } => expression + .clone() + .alias_qualified(Some(qualifier.clone()), field.name().to_string()), + }) + .collect(); + + if let Some(proj) = projection { + expressions = proj.iter().map(|i| expressions[*i].clone()).collect(); + } + + if let Some(event_time_field) = table.event_time_field.clone() { + let expr = table + .fields + .iter() + .find_map(|f| { + if f.field().name() == &event_time_field { + return match f { + FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => { + Some(Expr::Column(Column { + relation: Some(qualifier.clone()), + name: field.name().to_string(), + spans: Default::default(), + })) + } + FieldSpec::Virtual { expression, .. } => Some(*expression.clone()), + }; + } + None + }) + .ok_or_else(|| { + DataFusionError::Plan(format!("Event time field {event_time_field} not found")) + })?; + + expressions + .push(expr.alias_qualified(Some(qualifier.clone()), TIMESTAMP_FIELD.to_string())); + } else { + expressions.push(Expr::Column(Column::new( + Some(qualifier.clone()), + TIMESTAMP_FIELD, + ))); + } + + Ok(expressions) + } + + fn projection(&self, table_scan: &TableScan, table: &ConnectorTable) -> DFResult { + let qualifier = table_scan.table_name.clone(); + + // TODO: replace with TableSourceExtension when available + let source_input = LogicalPlan::TableScan(table_scan.clone()); + + Ok(LogicalPlan::Projection(Projection::try_new( + Self::projection_expressions(table, &qualifier, &table_scan.projection)?, + Arc::new(source_input), + )?)) + } + + fn mutate_connector_table( + &self, + table_scan: &TableScan, + table: &ConnectorTable, + ) -> DFResult> { + let input = self.projection(table_scan, table)?; + + let schema = input.schema().clone(); + let remote = LogicalPlan::Extension(Extension { + node: Arc::new(RemoteTableExtension { + input, + name: table_scan.table_name.to_owned(), + schema, + materialize: true, + }), + }); + + let watermark_node = WatermarkNode::new( + remote, + table_scan.table_name.clone(), + Self::watermark_expression(table)?, + ) + .map_err(|err| { + DataFusionError::Internal(format!("failed to create watermark expression: {err}")) + })?; + + Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(watermark_node), + }))) + } + + fn mutate_table_from_query( + &self, + table_scan: &TableScan, + logical_plan: &LogicalPlan, + ) -> DFResult> { + let column_expressions: Vec<_> = if let Some(projection) = &table_scan.projection { + logical_plan + .schema() + .columns() + .into_iter() + .enumerate() + .filter_map(|(i, col)| { + if projection.contains(&i) { + Some(Expr::Column(col)) + } else { + None + } + }) + .collect() + } else { + logical_plan + .schema() + .columns() + .into_iter() + .map(Expr::Column) + .collect() + }; + + let target_columns: Vec<_> = table_scan.projected_schema.columns().into_iter().collect(); + + let expressions = column_expressions + .into_iter() + .zip(target_columns) + .map(|(expr, col)| expr.alias_qualified(col.relation, col.name)) + .collect(); + + let projection = LogicalPlan::Projection(Projection::try_new_with_schema( + expressions, + Arc::new(logical_plan.clone()), + table_scan.projected_schema.clone(), + )?); + + Ok(Transformed::yes(projection)) + } +} + +impl TreeNodeRewriter for SourceRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> DFResult> { + let LogicalPlan::TableScan(mut table_scan) = node else { + return Ok(Transformed::no(node)); + }; + + let table_name = table_scan.table_name.table(); + let table = self + .schema_provider + .get_catalog_table(table_name) + .ok_or_else(|| DataFusionError::Plan(format!("Table {table_name} not found")))?; + + match table { + Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table), + Table::LookupTable(_table) => { + // TODO: implement LookupSource extension + plan_err!("Lookup tables are not yet supported") + } + Table::MemoryTable { + name, + fields: _, + logical_plan, + } => { + let Some(logical_plan) = logical_plan else { + return plan_err!( + "Can't query from memory table {} without first inserting into it", + name + ); + }; + table_scan.projected_schema = add_timestamp_field( + table_scan.projected_schema.clone(), + Some(table_scan.table_name.clone()), + )?; + self.mutate_table_from_query(&table_scan, logical_plan) + } + Table::TableFromQuery { + name: _, + logical_plan, + } => self.mutate_table_from_query(&table_scan, logical_plan), + Table::PreviewSink { .. } => Err(DataFusionError::Plan( + "can't select from a preview sink".to_string(), + )), + } + } +} diff --git a/src/sql/planner/rewrite/time_window.rs b/src/sql/planner/rewrite/time_window.rs new file mode 100644 index 00000000..104c0cca --- /dev/null +++ b/src/sql/planner/rewrite/time_window.rs @@ -0,0 +1,83 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::common::tree_node::{ + Transformed, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor, +}; +use datafusion::common::{DataFusionError, Result as DFResult, ScalarValue, plan_err}; +use datafusion::logical_expr::expr::ScalarFunction; +use datafusion::logical_expr::{Expr, LogicalPlan}; + +/// Returns the time window function name if the expression is one (tumble/hop/session). +pub fn is_time_window(expr: &Expr) -> Option<&str> { + if let Expr::ScalarFunction(ScalarFunction { func, args: _ }) = expr { + match func.name() { + "tumble" | "hop" | "session" => return Some(func.name()), + _ => {} + } + } + None +} + +struct TimeWindowExprChecker {} + +impl TreeNodeVisitor<'_> for TimeWindowExprChecker { + type Node = Expr; + + fn f_down(&mut self, node: &Self::Node) -> DFResult { + if let Some(w) = is_time_window(node) { + return plan_err!( + "time window function {} is not allowed in this context. \ + Are you missing a GROUP BY clause?", + w + ); + } + Ok(TreeNodeRecursion::Continue) + } +} + +/// Visitor that checks an entire LogicalPlan for misplaced time window UDFs. +pub struct TimeWindowUdfChecker {} + +impl TreeNodeVisitor<'_> for TimeWindowUdfChecker { + type Node = LogicalPlan; + + fn f_down(&mut self, node: &Self::Node) -> DFResult { + use datafusion::common::tree_node::TreeNode; + node.expressions().iter().try_for_each(|expr| { + let mut checker = TimeWindowExprChecker {}; + expr.visit(&mut checker)?; + Ok::<(), DataFusionError>(()) + })?; + Ok(TreeNodeRecursion::Continue) + } +} + +/// Removes `IS NOT NULL` checks wrapping time window functions, +/// replacing them with `true` since time windows are never null. +pub struct TimeWindowNullCheckRemover {} + +impl TreeNodeRewriter for TimeWindowNullCheckRemover { + type Node = Expr; + + fn f_down(&mut self, node: Self::Node) -> DFResult> { + if let Expr::IsNotNull(expr) = &node + && is_time_window(expr).is_some() + { + return Ok(Transformed::yes(Expr::Literal( + ScalarValue::Boolean(Some(true)), + None, + ))); + } + Ok(Transformed::no(node)) + } +} diff --git a/src/sql/planner/rewrite/unnest_rewriter.rs b/src/sql/planner/rewrite/unnest_rewriter.rs new file mode 100644 index 00000000..2a9eabda --- /dev/null +++ b/src/sql/planner/rewrite/unnest_rewriter.rs @@ -0,0 +1,178 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use datafusion::arrow::datatypes::DataType; +use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; +use datafusion::common::{Column, Result as DFResult, plan_err}; +use datafusion::logical_expr::expr::ScalarFunction; +use datafusion::logical_expr::{ColumnUnnestList, Expr, LogicalPlan, Projection, Unnest}; + +use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields}; + +pub const UNNESTED_COL: &str = "__unnested"; + +/// Rewrites projections containing `unnest()` calls into proper Unnest logical plans. +pub struct UnnestRewriter {} + +impl UnnestRewriter { + fn split_unnest(expr: Expr) -> DFResult<(Expr, Option)> { + let mut captured: Option = None; + + let expr = expr.transform_up(|e| { + if let Expr::ScalarFunction(ScalarFunction { func: udf, args }) = &e + && udf.name() == "unnest" + { + match args.len() { + 1 => { + if captured.replace(args[0].clone()).is_some() { + return plan_err!( + "Multiple unnests in expression, which is not allowed" + ); + } + return Ok(Transformed::yes(Expr::Column(Column::new_unqualified( + UNNESTED_COL, + )))); + } + n => { + panic!("Unnest has wrong number of arguments (expected 1, found {n})"); + } + } + } + Ok(Transformed::no(e)) + })?; + + Ok((expr.data, captured)) + } +} + +impl TreeNodeRewriter for UnnestRewriter { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> DFResult> { + let LogicalPlan::Projection(projection) = &node else { + if node.expressions().iter().any(|e| { + let e = Self::split_unnest(e.clone()); + e.is_err() || e.unwrap().1.is_some() + }) { + return plan_err!("unnest is only supported in SELECT statements"); + } + return Ok(Transformed::no(node)); + }; + + let mut unnest = None; + let exprs = projection + .expr + .clone() + .into_iter() + .enumerate() + .map(|(i, expr)| { + let (expr, opt) = Self::split_unnest(expr)?; + let is_unnest = if let Some(e) = opt { + if let Some(prev) = unnest.replace((e, i)) + && &prev != unnest.as_ref().unwrap() + { + return plan_err!( + "Projection contains multiple unnests, which is not currently supported" + ); + } + true + } else { + false + }; + + Ok((expr, is_unnest)) + }) + .collect::>>()?; + + if let Some((unnest_inner, unnest_idx)) = unnest { + let produce_list = Arc::new(LogicalPlan::Projection(Projection::try_new( + exprs + .iter() + .cloned() + .map(|(e, is_unnest)| { + if is_unnest { + unnest_inner.clone().alias(UNNESTED_COL) + } else { + e + } + }) + .collect(), + projection.input.clone(), + )?)); + + let unnest_fields = fields_with_qualifiers(produce_list.schema()) + .iter() + .enumerate() + .map(|(i, f)| { + if i == unnest_idx { + let DataType::List(inner) = f.data_type() else { + return plan_err!( + "Argument '{}' to unnest is not a List", + f.qualified_name() + ); + }; + Ok(DFField::new_unqualified( + UNNESTED_COL, + inner.data_type().clone(), + inner.is_nullable(), + )) + } else { + Ok((*f).clone()) + } + }) + .collect::>>()?; + + let unnest_node = LogicalPlan::Unnest(Unnest { + exec_columns: vec![ + DFField::from(produce_list.schema().qualified_field(unnest_idx)) + .qualified_column(), + ], + input: produce_list, + list_type_columns: vec![( + unnest_idx, + ColumnUnnestList { + output_column: Column::new_unqualified(UNNESTED_COL), + depth: 1, + }, + )], + struct_type_columns: vec![], + dependency_indices: vec![], + schema: Arc::new(schema_from_df_fields(&unnest_fields)?), + options: Default::default(), + }); + + let output_node = LogicalPlan::Projection(Projection::try_new( + exprs + .iter() + .enumerate() + .map(|(i, (expr, has_unnest))| { + if *has_unnest { + expr.clone() + } else { + Expr::Column( + DFField::from(unnest_node.schema().qualified_field(i)) + .qualified_column(), + ) + } + }) + .collect(), + Arc::new(unnest_node), + )?); + + Ok(Transformed::yes(output_node)) + } else { + Ok(Transformed::no(LogicalPlan::Projection(projection.clone()))) + } + } +} diff --git a/src/sql/planner/schema_provider.rs b/src/sql/planner/schema_provider.rs new file mode 100644 index 00000000..d860fd6c --- /dev/null +++ b/src/sql/planner/schema_provider.rs @@ -0,0 +1,360 @@ +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema}; +use datafusion::common::{Result, plan_err}; +use datafusion::datasource::DefaultTableSource; +use datafusion::error::DataFusionError; +use datafusion::execution::{FunctionRegistry, SessionStateDefaults}; +use datafusion::logical_expr::expr_rewriter::FunctionRewrite; +use datafusion::logical_expr::planner::ExprPlanner; +use datafusion::logical_expr::{ + AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF, +}; +use datafusion::optimizer::Analyzer; +use datafusion::sql::TableReference; +use datafusion::sql::planner::ContextProvider; +use unicase::UniCase; + +use crate::sql::catalog::table::Table as CatalogTable; +use crate::sql::planner::schemas::window_arrow_struct; +use crate::sql::types::{PlaceholderUdf, PlanningOptions}; + +#[derive(Clone, Default)] +pub struct StreamSchemaProvider { + pub source_defs: HashMap, + tables: HashMap, StreamTable>, + catalog_tables: HashMap, CatalogTable>, + pub functions: HashMap>, + pub aggregate_functions: HashMap>, + pub window_functions: HashMap>, + config_options: datafusion::config::ConfigOptions, + pub expr_planners: Vec>, + pub planning_options: PlanningOptions, + pub analyzer: Analyzer, +} + +#[derive(Clone, Debug)] +pub enum StreamTable { + Source { + name: String, + schema: Arc, + event_time_field: Option, + watermark_field: Option, + }, + Sink { + name: String, + schema: Arc, + }, + Memory { + name: String, + logical_plan: Option, + }, +} + +impl StreamTable { + pub fn name(&self) -> &str { + match self { + StreamTable::Source { name, .. } => name, + StreamTable::Sink { name, .. } => name, + StreamTable::Memory { name, .. } => name, + } + } + + pub fn get_fields(&self) -> Vec> { + match self { + StreamTable::Source { schema, .. } => schema.fields().to_vec(), + StreamTable::Sink { schema, .. } => schema.fields().to_vec(), + StreamTable::Memory { .. } => vec![], + } + } +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LogicalBatchInput { + pub table_name: String, + pub schema: Arc, +} + +#[async_trait::async_trait] +impl datafusion::datasource::TableProvider for LogicalBatchInput { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn schema(&self) -> Arc { + self.schema.clone() + } + + fn table_type(&self) -> datafusion::datasource::TableType { + datafusion::datasource::TableType::Temporary + } + + async fn scan( + &self, + _state: &dyn datafusion::catalog::Session, + _projection: Option<&Vec>, + _filters: &[Expr], + _limit: Option, + ) -> Result> { + Ok(Arc::new(crate::sql::physical::FsMemExec::new( + self.table_name.clone(), + self.schema.clone(), + ))) + } +} + +fn create_table(table_name: String, schema: Arc) -> Arc { + let table_provider = LogicalBatchInput { table_name, schema }; + let wrapped = Arc::new(table_provider); + let provider = DefaultTableSource::new(wrapped); + Arc::new(provider) +} + +impl StreamSchemaProvider { + pub fn new() -> Self { + let mut registry = Self { + ..Default::default() + }; + + registry + .register_udf(PlaceholderUdf::with_return( + "hop", + vec![ + DataType::Interval(datatypes::IntervalUnit::MonthDayNano), + DataType::Interval(datatypes::IntervalUnit::MonthDayNano), + ], + window_arrow_struct(), + )) + .unwrap(); + + registry + .register_udf(PlaceholderUdf::with_return( + "tumble", + vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], + window_arrow_struct(), + )) + .unwrap(); + + registry + .register_udf(PlaceholderUdf::with_return( + "session", + vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], + window_arrow_struct(), + )) + .unwrap(); + + registry + .register_udf(PlaceholderUdf::with_return( + "unnest", + vec![DataType::List(Arc::new(Field::new( + "field", + DataType::Utf8, + true, + )))], + DataType::Utf8, + )) + .unwrap(); + + registry + .register_udf(PlaceholderUdf::with_return( + "row_time", + vec![], + DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None), + )) + .unwrap(); + + for p in SessionStateDefaults::default_scalar_functions() { + registry.register_udf(p).unwrap(); + } + for p in SessionStateDefaults::default_aggregate_functions() { + registry.register_udaf(p).unwrap(); + } + for p in SessionStateDefaults::default_window_functions() { + registry.register_udwf(p).unwrap(); + } + for p in SessionStateDefaults::default_expr_planners() { + registry.register_expr_planner(p).unwrap(); + } + + registry + } + + pub fn add_source_table( + &mut self, + name: String, + schema: Arc, + event_time_field: Option, + watermark_field: Option, + ) { + self.tables.insert( + UniCase::new(name.clone()), + StreamTable::Source { + name, + schema, + event_time_field, + watermark_field, + }, + ); + } + + pub fn add_sink_table(&mut self, name: String, schema: Arc) { + self.tables.insert( + UniCase::new(name.clone()), + StreamTable::Sink { name, schema }, + ); + } + + pub fn insert_table(&mut self, table: StreamTable) { + self.tables + .insert(UniCase::new(table.name().to_string()), table); + } + + pub fn get_table(&self, table_name: impl Into) -> Option<&StreamTable> { + self.tables.get(&UniCase::new(table_name.into())) + } + + pub fn get_table_mut(&mut self, table_name: impl Into) -> Option<&mut StreamTable> { + self.tables.get_mut(&UniCase::new(table_name.into())) + } + + pub fn insert_catalog_table(&mut self, table: CatalogTable) { + self.catalog_tables + .insert(UniCase::new(table.name().to_string()), table); + } + + pub fn get_catalog_table(&self, table_name: impl Into) -> Option<&CatalogTable> { + self.catalog_tables.get(&UniCase::new(table_name.into())) + } + + pub fn get_catalog_table_mut( + &mut self, + table_name: impl Into, + ) -> Option<&mut CatalogTable> { + self.catalog_tables + .get_mut(&UniCase::new(table_name.into())) + } + + pub fn get_async_udf_options( + &self, + _name: &str, + ) -> Option { + // TODO: implement async UDF lookup + None + } +} + +impl ContextProvider for StreamSchemaProvider { + fn get_table_source(&self, name: TableReference) -> Result> { + let table = self + .get_table(name.to_string()) + .ok_or_else(|| DataFusionError::Plan(format!("Table {name} not found")))?; + + let fields = table.get_fields(); + let schema = Arc::new(Schema::new_with_metadata( + fields + .iter() + .map(|f| f.as_ref().clone()) + .collect::>(), + HashMap::new(), + )); + Ok(create_table(name.to_string(), schema)) + } + + fn get_function_meta(&self, name: &str) -> Option> { + self.functions.get(name).cloned() + } + + fn get_aggregate_meta(&self, name: &str) -> Option> { + self.aggregate_functions.get(name).cloned() + } + + fn get_variable_type(&self, _variable_names: &[String]) -> Option { + None + } + + fn options(&self) -> &datafusion::config::ConfigOptions { + &self.config_options + } + + fn get_window_meta(&self, name: &str) -> Option> { + self.window_functions.get(name).cloned() + } + + fn udf_names(&self) -> Vec { + self.functions.keys().cloned().collect() + } + + fn udaf_names(&self) -> Vec { + self.aggregate_functions.keys().cloned().collect() + } + + fn udwf_names(&self) -> Vec { + self.window_functions.keys().cloned().collect() + } + + fn get_expr_planners(&self) -> &[Arc] { + &self.expr_planners + } +} + +impl FunctionRegistry for StreamSchemaProvider { + fn udfs(&self) -> HashSet { + self.functions.keys().cloned().collect() + } + + fn udf(&self, name: &str) -> Result> { + if let Some(f) = self.functions.get(name) { + Ok(Arc::clone(f)) + } else { + plan_err!("No UDF with name {name}") + } + } + + fn udaf(&self, name: &str) -> Result> { + if let Some(f) = self.aggregate_functions.get(name) { + Ok(Arc::clone(f)) + } else { + plan_err!("No UDAF with name {name}") + } + } + + fn udwf(&self, name: &str) -> Result> { + if let Some(f) = self.window_functions.get(name) { + Ok(Arc::clone(f)) + } else { + plan_err!("No UDWF with name {name}") + } + } + + fn register_function_rewrite( + &mut self, + rewrite: Arc, + ) -> Result<()> { + self.analyzer.add_function_rewrite(rewrite); + Ok(()) + } + + fn register_udf(&mut self, udf: Arc) -> Result>> { + Ok(self.functions.insert(udf.name().to_string(), udf)) + } + + fn register_udaf(&mut self, udaf: Arc) -> Result>> { + Ok(self + .aggregate_functions + .insert(udaf.name().to_string(), udaf)) + } + + fn register_udwf(&mut self, udwf: Arc) -> Result>> { + Ok(self.window_functions.insert(udwf.name().to_string(), udwf)) + } + + fn register_expr_planner(&mut self, expr_planner: Arc) -> Result<()> { + self.expr_planners.push(expr_planner); + Ok(()) + } + + fn expr_planners(&self) -> Vec> { + self.expr_planners.clone() + } +} diff --git a/src/sql/planner/schemas.rs b/src/sql/planner/schemas.rs index 0440cc85..f903db83 100644 --- a/src/sql/planner/schemas.rs +++ b/src/sql/planner/schemas.rs @@ -1,59 +1,5 @@ -use crate::sql::planner::types::{DFField, TIMESTAMP_FIELD}; -use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; -use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference}; -use std::{collections::HashMap, sync::Arc}; - -pub fn window_arrow_struct() -> DataType { - DataType::Struct( - vec![ - Arc::new(Field::new( - "start", - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - )), - Arc::new(Field::new( - "end", - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - )), - ] - .into(), - ) -} - -pub(crate) fn add_timestamp_field( - schema: DFSchemaRef, - qualifier: Option, -) -> DFResult { - if has_timestamp_field(&schema) { - return Ok(schema); - } - - let timestamp_field = DFField::new( - qualifier, - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ); - Ok(Arc::new(schema.join(&DFSchema::new_with_metadata( - vec![timestamp_field.into()], - HashMap::new(), - )?)?)) -} - -pub(crate) fn has_timestamp_field(schema: &DFSchemaRef) -> bool { - schema - .fields() - .iter() - .any(|field| field.name() == TIMESTAMP_FIELD) -} - -pub fn add_timestamp_field_arrow(schema: Schema) -> SchemaRef { - let mut fields = schema.fields().to_vec(); - fields.push(Arc::new(Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ))); - Arc::new(Schema::new(fields)) -} +// Re-export schema utilities from catalog::utils. +// Kept for backward compatibility with existing planner imports. +pub use crate::sql::catalog::utils::{ + add_timestamp_field, add_timestamp_field_arrow, has_timestamp_field, window_arrow_struct, +}; diff --git a/src/sql/planner/types.rs b/src/sql/planner/types.rs deleted file mode 100644 index 2330c0de..00000000 --- a/src/sql/planner/types.rs +++ /dev/null @@ -1,513 +0,0 @@ -use std::collections::HashMap; -use std::fmt::{Debug, Formatter}; -use std::sync::Arc; -use std::time::Duration; - -use datafusion::arrow::datatypes::{ - DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DataType, Field, FieldRef, IntervalUnit, - Schema, SchemaRef, TimeUnit, -}; -use datafusion::common::{Column, DFSchema, Result, TableReference, plan_datafusion_err, plan_err}; -use datafusion::logical_expr::{ - ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, -}; -use std::any::Any; - -pub const TIMESTAMP_FIELD: &str = "_timestamp"; - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum ProcessingMode { - Append, - Update, -} - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub enum WindowType { - Tumbling { width: Duration }, - Sliding { width: Duration, slide: Duration }, - Session { gap: Duration }, - Instant, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) enum WindowBehavior { - FromOperator { - window: WindowType, - window_field: DFField, - window_index: usize, - is_nested: bool, - }, - InData, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DFField { - qualifier: Option, - field: FieldRef, -} - -impl From<(Option, FieldRef)> for DFField { - fn from(value: (Option, FieldRef)) -> Self { - Self { - qualifier: value.0, - field: value.1, - } - } -} - -impl From<(Option<&TableReference>, &Field)> for DFField { - fn from(value: (Option<&TableReference>, &Field)) -> Self { - Self { - qualifier: value.0.cloned(), - field: Arc::new(value.1.clone()), - } - } -} - -impl From for (Option, FieldRef) { - fn from(value: DFField) -> Self { - (value.qualifier, value.field) - } -} - -impl DFField { - pub fn new( - qualifier: Option, - name: impl Into, - data_type: DataType, - nullable: bool, - ) -> Self { - Self { - qualifier, - field: Arc::new(Field::new(name, data_type, nullable)), - } - } - - pub fn new_unqualified(name: &str, data_type: DataType, nullable: bool) -> Self { - DFField { - qualifier: None, - field: Arc::new(Field::new(name, data_type, nullable)), - } - } - - pub fn name(&self) -> &String { - self.field.name() - } - - pub fn data_type(&self) -> &DataType { - self.field.data_type() - } - - pub fn is_nullable(&self) -> bool { - self.field.is_nullable() - } - - pub fn metadata(&self) -> &HashMap { - self.field.metadata() - } - - pub fn qualified_name(&self) -> String { - if let Some(qualifier) = &self.qualifier { - format!("{}.{}", qualifier, self.field.name()) - } else { - self.field.name().to_owned() - } - } - - pub fn qualified_column(&self) -> Column { - Column { - relation: self.qualifier.clone(), - name: self.field.name().to_string(), - spans: Default::default(), - } - } - - pub fn unqualified_column(&self) -> Column { - Column { - relation: None, - name: self.field.name().to_string(), - spans: Default::default(), - } - } - - pub fn qualifier(&self) -> Option<&TableReference> { - self.qualifier.as_ref() - } - - pub fn field(&self) -> &FieldRef { - &self.field - } - - pub fn strip_qualifier(mut self) -> Self { - self.qualifier = None; - self - } - - pub fn with_nullable(mut self, nullable: bool) -> Self { - let f = self.field().as_ref().clone().with_nullable(nullable); - self.field = f.into(); - self - } - - pub fn with_metadata(mut self, metadata: HashMap) -> Self { - let f = self.field().as_ref().clone().with_metadata(metadata); - self.field = f.into(); - self - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct StreamSchema { - pub schema: SchemaRef, - pub timestamp_index: usize, - pub key_indices: Option>, -} - -impl StreamSchema { - pub fn new(schema: SchemaRef, timestamp_index: usize, key_indices: Option>) -> Self { - Self { - schema, - timestamp_index, - key_indices, - } - } - - pub fn new_unkeyed(schema: SchemaRef, timestamp_index: usize) -> Self { - Self { - schema, - timestamp_index, - key_indices: None, - } - } - - pub fn from_fields(fields: Vec) -> Self { - let schema = Arc::new(Schema::new(fields)); - let timestamp_index = schema - .column_with_name(TIMESTAMP_FIELD) - .map(|(i, _)| i) - .unwrap_or(0); - Self { - schema, - timestamp_index, - key_indices: None, - } - } - - pub fn from_schema_keys(schema: SchemaRef, key_indices: Vec) -> Result { - let timestamp_index = schema - .column_with_name(TIMESTAMP_FIELD) - .ok_or_else(|| { - datafusion::error::DataFusionError::Plan(format!( - "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}" - )) - })? - .0; - Ok(Self { - schema, - timestamp_index, - key_indices: Some(key_indices), - }) - } - - pub fn from_schema_unkeyed(schema: SchemaRef) -> Result { - let timestamp_index = schema - .column_with_name(TIMESTAMP_FIELD) - .ok_or_else(|| { - datafusion::error::DataFusionError::Plan(format!( - "no {TIMESTAMP_FIELD} field in schema" - )) - })? - .0; - Ok(Self { - schema, - timestamp_index, - key_indices: None, - }) - } -} - -#[allow(clippy::type_complexity)] -pub(crate) struct PlaceholderUdf { - name: String, - signature: Signature, - return_type: Arc Result + Send + Sync + 'static>, -} - -impl Debug for PlaceholderUdf { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "PlaceholderUDF<{}>", self.name) - } -} - -impl ScalarUDFImpl for PlaceholderUdf { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - &self.name - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, args: &[DataType]) -> Result { - (self.return_type)(args) - } - - fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { - unimplemented!("PlaceholderUdf should never be called at execution time"); - } -} - -impl PlaceholderUdf { - pub fn with_return( - name: impl Into, - args: Vec, - ret: DataType, - ) -> Arc { - Arc::new(ScalarUDF::new_from_impl(PlaceholderUdf { - name: name.into(), - signature: Signature::exact(args, Volatility::Volatile), - return_type: Arc::new(move |_| Ok(ret.clone())), - })) - } -} - -#[derive(Clone, Debug)] -pub struct SqlConfig { - pub default_parallelism: usize, -} - -impl Default for SqlConfig { - fn default() -> Self { - Self { - default_parallelism: 4, - } - } -} - -#[derive(Clone)] -pub struct PlanningOptions { - pub ttl: Duration, -} - -impl Default for PlanningOptions { - fn default() -> Self { - Self { - ttl: Duration::from_secs(24 * 60 * 60), - } - } -} - -pub fn convert_data_type(sql_type: &datafusion::sql::sqlparser::ast::DataType) -> Result { - use datafusion::sql::sqlparser::ast::ArrayElemTypeDef; - use datafusion::sql::sqlparser::ast::DataType as SQLDataType; - - match sql_type { - SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) - | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type, _)) => { - let data_type = convert_data_type(inner_sql_type)?; - Ok(DataType::List(Arc::new(Field::new( - "field", data_type, true, - )))) - } - SQLDataType::Array(ArrayElemTypeDef::None) => { - plan_err!("Arrays with unspecified type is not supported") - } - other => convert_simple_data_type(other), - } -} - -fn convert_simple_data_type( - sql_type: &datafusion::sql::sqlparser::ast::DataType, -) -> Result { - use datafusion::sql::sqlparser::ast::DataType as SQLDataType; - use datafusion::sql::sqlparser::ast::{ExactNumberInfo, TimezoneInfo}; - - match sql_type { - SQLDataType::Boolean | SQLDataType::Bool => Ok(DataType::Boolean), - SQLDataType::TinyInt(_) => Ok(DataType::Int8), - SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16), - SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => Ok(DataType::Int32), - SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64), - SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8), - SQLDataType::SmallIntUnsigned(_) | SQLDataType::Int2Unsigned(_) => Ok(DataType::UInt16), - SQLDataType::IntUnsigned(_) - | SQLDataType::UnsignedInteger - | SQLDataType::Int4Unsigned(_) => Ok(DataType::UInt32), - SQLDataType::BigIntUnsigned(_) | SQLDataType::Int8Unsigned(_) => Ok(DataType::UInt64), - SQLDataType::Float(_) | SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32), - SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => { - Ok(DataType::Float64) - } - SQLDataType::Char(_) - | SQLDataType::Varchar(_) - | SQLDataType::Text - | SQLDataType::String(_) => Ok(DataType::Utf8), - SQLDataType::Timestamp(None, TimezoneInfo::None) | SQLDataType::Datetime(_) => { - Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) - } - SQLDataType::Timestamp(Some(precision), TimezoneInfo::None) => match *precision { - 0 => Ok(DataType::Timestamp(TimeUnit::Second, None)), - 3 => Ok(DataType::Timestamp(TimeUnit::Millisecond, None)), - 6 => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)), - 9 => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)), - _ => { - plan_err!( - "unsupported precision {} -- supported: 0 (seconds), 3 (ms), 6 (us), 9 (ns)", - precision - ) - } - }, - SQLDataType::Date => Ok(DataType::Date32), - SQLDataType::Time(None, tz_info) => { - if matches!(tz_info, TimezoneInfo::None) - || matches!(tz_info, TimezoneInfo::WithoutTimeZone) - { - Ok(DataType::Time64(TimeUnit::Nanosecond)) - } else { - plan_err!("Unsupported SQL type {sql_type:?}") - } - } - SQLDataType::Numeric(exact_number_info) | SQLDataType::Decimal(exact_number_info) => { - let (precision, scale) = match *exact_number_info { - ExactNumberInfo::None => (None, None), - ExactNumberInfo::Precision(precision) => (Some(precision), None), - ExactNumberInfo::PrecisionAndScale(precision, scale) => { - (Some(precision), Some(scale)) - } - }; - make_decimal_type(precision, scale) - } - SQLDataType::Bytea => Ok(DataType::Binary), - SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)), - SQLDataType::Struct(fields, _) => { - let fields: Vec<_> = fields - .iter() - .map(|f| { - Ok::<_, datafusion::error::DataFusionError>(Arc::new(Field::new( - f.field_name - .as_ref() - .ok_or_else(|| { - plan_datafusion_err!("anonymous struct fields are not allowed") - })? - .to_string(), - convert_data_type(&f.field_type)?, - true, - ))) - }) - .collect::>()?; - Ok(DataType::Struct(fields.into())) - } - _ => plan_err!("Unsupported SQL type {sql_type:?}"), - } -} - -fn make_decimal_type(precision: Option, scale: Option) -> Result { - let (precision, scale) = match (precision, scale) { - (Some(p), Some(s)) => (p as u8, s as i8), - (Some(p), None) => (p as u8, 0), - (None, Some(_)) => return plan_err!("Cannot specify only scale for decimal data type"), - (None, None) => (DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE), - }; - - if precision == 0 || precision > DECIMAL128_MAX_PRECISION || scale.unsigned_abs() > precision { - plan_err!( - "Decimal(precision = {precision}, scale = {scale}) should satisfy `0 < precision <= 38`, and `scale <= precision`." - ) - } else { - Ok(DataType::Decimal128(precision, scale)) - } -} - -pub fn fields_with_qualifiers(schema: &DFSchema) -> Vec { - schema - .fields() - .iter() - .enumerate() - .map(|(i, f)| (schema.qualified_field(i).0.cloned(), f.clone()).into()) - .collect() -} - -pub fn schema_from_df_fields(fields: &[DFField]) -> Result { - schema_from_df_fields_with_metadata(fields, HashMap::new()) -} - -pub fn schema_from_df_fields_with_metadata( - fields: &[DFField], - metadata: HashMap, -) -> Result { - DFSchema::new_with_metadata(fields.iter().map(|t| t.clone().into()).collect(), metadata) -} - -pub fn get_duration(expression: &Expr) -> Result { - use datafusion::common::ScalarValue; - - match expression { - Expr::Literal(ScalarValue::IntervalDayTime(Some(val)), _) => { - Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60) - + Duration::from_millis(val.milliseconds as u64)) - } - Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(val)), _) => { - if val.months != 0 { - return datafusion::common::not_impl_err!( - "Windows do not support durations specified as months" - ); - } - Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60) - + Duration::from_nanos(val.nanoseconds as u64)) - } - _ => plan_err!( - "unsupported Duration expression, expect duration literal, not {}", - expression - ), - } -} - -pub fn find_window(expression: &Expr) -> Result> { - use datafusion::logical_expr::expr::Alias; - use datafusion::logical_expr::expr::ScalarFunction; - - match expression { - Expr::ScalarFunction(ScalarFunction { func: fun, args }) => match fun.name() { - "hop" => { - if args.len() != 2 { - unreachable!(); - } - let slide = get_duration(&args[0])?; - let width = get_duration(&args[1])?; - if width.as_nanos() % slide.as_nanos() != 0 { - return plan_err!( - "hop() width {:?} must be a multiple of slide {:?}", - width, - slide - ); - } - if slide == width { - Ok(Some(WindowType::Tumbling { width })) - } else { - Ok(Some(WindowType::Sliding { width, slide })) - } - } - "tumble" => { - if args.len() != 1 { - unreachable!("wrong number of arguments for tumble(), expect one"); - } - let width = get_duration(&args[0])?; - Ok(Some(WindowType::Tumbling { width })) - } - "session" => { - if args.len() != 1 { - unreachable!("wrong number of arguments for session(), expected one"); - } - let gap = get_duration(&args[0])?; - Ok(Some(WindowType::Session { gap })) - } - _ => Ok(None), - }, - Expr::Alias(Alias { expr, .. }) => find_window(expr), - _ => Ok(None), - } -} diff --git a/src/sql/planner/udafs.rs b/src/sql/planner/udafs.rs new file mode 100644 index 00000000..9685c2d4 --- /dev/null +++ b/src/sql/planner/udafs.rs @@ -0,0 +1,31 @@ +use datafusion::arrow::array::ArrayRef; +use datafusion::error::Result; +use datafusion::physical_plan::Accumulator; +use datafusion::scalar::ScalarValue; +use std::fmt::Debug; + +/// Fake UDAF used just for plan-time placeholder. +#[derive(Debug)] +pub struct EmptyUdaf {} + +impl Accumulator for EmptyUdaf { + fn update_batch(&mut self, _: &[ArrayRef]) -> Result<()> { + unreachable!() + } + + fn evaluate(&self) -> Result { + unreachable!() + } + + fn size(&self) -> usize { + unreachable!() + } + + fn state(&self) -> Result> { + unreachable!() + } + + fn merge_batch(&mut self, _: &[ArrayRef]) -> Result<()> { + unreachable!() + } +} diff --git a/src/sql/types/data_type.rs b/src/sql/types/data_type.rs new file mode 100644 index 00000000..57edc3c9 --- /dev/null +++ b/src/sql/types/data_type.rs @@ -0,0 +1,144 @@ +use std::sync::Arc; + +use datafusion::arrow::datatypes::{ + DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DataType, Field, IntervalUnit, TimeUnit, +}; +use datafusion::common::{Result, plan_datafusion_err, plan_err}; + +use crate::types::FsExtensionType; + +pub fn convert_data_type( + sql_type: &datafusion::sql::sqlparser::ast::DataType, +) -> Result<(DataType, Option)> { + use datafusion::sql::sqlparser::ast::ArrayElemTypeDef; + use datafusion::sql::sqlparser::ast::DataType as SQLDataType; + + match sql_type { + SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) + | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type, _)) => { + let (data_type, extension) = convert_simple_data_type(inner_sql_type)?; + + Ok(( + DataType::List(Arc::new(FsExtensionType::add_metadata( + extension, + Field::new("field", data_type, true), + ))), + None, + )) + } + SQLDataType::Array(ArrayElemTypeDef::None) => { + plan_err!("Arrays with unspecified type is not supported") + } + other => convert_simple_data_type(other), + } +} + +fn convert_simple_data_type( + sql_type: &datafusion::sql::sqlparser::ast::DataType, +) -> Result<(DataType, Option)> { + use datafusion::sql::sqlparser::ast::DataType as SQLDataType; + use datafusion::sql::sqlparser::ast::{ExactNumberInfo, TimezoneInfo}; + + if matches!(sql_type, SQLDataType::JSON) { + return Ok((DataType::Utf8, Some(FsExtensionType::JSON))); + } + + let dt = match sql_type { + SQLDataType::Boolean | SQLDataType::Bool => Ok(DataType::Boolean), + SQLDataType::TinyInt(_) => Ok(DataType::Int8), + SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16), + SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => Ok(DataType::Int32), + SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64), + SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8), + SQLDataType::SmallIntUnsigned(_) | SQLDataType::Int2Unsigned(_) => Ok(DataType::UInt16), + SQLDataType::IntUnsigned(_) + | SQLDataType::UnsignedInteger + | SQLDataType::Int4Unsigned(_) => Ok(DataType::UInt32), + SQLDataType::BigIntUnsigned(_) | SQLDataType::Int8Unsigned(_) => Ok(DataType::UInt64), + SQLDataType::Float(_) => Ok(DataType::Float32), + SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32), + SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => { + Ok(DataType::Float64) + } + SQLDataType::Char(_) + | SQLDataType::Varchar(_) + | SQLDataType::Text + | SQLDataType::String(_) => Ok(DataType::Utf8), + SQLDataType::Timestamp(None, TimezoneInfo::None) | SQLDataType::Datetime(_) => { + Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) + } + SQLDataType::Timestamp(Some(precision), TimezoneInfo::None) => match *precision { + 0 => Ok(DataType::Timestamp(TimeUnit::Second, None)), + 3 => Ok(DataType::Timestamp(TimeUnit::Millisecond, None)), + 6 => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)), + 9 => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)), + _ => { + return plan_err!( + "unsupported precision {} -- supported precisions are 0 (seconds), \ + 3 (milliseconds), 6 (microseconds), and 9 (nanoseconds)", + precision + ); + } + }, + SQLDataType::Date => Ok(DataType::Date32), + SQLDataType::Time(None, tz_info) => { + if matches!(tz_info, TimezoneInfo::None) + || matches!(tz_info, TimezoneInfo::WithoutTimeZone) + { + Ok(DataType::Time64(TimeUnit::Nanosecond)) + } else { + return plan_err!("Unsupported SQL type {sql_type:?}"); + } + } + SQLDataType::Numeric(exact_number_info) | SQLDataType::Decimal(exact_number_info) => { + let (precision, scale) = match *exact_number_info { + ExactNumberInfo::None => (None, None), + ExactNumberInfo::Precision(precision) => (Some(precision), None), + ExactNumberInfo::PrecisionAndScale(precision, scale) => { + (Some(precision), Some(scale)) + } + }; + make_decimal_type(precision, scale) + } + SQLDataType::Bytea => Ok(DataType::Binary), + SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)), + SQLDataType::Struct(fields, _) => { + let fields: Vec<_> = fields + .iter() + .map(|f| { + Ok::<_, datafusion::error::DataFusionError>(Arc::new(Field::new( + f.field_name + .as_ref() + .ok_or_else(|| { + plan_datafusion_err!("anonymous struct fields are not allowed") + })? + .to_string(), + convert_data_type(&f.field_type)?.0, + true, + ))) + }) + .collect::>()?; + Ok(DataType::Struct(fields.into())) + } + _ => return plan_err!("Unsupported SQL type {sql_type:?}"), + }; + + Ok((dt?, None)) +} + +fn make_decimal_type(precision: Option, scale: Option) -> Result { + let (precision, scale) = match (precision, scale) { + (Some(p), Some(s)) => (p as u8, s as i8), + (Some(p), None) => (p as u8, 0), + (None, Some(_)) => return plan_err!("Cannot specify only scale for decimal data type"), + (None, None) => (DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE), + }; + + if precision == 0 || precision > DECIMAL128_MAX_PRECISION || scale.unsigned_abs() > precision { + plan_err!( + "Decimal(precision = {precision}, scale = {scale}) should satisfy `0 < precision <= 38`, and `scale <= precision`." + ) + } else { + Ok(DataType::Decimal128(precision, scale)) + } +} diff --git a/src/sql/types/df_field.rs b/src/sql/types/df_field.rs new file mode 100644 index 00000000..3797adb2 --- /dev/null +++ b/src/sql/types/df_field.rs @@ -0,0 +1,141 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use datafusion::arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion::common::{Column, DFSchema, Result, TableReference}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct DFField { + qualifier: Option, + field: FieldRef, +} + +impl From<(Option, FieldRef)> for DFField { + fn from(value: (Option, FieldRef)) -> Self { + Self { + qualifier: value.0, + field: value.1, + } + } +} + +impl From<(Option<&TableReference>, &Field)> for DFField { + fn from(value: (Option<&TableReference>, &Field)) -> Self { + Self { + qualifier: value.0.cloned(), + field: Arc::new(value.1.clone()), + } + } +} + +impl From for (Option, FieldRef) { + fn from(value: DFField) -> Self { + (value.qualifier, value.field) + } +} + +impl DFField { + pub fn new( + qualifier: Option, + name: impl Into, + data_type: DataType, + nullable: bool, + ) -> Self { + Self { + qualifier, + field: Arc::new(Field::new(name, data_type, nullable)), + } + } + + pub fn new_unqualified(name: &str, data_type: DataType, nullable: bool) -> Self { + DFField { + qualifier: None, + field: Arc::new(Field::new(name, data_type, nullable)), + } + } + + pub fn name(&self) -> &String { + self.field.name() + } + + pub fn data_type(&self) -> &DataType { + self.field.data_type() + } + + pub fn is_nullable(&self) -> bool { + self.field.is_nullable() + } + + pub fn metadata(&self) -> &HashMap { + self.field.metadata() + } + + pub fn qualified_name(&self) -> String { + if let Some(qualifier) = &self.qualifier { + format!("{}.{}", qualifier, self.field.name()) + } else { + self.field.name().to_owned() + } + } + + pub fn qualified_column(&self) -> Column { + Column { + relation: self.qualifier.clone(), + name: self.field.name().to_string(), + spans: Default::default(), + } + } + + pub fn unqualified_column(&self) -> Column { + Column { + relation: None, + name: self.field.name().to_string(), + spans: Default::default(), + } + } + + pub fn qualifier(&self) -> Option<&TableReference> { + self.qualifier.as_ref() + } + + pub fn field(&self) -> &FieldRef { + &self.field + } + + pub fn strip_qualifier(mut self) -> Self { + self.qualifier = None; + self + } + + pub fn with_nullable(mut self, nullable: bool) -> Self { + let f = self.field().as_ref().clone().with_nullable(nullable); + self.field = f.into(); + self + } + + pub fn with_metadata(mut self, metadata: HashMap) -> Self { + let f = self.field().as_ref().clone().with_metadata(metadata); + self.field = f.into(); + self + } +} + +pub fn fields_with_qualifiers(schema: &DFSchema) -> Vec { + schema + .fields() + .iter() + .enumerate() + .map(|(i, f)| (schema.qualified_field(i).0.cloned(), f.clone()).into()) + .collect() +} + +pub fn schema_from_df_fields(fields: &[DFField]) -> Result { + schema_from_df_fields_with_metadata(fields, HashMap::new()) +} + +pub fn schema_from_df_fields_with_metadata( + fields: &[DFField], + metadata: HashMap, +) -> Result { + DFSchema::new_with_metadata(fields.iter().map(|t| t.clone().into()).collect(), metadata) +} diff --git a/src/sql/types/mod.rs b/src/sql/types/mod.rs new file mode 100644 index 00000000..25c67574 --- /dev/null +++ b/src/sql/types/mod.rs @@ -0,0 +1,50 @@ +mod data_type; +mod df_field; +pub(crate) mod placeholder_udf; +mod stream_schema; +mod window; + +use std::time::Duration; + +pub use data_type::convert_data_type; +pub use df_field::{ + DFField, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata, +}; +pub(crate) use placeholder_udf::PlaceholderUdf; +pub use stream_schema::StreamSchema; +pub(crate) use window::WindowBehavior; +pub use window::{WindowType, find_window, get_duration}; + +pub const TIMESTAMP_FIELD: &str = "_timestamp"; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ProcessingMode { + Append, + Update, +} + +#[derive(Clone, Debug)] +pub struct SqlConfig { + pub default_parallelism: usize, +} + +impl Default for SqlConfig { + fn default() -> Self { + Self { + default_parallelism: 4, + } + } +} + +#[derive(Clone)] +pub struct PlanningOptions { + pub ttl: Duration, +} + +impl Default for PlanningOptions { + fn default() -> Self { + Self { + ttl: Duration::from_secs(24 * 60 * 60), + } + } +} diff --git a/src/sql/types/placeholder_udf.rs b/src/sql/types/placeholder_udf.rs new file mode 100644 index 00000000..5cf96d28 --- /dev/null +++ b/src/sql/types/placeholder_udf.rs @@ -0,0 +1,58 @@ +use std::any::Any; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; + +use datafusion::arrow::datatypes::DataType; +use datafusion::common::Result; +use datafusion::logical_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, +}; + +#[allow(clippy::type_complexity)] +pub(crate) struct PlaceholderUdf { + name: String, + signature: Signature, + return_type: Arc Result + Send + Sync + 'static>, +} + +impl Debug for PlaceholderUdf { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "PlaceholderUDF<{}>", self.name) + } +} + +impl ScalarUDFImpl for PlaceholderUdf { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + &self.name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, args: &[DataType]) -> Result { + (self.return_type)(args) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + unimplemented!("PlaceholderUdf should never be called at execution time"); + } +} + +impl PlaceholderUdf { + pub fn with_return( + name: impl Into, + args: Vec, + ret: DataType, + ) -> Arc { + Arc::new(ScalarUDF::new_from_impl(PlaceholderUdf { + name: name.into(), + signature: Signature::exact(args, Volatility::Volatile), + return_type: Arc::new(move |_| Ok(ret.clone())), + })) + } +} diff --git a/src/sql/types/stream_schema.rs b/src/sql/types/stream_schema.rs new file mode 100644 index 00000000..e981111b --- /dev/null +++ b/src/sql/types/stream_schema.rs @@ -0,0 +1,76 @@ +use std::sync::Arc; + +use datafusion::arrow::datatypes::{Field, Schema, SchemaRef}; +use datafusion::common::Result; + +use super::TIMESTAMP_FIELD; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StreamSchema { + pub schema: SchemaRef, + pub timestamp_index: usize, + pub key_indices: Option>, +} + +impl StreamSchema { + pub fn new(schema: SchemaRef, timestamp_index: usize, key_indices: Option>) -> Self { + Self { + schema, + timestamp_index, + key_indices, + } + } + + pub fn new_unkeyed(schema: SchemaRef, timestamp_index: usize) -> Self { + Self { + schema, + timestamp_index, + key_indices: None, + } + } + + pub fn from_fields(fields: Vec) -> Self { + let schema = Arc::new(Schema::new(fields)); + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .map(|(i, _)| i) + .unwrap_or(0); + Self { + schema, + timestamp_index, + key_indices: None, + } + } + + pub fn from_schema_keys(schema: SchemaRef, key_indices: Vec) -> Result { + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .ok_or_else(|| { + datafusion::error::DataFusionError::Plan(format!( + "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}" + )) + })? + .0; + Ok(Self { + schema, + timestamp_index, + key_indices: Some(key_indices), + }) + } + + pub fn from_schema_unkeyed(schema: SchemaRef) -> Result { + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .ok_or_else(|| { + datafusion::error::DataFusionError::Plan(format!( + "no {TIMESTAMP_FIELD} field in schema" + )) + })? + .0; + Ok(Self { + schema, + timestamp_index, + key_indices: None, + }) + } +} diff --git a/src/sql/types/window.rs b/src/sql/types/window.rs new file mode 100644 index 00000000..9687974a --- /dev/null +++ b/src/sql/types/window.rs @@ -0,0 +1,95 @@ +use std::time::Duration; + +use datafusion::common::{Result, plan_err}; +use datafusion::logical_expr::Expr; + +use super::DFField; + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum WindowType { + Tumbling { width: Duration }, + Sliding { width: Duration, slide: Duration }, + Session { gap: Duration }, + Instant, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) enum WindowBehavior { + FromOperator { + window: WindowType, + window_field: DFField, + window_index: usize, + is_nested: bool, + }, + InData, +} + +pub fn get_duration(expression: &Expr) -> Result { + use datafusion::common::ScalarValue; + + match expression { + Expr::Literal(ScalarValue::IntervalDayTime(Some(val)), _) => { + Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60) + + Duration::from_millis(val.milliseconds as u64)) + } + Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(val)), _) => { + if val.months != 0 { + return datafusion::common::not_impl_err!( + "Windows do not support durations specified as months" + ); + } + Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60) + + Duration::from_nanos(val.nanoseconds as u64)) + } + _ => plan_err!( + "unsupported Duration expression, expect duration literal, not {}", + expression + ), + } +} + +pub fn find_window(expression: &Expr) -> Result> { + use datafusion::logical_expr::expr::Alias; + use datafusion::logical_expr::expr::ScalarFunction; + + match expression { + Expr::ScalarFunction(ScalarFunction { func: fun, args }) => match fun.name() { + "hop" => { + if args.len() != 2 { + unreachable!(); + } + let slide = get_duration(&args[0])?; + let width = get_duration(&args[1])?; + if width.as_nanos() % slide.as_nanos() != 0 { + return plan_err!( + "hop() width {:?} must be a multiple of slide {:?}", + width, + slide + ); + } + if slide == width { + Ok(Some(WindowType::Tumbling { width })) + } else { + Ok(Some(WindowType::Sliding { width, slide })) + } + } + "tumble" => { + if args.len() != 1 { + unreachable!("wrong number of arguments for tumble(), expect one"); + } + let width = get_duration(&args[0])?; + Ok(Some(WindowType::Tumbling { width })) + } + "session" => { + if args.len() != 1 { + unreachable!("wrong number of arguments for session(), expected one"); + } + let gap = get_duration(&args[0])?; + Ok(Some(WindowType::Session { gap })) + } + _ => Ok(None), + }, + Expr::Alias(Alias { expr, .. }) => find_window(expr), + _ => Ok(None), + } +} diff --git a/src/storage/task/rocksdb_storage.rs b/src/storage/task/rocksdb_storage.rs index 31709a51..714a9143 100644 --- a/src/storage/task/rocksdb_storage.rs +++ b/src/storage/task/rocksdb_storage.rs @@ -103,11 +103,19 @@ impl TaskStorage for RocksDBTaskStorage { }; let mut batch = WriteBatch::default(); - batch.put_cf(&cf_meta, key, bincode::serialize(&meta)?); + batch.put_cf( + &cf_meta, + key, + bincode::serde::encode_to_vec(&meta, bincode::config::standard())?, + ); batch.put_cf(&cf_conf, key, &task_info.config_bytes); if let Some(ref module) = task_info.module_bytes { - batch.put_cf(&cf_payl, key, bincode::serialize(module)?); + batch.put_cf( + &cf_payl, + key, + bincode::serde::encode_to_vec(module, bincode::config::standard())?, + ); } self.db @@ -124,10 +132,15 @@ impl TaskStorage for RocksDBTaskStorage { .get_cf(&cf, key)? .ok_or_else(|| anyhow!("Task {} not found", task_name))?; - let mut meta: TaskMetadata = bincode::deserialize(&raw)?; + let (mut meta, _): (TaskMetadata, _) = + bincode::serde::decode_from_slice(&raw, bincode::config::standard())?; meta.state = new_state; - self.db.put_cf(&cf, key, bincode::serialize(&meta)?)?; + self.db.put_cf( + &cf, + key, + bincode::serde::encode_to_vec(&meta, bincode::config::standard())?, + )?; Ok(()) } @@ -140,10 +153,15 @@ impl TaskStorage for RocksDBTaskStorage { .get_cf(&cf, key)? .ok_or_else(|| anyhow!("Task {} not found", task_name))?; - let mut meta: TaskMetadata = bincode::deserialize(&raw)?; + let (mut meta, _): (TaskMetadata, _) = + bincode::serde::decode_from_slice(&raw, bincode::config::standard())?; meta.checkpoint_id = checkpoint_id; - self.db.put_cf(&cf, key, bincode::serialize(&meta)?)?; + self.db.put_cf( + &cf, + key, + bincode::serde::encode_to_vec(&meta, bincode::config::standard())?, + )?; Ok(()) } @@ -174,9 +192,17 @@ impl TaskStorage for RocksDBTaskStorage { let module_bytes = self .db .get_cf(&self.get_cf(CF_PAYLOAD)?, key)? - .and_then(|b| bincode::deserialize::(&b).ok()); - - let meta: TaskMetadata = bincode::deserialize(&meta_raw)?; + .and_then(|b| { + bincode::serde::decode_from_slice::( + &b, + bincode::config::standard(), + ) + .ok() + .map(|(v, _)| v) + }); + + let (meta, _): (TaskMetadata, _) = + bincode::serde::decode_from_slice(&meta_raw, bincode::config::standard())?; Ok(StoredTaskInfo { name: task_name.to_string(), diff --git a/src/types/arrow_ext.rs b/src/types/arrow_ext.rs new file mode 100644 index 00000000..701bf8e4 --- /dev/null +++ b/src/types/arrow_ext.rs @@ -0,0 +1,169 @@ +use std::collections::HashMap; +use std::fmt::{Display, Formatter}; +use std::time::SystemTime; + +use datafusion::arrow::datatypes::{DataType, Field, TimeUnit}; + +pub struct DisplayAsSql<'a>(pub &'a DataType); + +impl Display for DisplayAsSql<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.0 { + DataType::Boolean => write!(f, "BOOLEAN"), + DataType::Int8 | DataType::Int16 | DataType::Int32 => write!(f, "INT"), + DataType::Int64 => write!(f, "BIGINT"), + DataType::UInt8 | DataType::UInt16 | DataType::UInt32 => write!(f, "INT UNSIGNED"), + DataType::UInt64 => write!(f, "BIGINT UNSIGNED"), + DataType::Float16 | DataType::Float32 => write!(f, "FLOAT"), + DataType::Float64 => write!(f, "DOUBLE"), + DataType::Timestamp(_, _) => write!(f, "TIMESTAMP"), + DataType::Date32 => write!(f, "DATE"), + DataType::Date64 => write!(f, "DATETIME"), + DataType::Time32(_) => write!(f, "TIME"), + DataType::Time64(_) => write!(f, "TIME"), + DataType::Duration(_) => write!(f, "INTERVAL"), + DataType::Interval(_) => write!(f, "INTERVAL"), + DataType::Binary | DataType::FixedSizeBinary(_) | DataType::LargeBinary => { + write!(f, "BYTEA") + } + DataType::Utf8 | DataType::LargeUtf8 => write!(f, "TEXT"), + DataType::List(inner) => { + write!(f, "{}[]", DisplayAsSql(inner.data_type())) + } + dt => write!(f, "{dt}"), + } + } +} + +/// Arrow extension type markers for FunctionStream-specific semantics. +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum FsExtensionType { + JSON, +} + +impl FsExtensionType { + pub fn from_map(map: &HashMap) -> Option { + match map.get("ARROW:extension:name")?.as_str() { + "functionstream.json" => Some(Self::JSON), + _ => None, + } + } + + pub fn add_metadata(v: Option, field: Field) -> Field { + if let Some(v) = v { + let mut m = HashMap::new(); + match v { + FsExtensionType::JSON => { + m.insert( + "ARROW:extension:name".to_string(), + "functionstream.json".to_string(), + ); + } + } + field.with_metadata(m) + } else { + field + } + } +} + +pub trait GetArrowType { + fn arrow_type() -> DataType; +} + +pub trait GetArrowSchema { + fn arrow_schema() -> datafusion::arrow::datatypes::Schema; +} + +impl GetArrowType for T +where + T: GetArrowSchema, +{ + fn arrow_type() -> DataType { + DataType::Struct(Self::arrow_schema().fields.clone()) + } +} + +impl GetArrowType for bool { + fn arrow_type() -> DataType { + DataType::Boolean + } +} + +impl GetArrowType for i8 { + fn arrow_type() -> DataType { + DataType::Int8 + } +} + +impl GetArrowType for i16 { + fn arrow_type() -> DataType { + DataType::Int16 + } +} + +impl GetArrowType for i32 { + fn arrow_type() -> DataType { + DataType::Int32 + } +} + +impl GetArrowType for i64 { + fn arrow_type() -> DataType { + DataType::Int64 + } +} + +impl GetArrowType for u8 { + fn arrow_type() -> DataType { + DataType::UInt8 + } +} + +impl GetArrowType for u16 { + fn arrow_type() -> DataType { + DataType::UInt16 + } +} + +impl GetArrowType for u32 { + fn arrow_type() -> DataType { + DataType::UInt32 + } +} + +impl GetArrowType for u64 { + fn arrow_type() -> DataType { + DataType::UInt64 + } +} + +impl GetArrowType for f32 { + fn arrow_type() -> DataType { + DataType::Float32 + } +} + +impl GetArrowType for f64 { + fn arrow_type() -> DataType { + DataType::Float64 + } +} + +impl GetArrowType for String { + fn arrow_type() -> DataType { + DataType::Utf8 + } +} + +impl GetArrowType for Vec { + fn arrow_type() -> DataType { + DataType::Binary + } +} + +impl GetArrowType for SystemTime { + fn arrow_type() -> DataType { + DataType::Timestamp(TimeUnit::Nanosecond, None) + } +} diff --git a/src/types/control.rs b/src/types/control.rs new file mode 100644 index 00000000..efdc754e --- /dev/null +++ b/src/types/control.rs @@ -0,0 +1,152 @@ +use std::collections::HashMap; +use std::time::SystemTime; + +use super::message::CheckpointBarrier; + +/// Control messages sent from the controller to worker tasks. +#[derive(Debug, Clone)] +pub enum ControlMessage { + Checkpoint(CheckpointBarrier), + Stop { + mode: StopMode, + }, + Commit { + epoch: u32, + commit_data: HashMap>>, + }, + LoadCompacted { + compacted: CompactionResult, + }, + NoOp, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum StopMode { + Graceful, + Immediate, +} + +#[derive(Debug, Clone)] +pub struct CompactionResult { + pub operator_id: String, + pub compacted_tables: HashMap, +} + +#[derive(Debug, Clone)] +pub struct TableCheckpointMetadata { + pub table_type: TableType, + pub data: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TableType { + GlobalKeyValue, + ExpiringKeyedTimeTable, +} + +/// Responses sent from worker tasks back to the controller. +#[derive(Debug, Clone)] +pub enum ControlResp { + CheckpointEvent(CheckpointEvent), + CheckpointCompleted(CheckpointCompleted), + TaskStarted { + node_id: u32, + task_index: usize, + start_time: SystemTime, + }, + TaskFinished { + node_id: u32, + task_index: usize, + }, + TaskFailed { + node_id: u32, + task_index: usize, + error: TaskError, + }, + Error { + node_id: u32, + operator_id: String, + task_index: usize, + message: String, + details: String, + }, +} + +#[derive(Debug, Clone)] +pub struct CheckpointCompleted { + pub checkpoint_epoch: u32, + pub node_id: u32, + pub operator_id: String, + pub subtask_metadata: SubtaskCheckpointMetadata, +} + +#[derive(Debug, Clone)] +pub struct SubtaskCheckpointMetadata { + pub subtask_index: u32, + pub start_time: u64, + pub finish_time: u64, + pub watermark: Option, + pub bytes: u64, + pub table_metadata: HashMap, + pub table_configs: HashMap, +} + +#[derive(Debug, Clone)] +pub struct TableSubtaskCheckpointMetadata { + pub subtask_index: u32, + pub table_type: TableType, + pub data: Vec, +} + +#[derive(Debug, Clone)] +pub struct TableConfig { + pub table_type: TableType, + pub config: Vec, + pub state_version: u32, +} + +#[derive(Debug, Clone)] +pub struct CheckpointEvent { + pub checkpoint_epoch: u32, + pub node_id: u32, + pub operator_id: String, + pub subtask_index: u32, + pub time: SystemTime, + pub event_type: TaskCheckpointEventType, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TaskCheckpointEventType { + StartedAlignment, + StartedCheckpointing, + FinishedOperatorSetup, + FinishedSync, + FinishedCommit, +} + +#[derive(Debug, Clone)] +pub struct TaskError { + pub job_id: String, + pub node_id: u32, + pub operator_id: String, + pub operator_subtask: u64, + pub error: String, + pub error_domain: ErrorDomain, + pub retry_hint: RetryHint, + pub details: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ErrorDomain { + User, + Internal, + External, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum RetryHint { + NoRetry, + WithBackoff, +} diff --git a/src/types/date.rs b/src/types/date.rs new file mode 100644 index 00000000..c18e31a7 --- /dev/null +++ b/src/types/date.rs @@ -0,0 +1,70 @@ +use serde::Serialize; +use std::convert::TryFrom; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Hash, Serialize)] +pub enum DatePart { + Year, + Month, + Week, + Day, + Hour, + Minute, + Second, + Millisecond, + Microsecond, + Nanosecond, + DayOfWeek, + DayOfYear, +} + +impl TryFrom<&str> for DatePart { + type Error = String; + + fn try_from(value: &str) -> Result { + match value.to_lowercase().as_str() { + "year" => Ok(DatePart::Year), + "month" => Ok(DatePart::Month), + "week" => Ok(DatePart::Week), + "day" => Ok(DatePart::Day), + "hour" => Ok(DatePart::Hour), + "minute" => Ok(DatePart::Minute), + "second" => Ok(DatePart::Second), + "millisecond" => Ok(DatePart::Millisecond), + "microsecond" => Ok(DatePart::Microsecond), + "nanosecond" => Ok(DatePart::Nanosecond), + "dow" => Ok(DatePart::DayOfWeek), + "doy" => Ok(DatePart::DayOfYear), + _ => Err(format!("'{value}' is not a valid DatePart")), + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, PartialOrd, Serialize)] +pub enum DateTruncPrecision { + Year, + Quarter, + Month, + Week, + Day, + Hour, + Minute, + Second, +} + +impl TryFrom<&str> for DateTruncPrecision { + type Error = String; + + fn try_from(value: &str) -> Result { + match value.to_lowercase().as_str() { + "year" => Ok(DateTruncPrecision::Year), + "quarter" => Ok(DateTruncPrecision::Quarter), + "month" => Ok(DateTruncPrecision::Month), + "week" => Ok(DateTruncPrecision::Week), + "day" => Ok(DateTruncPrecision::Day), + "hour" => Ok(DateTruncPrecision::Hour), + "minute" => Ok(DateTruncPrecision::Minute), + "second" => Ok(DateTruncPrecision::Second), + _ => Err(format!("'{value}' is not a valid DateTruncPrecision")), + } + } +} diff --git a/src/types/debezium.rs b/src/types/debezium.rs new file mode 100644 index 00000000..3c9f4747 --- /dev/null +++ b/src/types/debezium.rs @@ -0,0 +1,136 @@ +use bincode::{Decode, Encode}; +use serde::{Deserialize, Serialize}; +use std::convert::TryFrom; +use std::fmt::Debug; + +pub trait Key: + Debug + Clone + Encode + Decode<()> + std::hash::Hash + PartialEq + Eq + Send + 'static +{ +} +impl + std::hash::Hash + PartialEq + Eq + Send + 'static> Key + for T +{ +} + +pub trait Data: Debug + Clone + Encode + Decode<()> + Send + 'static {} +impl + Send + 'static> Data for T {} + +#[derive(Debug, Clone, PartialEq, Encode, Decode, Serialize, Deserialize)] +pub enum UpdatingData { + Retract(T), + Update { old: T, new: T }, + Append(T), +} + +impl UpdatingData { + pub fn lower(&self) -> T { + match self { + UpdatingData::Retract(_) => panic!("cannot lower retractions"), + UpdatingData::Update { new, .. } => new.clone(), + UpdatingData::Append(t) => t.clone(), + } + } + + pub fn unwrap_append(&self) -> &T { + match self { + UpdatingData::Append(t) => t, + _ => panic!("UpdatingData is not an append"), + } + } +} + +#[derive(Clone, Encode, Decode, Debug, Serialize, Deserialize, PartialEq)] +#[serde(try_from = "DebeziumShadow")] +pub struct Debezium { + pub before: Option, + pub after: Option, + pub op: DebeziumOp, +} + +#[derive(Clone, Encode, Decode, Debug, Serialize, Deserialize, PartialEq)] +struct DebeziumShadow { + before: Option, + after: Option, + op: DebeziumOp, +} + +impl TryFrom> for Debezium { + type Error = &'static str; + + fn try_from(value: DebeziumShadow) -> Result { + match (value.op, &value.before, &value.after) { + (DebeziumOp::Create, _, None) => { + Err("`after` must be set for Debezium create messages") + } + (DebeziumOp::Update, None, _) => { + Err("`before` must be set for Debezium update messages") + } + (DebeziumOp::Update, _, None) => { + Err("`after` must be set for Debezium update messages") + } + (DebeziumOp::Delete, None, _) => { + Err("`before` must be set for Debezium delete messages") + } + _ => Ok(Debezium { + before: value.before, + after: value.after, + op: value.op, + }), + } + } +} + +#[derive(Copy, Clone, Encode, Decode, Debug, PartialEq)] +pub enum DebeziumOp { + Create, + Update, + Delete, +} + +#[allow(clippy::to_string_trait_impl)] +impl ToString for DebeziumOp { + fn to_string(&self) -> String { + match self { + DebeziumOp::Create => "c", + DebeziumOp::Update => "u", + DebeziumOp::Delete => "d", + } + .to_string() + } +} + +impl<'de> Deserialize<'de> for DebeziumOp { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + match s.as_str() { + "c" | "r" => Ok(DebeziumOp::Create), + "u" => Ok(DebeziumOp::Update), + "d" => Ok(DebeziumOp::Delete), + _ => Err(serde::de::Error::custom(format!("Invalid DebeziumOp {s}"))), + } + } +} + +impl Serialize for DebeziumOp { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + DebeziumOp::Create => serializer.serialize_str("c"), + DebeziumOp::Update => serializer.serialize_str("u"), + DebeziumOp::Delete => serializer.serialize_str("d"), + } + } +} + +#[derive(Copy, Clone, Encode, Decode, Debug, PartialEq, Serialize, Deserialize)] +pub enum JoinType { + Inner, + Left, + Right, + Full, +} diff --git a/src/types/df.rs b/src/types/df.rs new file mode 100644 index 00000000..30b4eb9c --- /dev/null +++ b/src/types/df.rs @@ -0,0 +1,394 @@ +use datafusion::arrow::array::builder::{ArrayBuilder, make_builder}; +use datafusion::arrow::array::{RecordBatch, TimestampNanosecondArray}; +use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit}; +use datafusion::arrow::error::ArrowError; +use datafusion::common::{DataFusionError, Result as DFResult}; +use std::sync::Arc; + +use super::TIMESTAMP_FIELD; +use crate::sql::types::StreamSchema; + +pub type FsSchemaRef = Arc; + +/// Core streaming schema with timestamp and key tracking. +/// Analogous to Arroyo's `ArroyoSchema`. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct FsSchema { + pub schema: Arc, + pub timestamp_index: usize, + key_indices: Option>, + routing_key_indices: Option>, +} + +impl FsSchema { + pub fn new( + schema: Arc, + timestamp_index: usize, + key_indices: Option>, + routing_key_indices: Option>, + ) -> Self { + Self { + schema, + timestamp_index, + key_indices, + routing_key_indices, + } + } + + pub fn new_unkeyed(schema: Arc, timestamp_index: usize) -> Self { + Self { + schema, + timestamp_index, + key_indices: None, + routing_key_indices: None, + } + } + + pub fn new_keyed(schema: Arc, timestamp_index: usize, key_indices: Vec) -> Self { + Self { + schema, + timestamp_index, + key_indices: Some(key_indices), + routing_key_indices: None, + } + } + + pub fn from_fields(mut fields: Vec) -> Self { + if !fields.iter().any(|f| f.name() == TIMESTAMP_FIELD) { + fields.push(Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )); + } + + Self::from_schema_keys(Arc::new(Schema::new(fields)), vec![]).unwrap() + } + + pub fn from_schema_unkeyed(schema: Arc) -> DFResult { + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}" + )) + })? + .0; + + Ok(Self { + schema, + timestamp_index, + key_indices: None, + routing_key_indices: None, + }) + } + + pub fn from_schema_keys(schema: Arc, key_indices: Vec) -> DFResult { + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}" + )) + })? + .0; + + Ok(Self { + schema, + timestamp_index, + key_indices: Some(key_indices), + routing_key_indices: None, + }) + } + + pub fn schema_without_timestamp(&self) -> Schema { + let mut builder = SchemaBuilder::from(self.schema.fields()); + builder.remove(self.timestamp_index); + builder.finish() + } + + pub fn remove_timestamp_column(&self, batch: &mut RecordBatch) { + batch.remove_column(self.timestamp_index); + } + + pub fn builders(&self) -> Vec> { + self.schema + .fields + .iter() + .map(|f| make_builder(f.data_type(), 8)) + .collect() + } + + pub fn timestamp_column<'a>(&self, batch: &'a RecordBatch) -> &'a TimestampNanosecondArray { + batch + .column(self.timestamp_index) + .as_any() + .downcast_ref::() + .unwrap() + } + + pub fn has_routing_keys(&self) -> bool { + self.routing_keys().map(|k| !k.is_empty()).unwrap_or(false) + } + + pub fn routing_keys(&self) -> Option<&Vec> { + self.routing_key_indices + .as_ref() + .or(self.key_indices.as_ref()) + } + + pub fn storage_keys(&self) -> Option<&Vec> { + self.key_indices.as_ref() + } + + pub fn sort_field_indices(&self, with_timestamp: bool) -> Vec { + let mut indices = vec![]; + if let Some(keys) = &self.key_indices { + indices.extend(keys.iter().copied()); + } + if with_timestamp { + indices.push(self.timestamp_index); + } + indices + } + + pub fn value_indices(&self, with_timestamp: bool) -> Vec { + let field_count = self.schema.fields().len(); + match &self.key_indices { + None => { + let mut indices: Vec = (0..field_count).collect(); + if !with_timestamp { + indices.remove(self.timestamp_index); + } + indices + } + Some(keys) => (0..field_count) + .filter(|index| { + !keys.contains(index) && (with_timestamp || *index != self.timestamp_index) + }) + .collect(), + } + } + + pub fn unkeyed_batch(&self, batch: &RecordBatch) -> Result { + if self.key_indices.is_none() { + return Ok(batch.clone()); + } + let columns: Vec<_> = (0..batch.num_columns()) + .filter(|index| !self.key_indices.as_ref().unwrap().contains(index)) + .collect(); + batch.project(&columns) + } + + pub fn schema_without_keys(&self) -> Result { + if self.key_indices.is_none() { + return Ok(self.clone()); + } + let key_indices = self.key_indices.as_ref().unwrap(); + let unkeyed_schema = Schema::new( + self.schema + .fields() + .iter() + .enumerate() + .filter(|(index, _)| !key_indices.contains(index)) + .map(|(_, field)| field.as_ref().clone()) + .collect::>(), + ); + let timestamp_index = unkeyed_schema.index_of(TIMESTAMP_FIELD)?; + Ok(Self { + schema: Arc::new(unkeyed_schema), + timestamp_index, + key_indices: None, + routing_key_indices: None, + }) + } + + pub fn with_fields(&self, fields: Vec) -> Result { + let schema = Arc::new(Schema::new_with_metadata( + fields, + self.schema.metadata.clone(), + )); + + let timestamp_index = schema.index_of(TIMESTAMP_FIELD)?; + let max_index = *[&self.key_indices, &self.routing_key_indices] + .iter() + .map(|indices| indices.as_ref().and_then(|k| k.iter().max())) + .max() + .flatten() + .unwrap_or(&0); + + if schema.fields.len() - 1 < max_index { + return Err(ArrowError::InvalidArgumentError(format!( + "expected at least {} fields, but were only {}", + max_index + 1, + schema.fields.len() + ))); + } + + Ok(Self { + schema, + timestamp_index, + key_indices: self.key_indices.clone(), + routing_key_indices: self.routing_key_indices.clone(), + }) + } + + pub fn with_additional_fields( + &self, + new_fields: impl Iterator, + ) -> Result { + let mut fields = self.schema.fields.to_vec(); + fields.extend(new_fields.map(Arc::new)); + self.with_fields(fields) + } +} + +/// Proto serialization: convert between FsSchema and the proto `FsSchema` message. +/// +/// Schema is encoded as JSON using Arrow's `SchemaRef` JSON representation. +/// This approach avoids depending on serde for `arrow_schema::Schema` directly. +impl FsSchema { + pub fn to_proto(&self) -> protocol::grpc::api::FsSchema { + let arrow_schema = schema_to_json_string(&self.schema); + let timestamp_index = self.timestamp_index as u32; + + let has_keys = self.key_indices.is_some(); + let key_indices = self + .key_indices + .as_ref() + .map(|ks| ks.iter().map(|i| *i as u32).collect()) + .unwrap_or_default(); + + let has_routing_keys = self.routing_key_indices.is_some(); + let routing_key_indices = self + .routing_key_indices + .as_ref() + .map(|ks| ks.iter().map(|i| *i as u32).collect()) + .unwrap_or_default(); + + protocol::grpc::api::FsSchema { + arrow_schema, + timestamp_index, + key_indices, + has_keys, + routing_key_indices, + has_routing_keys, + } + } + + pub fn from_proto(proto: protocol::grpc::api::FsSchema) -> Result { + let schema = schema_from_json_string(&proto.arrow_schema)?; + let timestamp_index = proto.timestamp_index as usize; + + let key_indices = proto + .has_keys + .then(|| proto.key_indices.into_iter().map(|i| i as usize).collect()); + + let routing_key_indices = proto.has_routing_keys.then(|| { + proto + .routing_key_indices + .into_iter() + .map(|i| i as usize) + .collect() + }); + + Ok(Self { + schema: Arc::new(schema), + timestamp_index, + key_indices, + routing_key_indices, + }) + } +} + +fn schema_to_json_string(schema: &Schema) -> String { + let json_fields: Vec = schema + .fields() + .iter() + .map(|f| { + serde_json::json!({ + "name": f.name(), + "data_type": format!("{:?}", f.data_type()), + "nullable": f.is_nullable(), + }) + }) + .collect(); + serde_json::to_string(&json_fields).unwrap() +} + +fn schema_from_json_string(s: &str) -> Result { + let json_fields: Vec = serde_json::from_str(s) + .map_err(|e| DataFusionError::Plan(format!("Invalid schema JSON: {e}")))?; + + let fields: Vec = json_fields + .into_iter() + .map(|v| { + let name = v["name"] + .as_str() + .ok_or_else(|| DataFusionError::Plan("missing field name".into()))? + .to_string(); + let nullable = v["nullable"].as_bool().unwrap_or(true); + let dt_str = v["data_type"] + .as_str() + .ok_or_else(|| DataFusionError::Plan("missing data_type".into()))?; + let data_type = parse_debug_data_type(dt_str)?; + Ok(Field::new(name, data_type, nullable)) + }) + .collect::>()?; + + Ok(Schema::new(fields)) +} + +fn parse_debug_data_type(s: &str) -> Result { + match s { + "Boolean" => Ok(DataType::Boolean), + "Int8" => Ok(DataType::Int8), + "Int16" => Ok(DataType::Int16), + "Int32" => Ok(DataType::Int32), + "Int64" => Ok(DataType::Int64), + "UInt8" => Ok(DataType::UInt8), + "UInt16" => Ok(DataType::UInt16), + "UInt32" => Ok(DataType::UInt32), + "UInt64" => Ok(DataType::UInt64), + "Float16" => Ok(DataType::Float16), + "Float32" => Ok(DataType::Float32), + "Float64" => Ok(DataType::Float64), + "Utf8" => Ok(DataType::Utf8), + "LargeUtf8" => Ok(DataType::LargeUtf8), + "Binary" => Ok(DataType::Binary), + "LargeBinary" => Ok(DataType::LargeBinary), + "Date32" => Ok(DataType::Date32), + "Date64" => Ok(DataType::Date64), + "Null" => Ok(DataType::Null), + s if s.starts_with("Timestamp(Nanosecond") => { + Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) + } + s if s.starts_with("Timestamp(Microsecond") => { + Ok(DataType::Timestamp(TimeUnit::Microsecond, None)) + } + s if s.starts_with("Timestamp(Millisecond") => { + Ok(DataType::Timestamp(TimeUnit::Millisecond, None)) + } + s if s.starts_with("Timestamp(Second") => Ok(DataType::Timestamp(TimeUnit::Second, None)), + _ => Err(DataFusionError::Plan(format!( + "Unsupported data type in schema JSON: {s}" + ))), + } +} + +impl From for FsSchema { + fn from(s: StreamSchema) -> Self { + FsSchema { + schema: s.schema, + timestamp_index: s.timestamp_index, + key_indices: s.key_indices, + routing_key_indices: None, + } + } +} + +impl From for Arc { + fn from(s: StreamSchema) -> Self { + Arc::new(FsSchema::from(s)) + } +} diff --git a/src/types/errors.rs b/src/types/errors.rs new file mode 100644 index 00000000..2c425c93 --- /dev/null +++ b/src/types/errors.rs @@ -0,0 +1,67 @@ +use std::fmt; + +/// Unified error type for streaming dataflow operations. +#[derive(Debug)] +pub enum DataflowError { + Arrow(arrow_schema::ArrowError), + DataFusion(datafusion::error::DataFusionError), + Operator(String), + State(String), + Connector(String), + Internal(String), +} + +impl fmt::Display for DataflowError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + DataflowError::Arrow(e) => write!(f, "Arrow error: {e}"), + DataflowError::DataFusion(e) => write!(f, "DataFusion error: {e}"), + DataflowError::Operator(msg) => write!(f, "Operator error: {msg}"), + DataflowError::State(msg) => write!(f, "State error: {msg}"), + DataflowError::Connector(msg) => write!(f, "Connector error: {msg}"), + DataflowError::Internal(msg) => write!(f, "Internal error: {msg}"), + } + } +} + +impl std::error::Error for DataflowError {} + +impl From for DataflowError { + fn from(e: arrow_schema::ArrowError) -> Self { + DataflowError::Arrow(e) + } +} + +impl From for DataflowError { + fn from(e: datafusion::error::DataFusionError) -> Self { + DataflowError::DataFusion(e) + } +} + +/// Macro for creating connector errors. +#[macro_export] +macro_rules! connector_err { + ($($arg:tt)*) => { + $crate::types::errors::DataflowError::Connector(format!($($arg)*)) + }; +} + +/// State-related errors. +#[derive(Debug)] +pub enum StateError { + KeyNotFound(String), + SerializationError(String), + BackendError(String), +} + +impl fmt::Display for StateError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + StateError::KeyNotFound(key) => write!(f, "Key not found: {key}"), + StateError::SerializationError(msg) => write!(f, "Serialization error: {msg}"), + StateError::BackendError(msg) => write!(f, "State backend error: {msg}"), + } + } +} + +impl std::error::Error for StateError {} diff --git a/src/types/formats.rs b/src/types/formats.rs new file mode 100644 index 00000000..25d09a74 --- /dev/null +++ b/src/types/formats.rs @@ -0,0 +1,234 @@ +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub enum TimestampFormat { + #[default] + #[serde(rename = "rfc3339")] + RFC3339, + UnixMillis, +} + +impl TryFrom<&str> for TimestampFormat { + type Error = (); + + fn try_from(value: &str) -> Result { + match value { + "RFC3339" | "rfc3339" => Ok(TimestampFormat::RFC3339), + "UnixMillis" | "unix_millis" => Ok(TimestampFormat::UnixMillis), + _ => Err(()), + } + } +} + +#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub enum DecimalEncoding { + #[default] + Number, + String, + Bytes, +} + +impl TryFrom<&str> for DecimalEncoding { + type Error = (); + + fn try_from(s: &str) -> Result { + match s { + "number" => Ok(Self::Number), + "string" => Ok(Self::String), + "bytes" => Ok(Self::Bytes), + _ => Err(()), + } + } +} + +#[derive(Serialize, Deserialize, Default, Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub enum JsonCompression { + #[default] + Uncompressed, + Gzip, +} + +impl FromStr for JsonCompression { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "uncompressed" => Ok(JsonCompression::Uncompressed), + "gzip" => Ok(JsonCompression::Gzip), + _ => Err(format!("invalid json compression '{s}'")), + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub struct JsonFormat { + #[serde(default)] + pub confluent_schema_registry: bool, + #[serde(default, alias = "confluent_schema_version")] + pub schema_id: Option, + #[serde(default)] + pub include_schema: bool, + #[serde(default)] + pub debezium: bool, + #[serde(default)] + pub unstructured: bool, + #[serde(default)] + pub timestamp_format: TimestampFormat, + #[serde(default)] + pub decimal_encoding: DecimalEncoding, + #[serde(default)] + pub compression: JsonCompression, +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub struct RawStringFormat {} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub struct RawBytesFormat {} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub struct AvroFormat { + #[serde(default)] + pub confluent_schema_registry: bool, + #[serde(default)] + pub raw_datums: bool, + #[serde(default)] + pub into_unstructured_json: bool, + #[serde(default)] + pub schema_id: Option, +} + +impl AvroFormat { + pub fn new( + confluent_schema_registry: bool, + raw_datums: bool, + into_unstructured_json: bool, + ) -> Self { + Self { + confluent_schema_registry, + raw_datums, + into_unstructured_json, + schema_id: None, + } + } +} + +#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Default)] +#[serde(rename_all = "snake_case")] +pub enum ParquetCompression { + Uncompressed, + Snappy, + Gzip, + #[default] + Zstd, + Lz4, + Lz4Raw, +} + +impl FromStr for ParquetCompression { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "uncompressed" => Ok(ParquetCompression::Uncompressed), + "snappy" => Ok(ParquetCompression::Snappy), + "gzip" => Ok(ParquetCompression::Gzip), + "zstd" => Ok(ParquetCompression::Zstd), + "lz4" => Ok(ParquetCompression::Lz4), + "lz4_raw" => Ok(ParquetCompression::Lz4Raw), + _ => Err(format!("invalid parquet compression '{s}'")), + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Default)] +#[serde(rename_all = "snake_case")] +pub struct ParquetFormat { + #[serde(default)] + pub compression: ParquetCompression, + #[serde(default)] + pub row_group_bytes: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub struct ProtobufFormat { + #[serde(default)] + pub into_unstructured_json: bool, + #[serde(default)] + pub message_name: Option, + #[serde(default)] + pub compiled_schema: Option>, + #[serde(default)] + pub confluent_schema_registry: bool, + #[serde(default)] + pub length_delimited: bool, +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum Format { + Json(JsonFormat), + Avro(AvroFormat), + Protobuf(ProtobufFormat), + Parquet(ParquetFormat), + RawString(RawStringFormat), + RawBytes(RawBytesFormat), +} + +impl Display for Format { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(self.name()) + } +} + +impl Format { + pub fn name(&self) -> &'static str { + match self { + Format::Json(_) => "json", + Format::Avro(_) => "avro", + Format::Protobuf(_) => "protobuf", + Format::Parquet(_) => "parquet", + Format::RawString(_) => "raw_string", + Format::RawBytes(_) => "raw_bytes", + } + } + + pub fn is_updating(&self) -> bool { + matches!(self, Format::Json(JsonFormat { debezium: true, .. })) + } +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case", tag = "behavior")] +pub enum BadData { + Fail {}, + Drop {}, +} + +impl Default for BadData { + fn default() -> Self { + BadData::Fail {} + } +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case", tag = "method")] +pub enum Framing { + Newline(NewlineDelimitedFraming), +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)] +#[serde(rename_all = "snake_case")] +pub struct NewlineDelimitedFraming { + pub max_line_length: Option, +} diff --git a/src/types/hash.rs b/src/types/hash.rs new file mode 100644 index 00000000..8f47a8fa --- /dev/null +++ b/src/types/hash.rs @@ -0,0 +1,88 @@ +use std::ops::RangeInclusive; + +/// Randomly generated seeds for consistent hashing. Changing these breaks existing state. +pub const HASH_SEEDS: [u64; 4] = [ + 5093852630788334730, + 1843948808084437226, + 8049205638242432149, + 17942305062735447798, +]; + +/// Returns the server index (0-based) responsible for the given hash value +/// when distributing across `n` servers. +pub fn server_for_hash(x: u64, n: usize) -> usize { + if n == 1 { + 0 + } else { + let range_size = (u64::MAX / (n as u64)) + 1; + (x / range_size) as usize + } +} + +/// Returns the key range assigned to server `i` out of `n` total servers. +pub fn range_for_server(i: usize, n: usize) -> RangeInclusive { + if n == 1 { + return 0..=u64::MAX; + } + let range_size = (u64::MAX / (n as u64)) + 1; + let start = range_size * (i as u64); + let end = if i + 1 == n { + u64::MAX + } else { + start + range_size - 1 + }; + start..=end +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_range_for_server() { + let n = 6; + + for i in 0..(n - 1) { + let range1 = range_for_server(i, n); + let range2 = range_for_server(i + 1, n); + + assert_eq!(*range1.end() + 1, *range2.start(), "Ranges not adjacent"); + assert_eq!( + i, + server_for_hash(*range1.start(), n), + "start not assigned to range" + ); + assert_eq!( + i, + server_for_hash(*range1.end(), n), + "end not assigned to range" + ); + } + + let last_range = range_for_server(n - 1, n); + assert_eq!( + *last_range.end(), + u64::MAX, + "Last range does not contain u64::MAX" + ); + assert_eq!( + n - 1, + server_for_hash(u64::MAX, n), + "u64::MAX not in last range" + ); + } + + #[test] + fn test_server_for_hash() { + let n = 2; + let x = u64::MAX; + + let server_index = server_for_hash(x, n); + let server_range = range_for_server(server_index, n); + + assert!( + server_range.contains(&x), + "u64::MAX is not in the correct range" + ); + } +} diff --git a/src/types/message.rs b/src/types/message.rs new file mode 100644 index 00000000..29b7f3a5 --- /dev/null +++ b/src/types/message.rs @@ -0,0 +1,42 @@ +use bincode::{Decode, Encode}; +use datafusion::arrow::array::RecordBatch; +use serde::{Deserialize, Serialize}; +use std::time::SystemTime; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encode, Decode, Serialize, Deserialize)] +pub enum Watermark { + EventTime(SystemTime), + Idle, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ArrowMessage { + Data(RecordBatch), + Signal(SignalMessage), +} + +impl ArrowMessage { + pub fn is_end(&self) -> bool { + matches!( + self, + ArrowMessage::Signal(SignalMessage::Stop) + | ArrowMessage::Signal(SignalMessage::EndOfData) + ) + } +} + +#[derive(Debug, Clone, PartialEq, Encode, Decode)] +pub enum SignalMessage { + Barrier(CheckpointBarrier), + Watermark(Watermark), + Stop, + EndOfData, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encode, Decode, Serialize, Deserialize)] +pub struct CheckpointBarrier { + pub epoch: u32, + pub min_epoch: u32, + pub timestamp: SystemTime, + pub then_stop: bool, +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 00000000..ddf7baca --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,71 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Core types shared across the FunctionStream system. +//! +//! This module provides fundamental types used by the runtime, SQL planner, +//! coordinator, and other subsystems — analogous to `arroyo-types` + `arroyo-rpc` in Arroyo. + +pub mod arrow_ext; +pub mod control; +pub mod date; +pub mod debezium; +pub mod df; +pub mod errors; +pub mod formats; +pub mod hash; +pub mod message; +pub mod operator_config; +pub mod task_info; +pub mod time_utils; +pub mod worker; + +// ── Re-exports from existing modules ── +pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType}; +pub use date::{DatePart, DateTruncPrecision}; +pub use debezium::{Debezium, DebeziumOp, UpdatingData}; +pub use hash::{HASH_SEEDS, range_for_server, server_for_hash}; +pub use message::{ArrowMessage, CheckpointBarrier, SignalMessage, Watermark}; +pub use task_info::{ChainInfo, TaskInfo}; +pub use time_utils::{from_micros, from_millis, from_nanos, to_micros, to_millis, to_nanos}; +pub use worker::{MachineId, WorkerId}; + +// ── Re-exports from new modules ── +pub use control::{ + CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp, + ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError, +}; +pub use df::{FsSchema, FsSchemaRef}; +pub use errors::DataflowError; +pub use formats::{BadData, Format, Framing, JsonFormat}; +pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; + +// ── Well-known column names ── +pub const TIMESTAMP_FIELD: &str = "_timestamp"; +pub const UPDATING_META_FIELD: &str = "_updating_meta"; + +// ── Environment variables ── +pub const JOB_ID_ENV: &str = "JOB_ID"; +pub const RUN_ID_ENV: &str = "RUN_ID"; + +// ── Metric names ── +pub const MESSAGES_RECV: &str = "fs_worker_messages_recv"; +pub const MESSAGES_SENT: &str = "fs_worker_messages_sent"; +pub const BYTES_RECV: &str = "fs_worker_bytes_recv"; +pub const BYTES_SENT: &str = "fs_worker_bytes_sent"; +pub const BATCHES_RECV: &str = "fs_worker_batches_recv"; +pub const BATCHES_SENT: &str = "fs_worker_batches_sent"; +pub const TX_QUEUE_SIZE: &str = "fs_worker_tx_queue_size"; +pub const TX_QUEUE_REM: &str = "fs_worker_tx_queue_rem"; +pub const DESERIALIZATION_ERRORS: &str = "fs_worker_deserialization_errors"; + +pub const LOOKUP_KEY_INDEX_FIELD: &str = "__lookup_key_index"; diff --git a/src/types/operator_config.rs b/src/types/operator_config.rs new file mode 100644 index 00000000..744dbd85 --- /dev/null +++ b/src/types/operator_config.rs @@ -0,0 +1,30 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use super::formats::{BadData, Format, Framing}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RateLimit { + pub messages_per_second: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MetadataField { + pub field_name: String, + pub key: String, + /// JSON-encoded Arrow DataType string, e.g. `"Utf8"`, `"Int64"`. + #[serde(default)] + pub data_type: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OperatorConfig { + pub connection: Value, + pub table: Value, + pub format: Option, + pub bad_data: Option, + pub framing: Option, + pub rate_limit: Option, + #[serde(default)] + pub metadata_fields: Vec, +} diff --git a/src/types/task_info.rs b/src/types/task_info.rs new file mode 100644 index 00000000..5a31511b --- /dev/null +++ b/src/types/task_info.rs @@ -0,0 +1,80 @@ +use bincode::{Decode, Encode}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fmt::{Display, Formatter}; +use std::ops::RangeInclusive; + +#[derive(Eq, PartialEq, Hash, Debug, Clone, Encode, Decode, Serialize, Deserialize)] +pub struct TaskInfo { + pub job_id: String, + pub node_id: u32, + pub operator_name: String, + pub operator_id: String, + pub task_index: u32, + pub parallelism: u32, + pub key_range: RangeInclusive, +} + +impl Display for TaskInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Task_{}-{}/{}", + self.operator_id, self.task_index, self.parallelism + ) + } +} + +impl TaskInfo { + pub fn for_test(job_id: &str, operator_id: &str) -> Self { + Self { + job_id: job_id.to_string(), + node_id: 1, + operator_name: "op".to_string(), + operator_id: operator_id.to_string(), + task_index: 0, + parallelism: 1, + key_range: 0..=u64::MAX, + } + } +} + +pub fn get_test_task_info() -> TaskInfo { + TaskInfo { + job_id: "instance-1".to_string(), + node_id: 1, + operator_name: "test-operator".to_string(), + operator_id: "test-operator-1".to_string(), + task_index: 0, + parallelism: 1, + key_range: 0..=u64::MAX, + } +} + +#[derive(Eq, PartialEq, Hash, Debug, Clone, Encode, Decode, Serialize, Deserialize)] +pub struct ChainInfo { + pub job_id: String, + pub node_id: u32, + pub description: String, + pub task_index: u32, +} + +impl Display for ChainInfo { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "TaskChain{}-{} ({})", + self.node_id, self.task_index, self.description + ) + } +} + +impl ChainInfo { + pub fn metric_label_map(&self) -> HashMap { + let mut labels = HashMap::new(); + labels.insert("node_id".to_string(), self.node_id.to_string()); + labels.insert("subtask_idx".to_string(), self.task_index.to_string()); + labels.insert("node_description".to_string(), self.description.to_string()); + labels + } +} diff --git a/src/types/time_utils.rs b/src/types/time_utils.rs new file mode 100644 index 00000000..2ee5a126 --- /dev/null +++ b/src/types/time_utils.rs @@ -0,0 +1,62 @@ +use std::collections::HashMap; +use std::hash::Hash; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +pub fn to_millis(time: SystemTime) -> u64 { + time.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64 +} + +pub fn to_micros(time: SystemTime) -> u64 { + time.duration_since(UNIX_EPOCH).unwrap().as_micros() as u64 +} + +pub fn from_millis(ts: u64) -> SystemTime { + UNIX_EPOCH + Duration::from_millis(ts) +} + +pub fn from_micros(ts: u64) -> SystemTime { + UNIX_EPOCH + Duration::from_micros(ts) +} + +pub fn to_nanos(time: SystemTime) -> u128 { + time.duration_since(UNIX_EPOCH).unwrap().as_nanos() +} + +pub fn from_nanos(ts: u128) -> SystemTime { + UNIX_EPOCH + + Duration::from_secs((ts / 1_000_000_000) as u64) + + Duration::from_nanos((ts % 1_000_000_000) as u64) +} + +pub fn print_time(time: SystemTime) -> String { + chrono::DateTime::::from(time) + .format("%Y-%m-%d %H:%M:%S%.3f") + .to_string() +} + +/// Returns the number of days since the UNIX epoch (for Avro serialization). +pub fn days_since_epoch(time: SystemTime) -> i32 { + time.duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() + .div_euclid(86400) as i32 +} + +pub fn single_item_hash_map, K: Hash + Eq, V>(key: I, value: V) -> HashMap { + let mut map = HashMap::new(); + map.insert(key.into(), value); + map +} + +pub fn string_to_map(s: &str, pair_delimiter: char) -> Option> { + if s.trim().is_empty() { + return Some(HashMap::new()); + } + + s.split(',') + .map(|s| { + let mut kv = s.trim().split(pair_delimiter); + Some((kv.next()?.trim().to_string(), kv.next()?.trim().to_string())) + }) + .collect() +} diff --git a/src/types/worker.rs b/src/types/worker.rs new file mode 100644 index 00000000..c12163ba --- /dev/null +++ b/src/types/worker.rs @@ -0,0 +1,14 @@ +use std::fmt::{Display, Formatter}; +use std::sync::Arc; + +#[derive(Debug, Hash, Eq, PartialEq, Copy, Clone)] +pub struct WorkerId(pub u64); + +#[derive(Debug, Hash, Eq, PartialEq, Clone)] +pub struct MachineId(pub Arc); + +impl Display for MachineId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} From a42f5a305cf8326bc731d6c8924f61997bd132ee Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Mon, 16 Mar 2026 23:19:50 +0800 Subject: [PATCH 03/44] update --- src/coordinator/analyze/analyzer.rs | 21 ++++++++- src/coordinator/execution/executor.rs | 32 +++++++++++-- src/coordinator/mod.rs | 4 +- src/coordinator/plan/create_table_plan.rs | 32 +++++++++++++ src/coordinator/plan/insert_statement_plan.rs | 32 +++++++++++++ src/coordinator/plan/logical_plan_visitor.rs | 46 +++++++++++++++++-- src/coordinator/plan/mod.rs | 4 ++ src/coordinator/plan/visitor.rs | 16 ++++++- src/coordinator/statement/create_table.rs | 40 ++++++++++++++++ src/coordinator/statement/insert_statement.rs | 41 +++++++++++++++++ src/coordinator/statement/mod.rs | 4 ++ src/coordinator/statement/visitor.rs | 16 ++++++- src/sql/planner/parse.rs | 16 +++++-- 13 files changed, 284 insertions(+), 20 deletions(-) create mode 100644 src/coordinator/plan/create_table_plan.rs create mode 100644 src/coordinator/plan/insert_statement_plan.rs create mode 100644 src/coordinator/statement/create_table.rs create mode 100644 src/coordinator/statement/insert_statement.rs diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs index 58056b67..297d0de2 100644 --- a/src/coordinator/analyze/analyzer.rs +++ b/src/coordinator/analyze/analyzer.rs @@ -13,8 +13,9 @@ use super::Analysis; use crate::coordinator::execution_context::ExecutionContext; use crate::coordinator::statement::{ - CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, Statement, - StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingSql, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, + ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext, + StatementVisitorResult, StopFunction, StreamingSql, }; use std::fmt; @@ -116,6 +117,22 @@ impl StatementVisitor for Analyzer<'_> { StatementVisitorResult::Analyze(Box::new(stmt.clone())) } + fn visit_create_table( + &self, + stmt: &CreateTable, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Analyze(Box::new(CreateTable::new(stmt.statement.clone()))) + } + + fn visit_insert_statement( + &self, + stmt: &InsertStatement, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Analyze(Box::new(InsertStatement::new(stmt.statement.clone()))) + } + fn visit_streaming_sql( &self, stmt: &StreamingSql, diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 5d96bf45..dbc76923 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -12,9 +12,9 @@ use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_record_batch}; use crate::coordinator::plan::{ - CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, PlanNode, PlanVisitor, - PlanVisitorContext, PlanVisitorResult, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, - StreamingSqlPlan, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, + InsertStatementPlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult, + ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan, }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::taskexecutor::TaskManager; @@ -202,6 +202,32 @@ impl PlanVisitor for Executor { PlanVisitorResult::Execute(result) } + fn visit_create_table_plan( + &self, + plan: &CreateTablePlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + // TODO: register table in catalog and execute DDL + let result = Err(ExecuteError::Internal(format!( + "CREATE TABLE execution not yet implemented. LogicalPlan:\n{}", + plan.logical_plan.display_indent() + ))); + PlanVisitorResult::Execute(result) + } + + fn visit_insert_statement_plan( + &self, + plan: &InsertStatementPlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + // TODO: start streaming pipeline for INSERT / anonymous query + let result = Err(ExecuteError::Internal(format!( + "INSERT statement execution not yet implemented. LogicalPlan:\n{}", + plan.logical_plan.display_indent() + ))); + PlanVisitorResult::Execute(result) + } + fn visit_streaming_sql_plan( &self, plan: &StreamingSqlPlan, diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs index 26627a8b..51b93ca0 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/mod.rs @@ -22,6 +22,6 @@ mod statement; pub use coordinator::Coordinator; pub use dataset::{DataSet, ShowFunctionsResult}; pub use statement::{ - CreateFunction, CreatePythonFunction, DropFunction, PythonModule, ShowFunctions, StartFunction, - Statement, StopFunction, StreamingSql, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, PythonModule, + ShowFunctions, StartFunction, Statement, StopFunction, StreamingSql, }; diff --git a/src/coordinator/plan/create_table_plan.rs b/src/coordinator/plan/create_table_plan.rs new file mode 100644 index 00000000..450c8813 --- /dev/null +++ b/src/coordinator/plan/create_table_plan.rs @@ -0,0 +1,32 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::logical_expr::LogicalPlan; + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +#[derive(Debug)] +pub struct CreateTablePlan { + pub logical_plan: LogicalPlan, +} + +impl CreateTablePlan { + pub fn new(logical_plan: LogicalPlan) -> Self { + Self { logical_plan } + } +} + +impl PlanNode for CreateTablePlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_create_table_plan(self, context) + } +} diff --git a/src/coordinator/plan/insert_statement_plan.rs b/src/coordinator/plan/insert_statement_plan.rs new file mode 100644 index 00000000..e96a2772 --- /dev/null +++ b/src/coordinator/plan/insert_statement_plan.rs @@ -0,0 +1,32 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::logical_expr::LogicalPlan; + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +#[derive(Debug)] +pub struct InsertStatementPlan { + pub logical_plan: LogicalPlan, +} + +impl InsertStatementPlan { + pub fn new(logical_plan: LogicalPlan) -> Self { + Self { logical_plan } + } +} + +impl PlanNode for InsertStatementPlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_insert_statement_plan(self, context) + } +} diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 3462d033..fde7f35a 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -14,12 +14,14 @@ use tracing::debug; use crate::coordinator::analyze::analysis::Analysis; use crate::coordinator::plan::{ - CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, PlanNode, ShowFunctionsPlan, - StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, + InsertStatementPlan, PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, + StreamingSqlPlan, }; use crate::coordinator::statement::{ - CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, - StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingSql, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, + ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext, + StatementVisitorResult, StopFunction, StreamingSql, }; use crate::sql::planner::StreamSchemaProvider; @@ -110,6 +112,42 @@ impl StatementVisitor for LogicalPlanVisitor { ))) } + fn visit_create_table( + &self, + stmt: &CreateTable, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider); + + match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) { + Ok(plan) => { + debug!("Create table plan:\n{}", plan.display_graphviz()); + StatementVisitorResult::Plan(Box::new(CreateTablePlan::new(plan))) + } + Err(e) => { + panic!("Failed to convert CREATE TABLE to logical plan: {e}"); + } + } + } + + fn visit_insert_statement( + &self, + stmt: &InsertStatement, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider); + + match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) { + Ok(plan) => { + debug!("Insert statement plan:\n{}", plan.display_graphviz()); + StatementVisitorResult::Plan(Box::new(InsertStatementPlan::new(plan))) + } + Err(e) => { + panic!("Failed to convert INSERT statement to logical plan: {e}"); + } + } + } + fn visit_streaming_sql( &self, stmt: &StreamingSql, diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs index 744410e1..3d36ec16 100644 --- a/src/coordinator/plan/mod.rs +++ b/src/coordinator/plan/mod.rs @@ -12,7 +12,9 @@ mod create_function_plan; mod create_python_function_plan; +mod create_table_plan; mod drop_function_plan; +mod insert_statement_plan; mod logical_plan_visitor; mod optimizer; mod show_functions_plan; @@ -23,7 +25,9 @@ mod visitor; pub use create_function_plan::CreateFunctionPlan; pub use create_python_function_plan::CreatePythonFunctionPlan; +pub use create_table_plan::CreateTablePlan; pub use drop_function_plan::DropFunctionPlan; +pub use insert_statement_plan::InsertStatementPlan; pub use logical_plan_visitor::LogicalPlanVisitor; pub use optimizer::LogicalPlanner; pub use show_functions_plan::ShowFunctionsPlan; diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs index e8bd0ffc..e3911a8b 100644 --- a/src/coordinator/plan/visitor.rs +++ b/src/coordinator/plan/visitor.rs @@ -11,8 +11,8 @@ // limitations under the License. use super::{ - CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, ShowFunctionsPlan, - StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, + InsertStatementPlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan, }; /// Context passed to PlanVisitor methods @@ -85,6 +85,18 @@ pub trait PlanVisitor { context: &PlanVisitorContext, ) -> PlanVisitorResult; + fn visit_create_table_plan( + &self, + plan: &CreateTablePlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; + + fn visit_insert_statement_plan( + &self, + plan: &InsertStatementPlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; + fn visit_streaming_sql_plan( &self, plan: &StreamingSqlPlan, diff --git a/src/coordinator/statement/create_table.rs b/src/coordinator/statement/create_table.rs new file mode 100644 index 00000000..8aa16bf0 --- /dev/null +++ b/src/coordinator/statement/create_table.rs @@ -0,0 +1,40 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::sql::sqlparser::ast::Statement as DFStatement; + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// Represents a CREATE TABLE or CREATE VIEW statement. +/// +/// This wraps the raw SQL AST node so the coordinator pipeline can +/// distinguish table/view creation from other streaming SQL operations. +#[derive(Debug)] +pub struct CreateTable { + pub statement: DFStatement, +} + +impl CreateTable { + pub fn new(statement: DFStatement) -> Self { + Self { statement } + } +} + +impl Statement for CreateTable { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_create_table(self, context) + } +} diff --git a/src/coordinator/statement/insert_statement.rs b/src/coordinator/statement/insert_statement.rs new file mode 100644 index 00000000..45785251 --- /dev/null +++ b/src/coordinator/statement/insert_statement.rs @@ -0,0 +1,41 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::sql::sqlparser::ast::Statement as DFStatement; + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// Represents an INSERT INTO or standalone SELECT/query statement. +/// +/// In the streaming SQL context, both INSERT INTO (writing to a sink) +/// and standalone SELECT (anonymous computation) are treated as +/// data-producing operations that feed into the streaming pipeline. +#[derive(Debug)] +pub struct InsertStatement { + pub statement: DFStatement, +} + +impl InsertStatement { + pub fn new(statement: DFStatement) -> Self { + Self { statement } + } +} + +impl Statement for InsertStatement { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_insert_statement(self, context) + } +} diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs index a115af91..7628b94b 100644 --- a/src/coordinator/statement/mod.rs +++ b/src/coordinator/statement/mod.rs @@ -12,7 +12,9 @@ mod create_function; mod create_python_function; +mod create_table; mod drop_function; +mod insert_statement; mod show_functions; mod start_function; mod stop_function; @@ -21,7 +23,9 @@ mod visitor; pub use create_function::{ConfigSource, CreateFunction, FunctionSource}; pub use create_python_function::{CreatePythonFunction, PythonModule}; +pub use create_table::CreateTable; pub use drop_function::DropFunction; +pub use insert_statement::InsertStatement; pub use show_functions::ShowFunctions; pub use start_function::StartFunction; pub use stop_function::StopFunction; diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs index c9a63831..8de6ffe2 100644 --- a/src/coordinator/statement/visitor.rs +++ b/src/coordinator/statement/visitor.rs @@ -11,8 +11,8 @@ // limitations under the License. use super::{ - CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, StopFunction, - StreamingSql, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, + ShowFunctions, StartFunction, StopFunction, StreamingSql, }; use crate::coordinator::plan::PlanNode; use crate::coordinator::statement::Statement; @@ -89,6 +89,18 @@ pub trait StatementVisitor { context: &StatementVisitorContext, ) -> StatementVisitorResult; + fn visit_create_table( + &self, + stmt: &CreateTable, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; + + fn visit_insert_statement( + &self, + stmt: &InsertStatement, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; + fn visit_streaming_sql( &self, stmt: &StreamingSql, diff --git a/src/sql/planner/parse.rs b/src/sql/planner/parse.rs index dfaec9a6..4bd8f30e 100644 --- a/src/sql/planner/parse.rs +++ b/src/sql/planner/parse.rs @@ -19,8 +19,8 @@ use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; use datafusion::sql::sqlparser::parser::Parser; use crate::coordinator::{ - CreateFunction, DropFunction, ShowFunctions, StartFunction, Statement as CoordinatorStatement, - StopFunction, StreamingSql, + CreateFunction, CreateTable, DropFunction, InsertStatement, ShowFunctions, StartFunction, + Statement as CoordinatorStatement, StopFunction, StreamingSql, }; /// Stage 1: String → Box @@ -48,9 +48,11 @@ pub fn parse_sql(query: &str) -> Result> { /// Classify a parsed DataFusion Statement into the coordinator's Statement type. /// -/// FunctionStream DDL (CREATE/DROP/START/STOP FUNCTION, SHOW FUNCTIONS) -/// is converted to concrete coordinator types; everything else is wrapped -/// in StreamingSql. +/// Statement classification mirrors the analysis flow from `parse_and_get_arrow_program`: +/// - FunctionStream DDL → concrete coordinator types (CreateFunction, DropFunction, etc.) +/// - CREATE TABLE / CREATE VIEW → CreateTable (catalog registration) +/// - INSERT INTO / standalone SELECT → InsertStatement (streaming pipeline) +/// - Everything else → StreamingSql (catch-all) fn classify_statement(stmt: DFStatement) -> Result> { match stmt { DFStatement::CreateFunctionWith { options } => { @@ -69,6 +71,10 @@ fn classify_statement(stmt: DFStatement) -> Result Ok(Box::new(DropFunction::new(name))) } DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())), + s @ DFStatement::CreateTable(_) | s @ DFStatement::CreateView { .. } => { + Ok(Box::new(CreateTable::new(s))) + } + s @ DFStatement::Insert(_) => Ok(Box::new(InsertStatement::new(s))), other => Ok(Box::new(StreamingSql::new(other))), } } From 29b19d9ddad473cdc5f62a2dafd80bf1b788f5ea Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Tue, 17 Mar 2026 20:27:48 +0800 Subject: [PATCH 04/44] update --- src/coordinator/analyze/analyzer.rs | 11 +- src/coordinator/execution/executor.rs | 20 +-- src/coordinator/mod.rs | 2 +- src/coordinator/plan/insert_statement_plan.rs | 14 +- src/coordinator/plan/logical_plan_visitor.rs | 169 ++++++++++++++---- src/coordinator/plan/mod.rs | 2 - src/coordinator/plan/streaming_sql_plan.rs | 32 ---- src/coordinator/plan/visitor.rs | 8 +- src/coordinator/statement/mod.rs | 2 - src/coordinator/statement/streaming_sql.rs | 39 ---- src/coordinator/statement/visitor.rs | 8 +- src/server/handler.rs | 21 ++- src/sql/planner/mod.rs | 2 +- src/sql/planner/parse.rs | 111 +++++++----- 14 files changed, 239 insertions(+), 202 deletions(-) delete mode 100644 src/coordinator/plan/streaming_sql_plan.rs delete mode 100644 src/coordinator/statement/streaming_sql.rs diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs index 297d0de2..cd469f55 100644 --- a/src/coordinator/analyze/analyzer.rs +++ b/src/coordinator/analyze/analyzer.rs @@ -15,7 +15,7 @@ use crate::coordinator::execution_context::ExecutionContext; use crate::coordinator::statement::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext, - StatementVisitorResult, StopFunction, StreamingSql, + StatementVisitorResult, StopFunction, }; use std::fmt; @@ -132,13 +132,4 @@ impl StatementVisitor for Analyzer<'_> { ) -> StatementVisitorResult { StatementVisitorResult::Analyze(Box::new(InsertStatement::new(stmt.statement.clone()))) } - - fn visit_streaming_sql( - &self, - stmt: &StreamingSql, - _context: &StatementVisitorContext, - ) -> StatementVisitorResult { - // TODO: add semantic analysis for streaming SQL (schema validation, etc.) - StatementVisitorResult::Analyze(Box::new(StreamingSql::new(stmt.statement.clone()))) - } } diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index dbc76923..1a8e042a 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -14,7 +14,7 @@ use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_reco use crate::coordinator::plan::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, InsertStatementPlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult, - ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan, + ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::taskexecutor::TaskManager; @@ -222,21 +222,9 @@ impl PlanVisitor for Executor { ) -> PlanVisitorResult { // TODO: start streaming pipeline for INSERT / anonymous query let result = Err(ExecuteError::Internal(format!( - "INSERT statement execution not yet implemented. LogicalPlan:\n{}", - plan.logical_plan.display_indent() - ))); - PlanVisitorResult::Execute(result) - } - - fn visit_streaming_sql_plan( - &self, - plan: &StreamingSqlPlan, - _context: &PlanVisitorContext, - ) -> PlanVisitorResult { - // TODO: apply rewrite_plan for streaming transformations, then execute - let result = Err(ExecuteError::Internal(format!( - "Streaming SQL execution not yet implemented. LogicalPlan:\n{}", - plan.logical_plan.display_indent() + "INSERT statement execution not yet implemented. Program graph has {} node(s), {} connection(s)", + plan.program.graph.node_count(), + plan.connection_ids.len(), ))); PlanVisitorResult::Execute(result) } diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs index 51b93ca0..500e8164 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/mod.rs @@ -23,5 +23,5 @@ pub use coordinator::Coordinator; pub use dataset::{DataSet, ShowFunctionsResult}; pub use statement::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, PythonModule, - ShowFunctions, StartFunction, Statement, StopFunction, StreamingSql, + ShowFunctions, StartFunction, Statement, StopFunction, }; diff --git a/src/coordinator/plan/insert_statement_plan.rs b/src/coordinator/plan/insert_statement_plan.rs index e96a2772..9c7e4b76 100644 --- a/src/coordinator/plan/insert_statement_plan.rs +++ b/src/coordinator/plan/insert_statement_plan.rs @@ -10,18 +10,24 @@ // See the License for the specific language governing permissions and // limitations under the License. -use datafusion::logical_expr::LogicalPlan; +use std::collections::HashSet; + +use crate::datastream::logical::LogicalProgram; use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; #[derive(Debug)] pub struct InsertStatementPlan { - pub logical_plan: LogicalPlan, + pub program: LogicalProgram, + pub connection_ids: HashSet, } impl InsertStatementPlan { - pub fn new(logical_plan: LogicalPlan) -> Self { - Self { logical_plan } + pub fn new(program: LogicalProgram, connection_ids: HashSet) -> Self { + Self { + program, + connection_ids, + } } } diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index fde7f35a..818d830f 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -10,20 +10,38 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; +use std::sync::Arc; + +use datafusion::common::tree_node::TreeNode; +use datafusion::common::{Result, plan_err}; +use datafusion::error::DataFusionError; +use datafusion::execution::SessionStateBuilder; +use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion::prelude::SessionConfig; +use datafusion::sql::TableReference; use tracing::debug; use crate::coordinator::analyze::analysis::Analysis; use crate::coordinator::plan::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, InsertStatementPlan, PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, - StreamingSqlPlan, }; use crate::coordinator::statement::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext, - StatementVisitorResult, StopFunction, StreamingSql, + StatementVisitorResult, StopFunction, }; +use crate::datastream::logical::{LogicalProgram, ProgramConfig}; +use crate::datastream::optimizers::ChainingOptimizer; +use crate::sql::catalog::insert::Insert; +use crate::sql::catalog::table::Table as CatalogTable; +use crate::sql::functions::{is_json_union, serialize_outgoing_json}; use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::extension::sink::SinkExtension; +use crate::sql::planner::plan::rewrite_plan; +use crate::sql::planner::rewrite::SourceMetadataVisitor; +use crate::sql::planner::{physical_planner, rewrite_sinks}; pub struct LogicalPlanVisitor { schema_provider: StreamSchemaProvider, @@ -45,6 +63,122 @@ impl LogicalPlanVisitor { _ => panic!("LogicalPlanVisitor should return Plan"), } } + + fn build_insert_plan(&self, stmt: &InsertStatement) -> Result> { + let insert = Insert::try_from_statement(&stmt.statement, &self.schema_provider)?; + + let (plan, sink_name) = match insert { + Insert::InsertQuery { + sink_name, + logical_plan, + } => (logical_plan, Some(sink_name)), + Insert::Anonymous { logical_plan } => (logical_plan, None), + }; + + let mut plan_rewrite = rewrite_plan(plan, &self.schema_provider)?; + + if plan_rewrite + .schema() + .fields() + .iter() + .any(|f| is_json_union(f.data_type())) + { + plan_rewrite = serialize_outgoing_json(&self.schema_provider, Arc::new(plan_rewrite)); + } + + debug!("Plan = {}", plan_rewrite.display_graphviz()); + + let mut used_connections = HashSet::new(); + let mut metadata = SourceMetadataVisitor::new(&self.schema_provider); + plan_rewrite.visit_with_subqueries(&mut metadata)?; + used_connections.extend(metadata.connection_ids.iter()); + + let sink = match sink_name { + Some(sink_name) => { + let table = self + .schema_provider + .get_catalog_table(&sink_name) + .ok_or_else(|| { + DataFusionError::Plan(format!("Connection {sink_name} not found")) + })?; + match &table { + CatalogTable::ConnectorTable(c) => { + if let Some(id) = c.id { + used_connections.insert(id); + } + SinkExtension::new( + TableReference::bare(sink_name), + table.clone(), + plan_rewrite.schema().clone(), + Arc::new(plan_rewrite), + ) + } + CatalogTable::MemoryTable { .. } => { + return plan_err!( + "INSERT into memory tables is not supported in single-statement mode" + ); + } + CatalogTable::LookupTable(_) => { + plan_err!("lookup (temporary) tables cannot be inserted into") + } + CatalogTable::TableFromQuery { .. } => { + plan_err!( + "shouldn't be inserting more data into a table made with CREATE TABLE AS" + ) + } + CatalogTable::PreviewSink { .. } => { + plan_err!("queries shouldn't be able insert into preview sink.") + } + } + } + None => SinkExtension::new( + TableReference::parse_str("preview"), + CatalogTable::PreviewSink { + logical_plan: plan_rewrite.clone(), + }, + plan_rewrite.schema().clone(), + Arc::new(plan_rewrite), + ), + }; + + let extension = LogicalPlan::Extension(Extension { + node: Arc::new(sink?), + }); + + let extensions = rewrite_sinks(vec![extension])?; + + let mut config = SessionConfig::new(); + config + .options_mut() + .optimizer + .enable_round_robin_repartition = false; + config.options_mut().optimizer.repartition_aggregations = false; + config.options_mut().optimizer.repartition_windows = false; + config.options_mut().optimizer.repartition_sorts = false; + config.options_mut().optimizer.repartition_joins = false; + config.options_mut().execution.target_partitions = 1; + + let session_state = SessionStateBuilder::new() + .with_config(config) + .with_default_features() + .with_physical_optimizer_rules(vec![]) + .build(); + + let mut plan_to_graph_visitor = + physical_planner::PlanToGraphVisitor::new(&self.schema_provider, &session_state); + for ext in extensions { + plan_to_graph_visitor.add_plan(ext)?; + } + let graph = plan_to_graph_visitor.into_graph(); + + let mut program = LogicalProgram::new(graph, ProgramConfig::default()); + program.optimize(&ChainingOptimizer {}); + + Ok(Box::new(InsertStatementPlan::new( + program, + used_connections, + ))) + } } impl StatementVisitor for LogicalPlanVisitor { @@ -135,34 +269,9 @@ impl StatementVisitor for LogicalPlanVisitor { stmt: &InsertStatement, _context: &StatementVisitorContext, ) -> StatementVisitorResult { - let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider); - - match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) { - Ok(plan) => { - debug!("Insert statement plan:\n{}", plan.display_graphviz()); - StatementVisitorResult::Plan(Box::new(InsertStatementPlan::new(plan))) - } - Err(e) => { - panic!("Failed to convert INSERT statement to logical plan: {e}"); - } - } - } - - fn visit_streaming_sql( - &self, - stmt: &StreamingSql, - _context: &StatementVisitorContext, - ) -> StatementVisitorResult { - let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider); - - match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) { - Ok(plan) => { - debug!("Logical plan:\n{}", plan.display_graphviz()); - StatementVisitorResult::Plan(Box::new(StreamingSqlPlan::new(plan))) - } - Err(e) => { - panic!("Failed to convert SQL statement to logical plan: {e}"); - } + match self.build_insert_plan(stmt) { + Ok(plan) => StatementVisitorResult::Plan(plan), + Err(e) => panic!("Failed to build INSERT plan: {e}"), } } } diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs index 3d36ec16..1dbd75f6 100644 --- a/src/coordinator/plan/mod.rs +++ b/src/coordinator/plan/mod.rs @@ -20,7 +20,6 @@ mod optimizer; mod show_functions_plan; mod start_function_plan; mod stop_function_plan; -mod streaming_sql_plan; mod visitor; pub use create_function_plan::CreateFunctionPlan; @@ -33,7 +32,6 @@ pub use optimizer::LogicalPlanner; pub use show_functions_plan::ShowFunctionsPlan; pub use start_function_plan::StartFunctionPlan; pub use stop_function_plan::StopFunctionPlan; -pub use streaming_sql_plan::StreamingSqlPlan; pub use visitor::{PlanVisitor, PlanVisitorContext, PlanVisitorResult}; use std::fmt; diff --git a/src/coordinator/plan/streaming_sql_plan.rs b/src/coordinator/plan/streaming_sql_plan.rs deleted file mode 100644 index 607420a8..00000000 --- a/src/coordinator/plan/streaming_sql_plan.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use datafusion::logical_expr::LogicalPlan; - -use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; - -#[derive(Debug)] -pub struct StreamingSqlPlan { - pub logical_plan: LogicalPlan, -} - -impl StreamingSqlPlan { - pub fn new(logical_plan: LogicalPlan) -> Self { - Self { logical_plan } - } -} - -impl PlanNode for StreamingSqlPlan { - fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { - visitor.visit_streaming_sql_plan(self, context) - } -} diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs index e3911a8b..3964d645 100644 --- a/src/coordinator/plan/visitor.rs +++ b/src/coordinator/plan/visitor.rs @@ -12,7 +12,7 @@ use super::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, - InsertStatementPlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan, + InsertStatementPlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, }; /// Context passed to PlanVisitor methods @@ -96,10 +96,4 @@ pub trait PlanVisitor { plan: &InsertStatementPlan, context: &PlanVisitorContext, ) -> PlanVisitorResult; - - fn visit_streaming_sql_plan( - &self, - plan: &StreamingSqlPlan, - context: &PlanVisitorContext, - ) -> PlanVisitorResult; } diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs index 7628b94b..627ebbaa 100644 --- a/src/coordinator/statement/mod.rs +++ b/src/coordinator/statement/mod.rs @@ -18,7 +18,6 @@ mod insert_statement; mod show_functions; mod start_function; mod stop_function; -mod streaming_sql; mod visitor; pub use create_function::{ConfigSource, CreateFunction, FunctionSource}; @@ -29,7 +28,6 @@ pub use insert_statement::InsertStatement; pub use show_functions::ShowFunctions; pub use start_function::StartFunction; pub use stop_function::StopFunction; -pub use streaming_sql::StreamingSql; pub use visitor::{StatementVisitor, StatementVisitorContext, StatementVisitorResult}; use std::fmt; diff --git a/src/coordinator/statement/streaming_sql.rs b/src/coordinator/statement/streaming_sql.rs deleted file mode 100644 index 1aa49205..00000000 --- a/src/coordinator/statement/streaming_sql.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use datafusion::sql::sqlparser::ast::Statement as DFStatement; - -use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; - -/// Wraps a DataFusion SQL statement (SELECT, INSERT, CREATE TABLE, etc.) -/// so it can flow through the same Statement → StatementVisitor pipeline -/// as FunctionStream DDL commands. -#[derive(Debug)] -pub struct StreamingSql { - pub statement: DFStatement, -} - -impl StreamingSql { - pub fn new(statement: DFStatement) -> Self { - Self { statement } - } -} - -impl Statement for StreamingSql { - fn accept( - &self, - visitor: &dyn StatementVisitor, - context: &StatementVisitorContext, - ) -> StatementVisitorResult { - visitor.visit_streaming_sql(self, context) - } -} diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs index 8de6ffe2..27e5a512 100644 --- a/src/coordinator/statement/visitor.rs +++ b/src/coordinator/statement/visitor.rs @@ -12,7 +12,7 @@ use super::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, - ShowFunctions, StartFunction, StopFunction, StreamingSql, + ShowFunctions, StartFunction, StopFunction, }; use crate::coordinator::plan::PlanNode; use crate::coordinator::statement::Statement; @@ -100,10 +100,4 @@ pub trait StatementVisitor { stmt: &InsertStatement, context: &StatementVisitorContext, ) -> StatementVisitorResult; - - fn visit_streaming_sql( - &self, - stmt: &StreamingSql, - context: &StatementVisitorContext, - ) -> StatementVisitorResult; } diff --git a/src/server/handler.rs b/src/server/handler.rs index 45b0cd07..bf9350e6 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -70,10 +70,14 @@ impl FunctionStreamService for FunctionStreamServiceImpl { let req = request.into_inner(); let parse_start = Instant::now(); - let parsed = match parse_sql(&req.sql) { - Ok(parsed) => { - log::debug!("SQL parsed in {}ms", parse_start.elapsed().as_millis()); - parsed + let statements = match parse_sql(&req.sql) { + Ok(stmts) => { + log::debug!( + "SQL parsed {} statement(s) in {}ms", + stmts.len(), + parse_start.elapsed().as_millis() + ); + stmts } Err(e) => { return Ok(TonicResponse::new(Self::build_response( @@ -85,7 +89,14 @@ impl FunctionStreamService for FunctionStreamServiceImpl { }; let exec_start = Instant::now(); - let result = self.coordinator.execute(parsed.as_ref()); + let mut last_result = self.coordinator.execute(statements[0].as_ref()); + for stmt in &statements[1..] { + if !last_result.success { + break; + } + last_result = self.coordinator.execute(stmt.as_ref()); + } + let result = last_result; log::debug!( "Coordinator execution finished in {}ms", exec_start.elapsed().as_millis() diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs index d80d3a8d..6bf8d357 100644 --- a/src/sql/planner/mod.rs +++ b/src/sql/planner/mod.rs @@ -202,7 +202,7 @@ fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result { })) } -fn rewrite_sinks(extensions: Vec) -> Result> { +pub fn rewrite_sinks(extensions: Vec) -> Result> { let mut sink_inputs = build_sink_inputs(&extensions); let mut new_extensions = vec![]; for extension in extensions { diff --git a/src/sql/planner/parse.rs b/src/sql/planner/parse.rs index 4bd8f30e..a3af2e89 100644 --- a/src/sql/planner/parse.rs +++ b/src/sql/planner/parse.rs @@ -20,30 +20,29 @@ use datafusion::sql::sqlparser::parser::Parser; use crate::coordinator::{ CreateFunction, CreateTable, DropFunction, InsertStatement, ShowFunctions, StartFunction, - Statement as CoordinatorStatement, StopFunction, StreamingSql, + Statement as CoordinatorStatement, StopFunction, }; -/// Stage 1: String → Box +/// Stage 1: String → Vec> /// /// Parses SQL using FunctionStreamDialect (from sqlparser-rs), then classifies -/// the result into either a FunctionStream DDL statement or a StreamingSql, -/// both unified under the coordinator's Statement trait. -pub fn parse_sql(query: &str) -> Result> { +/// each statement into a concrete coordinator Statement type. +/// A single SQL input may contain multiple statements (separated by `;`). +pub fn parse_sql(query: &str) -> Result>> { let trimmed = query.trim(); if trimmed.is_empty() { return plan_err!("Query is empty"); } let dialect = FunctionStreamDialect {}; - let mut statements = Parser::parse_sql(&dialect, trimmed) + let statements = Parser::parse_sql(&dialect, trimmed) .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?; if statements.is_empty() { return plan_err!("No SQL statements found"); } - let stmt = statements.remove(0); - classify_statement(stmt) + statements.into_iter().map(classify_statement).collect() } /// Classify a parsed DataFusion Statement into the coordinator's Statement type. @@ -51,8 +50,8 @@ pub fn parse_sql(query: &str) -> Result> { /// Statement classification mirrors the analysis flow from `parse_and_get_arrow_program`: /// - FunctionStream DDL → concrete coordinator types (CreateFunction, DropFunction, etc.) /// - CREATE TABLE / CREATE VIEW → CreateTable (catalog registration) -/// - INSERT INTO / standalone SELECT → InsertStatement (streaming pipeline) -/// - Everything else → StreamingSql (catch-all) +/// - INSERT INTO → InsertStatement (streaming pipeline) +/// - Everything else → error (unsupported) fn classify_statement(stmt: DFStatement) -> Result> { match stmt { DFStatement::CreateFunctionWith { options } => { @@ -75,7 +74,7 @@ fn classify_statement(stmt: DFStatement) -> Result Ok(Box::new(CreateTable::new(s))) } s @ DFStatement::Insert(_) => Ok(Box::new(InsertStatement::new(s))), - other => Ok(Box::new(StreamingSql::new(other))), + other => plan_err!("Unsupported SQL statement: {other}"), } } @@ -97,76 +96,90 @@ fn sql_options_to_map(options: &[SqlOption]) -> HashMap { mod tests { use super::*; - fn is_streaming_sql(stmt: &dyn CoordinatorStatement) -> bool { - let debug = format!("{:?}", stmt); - debug.starts_with("StreamingSql") + fn first_stmt(sql: &str) -> Box { + let mut stmts = parse_sql(sql).unwrap(); + assert!(!stmts.is_empty()); + stmts.remove(0) } - fn is_ddl(stmt: &dyn CoordinatorStatement) -> bool { - !is_streaming_sql(stmt) + fn is_type(stmt: &dyn CoordinatorStatement, prefix: &str) -> bool { + format!("{:?}", stmt).starts_with(prefix) } #[test] fn test_parse_create_function() { let sql = "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')"; - let stmt = parse_sql(sql).unwrap(); - assert!(is_ddl(stmt.as_ref())); + let stmt = first_stmt(sql); + assert!(is_type(stmt.as_ref(), "CreateFunction")); } #[test] fn test_parse_create_function_minimal() { let sql = "CREATE FUNCTION WITH ('function_path'='./processor.wasm')"; - let stmt = parse_sql(sql).unwrap(); - assert!(is_ddl(stmt.as_ref())); + let stmt = first_stmt(sql); + assert!(is_type(stmt.as_ref(), "CreateFunction")); } #[test] fn test_parse_drop_function() { - let sql = "DROP FUNCTION my_task"; - let stmt = parse_sql(sql).unwrap(); - assert!(is_ddl(stmt.as_ref())); + let stmt = first_stmt("DROP FUNCTION my_task"); + assert!(is_type(stmt.as_ref(), "DropFunction")); } #[test] fn test_parse_start_function() { - let sql = "START FUNCTION my_task"; - let stmt = parse_sql(sql).unwrap(); - assert!(is_ddl(stmt.as_ref())); + let stmt = first_stmt("START FUNCTION my_task"); + assert!(is_type(stmt.as_ref(), "StartFunction")); } #[test] fn test_parse_stop_function() { - let sql = "STOP FUNCTION my_task"; - let stmt = parse_sql(sql).unwrap(); - assert!(is_ddl(stmt.as_ref())); + let stmt = first_stmt("STOP FUNCTION my_task"); + assert!(is_type(stmt.as_ref(), "StopFunction")); } #[test] fn test_parse_show_functions() { - let sql = "SHOW FUNCTIONS"; - let stmt = parse_sql(sql).unwrap(); - assert!(is_ddl(stmt.as_ref())); + let stmt = first_stmt("SHOW FUNCTIONS"); + assert!(is_type(stmt.as_ref(), "ShowFunctions")); } #[test] - fn test_parse_case_insensitive() { - let sql1 = "create function with ('function_path'='./test.wasm')"; - assert!(is_ddl(parse_sql(sql1).unwrap().as_ref())); + fn test_parse_create_table() { + let stmt = first_stmt("CREATE TABLE foo (id INT, name VARCHAR)"); + assert!(is_type(stmt.as_ref(), "CreateTable")); + } - let sql2 = "show functions"; - assert!(is_ddl(parse_sql(sql2).unwrap().as_ref())); + #[test] + fn test_parse_insert_statement() { + let stmt = first_stmt("INSERT INTO sink SELECT * FROM source"); + assert!(is_type(stmt.as_ref(), "InsertStatement")); + } - let sql3 = "start function my_task"; - assert!(is_ddl(parse_sql(sql3).unwrap().as_ref())); + #[test] + fn test_parse_case_insensitive() { + assert!(is_type( + first_stmt("create function with ('function_path'='./test.wasm')").as_ref(), + "CreateFunction" + )); + assert!(is_type( + first_stmt("show functions").as_ref(), + "ShowFunctions" + )); + assert!(is_type( + first_stmt("start function my_task").as_ref(), + "StartFunction" + )); } #[test] - fn test_parse_streaming_sql() { - let sql = - "SELECT count(*), tumble(interval '1 minute') as window FROM events GROUP BY window"; - let stmt = parse_sql(sql).unwrap(); - assert!(is_streaming_sql(stmt.as_ref())); + fn test_parse_multiple_statements() { + let sql = "CREATE TABLE t1 (id INT); INSERT INTO sink SELECT * FROM t1"; + let stmts = parse_sql(sql).unwrap(); + assert_eq!(stmts.len(), 2); + assert!(is_type(stmts[0].as_ref(), "CreateTable")); + assert!(is_type(stmts[1].as_ref(), "InsertStatement")); } #[test] @@ -175,6 +188,12 @@ mod tests { assert!(parse_sql(" ").is_err()); } + #[test] + fn test_parse_unsupported_statement() { + let result = parse_sql("SELECT 1"); + assert!(result.is_err()); + } + #[test] fn test_parse_with_extra_properties() { let sql = r#"CREATE FUNCTION WITH ( @@ -183,7 +202,7 @@ mod tests { 'parallelism'='4', 'memory-limit'='256mb' )"#; - let stmt = parse_sql(sql).unwrap(); - assert!(is_ddl(stmt.as_ref())); + let stmt = first_stmt(sql); + assert!(is_type(stmt.as_ref(), "CreateFunction")); } } From 67b65a9de49193759169a69377dc7142d3a95eb0 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 18 Mar 2026 22:41:13 +0800 Subject: [PATCH 05/44] update --- Cargo.lock | 4 +- src/coordinator/analyze/analyzer.rs | 14 +- src/coordinator/execution/executor.rs | 40 +- src/coordinator/mod.rs | 5 +- src/coordinator/plan/logical_plan_visitor.rs | 266 ++++++----- ...statement_plan.rs => lookup_table_plan.rs} | 23 +- src/coordinator/plan/mod.rs | 8 +- .../plan/streaming_table_connector_plan.rs | 27 ++ src/coordinator/plan/streaming_table_plan.rs | 30 ++ src/coordinator/plan/visitor.rs | 19 +- src/coordinator/statement/mod.rs | 4 +- ...insert_statement.rs => streaming_table.rs} | 12 +- src/coordinator/statement/visitor.rs | 8 +- src/coordinator/tool/connector_options.rs | 360 +++++++++++++++ src/coordinator/tool/mod.rs | 3 + src/sql/catalog/table.rs | 59 +-- src/sql/planner/extension/sink.rs | 9 +- src/sql/planner/mod.rs | 25 +- src/sql/planner/parse.rs | 21 +- src/sql/planner/plan/mod.rs | 429 +----------------- src/sql/planner/plan/stream_rewriter.rs | 148 ++++++ .../planner/plan/window_detecting_visitor.rs | 215 +++++++++ src/sql/planner/rewrite/source_rewriter.rs | 23 +- 23 files changed, 1024 insertions(+), 728 deletions(-) rename src/coordinator/plan/{insert_statement_plan.rs => lookup_table_plan.rs} (61%) create mode 100644 src/coordinator/plan/streaming_table_connector_plan.rs create mode 100644 src/coordinator/plan/streaming_table_plan.rs rename src/coordinator/statement/{insert_statement.rs => streaming_table.rs} (76%) create mode 100644 src/coordinator/tool/connector_options.rs create mode 100644 src/coordinator/tool/mod.rs create mode 100644 src/sql/planner/plan/stream_rewriter.rs create mode 100644 src/sql/planner/plan/window_detecting_visitor.rs diff --git a/Cargo.lock b/Cargo.lock index cb19233d..f39d5d3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4964,7 +4964,7 @@ dependencies = [ [[package]] name = "sqlparser" version = "0.55.0" -source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#7e7cfb6145a426a26a7db12ae5874fed8b9c6b95" +source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#9783cf9e3e6b61c763f78bcdd460e85edec22250" dependencies = [ "log", "recursive", @@ -4974,7 +4974,7 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.3.0" -source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#7e7cfb6145a426a26a7db12ae5874fed8b9c6b95" +source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#9783cf9e3e6b61c763f78bcdd460e85edec22250" dependencies = [ "proc-macro2", "quote", diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs index cd469f55..c351f3ae 100644 --- a/src/coordinator/analyze/analyzer.rs +++ b/src/coordinator/analyze/analyzer.rs @@ -13,9 +13,9 @@ use super::Analysis; use crate::coordinator::execution_context::ExecutionContext; use crate::coordinator::statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, - ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext, - StatementVisitorResult, StopFunction, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction, + Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, + StreamingTableStatement, }; use std::fmt; @@ -125,11 +125,13 @@ impl StatementVisitor for Analyzer<'_> { StatementVisitorResult::Analyze(Box::new(CreateTable::new(stmt.statement.clone()))) } - fn visit_insert_statement( + fn visit_streaming_table_statement( &self, - stmt: &InsertStatement, + stmt: &StreamingTableStatement, _context: &StatementVisitorContext, ) -> StatementVisitorResult { - StatementVisitorResult::Analyze(Box::new(InsertStatement::new(stmt.statement.clone()))) + StatementVisitorResult::Analyze(Box::new(StreamingTableStatement::new( + stmt.statement.clone(), + ))) } } diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 1a8e042a..056f0236 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -13,8 +13,9 @@ use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_record_batch}; use crate::coordinator::plan::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, - InsertStatementPlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult, - ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, + LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult, + ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, + StreamingTableConnectorPlan, }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::taskexecutor::TaskManager; @@ -215,17 +216,36 @@ impl PlanVisitor for Executor { PlanVisitorResult::Execute(result) } - fn visit_insert_statement_plan( + fn visit_streaming_table( &self, - plan: &InsertStatementPlan, + _plan: &StreamingTable, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - // TODO: start streaming pipeline for INSERT / anonymous query - let result = Err(ExecuteError::Internal(format!( - "INSERT statement execution not yet implemented. Program graph has {} node(s), {} connection(s)", - plan.program.graph.node_count(), - plan.connection_ids.len(), - ))); + let result = Err(ExecuteError::Internal( + "StreamingTable execution not yet implemented".to_string(), + )); + PlanVisitorResult::Execute(result) + } + + fn visit_lookup_table( + &self, + _plan: &LookupTablePlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + let result = Err(ExecuteError::Internal( + "LookupTable execution not yet implemented".to_string(), + )); + PlanVisitorResult::Execute(result) + } + + fn visit_streaming_connector_table( + &self, + _plan: &StreamingTableConnectorPlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + let result = Err(ExecuteError::Internal( + "StreamingTableConnector execution not yet implemented".to_string(), + )); PlanVisitorResult::Execute(result) } } diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs index 500e8164..7791e8a8 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/mod.rs @@ -18,10 +18,11 @@ mod execution; mod execution_context; mod plan; mod statement; +mod tool; pub use coordinator::Coordinator; pub use dataset::{DataSet, ShowFunctionsResult}; pub use statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, PythonModule, - ShowFunctions, StartFunction, Statement, StopFunction, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, PythonModule, ShowFunctions, + StartFunction, Statement, StopFunction, StreamingTableStatement, }; diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 818d830f..fb8c8c82 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -10,38 +10,53 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashSet; use std::sync::Arc; -use datafusion::common::tree_node::TreeNode; -use datafusion::common::{Result, plan_err}; -use datafusion::error::DataFusionError; -use datafusion::execution::SessionStateBuilder; -use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::prelude::SessionConfig; -use datafusion::sql::TableReference; +use datafusion::common::{Result, plan_datafusion_err, plan_err}; +use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement}; +use datafusion_common::TableReference; +use datafusion_expr::{Expr, Extension, LogicalPlan, col}; +use sqlparser::ast::Statement; use tracing::debug; use crate::coordinator::analyze::analysis::Analysis; use crate::coordinator::plan::{ - CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, - InsertStatementPlan, PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, PlanNode, + ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, }; use crate::coordinator::statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, - ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext, - StatementVisitorResult, StopFunction, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction, + StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, + StreamingTableStatement, }; -use crate::datastream::logical::{LogicalProgram, ProgramConfig}; -use crate::datastream::optimizers::ChainingOptimizer; -use crate::sql::catalog::insert::Insert; -use crate::sql::catalog::table::Table as CatalogTable; +use crate::coordinator::tool::ConnectorOptions; +use crate::sql::catalog::Table; +use crate::sql::catalog::connector::ConnectionType; +use crate::sql::catalog::connector_table::ConnectorTable; +use crate::sql::catalog::field_spec::FieldSpec; +use crate::sql::catalog::optimizer::produce_optimized_plan; use crate::sql::functions::{is_json_union, serialize_outgoing_json}; -use crate::sql::planner::StreamSchemaProvider; use crate::sql::planner::extension::sink::SinkExtension; -use crate::sql::planner::plan::rewrite_plan; -use crate::sql::planner::rewrite::SourceMetadataVisitor; -use crate::sql::planner::{physical_planner, rewrite_sinks}; +use crate::sql::planner::{StreamSchemaProvider, maybe_add_key_extension_to_sink}; +use crate::sql::rewrite_plan; + +const CONNECTOR: &str = "connector"; +const PARTITION_BY: &str = "partition_by"; +const IDLE_MICROS: &str = "idle_time"; + +/// 将 WITH 选项列表转为 key-value map,便于读取 connector 等配置。 +fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap { + options + .iter() + .filter_map(|opt| match opt { + SqlOption::KeyValue { key, value } => Some(( + key.value.clone(), + value.to_string().trim_matches('\'').to_string(), + )), + _ => None, + }) + .collect() +} pub struct LogicalPlanVisitor { schema_provider: StreamSchemaProvider, @@ -63,121 +78,98 @@ impl LogicalPlanVisitor { _ => panic!("LogicalPlanVisitor should return Plan"), } } - - fn build_insert_plan(&self, stmt: &InsertStatement) -> Result> { - let insert = Insert::try_from_statement(&stmt.statement, &self.schema_provider)?; - - let (plan, sink_name) = match insert { - Insert::InsertQuery { - sink_name, - logical_plan, - } => (logical_plan, Some(sink_name)), - Insert::Anonymous { logical_plan } => (logical_plan, None), - }; - - let mut plan_rewrite = rewrite_plan(plan, &self.schema_provider)?; - - if plan_rewrite - .schema() - .fields() - .iter() - .any(|f| is_json_union(f.data_type())) - { - plan_rewrite = serialize_outgoing_json(&self.schema_provider, Arc::new(plan_rewrite)); - } - - debug!("Plan = {}", plan_rewrite.display_graphviz()); - - let mut used_connections = HashSet::new(); - let mut metadata = SourceMetadataVisitor::new(&self.schema_provider); - plan_rewrite.visit_with_subqueries(&mut metadata)?; - used_connections.extend(metadata.connection_ids.iter()); - - let sink = match sink_name { - Some(sink_name) => { - let table = self - .schema_provider - .get_catalog_table(&sink_name) - .ok_or_else(|| { - DataFusionError::Plan(format!("Connection {sink_name} not found")) - })?; - match &table { - CatalogTable::ConnectorTable(c) => { - if let Some(id) = c.id { - used_connections.insert(id); - } - SinkExtension::new( - TableReference::bare(sink_name), - table.clone(), - plan_rewrite.schema().clone(), - Arc::new(plan_rewrite), - ) - } - CatalogTable::MemoryTable { .. } => { - return plan_err!( - "INSERT into memory tables is not supported in single-statement mode" - ); - } - CatalogTable::LookupTable(_) => { - plan_err!("lookup (temporary) tables cannot be inserted into") - } - CatalogTable::TableFromQuery { .. } => { - plan_err!( - "shouldn't be inserting more data into a table made with CREATE TABLE AS" - ) - } - CatalogTable::PreviewSink { .. } => { - plan_err!("queries shouldn't be able insert into preview sink.") - } + fn build_create_streaming_table_plan( + &self, + stmt: &StreamingTableStatement, + ) -> Result> { + let statement = &stmt.statement; + match statement { + DFStatement::CreateStreamingTable { + name, + with_options, + comment, + query, + } => { + let name_str = name.to_string(); + + let mut connector_opts = ConnectorOptions::new(with_options, &None)?; + let connector_type = connector_opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| { + plan_datafusion_err!( + "Streaming Table '{}' must specify '{}' option", + name_str, + CONNECTOR + ) + })?; + + let synthetic_statement = Statement::Query(query.clone()); + let base_plan = + produce_optimized_plan(&synthetic_statement, &self.schema_provider)?; + + let mut plan_rewrite = rewrite_plan(base_plan, &self.schema_provider)?; + + if plan_rewrite + .schema() + .fields() + .iter() + .any(|f| is_json_union(f.data_type())) + { + plan_rewrite = + serialize_outgoing_json(&self.schema_provider, Arc::new(plan_rewrite)); } - } - None => SinkExtension::new( - TableReference::parse_str("preview"), - CatalogTable::PreviewSink { - logical_plan: plan_rewrite.clone(), - }, - plan_rewrite.schema().clone(), - Arc::new(plan_rewrite), - ), - }; - - let extension = LogicalPlan::Extension(Extension { - node: Arc::new(sink?), - }); - - let extensions = rewrite_sinks(vec![extension])?; - - let mut config = SessionConfig::new(); - config - .options_mut() - .optimizer - .enable_round_robin_repartition = false; - config.options_mut().optimizer.repartition_aggregations = false; - config.options_mut().optimizer.repartition_windows = false; - config.options_mut().optimizer.repartition_sorts = false; - config.options_mut().optimizer.repartition_joins = false; - config.options_mut().execution.target_partitions = 1; - - let session_state = SessionStateBuilder::new() - .with_config(config) - .with_default_features() - .with_physical_optimizer_rules(vec![]) - .build(); - let mut plan_to_graph_visitor = - physical_planner::PlanToGraphVisitor::new(&self.schema_provider, &session_state); - for ext in extensions { - plan_to_graph_visitor.add_plan(ext)?; + let fields: Vec = plan_rewrite + .schema() + .fields() + .iter() + .map(|f| FieldSpec::Struct((**f).clone())) + .collect(); + + let partition_exprs = + if let Some(partition_cols) = connector_opts.pull_opt_str(PARTITION_BY)? { + let cols: Vec = + partition_cols.split(',').map(|c| col(c.trim())).collect(); + Some(cols) + } else { + None + }; + + let connector_table = ConnectorTable { + id: None, + connector: connector_type, + name: name_str.clone(), + connection_type: ConnectionType::Sink, + fields, + config: "".to_string(), + description: comment.clone().unwrap_or_default(), + event_time_field: None, + watermark_field: None, + idle_time: connector_opts.pull_opt_duration(IDLE_MICROS)?, + primary_keys: Arc::new(vec![]), + inferred_fields: None, + partition_exprs: Arc::new(partition_exprs), + }; + + let sink_extension = SinkExtension::new( + TableReference::bare(name_str.clone()), + Table::ConnectorTable(connector_table.clone()), + plan_rewrite.schema().clone(), + Arc::new(plan_rewrite), + )?; + + let final_plan = + maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension { + node: Arc::new(sink_extension), + }))?; + + Ok(Box::new(StreamingTable { + name: name_str, + comment: comment.clone(), + connector_table, + logical_plan: final_plan, + })) + } + _ => plan_err!("Only CREATE STREAMING TABLE supported"), } - let graph = plan_to_graph_visitor.into_graph(); - - let mut program = LogicalProgram::new(graph, ProgramConfig::default()); - program.optimize(&ChainingOptimizer {}); - - Ok(Box::new(InsertStatementPlan::new( - program, - used_connections, - ))) } } @@ -264,14 +256,14 @@ impl StatementVisitor for LogicalPlanVisitor { } } - fn visit_insert_statement( + fn visit_streaming_table_statement( &self, - stmt: &InsertStatement, + stmt: &StreamingTableStatement, _context: &StatementVisitorContext, ) -> StatementVisitorResult { - match self.build_insert_plan(stmt) { + match self.build_create_streaming_table_plan(stmt) { Ok(plan) => StatementVisitorResult::Plan(plan), - Err(e) => panic!("Failed to build INSERT plan: {e}"), + Err(e) => panic!("Failed to build CreateStreamingTable plan: {e}"), } } } diff --git a/src/coordinator/plan/insert_statement_plan.rs b/src/coordinator/plan/lookup_table_plan.rs similarity index 61% rename from src/coordinator/plan/insert_statement_plan.rs rename to src/coordinator/plan/lookup_table_plan.rs index 9c7e4b76..889f57e1 100644 --- a/src/coordinator/plan/insert_statement_plan.rs +++ b/src/coordinator/plan/lookup_table_plan.rs @@ -10,29 +10,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashSet; - -use crate::datastream::logical::LogicalProgram; +use crate::sql::catalog::connector_table::ConnectorTable; use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; +/// Plan node that exposes a lookup table config as a logical plan input. #[derive(Debug)] -pub struct InsertStatementPlan { - pub program: LogicalProgram, - pub connection_ids: HashSet, -} - -impl InsertStatementPlan { - pub fn new(program: LogicalProgram, connection_ids: HashSet) -> Self { - Self { - program, - connection_ids, - } - } +pub struct LookupTablePlan { + pub table: ConnectorTable, } -impl PlanNode for InsertStatementPlan { +impl PlanNode for LookupTablePlan { fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { - visitor.visit_insert_statement_plan(self, context) + visitor.visit_lookup_table(self, context) } } diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs index 1dbd75f6..d68320d8 100644 --- a/src/coordinator/plan/mod.rs +++ b/src/coordinator/plan/mod.rs @@ -14,24 +14,28 @@ mod create_function_plan; mod create_python_function_plan; mod create_table_plan; mod drop_function_plan; -mod insert_statement_plan; mod logical_plan_visitor; +mod lookup_table_plan; mod optimizer; mod show_functions_plan; mod start_function_plan; mod stop_function_plan; +mod streaming_table_connector_plan; +mod streaming_table_plan; mod visitor; pub use create_function_plan::CreateFunctionPlan; pub use create_python_function_plan::CreatePythonFunctionPlan; pub use create_table_plan::CreateTablePlan; pub use drop_function_plan::DropFunctionPlan; -pub use insert_statement_plan::InsertStatementPlan; pub use logical_plan_visitor::LogicalPlanVisitor; +pub use lookup_table_plan::LookupTablePlan; pub use optimizer::LogicalPlanner; pub use show_functions_plan::ShowFunctionsPlan; pub use start_function_plan::StartFunctionPlan; pub use stop_function_plan::StopFunctionPlan; +pub use streaming_table_connector_plan::StreamingTableConnectorPlan; +pub use streaming_table_plan::StreamingTable; pub use visitor::{PlanVisitor, PlanVisitorContext, PlanVisitorResult}; use std::fmt; diff --git a/src/coordinator/plan/streaming_table_connector_plan.rs b/src/coordinator/plan/streaming_table_connector_plan.rs new file mode 100644 index 00000000..be1cda31 --- /dev/null +++ b/src/coordinator/plan/streaming_table_connector_plan.rs @@ -0,0 +1,27 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::sql::catalog::connector_table::ConnectorTable; + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +/// Plan node that exposes a connector table config as a logical plan input. +#[derive(Debug)] +pub struct StreamingTableConnectorPlan { + pub table: ConnectorTable, +} + +impl PlanNode for StreamingTableConnectorPlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_streaming_connector_table(self, context) + } +} diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs new file mode 100644 index 00000000..577e6494 --- /dev/null +++ b/src/coordinator/plan/streaming_table_plan.rs @@ -0,0 +1,30 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; +use crate::sql::catalog::connector_table::ConnectorTable; +use datafusion::logical_expr::LogicalPlan; + +/// Plan node representing a fully resolved streaming table (DDL). +#[derive(Debug)] +pub struct StreamingTable { + pub name: String, + pub comment: Option, + pub connector_table: ConnectorTable, + pub logical_plan: LogicalPlan, +} + +impl PlanNode for StreamingTable { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_streaming_table(self, context) + } +} diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs index 3964d645..fc764b2b 100644 --- a/src/coordinator/plan/visitor.rs +++ b/src/coordinator/plan/visitor.rs @@ -12,7 +12,8 @@ use super::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, - InsertStatementPlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, + LookupTablePlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, + StreamingTableConnectorPlan, }; /// Context passed to PlanVisitor methods @@ -91,9 +92,21 @@ pub trait PlanVisitor { context: &PlanVisitorContext, ) -> PlanVisitorResult; - fn visit_insert_statement_plan( + fn visit_streaming_table( &self, - plan: &InsertStatementPlan, + plan: &StreamingTable, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; + + fn visit_lookup_table( + &self, + plan: &LookupTablePlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; + + fn visit_streaming_connector_table( + &self, + plan: &StreamingTableConnectorPlan, context: &PlanVisitorContext, ) -> PlanVisitorResult; } diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs index 627ebbaa..15880284 100644 --- a/src/coordinator/statement/mod.rs +++ b/src/coordinator/statement/mod.rs @@ -14,20 +14,20 @@ mod create_function; mod create_python_function; mod create_table; mod drop_function; -mod insert_statement; mod show_functions; mod start_function; mod stop_function; +mod streaming_table; mod visitor; pub use create_function::{ConfigSource, CreateFunction, FunctionSource}; pub use create_python_function::{CreatePythonFunction, PythonModule}; pub use create_table::CreateTable; pub use drop_function::DropFunction; -pub use insert_statement::InsertStatement; pub use show_functions::ShowFunctions; pub use start_function::StartFunction; pub use stop_function::StopFunction; +pub use streaming_table::StreamingTableStatement; pub use visitor::{StatementVisitor, StatementVisitorContext, StatementVisitorResult}; use std::fmt; diff --git a/src/coordinator/statement/insert_statement.rs b/src/coordinator/statement/streaming_table.rs similarity index 76% rename from src/coordinator/statement/insert_statement.rs rename to src/coordinator/statement/streaming_table.rs index 45785251..48fd25e9 100644 --- a/src/coordinator/statement/insert_statement.rs +++ b/src/coordinator/statement/streaming_table.rs @@ -14,28 +14,28 @@ use datafusion::sql::sqlparser::ast::Statement as DFStatement; use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; -/// Represents an INSERT INTO or standalone SELECT/query statement. +/// Represents an INSERT INTO or standalone SELECT/query that creates a streaming table/pipeline. /// /// In the streaming SQL context, both INSERT INTO (writing to a sink) /// and standalone SELECT (anonymous computation) are treated as -/// data-producing operations that feed into the streaming pipeline. +/// data-producing operations that create/feed into the streaming pipeline. #[derive(Debug)] -pub struct InsertStatement { +pub struct StreamingTableStatement { pub statement: DFStatement, } -impl InsertStatement { +impl StreamingTableStatement { pub fn new(statement: DFStatement) -> Self { Self { statement } } } -impl Statement for InsertStatement { +impl Statement for StreamingTableStatement { fn accept( &self, visitor: &dyn StatementVisitor, context: &StatementVisitorContext, ) -> StatementVisitorResult { - visitor.visit_insert_statement(self, context) + visitor.visit_streaming_table_statement(self, context) } } diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs index 27e5a512..1867b603 100644 --- a/src/coordinator/statement/visitor.rs +++ b/src/coordinator/statement/visitor.rs @@ -11,8 +11,8 @@ // limitations under the License. use super::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, - ShowFunctions, StartFunction, StopFunction, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction, + StopFunction, StreamingTableStatement, }; use crate::coordinator::plan::PlanNode; use crate::coordinator::statement::Statement; @@ -95,9 +95,9 @@ pub trait StatementVisitor { context: &StatementVisitorContext, ) -> StatementVisitorResult; - fn visit_insert_statement( + fn visit_streaming_table_statement( &self, - stmt: &InsertStatement, + stmt: &StreamingTableStatement, context: &StatementVisitorContext, ) -> StatementVisitorResult; } diff --git a/src/coordinator/tool/connector_options.rs b/src/coordinator/tool/connector_options.rs new file mode 100644 index 00000000..de39872f --- /dev/null +++ b/src/coordinator/tool/connector_options.rs @@ -0,0 +1,360 @@ +use std::collections::HashMap; +use std::num::{NonZero, NonZeroU64}; +use std::str::FromStr; +use std::time::Duration; + +use datafusion::common::{Result as DFResult, plan_datafusion_err}; +use datafusion::error::DataFusionError; +use datafusion::sql::sqlparser::ast::{Expr, Ident, SqlOption, Value as SqlValue, ValueWithSpan}; +use tracing::warn; + +pub trait FromOpts: Sized { + fn from_opts(opts: &mut ConnectorOptions) -> DFResult; +} + +pub struct ConnectorOptions { + options: HashMap, + partitions: Vec, +} + +impl ConnectorOptions { + pub fn new(sql_opts: &[SqlOption], partition_by: &Option>) -> DFResult { + let mut options = HashMap::new(); + + for option in sql_opts { + let SqlOption::KeyValue { key, value } = option else { + return Err(plan_datafusion_err!( + "invalid with option: '{}'; expected an `=` delimited key-value pair", + option + )); + }; + + options.insert(key.value.clone(), value.clone()); + } + + Ok(Self { + options, + partitions: partition_by.clone().unwrap_or_default(), + }) + } + + pub fn partitions(&self) -> &[Expr] { + &self.partitions + } + + pub fn pull_struct(&mut self) -> DFResult { + T::from_opts(self) + } + + pub fn pull_opt_str(&mut self, name: &str) -> DFResult> { + match self.options.remove(name) { + Some(Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + span: _, + })) => Ok(Some(s)), + Some(e) => Err(plan_datafusion_err!( + "expected with option '{}' to be a single-quoted string, but it was `{:?}`", + name, + e + )), + None => Ok(None), + } + } + + pub fn pull_str(&mut self, name: &str) -> DFResult { + self.pull_opt_str(name)? + .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name)) + } + + pub fn pull_opt_bool(&mut self, name: &str) -> DFResult> { + match self.options.remove(name) { + Some(Expr::Value(ValueWithSpan { + value: SqlValue::Boolean(b), + span: _, + })) => Ok(Some(b)), + Some(Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + span: _, + })) => match s.as_str() { + "true" | "yes" => Ok(Some(true)), + "false" | "no" => Ok(Some(false)), + _ => Err(plan_datafusion_err!( + "expected with option '{}' to be a boolean, but it was `'{}'`", + name, + s + )), + }, + Some(e) => Err(plan_datafusion_err!( + "expected with option '{}' to be a boolean, but it was `{:?}`", + name, + e + )), + None => Ok(None), + } + } + + pub fn pull_opt_u64(&mut self, name: &str) -> DFResult> { + match self.options.remove(name) { + Some(Expr::Value(ValueWithSpan { + value: SqlValue::Number(s, _), + span: _, + })) + | Some(Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + span: _, + })) => s.parse::().map(Some).map_err(|_| { + plan_datafusion_err!( + "expected with option '{}' to be an unsigned integer, but it was `{}`", + name, + s + ) + }), + Some(e) => Err(plan_datafusion_err!( + "expected with option '{}' to be an unsigned integer, but it was `{:?}`", + name, + e + )), + None => Ok(None), + } + } + + pub fn pull_opt_nonzero_u64(&mut self, name: &str) -> DFResult>> { + match self.pull_opt_u64(name)? { + Some(0) => Err(plan_datafusion_err!( + "expected with option '{name}' to be greater than 0, but it was 0" + )), + Some(i) => Ok(Some(NonZeroU64::new(i).unwrap())), + None => Ok(None), + } + } + + pub fn pull_opt_data_size_bytes(&mut self, name: &str) -> DFResult> { + self.pull_opt_str(name)? + .map(|s| { + s.parse::().map_err(|_| { + plan_datafusion_err!( + "expected with option '{}' to be a size in bytes (unsigned integer), but it was `{}`", + name, + s + ) + }) + }) + .transpose() + } + + pub fn pull_opt_i64(&mut self, name: &str) -> DFResult> { + match self.options.remove(name) { + Some(Expr::Value(ValueWithSpan { + value: SqlValue::Number(s, _), + span: _, + })) + | Some(Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + span: _, + })) => s.parse::().map(Some).map_err(|_| { + plan_datafusion_err!( + "expected with option '{}' to be an integer, but it was `{}`", + name, + s + ) + }), + Some(e) => Err(plan_datafusion_err!( + "expected with option '{}' to be an integer, but it was `{:?}`", + name, + e + )), + None => Ok(None), + } + } + + pub fn pull_i64(&mut self, name: &str) -> DFResult { + self.pull_opt_i64(name)? + .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name)) + } + + pub fn pull_u64(&mut self, name: &str) -> DFResult { + self.pull_opt_u64(name)? + .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name)) + } + + pub fn pull_opt_f64(&mut self, name: &str) -> DFResult> { + match self.options.remove(name) { + Some(Expr::Value(ValueWithSpan { + value: SqlValue::Number(s, _), + span: _, + })) + | Some(Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + span: _, + })) => s.parse::().map(Some).map_err(|_| { + plan_datafusion_err!( + "expected with option '{}' to be a double, but it was `{}`", + name, + s + ) + }), + Some(e) => Err(plan_datafusion_err!( + "expected with option '{}' to be a double, but it was `{:?}`", + name, + e + )), + None => Ok(None), + } + } + + pub fn pull_f64(&mut self, name: &str) -> DFResult { + self.pull_opt_f64(name)? + .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name)) + } + + pub fn pull_bool(&mut self, name: &str) -> DFResult { + self.pull_opt_bool(name)? + .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name)) + } + + pub fn pull_opt_duration(&mut self, name: &str) -> DFResult> { + match self.options.remove(name) { + Some(e) => Ok(Some(duration_from_sql_expr(&e).map_err(|e| { + plan_datafusion_err!("in with clause '{name}': {}", e) + })?)), + None => Ok(None), + } + } + + pub fn pull_opt_field(&mut self, name: &str) -> DFResult> { + match self.options.remove(name) { + Some(Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + span: _, + })) => { + warn!( + "Referred to a field in `{name}` with a string—this is deprecated and will be unsupported after Arroyo 0.14" + ); + Ok(Some(s)) + } + Some(Expr::Identifier(Ident { value, .. })) => Ok(Some(value)), + Some(e) => Err(plan_datafusion_err!( + "expected with option '{}' to be a field, but it was `{:?}`", + name, + e + )), + None => Ok(None), + } + } + + pub fn pull_opt_array(&mut self, name: &str) -> Option> { + Some(match self.options.remove(name)? { + Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + span, + }) => s + .split(',') + .map(|p| { + Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(p.to_string()), + span: span.clone(), + }) + }) + .collect(), + Expr::Array(a) => a.elem, + e => vec![e], + }) + } + + pub fn pull_opt_parsed(&mut self, name: &str) -> DFResult> { + Ok(match self.pull_opt_str(name)? { + Some(s) => Some( + s.parse() + .map_err(|_| plan_datafusion_err!("invalid value '{s}' for {name}"))?, + ), + None => None, + }) + } + + pub fn keys(&self) -> impl Iterator { + self.options.keys() + } + + pub fn keys_with_prefix<'a, 'b>( + &'a self, + prefix: &'b str, + ) -> impl Iterator + 'b + where + 'a: 'b, + { + self.options.keys().filter(move |k| k.starts_with(prefix)) + } + + pub fn insert_str( + &mut self, + name: impl Into, + value: impl Into, + ) -> DFResult> { + let name = name.into(); + let value = value.into(); + let existing = self.pull_opt_str(&name)?; + self.options.insert( + name, + Expr::Value(SqlValue::SingleQuotedString(value).with_empty_span()), + ); + Ok(existing) + } + + pub fn is_empty(&self) -> bool { + self.options.is_empty() + } + + pub fn contains_key(&self, key: &str) -> bool { + self.options.contains_key(key) + } +} + +fn duration_from_sql_expr(expr: &Expr) -> Result { + match expr { + Expr::Interval(interval) => { + let s = match interval.value.as_ref() { + Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + .. + }) => s.clone(), + other => { + return Err(DataFusionError::Plan(format!( + "expected interval string literal, found {other}" + ))); + } + }; + parse_interval_to_duration(&s) + } + Expr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + .. + }) => parse_interval_to_duration(s), + other => Err(DataFusionError::Plan(format!( + "expected an interval expression, found {other}" + ))), + } +} + +fn parse_interval_to_duration(s: &str) -> Result { + let parts: Vec<&str> = s.trim().split_whitespace().collect(); + if parts.len() != 2 { + return Err(DataFusionError::Plan(format!( + "invalid interval string '{s}'; expected ' '" + ))); + } + let value: u64 = parts[0] + .parse() + .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?; + let duration = match parts[1].to_lowercase().as_str() { + "second" | "seconds" | "s" => Duration::from_secs(value), + "minute" | "minutes" | "min" => Duration::from_secs(value * 60), + "hour" | "hours" | "h" => Duration::from_secs(value * 3600), + "day" | "days" | "d" => Duration::from_secs(value * 86400), + unit => { + return Err(DataFusionError::Plan(format!( + "unsupported interval unit '{unit}'" + ))); + } + }; + Ok(duration) +} diff --git a/src/coordinator/tool/mod.rs b/src/coordinator/tool/mod.rs new file mode 100644 index 00000000..95d6a7ed --- /dev/null +++ b/src/coordinator/tool/mod.rs @@ -0,0 +1,3 @@ +mod connector_options; + +pub use connector_options::{ConnectorOptions, FromOpts}; diff --git a/src/sql/catalog/table.rs b/src/sql/catalog/table.rs index b1d60028..a997680b 100644 --- a/src/sql/catalog/table.rs +++ b/src/sql/catalog/table.rs @@ -32,19 +32,11 @@ pub enum Table { LookupTable(ConnectorTable), /// A source/sink table backed by an external connector. ConnectorTable(ConnectorTable), - /// An in-memory table with an optional logical plan (for views). - MemoryTable { - name: String, - fields: Vec, - logical_plan: Option, - }, /// A table defined by a query (CREATE VIEW / CREATE TABLE AS SELECT). TableFromQuery { name: String, logical_plan: LogicalPlan, }, - /// A preview sink for debugging/inspection. - PreviewSink { logical_plan: LogicalPlan }, } impl Table { @@ -56,44 +48,10 @@ impl Table { use datafusion::logical_expr::{CreateMemoryTable, CreateView, DdlStatement}; use datafusion::sql::sqlparser::ast::CreateTable; - if let Statement::CreateTable(CreateTable { - name, - columns, - query: None, - .. - }) = statement - { - let name = name.to_string(); - - if columns.is_empty() { - return plan_err!("CREATE TABLE requires at least one column"); - } - - let fields: Vec = columns - .iter() - .map(|col| { - let data_type = crate::sql::types::convert_data_type(&col.data_type) - .map(|(dt, _)| dt) - .unwrap_or(datafusion::arrow::datatypes::DataType::Utf8); - let nullable = !col.options.iter().any(|opt| { - matches!( - opt.option, - datafusion::sql::sqlparser::ast::ColumnOption::NotNull - ) - }); - Arc::new(datafusion::arrow::datatypes::Field::new( - col.name.value.clone(), - data_type, - nullable, - )) - }) - .collect(); - - return Ok(Some(Table::MemoryTable { - name, - fields, - logical_plan: None, - })); + if let Statement::CreateTable(CreateTable { query: None, .. }) = statement { + return plan_err!( + "CREATE TABLE without AS SELECT is not supported; use CREATE TABLE ... AS SELECT or a connector table" + ); } match produce_optimized_plan(statement, schema_provider) { @@ -124,15 +82,13 @@ impl Table { pub fn name(&self) -> &str { match self { - Table::MemoryTable { name, .. } | Table::TableFromQuery { name, .. } => name.as_str(), + Table::TableFromQuery { name, .. } => name.as_str(), Table::ConnectorTable(c) | Table::LookupTable(c) => c.name.as_str(), - Table::PreviewSink { .. } => "preview", } } pub fn get_fields(&self) -> Vec { match self { - Table::MemoryTable { fields, .. } => fields.clone(), Table::ConnectorTable(ConnectorTable { fields, inferred_fields, @@ -151,9 +107,6 @@ impl Table { Table::TableFromQuery { logical_plan, .. } => { logical_plan.schema().fields().iter().cloned().collect() } - Table::PreviewSink { logical_plan } => { - logical_plan.schema().fields().iter().cloned().collect() - } } } @@ -187,9 +140,7 @@ impl Table { pub fn connector_op(&self) -> Result { match self { Table::ConnectorTable(c) | Table::LookupTable(c) => Ok(c.connector_op()), - Table::MemoryTable { .. } => plan_err!("can't write to a memory table"), Table::TableFromQuery { .. } => plan_err!("can't write to a query-defined table"), - Table::PreviewSink { .. } => Ok(super::connector::ConnectorOp::new("preview", "")), } } diff --git a/src/sql/planner/extension/sink.rs b/src/sql/planner/extension/sink.rs index 7820925f..e73a8383 100644 --- a/src/sql/planner/extension/sink.rs +++ b/src/sql/planner/extension/sink.rs @@ -41,11 +41,7 @@ impl SinkExtension { } } Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"), - Table::MemoryTable { .. } => return plan_err!("memory tables not supported as sinks"), Table::TableFromQuery { .. } => {} - Table::PreviewSink { .. } => { - // preview sinks may also need debezium wrapping for updating inputs - } } Self::add_remote_if_necessary(&schema, &mut input); @@ -123,10 +119,7 @@ impl UserDefinedLogicalNodeCore for SinkExtension { impl StreamExtension for SinkExtension { fn node_name(&self) -> Option { - match &self.table { - Table::PreviewSink { .. } => None, - _ => Some(NamedNode::Sink(self.name.clone())), - } + Some(NamedNode::Sink(self.name.clone())) } fn output_schema(&self) -> StreamSchema { diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs index 6bf8d357..c85c0fb2 100644 --- a/src/sql/planner/mod.rs +++ b/src/sql/planner/mod.rs @@ -152,7 +152,7 @@ fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap Result { +pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result { let LogicalPlan::Extension(ref ext) = plan else { return Ok(plan); }; @@ -307,13 +307,6 @@ pub async fn parse_and_get_arrow_program( Arc::new(plan_rewrite), ) } - CatalogTable::MemoryTable { logical_plan, .. } => { - if logical_plan.is_some() { - return plan_err!("Can only insert into a memory table once"); - } - logical_plan.replace(plan_rewrite); - continue; - } CatalogTable::LookupTable(_) => { plan_err!("lookup (temporary) tables cannot be inserted into") } @@ -322,19 +315,13 @@ pub async fn parse_and_get_arrow_program( "shouldn't be inserting more data into a table made with CREATE TABLE AS" ) } - CatalogTable::PreviewSink { .. } => { - plan_err!("queries shouldn't be able insert into preview sink.") - } } } - None => SinkExtension::new( - TableReference::parse_str("preview"), - CatalogTable::PreviewSink { - logical_plan: plan_rewrite.clone(), - }, - plan_rewrite.schema().clone(), - Arc::new(plan_rewrite), - ), + None => { + return plan_err!( + "Anonymous query is not supported; use INSERT INTO SELECT ..." + ); + } }; extensions.push(LogicalPlan::Extension(Extension { node: Arc::new(sink?), diff --git a/src/sql/planner/parse.rs b/src/sql/planner/parse.rs index a3af2e89..bdb4d481 100644 --- a/src/sql/planner/parse.rs +++ b/src/sql/planner/parse.rs @@ -19,8 +19,8 @@ use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; use datafusion::sql::sqlparser::parser::Parser; use crate::coordinator::{ - CreateFunction, CreateTable, DropFunction, InsertStatement, ShowFunctions, StartFunction, - Statement as CoordinatorStatement, StopFunction, + CreateFunction, CreateTable, DropFunction, ShowFunctions, StartFunction, + Statement as CoordinatorStatement, StopFunction, StreamingTableStatement, }; /// Stage 1: String → Vec> @@ -45,13 +45,6 @@ pub fn parse_sql(query: &str) -> Result>> { statements.into_iter().map(classify_statement).collect() } -/// Classify a parsed DataFusion Statement into the coordinator's Statement type. -/// -/// Statement classification mirrors the analysis flow from `parse_and_get_arrow_program`: -/// - FunctionStream DDL → concrete coordinator types (CreateFunction, DropFunction, etc.) -/// - CREATE TABLE / CREATE VIEW → CreateTable (catalog registration) -/// - INSERT INTO → InsertStatement (streaming pipeline) -/// - Everything else → error (unsupported) fn classify_statement(stmt: DFStatement) -> Result> { match stmt { DFStatement::CreateFunctionWith { options } => { @@ -70,10 +63,10 @@ fn classify_statement(stmt: DFStatement) -> Result Ok(Box::new(DropFunction::new(name))) } DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())), - s @ DFStatement::CreateTable(_) | s @ DFStatement::CreateView { .. } => { - Ok(Box::new(CreateTable::new(s))) + s @ DFStatement::CreateTable(_) => Ok(Box::new(CreateTable::new(s))), + s @ DFStatement::CreateStreamingTable { .. } => { + Ok(Box::new(StreamingTableStatement::new(s))) } - s @ DFStatement::Insert(_) => Ok(Box::new(InsertStatement::new(s))), other => plan_err!("Unsupported SQL statement: {other}"), } } @@ -154,7 +147,7 @@ mod tests { #[test] fn test_parse_insert_statement() { let stmt = first_stmt("INSERT INTO sink SELECT * FROM source"); - assert!(is_type(stmt.as_ref(), "InsertStatement")); + assert!(is_type(stmt.as_ref(), "CreateStreamingTableStatement")); } #[test] @@ -179,7 +172,7 @@ mod tests { let stmts = parse_sql(sql).unwrap(); assert_eq!(stmts.len(), 2); assert!(is_type(stmts[0].as_ref(), "CreateTable")); - assert!(is_type(stmts[1].as_ref(), "InsertStatement")); + assert!(is_type(stmts[1].as_ref(), "CreateStreamingTableStatement")); } #[test] diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs index d497ca65..83891731 100644 --- a/src/sql/planner/plan/mod.rs +++ b/src/sql/planner/plan/mod.rs @@ -1,39 +1,24 @@ -use std::collections::HashSet; -use std::sync::Arc; +use datafusion::common::Result; +use datafusion::common::tree_node::TreeNode; +use datafusion::logical_expr::LogicalPlan; -use datafusion::common::tree_node::{Transformed, TreeNodeRecursion}; -use datafusion::common::{ - Column, DataFusionError, Result, Spans, TableReference, plan_err, - tree_node::{TreeNode, TreeNodeRewriter, TreeNodeVisitor}, -}; -use datafusion::logical_expr::{ - Aggregate, Expr, Extension, Filter, LogicalPlan, SubqueryAlias, expr::Alias, -}; - -use crate::sql::planner::extension::StreamExtension; -use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension}; -use crate::sql::planner::extension::join::JOIN_NODE_NAME; -use crate::sql::planner::extension::remote_table::RemoteTableExtension; -use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; -use crate::sql::types::{ - DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, -}; +use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::rewrite::TimeWindowUdfChecker; use self::aggregate::AggregateRewriter; use self::join::JoinRewriter; +use self::stream_rewriter::StreamRewriter; +use self::window_detecting_visitor::{WindowDetectingVisitor, extract_column}; use self::window_fn::WindowFunctionRewriter; pub(crate) mod aggregate; pub(crate) mod join; +pub(crate) mod stream_rewriter; +pub(crate) mod window_detecting_visitor; pub(crate) mod window_fn; -use super::StreamSchemaProvider; use tracing::debug; -/// Stage 3: LogicalPlan → Streaming LogicalPlan -/// -/// Rewrites a standard DataFusion logical plan into one that supports -/// streaming semantics (timestamps, windows, watermarks). pub fn rewrite_plan( plan: LogicalPlan, schema_provider: &StreamSchemaProvider, @@ -51,399 +36,3 @@ pub fn rewrite_plan( Ok(rewritten_plan.data) } - -/// Visitor that detects window types in a logical plan -#[derive(Debug, Default)] -pub(crate) struct WindowDetectingVisitor { - pub(crate) window: Option, - pub(crate) fields: HashSet, -} - -impl WindowDetectingVisitor { - pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result> { - let mut visitor = WindowDetectingVisitor { - window: None, - fields: HashSet::new(), - }; - logical_plan.visit_with_subqueries(&mut visitor)?; - Ok(visitor.window.take()) - } -} - -fn extract_column(expr: &Expr) -> Option<&Column> { - match expr { - Expr::Column(column) => Some(column), - Expr::Alias(Alias { expr, .. }) => extract_column(expr), - _ => None, - } -} - -impl TreeNodeVisitor<'_> for WindowDetectingVisitor { - type Node = LogicalPlan; - - fn f_down(&mut self, node: &Self::Node) -> Result { - let LogicalPlan::Extension(Extension { node }) = node else { - return Ok(TreeNodeRecursion::Continue); - }; - - if node.name() == JOIN_NODE_NAME { - let input_windows: HashSet<_> = node - .inputs() - .iter() - .map(|input| Self::get_window(input)) - .collect::>>()?; - if input_windows.len() > 1 { - return Err(DataFusionError::Plan( - "can't handle mixed windowing between left and right".to_string(), - )); - } - self.window = input_windows - .into_iter() - .next() - .expect("join has at least one input"); - return Ok(TreeNodeRecursion::Jump); - } - Ok(TreeNodeRecursion::Continue) - } - - fn f_up(&mut self, node: &Self::Node) -> Result { - match node { - LogicalPlan::Projection(projection) => { - let window_expressions = projection - .expr - .iter() - .enumerate() - .filter_map(|(index, expr)| { - if let Some(column) = extract_column(expr) { - let input_field = projection - .input - .schema() - .field_with_name(column.relation.as_ref(), &column.name); - let input_field = match input_field { - Ok(field) => field, - Err(err) => return Some(Err(err)), - }; - if self.fields.contains( - &(column.relation.clone(), Arc::new(input_field.clone())).into(), - ) { - return self.window.clone().map(|window| Ok((index, window))); - } - } - find_window(expr) - .map(|option| option.map(|inner| (index, inner))) - .transpose() - }) - .collect::>>()?; - self.fields.clear(); - for (index, window) in window_expressions { - if let Some(existing_window) = &self.window { - if *existing_window != window { - return plan_err!( - "can't window by both {:?} and {:?}", - existing_window, - window - ); - } - self.fields - .insert(projection.schema.qualified_field(index).into()); - } else { - return plan_err!( - "can't call a windowing function without grouping by it in an aggregate" - ); - } - } - } - LogicalPlan::SubqueryAlias(subquery_alias) => { - self.fields = self - .fields - .drain() - .map(|field| { - Ok(subquery_alias - .schema - .qualified_field( - subquery_alias - .input - .schema() - .index_of_column(&field.qualified_column())?, - ) - .into()) - }) - .collect::>>()?; - } - LogicalPlan::Aggregate(Aggregate { - input, - group_expr, - aggr_expr: _, - schema, - .. - }) => { - let window_expressions = group_expr - .iter() - .enumerate() - .filter_map(|(index, expr)| { - if let Some(column) = extract_column(expr) { - let input_field = input - .schema() - .field_with_name(column.relation.as_ref(), &column.name); - let input_field = match input_field { - Ok(field) => field, - Err(err) => return Some(Err(err)), - }; - if self - .fields - .contains(&(column.relation.as_ref(), input_field).into()) - { - return self.window.clone().map(|window| Ok((index, window))); - } - } - find_window(expr) - .map(|option| option.map(|inner| (index, inner))) - .transpose() - }) - .collect::>>()?; - self.fields.clear(); - for (index, window) in window_expressions { - if let Some(existing_window) = &self.window { - if *existing_window != window { - return Err(DataFusionError::Plan( - "window expressions do not match".to_string(), - )); - } - } else { - self.window = Some(window); - } - self.fields.insert(schema.qualified_field(index).into()); - } - } - LogicalPlan::Extension(Extension { node }) => { - if node.name() == AGGREGATE_EXTENSION_NAME { - let aggregate_extension = node - .as_any() - .downcast_ref::() - .expect("should be aggregate extension"); - - match &aggregate_extension.window_behavior { - WindowBehavior::FromOperator { - window, - window_field, - window_index: _, - is_nested, - } => { - if self.window.is_some() && !*is_nested { - return Err(DataFusionError::Plan( - "aggregate node should not be recalculating window, as input is windowed.".to_string(), - )); - } - self.window = Some(window.clone()); - self.fields.insert(window_field.clone()); - } - WindowBehavior::InData => { - let input_fields = self.fields.clone(); - self.fields.clear(); - for field in fields_with_qualifiers(node.schema()) { - if input_fields.contains(&field) { - self.fields.insert(field); - } - } - if self.fields.is_empty() { - return Err(DataFusionError::Plan( - "must have window in aggregate. Make sure you are calling one of the windowing functions (hop, tumble, session) or using the window field of the input".to_string(), - )); - } - } - } - } - } - _ => {} - } - Ok(TreeNodeRecursion::Continue) - } -} - -/// Main rewriter for streaming SQL plans. -/// Rewrites standard logical plans into streaming-aware plans with -/// timestamp propagation, window detection, and streaming operator insertion. -pub struct StreamRewriter<'a> { - pub(crate) schema_provider: &'a StreamSchemaProvider, -} - -impl TreeNodeRewriter for StreamRewriter<'_> { - type Node = LogicalPlan; - - fn f_up(&mut self, mut node: Self::Node) -> Result> { - match node { - LogicalPlan::Projection(ref mut projection) => { - if !has_timestamp_field(&projection.schema) { - let timestamp_field: DFField = projection - .input - .schema() - .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) - .map_err(|_| { - DataFusionError::Plan(format!( - "No timestamp field found in projection input ({})", - projection.input.display() - )) - })? - .into(); - projection.schema = add_timestamp_field( - projection.schema.clone(), - timestamp_field.qualifier().cloned(), - ) - .expect("in projection"); - projection.expr.push(Expr::Column(Column { - relation: timestamp_field.qualifier().cloned(), - name: TIMESTAMP_FIELD.to_string(), - spans: Spans::default(), - })); - } - - // Rewrite row_time() calls to _timestamp column references - let rewritten = projection - .expr - .iter() - .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {})) - .collect::>>()?; - if rewritten.iter().any(|r| r.transformed) { - projection.expr = rewritten.into_iter().map(|r| r.data).collect(); - } - return Ok(Transformed::yes(node)); - } - LogicalPlan::Aggregate(aggregate) => { - return AggregateRewriter { - schema_provider: self.schema_provider, - } - .f_up(LogicalPlan::Aggregate(aggregate)); - } - LogicalPlan::Join(join) => { - return JoinRewriter { - schema_provider: self.schema_provider, - } - .f_up(LogicalPlan::Join(join)); - } - LogicalPlan::Filter(f) => { - let expr = f - .predicate - .clone() - .rewrite(&mut TimeWindowNullCheckRemover {})?; - return Ok(if expr.transformed { - Transformed::yes(LogicalPlan::Filter(Filter::try_new(expr.data, f.input)?)) - } else { - Transformed::no(LogicalPlan::Filter(f)) - }); - } - LogicalPlan::Window(_) => { - return WindowFunctionRewriter {}.f_up(node); - } - LogicalPlan::Sort(_) => { - return plan_err!( - "ORDER BY is not currently supported in streaming SQL ({})", - node.display() - ); - } - LogicalPlan::Repartition(_) => { - return plan_err!( - "Repartitions are not currently supported ({})", - node.display() - ); - } - LogicalPlan::Union(mut union) => { - union.schema = union.inputs[0].schema().clone(); - for input in union.inputs.iter_mut() { - if let LogicalPlan::Extension(Extension { node }) = input.as_ref() { - let stream_extension: &dyn StreamExtension = node.try_into().unwrap(); - if !stream_extension.transparent() { - continue; - } - } - let remote_table_extension = Arc::new(RemoteTableExtension { - input: input.as_ref().clone(), - name: TableReference::bare("union_input"), - schema: union.schema.clone(), - materialize: false, - }); - *input = Arc::new(LogicalPlan::Extension(Extension { - node: remote_table_extension, - })); - } - return Ok(Transformed::yes(LogicalPlan::Union(union))); - } - LogicalPlan::SubqueryAlias(sa) => { - return Ok(Transformed::yes(LogicalPlan::SubqueryAlias( - SubqueryAlias::try_new(sa.input, sa.alias)?, - ))); - } - LogicalPlan::Limit(_) => { - return plan_err!( - "LIMIT is not currently supported in streaming SQL ({})", - node.display() - ); - } - LogicalPlan::Explain(_) => { - return plan_err!("EXPLAIN is not supported ({})", node.display()); - } - LogicalPlan::Analyze(_) => { - return plan_err!("ANALYZE is not supported ({})", node.display()); - } - _ => {} - } - Ok(Transformed::no(node)) - } -} - -/// Rewrites row_time() function calls to _timestamp column references -struct RowTimeRewriter; - -impl TreeNodeRewriter for RowTimeRewriter { - type Node = Expr; - - fn f_up(&mut self, node: Self::Node) -> Result> { - if let Expr::ScalarFunction(ref func) = node { - if func.func.name() == "row_time" { - return Ok(Transformed::yes(Expr::Column(Column::new_unqualified( - TIMESTAMP_FIELD.to_string(), - )))); - } - } - Ok(Transformed::no(node)) - } -} - -/// Removes IS NOT NULL checks on window expressions that get pushed down incorrectly -pub(crate) struct TimeWindowNullCheckRemover; - -impl TreeNodeRewriter for TimeWindowNullCheckRemover { - type Node = Expr; - - fn f_up(&mut self, node: Self::Node) -> Result> { - if let Expr::IsNotNull(ref inner) = node { - if find_window(inner)?.is_some() { - return Ok(Transformed::yes(Expr::Literal( - datafusion::common::ScalarValue::Boolean(Some(true)), - None, - ))); - } - } - Ok(Transformed::no(node)) - } -} - -/// Checks that window UDFs (tumble/hop/session) are not used outside aggregates -pub(crate) struct TimeWindowUdfChecker; - -impl TreeNodeVisitor<'_> for TimeWindowUdfChecker { - type Node = LogicalPlan; - - fn f_up(&mut self, node: &Self::Node) -> Result { - if let LogicalPlan::Projection(projection) = node { - for expr in &projection.expr { - if let Some(window) = find_window(expr)? { - return plan_err!( - "Window function {:?} can only be used as a GROUP BY expression in an aggregate", - window - ); - } - } - } - Ok(TreeNodeRecursion::Continue) - } -} diff --git a/src/sql/planner/plan/stream_rewriter.rs b/src/sql/planner/plan/stream_rewriter.rs new file mode 100644 index 00000000..53549af4 --- /dev/null +++ b/src/sql/planner/plan/stream_rewriter.rs @@ -0,0 +1,148 @@ +use std::sync::Arc; + +use crate::sql::planner::extension::StreamExtension; +use crate::sql::planner::extension::remote_table::RemoteTableExtension; +use crate::sql::planner::plan::{ + aggregate::AggregateRewriter, join::JoinRewriter, window_fn::WindowFunctionRewriter, +}; +use crate::sql::planner::rewrite::{RowTimeRewriter, TimeWindowNullCheckRemover}; +use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; +use crate::sql::types::{DFField, TIMESTAMP_FIELD}; +use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; +use datafusion::common::{Column, DataFusionError, Result, Spans, TableReference, plan_err}; +use datafusion::logical_expr::{Expr, Extension, Filter, LogicalPlan, SubqueryAlias}; +use datafusion_common::tree_node::TreeNode; + +use super::StreamSchemaProvider; + +pub struct StreamRewriter<'a> { + pub(crate) schema_provider: &'a StreamSchemaProvider, +} + +impl<'a> StreamRewriter<'a> { + pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self { + Self { schema_provider } + } +} + +impl TreeNodeRewriter for StreamRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, mut node: Self::Node) -> Result> { + match node { + LogicalPlan::Projection(ref mut projection) => { + if !has_timestamp_field(&projection.schema) { + let timestamp_field: DFField = projection + .input + .schema() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) + .map_err(|_| { + DataFusionError::Plan(format!( + "No timestamp field found in projection input ({})", + projection.input.display() + )) + })? + .into(); + projection.schema = add_timestamp_field( + projection.schema.clone(), + timestamp_field.qualifier().cloned(), + ) + .expect("in projection"); + projection.expr.push(Expr::Column(Column { + relation: timestamp_field.qualifier().cloned(), + name: TIMESTAMP_FIELD.to_string(), + spans: Spans::default(), + })); + } + + let rewritten = projection + .expr + .iter() + .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {})) + .collect::>>()?; + if rewritten.iter().any(|r| r.transformed) { + projection.expr = rewritten.into_iter().map(|r| r.data).collect(); + } + return Ok(Transformed::yes(node)); + } + LogicalPlan::Aggregate(aggregate) => { + return AggregateRewriter { + schema_provider: self.schema_provider, + } + .f_up(LogicalPlan::Aggregate(aggregate)); + } + LogicalPlan::Join(join) => { + return JoinRewriter { + schema_provider: self.schema_provider, + } + .f_up(LogicalPlan::Join(join)); + } + LogicalPlan::Filter(f) => { + let expr = f + .predicate + .clone() + .rewrite(&mut TimeWindowNullCheckRemover {})?; + return Ok(if expr.transformed { + Transformed::yes(LogicalPlan::Filter(Filter::try_new(expr.data, f.input)?)) + } else { + Transformed::no(LogicalPlan::Filter(f)) + }); + } + LogicalPlan::Window(_) => { + return WindowFunctionRewriter {}.f_up(node); + } + LogicalPlan::Sort(_) => { + return plan_err!( + "ORDER BY is not currently supported in streaming SQL ({})", + node.display() + ); + } + LogicalPlan::Repartition(_) => { + return plan_err!( + "Repartitions are not currently supported ({})", + node.display() + ); + } + LogicalPlan::Union(mut union) => { + union.schema = union.inputs[0].schema().clone(); + for input in union.inputs.iter_mut() { + if let LogicalPlan::Extension(Extension { node }) = input.as_ref() { + let stream_extension: &dyn StreamExtension = node.try_into().unwrap(); + if !stream_extension.transparent() { + continue; + } + } + let remote_table_extension = Arc::new(RemoteTableExtension { + input: input.as_ref().clone(), + name: TableReference::bare("union_input"), + schema: union.schema.clone(), + materialize: false, + }); + *input = Arc::new(LogicalPlan::Extension(Extension { + node: remote_table_extension, + })); + } + return Ok(Transformed::yes(LogicalPlan::Union(union))); + } + LogicalPlan::SubqueryAlias(sa) => { + return Ok(Transformed::yes(LogicalPlan::SubqueryAlias( + SubqueryAlias::try_new(sa.input, sa.alias)?, + ))); + } + LogicalPlan::Limit(_) => { + return plan_err!( + "LIMIT is not currently supported in streaming SQL ({})", + node.display() + ); + } + LogicalPlan::Explain(_) => { + return plan_err!("EXPLAIN is not supported ({})", node.display()); + } + LogicalPlan::Analyze(_) => { + return plan_err!("ANALYZE is not supported ({})", node.display()); + } + _ => {} + } + Ok(Transformed::no(node)) + } +} diff --git a/src/sql/planner/plan/window_detecting_visitor.rs b/src/sql/planner/plan/window_detecting_visitor.rs new file mode 100644 index 00000000..0a0a0323 --- /dev/null +++ b/src/sql/planner/plan/window_detecting_visitor.rs @@ -0,0 +1,215 @@ +use std::collections::HashSet; +use std::sync::Arc; + +use datafusion::common::{ + Column, DataFusionError, Result, + tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}, +}; +use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias}; + +use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension}; +use crate::sql::planner::extension::join::JOIN_NODE_NAME; +use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window}; + +#[derive(Debug, Default)] +pub(crate) struct WindowDetectingVisitor { + pub(crate) window: Option, + pub(crate) fields: HashSet, +} + +impl WindowDetectingVisitor { + pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result> { + let mut visitor = WindowDetectingVisitor { + window: None, + fields: HashSet::new(), + }; + logical_plan.visit_with_subqueries(&mut visitor)?; + Ok(visitor.window.take()) + } +} + +pub(crate) fn extract_column(expr: &Expr) -> Option<&Column> { + match expr { + Expr::Column(column) => Some(column), + Expr::Alias(Alias { expr, .. }) => extract_column(expr), + _ => None, + } +} + +impl TreeNodeVisitor<'_> for WindowDetectingVisitor { + type Node = LogicalPlan; + + fn f_down(&mut self, node: &Self::Node) -> Result { + let LogicalPlan::Extension(Extension { node }) = node else { + return Ok(TreeNodeRecursion::Continue); + }; + + if node.name() == JOIN_NODE_NAME { + let input_windows: HashSet<_> = node + .inputs() + .iter() + .map(|input| Self::get_window(input)) + .collect::>>()?; + if input_windows.len() > 1 { + return Err(DataFusionError::Plan( + "can't handle mixed windowing between left and right".to_string(), + )); + } + self.window = input_windows + .into_iter() + .next() + .expect("join has at least one input"); + return Ok(TreeNodeRecursion::Jump); + } + Ok(TreeNodeRecursion::Continue) + } + + fn f_up(&mut self, node: &Self::Node) -> Result { + match node { + LogicalPlan::Projection(projection) => { + let window_expressions = projection + .expr + .iter() + .enumerate() + .filter_map(|(index, expr)| { + if let Some(column) = extract_column(expr) { + let input_field = projection + .input + .schema() + .field_with_name(column.relation.as_ref(), &column.name); + let input_field = match input_field { + Ok(field) => field, + Err(err) => return Some(Err(err)), + }; + if self.fields.contains( + &(column.relation.clone(), Arc::new(input_field.clone())).into(), + ) { + return self.window.clone().map(|window| Ok((index, window))); + } + } + find_window(expr) + .map(|option| option.map(|inner| (index, inner))) + .transpose() + }) + .collect::>>()?; + self.fields.clear(); + for (index, window) in window_expressions { + if let Some(existing_window) = &self.window { + if *existing_window != window { + return Err(DataFusionError::Plan( + "window expressions do not match".to_string(), + )); + } + } else { + self.window = Some(window); + } + self.fields + .insert(projection.schema.qualified_field(index).into()); + } + } + LogicalPlan::SubqueryAlias(subquery_alias) => { + self.fields = self + .fields + .drain() + .map(|field| { + Ok(subquery_alias + .schema + .qualified_field( + subquery_alias + .input + .schema() + .index_of_column(&field.qualified_column())?, + ) + .into()) + }) + .collect::>>()?; + } + LogicalPlan::Aggregate(Aggregate { + input, + group_expr, + aggr_expr: _, + schema, + .. + }) => { + let window_expressions = group_expr + .iter() + .enumerate() + .filter_map(|(index, expr)| { + if let Some(column) = extract_column(expr) { + let input_field = input + .schema() + .field_with_name(column.relation.as_ref(), &column.name); + let input_field = match input_field { + Ok(field) => field, + Err(err) => return Some(Err(err)), + }; + if self + .fields + .contains(&(column.relation.as_ref(), input_field).into()) + { + return self.window.clone().map(|window| Ok((index, window))); + } + } + find_window(expr) + .map(|option| option.map(|inner| (index, inner))) + .transpose() + }) + .collect::>>()?; + self.fields.clear(); + for (index, window) in window_expressions { + if let Some(existing_window) = &self.window { + if *existing_window != window { + return Err(DataFusionError::Plan( + "window expressions do not match".to_string(), + )); + } + } else { + self.window = Some(window); + } + self.fields.insert(schema.qualified_field(index).into()); + } + } + LogicalPlan::Extension(Extension { node }) => { + if node.name() == AGGREGATE_EXTENSION_NAME { + let aggregate_extension = node + .as_any() + .downcast_ref::() + .expect("should be aggregate extension"); + + match &aggregate_extension.window_behavior { + WindowBehavior::FromOperator { + window, + window_field, + window_index: _, + is_nested, + } => { + if self.window.is_some() && !*is_nested { + return Err(DataFusionError::Plan( + "aggregate node should not be recalculating window, as input is windowed.".to_string(), + )); + } + self.window = Some(window.clone()); + self.fields.insert(window_field.clone()); + } + WindowBehavior::InData => { + let input_fields = self.fields.clone(); + self.fields.clear(); + for field in fields_with_qualifiers(node.schema()) { + if input_fields.contains(&field) { + self.fields.insert(field); + } + } + if self.fields.is_empty() { + return Err(DataFusionError::Plan( + "must have window in aggregate. Make sure you are calling one of the windowing functions (hop, tumble, session) or using the window field of the input".to_string(), + )); + } + } + } + } + } + _ => {} + } + Ok(TreeNodeRecursion::Continue) + } +} diff --git a/src/sql/planner/rewrite/source_rewriter.rs b/src/sql/planner/rewrite/source_rewriter.rs index 209c3288..27281b41 100644 --- a/src/sql/planner/rewrite/source_rewriter.rs +++ b/src/sql/planner/rewrite/source_rewriter.rs @@ -23,7 +23,6 @@ use datafusion::logical_expr::{ use crate::sql::catalog::connector_table::ConnectorTable; use crate::sql::catalog::field_spec::FieldSpec; use crate::sql::catalog::table::Table; -use crate::sql::catalog::utils::add_timestamp_field; use crate::sql::planner::StreamSchemaProvider; use crate::sql::planner::extension::remote_table::RemoteTableExtension; use crate::sql::planner::extension::watermark_node::WatermarkNode; @@ -227,7 +226,7 @@ impl TreeNodeRewriter for SourceRewriter<'_> { type Node = LogicalPlan; fn f_up(&mut self, node: Self::Node) -> DFResult> { - let LogicalPlan::TableScan(mut table_scan) = node else { + let LogicalPlan::TableScan(table_scan) = node else { return Ok(Transformed::no(node)); }; @@ -243,30 +242,10 @@ impl TreeNodeRewriter for SourceRewriter<'_> { // TODO: implement LookupSource extension plan_err!("Lookup tables are not yet supported") } - Table::MemoryTable { - name, - fields: _, - logical_plan, - } => { - let Some(logical_plan) = logical_plan else { - return plan_err!( - "Can't query from memory table {} without first inserting into it", - name - ); - }; - table_scan.projected_schema = add_timestamp_field( - table_scan.projected_schema.clone(), - Some(table_scan.table_name.clone()), - )?; - self.mutate_table_from_query(&table_scan, logical_plan) - } Table::TableFromQuery { name: _, logical_plan, } => self.mutate_table_from_query(&table_scan, logical_plan), - Table::PreviewSink { .. } => Err(DataFusionError::Plan( - "can't select from a preview sink".to_string(), - )), } } } From 1821c0f50cd3686e88e41fec6b8d0d295cd8e5c4 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Thu, 19 Mar 2026 00:33:57 +0800 Subject: [PATCH 06/44] update --- src/coordinator/execution/executor.rs | 4 + src/coordinator/plan/logical_plan_visitor.rs | 199 ++++++----- src/sql/planner/plan/aggregate.rs | 275 ---------------- src/sql/planner/plan/aggregate_rewriter.rs | 262 +++++++++++++++ src/sql/planner/plan/join.rs | 240 -------------- src/sql/planner/plan/join_rewriter.rs | 224 +++++++++++++ src/sql/planner/plan/mod.rs | 68 ++-- src/sql/planner/plan/row_time_rewriter.rs | 36 ++ src/sql/planner/plan/stream_rewriter.rs | 311 +++++++++++------- .../planner/plan/streaming_window_analzer.rs | 203 ++++++++++++ .../planner/plan/window_detecting_visitor.rs | 215 ------------ src/sql/planner/plan/window_fn.rs | 178 ---------- .../planner/plan/window_function_rewriter.rs | 191 +++++++++++ src/sql/planner/rewrite/mod.rs | 2 - src/sql/planner/rewrite/row_time.rs | 39 --- 15 files changed, 1264 insertions(+), 1183 deletions(-) delete mode 100644 src/sql/planner/plan/aggregate.rs create mode 100644 src/sql/planner/plan/aggregate_rewriter.rs delete mode 100644 src/sql/planner/plan/join.rs create mode 100644 src/sql/planner/plan/join_rewriter.rs create mode 100644 src/sql/planner/plan/row_time_rewriter.rs create mode 100644 src/sql/planner/plan/streaming_window_analzer.rs delete mode 100644 src/sql/planner/plan/window_detecting_visitor.rs delete mode 100644 src/sql/planner/plan/window_fn.rs create mode 100644 src/sql/planner/plan/window_function_rewriter.rs delete mode 100644 src/sql/planner/rewrite/row_time.rs diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 056f0236..2dfb6326 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -22,6 +22,10 @@ use crate::runtime::taskexecutor::TaskManager; use std::sync::Arc; use thiserror::Error; use tracing::{debug, info}; +use crate::datastream::logical::{LogicalProgram, ProgramConfig}; +use crate::datastream::optimizers::ChainingOptimizer; +use crate::sql::CompiledSql; +use crate::sql::planner::{physical_planner, rewrite_sinks}; #[derive(Error, Debug)] pub enum ExecuteError { diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index fb8c8c82..dfcf2e10 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -37,7 +37,7 @@ use crate::sql::catalog::field_spec::FieldSpec; use crate::sql::catalog::optimizer::produce_optimized_plan; use crate::sql::functions::{is_json_union, serialize_outgoing_json}; use crate::sql::planner::extension::sink::SinkExtension; -use crate::sql::planner::{StreamSchemaProvider, maybe_add_key_extension_to_sink}; +use crate::sql::planner::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks}; use crate::sql::rewrite_plan; const CONNECTOR: &str = "connector"; @@ -78,98 +78,121 @@ impl LogicalPlanVisitor { _ => panic!("LogicalPlanVisitor should return Plan"), } } + /// Builds the logical plan for 'CREATE STREAMING TABLE'. + /// This orchestrates the transformation from a SQL Query to a stateful Sink. fn build_create_streaming_table_plan( &self, stmt: &StreamingTableStatement, ) -> Result> { - let statement = &stmt.statement; - match statement { - DFStatement::CreateStreamingTable { - name, - with_options, - comment, - query, - } => { - let name_str = name.to_string(); - - let mut connector_opts = ConnectorOptions::new(with_options, &None)?; - let connector_type = connector_opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| { - plan_datafusion_err!( - "Streaming Table '{}' must specify '{}' option", - name_str, - CONNECTOR - ) - })?; - - let synthetic_statement = Statement::Query(query.clone()); - let base_plan = - produce_optimized_plan(&synthetic_statement, &self.schema_provider)?; - - let mut plan_rewrite = rewrite_plan(base_plan, &self.schema_provider)?; - - if plan_rewrite - .schema() - .fields() - .iter() - .any(|f| is_json_union(f.data_type())) - { - plan_rewrite = - serialize_outgoing_json(&self.schema_provider, Arc::new(plan_rewrite)); - } - - let fields: Vec = plan_rewrite - .schema() - .fields() - .iter() - .map(|f| FieldSpec::Struct((**f).clone())) - .collect(); - - let partition_exprs = - if let Some(partition_cols) = connector_opts.pull_opt_str(PARTITION_BY)? { - let cols: Vec = - partition_cols.split(',').map(|c| col(c.trim())).collect(); - Some(cols) - } else { - None - }; - - let connector_table = ConnectorTable { - id: None, - connector: connector_type, - name: name_str.clone(), - connection_type: ConnectionType::Sink, - fields, - config: "".to_string(), - description: comment.clone().unwrap_or_default(), - event_time_field: None, - watermark_field: None, - idle_time: connector_opts.pull_opt_duration(IDLE_MICROS)?, - primary_keys: Arc::new(vec![]), - inferred_fields: None, - partition_exprs: Arc::new(partition_exprs), - }; - - let sink_extension = SinkExtension::new( - TableReference::bare(name_str.clone()), - Table::ConnectorTable(connector_table.clone()), - plan_rewrite.schema().clone(), - Arc::new(plan_rewrite), - )?; - - let final_plan = - maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension { - node: Arc::new(sink_extension), - }))?; - - Ok(Box::new(StreamingTable { - name: name_str, - comment: comment.clone(), - connector_table, - logical_plan: final_plan, - })) - } - _ => plan_err!("Only CREATE STREAMING TABLE supported"), + let DFStatement::CreateStreamingTable { + name, + with_options, + comment, + query, + } = &stmt.statement + else { + return plan_err!("Only CREATE STREAMING TABLE is supported in this context"); + }; + + let table_name = name.to_string(); + debug!("Compiling Streaming Table Sink for: {}", table_name); + + // 1. Connector Options Extraction + // Extract 'connector' (Kafka, Postgres, etc.) and other physical properties. + let mut opts = ConnectorOptions::new(with_options, &None)?; + let connector = opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| { + plan_datafusion_err!( + "Streaming Table '{}' must specify the '{}' option", + table_name, + CONNECTOR + ) + })?; + + // 2. Query Optimization & Streaming Rewrite + // Convert the standard SQL query into a streaming-aware logical plan. + let base_plan = + produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?; + let mut plan = rewrite_plan(base_plan, &self.schema_provider)?; + + // 3. Outgoing Data Serialization + // If the query produces internal types (like JSON Union), inject a serialization layer. + if plan + .schema() + .fields() + .iter() + .any(|f| is_json_union(f.data_type())) + { + plan = serialize_outgoing_json(&self.schema_provider, Arc::new(plan)); } + + // 4. Sink Metadata & Partitioning Logic + // Determine how data should be partitioned before hitting the external system. + let partition_exprs = self.resolve_partition_expressions(&mut opts)?; + + // Map DataFusion fields to Arroyo FieldSpecs for the connector. + let fields: Vec = plan + .schema() + .fields() + .iter() + .map(|f| FieldSpec::Struct((**f).clone())) + .collect(); + + // 5. Connector Table Construction + // This object acts as the 'Identity Card' for the Sink in the physical cluster. + let connector_table = ConnectorTable { + id: None, + connector, + name: table_name.clone(), + connection_type: ConnectionType::Sink, + fields, + config: "".to_string(), // Filled by the coordinator later + description: comment.clone().unwrap_or_default(), + event_time_field: None, + watermark_field: None, + idle_time: opts.pull_opt_duration(IDLE_MICROS)?, + primary_keys: Arc::new(vec![]), // PKs are inferred or explicitly set here + inferred_fields: None, + partition_exprs: Arc::new(partition_exprs), + }; + + // 6. Sink Extension & Final Rewrites + // Wrap the plan in a SinkExtension and ensure Key/Partition alignment. + let sink_extension = SinkExtension::new( + TableReference::bare(table_name.clone()), + Table::ConnectorTable(connector_table.clone()), + plan.schema().clone(), + Arc::new(plan), + )?; + + // Ensure the data distribution matches the Sink's requirements (e.g., Shuffle by Partition Key) + let plan_with_keys = maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension { + node: Arc::new(sink_extension), + }))?; + + // Global pass to wire inputs and handle shared sub-plans + let final_extensions = rewrite_sinks(vec![plan_with_keys])?; + let final_plan = final_extensions.into_iter().next().unwrap(); + + Ok(Box::new(StreamingTable { + name: table_name, + comment: comment.clone(), + connector_table, + logical_plan: final_plan, + })) + } + + fn resolve_partition_expressions( + &self, + opts: &mut ConnectorOptions, + ) -> Result>> { + opts.pull_opt_str(PARTITION_BY)? + .map(|cols| { + cols.split(',') + .map(|c| col(c.trim())) + .collect::>() + }) + .map(Ok) + .transpose() } } diff --git a/src/sql/planner/plan/aggregate.rs b/src/sql/planner/plan/aggregate.rs deleted file mode 100644 index aad17edb..00000000 --- a/src/sql/planner/plan/aggregate.rs +++ /dev/null @@ -1,275 +0,0 @@ -use std::sync::Arc; - -use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; -use datafusion::common::{DFSchema, Result, not_impl_err, plan_err}; -use datafusion::functions_aggregate::expr_fn::max; -use datafusion::logical_expr; -use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan}; -use datafusion::prelude::col; -use tracing::debug; - -use crate::sql::planner::StreamSchemaProvider; -use crate::sql::planner::extension::aggregate::AggregateExtension; -use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; -use crate::sql::planner::plan::WindowDetectingVisitor; -use crate::sql::types::{ - DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, - schema_from_df_fields_with_metadata, -}; - -pub(crate) struct AggregateRewriter<'a> { - pub schema_provider: &'a StreamSchemaProvider, -} - -impl AggregateRewriter<'_> { - /// Rewrite a non-windowed aggregate into an updating aggregate with key calculation - pub fn rewrite_non_windowed_aggregate( - input: Arc, - mut key_fields: Vec, - group_expr: Vec, - mut aggr_expr: Vec, - schema: Arc, - _schema_provider: &StreamSchemaProvider, - ) -> Result> { - let key_count = key_fields.len(); - key_fields.extend(fields_with_qualifiers(input.schema())); - - let key_schema = Arc::new(schema_from_df_fields_with_metadata( - &key_fields, - schema.metadata().clone(), - )?); - - let mut key_projection_expressions: Vec<_> = group_expr - .iter() - .zip(key_fields.iter()) - .map(|(expr, f)| expr.clone().alias(f.name().to_string())) - .collect(); - - key_projection_expressions.extend( - fields_with_qualifiers(input.schema()) - .iter() - .map(|field| Expr::Column(field.qualified_column())), - ); - - let key_projection = - LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema( - key_projection_expressions, - input.clone(), - key_schema, - )?); - - let key_plan = LogicalPlan::Extension(Extension { - node: Arc::new(KeyCalculationExtension::new( - key_projection, - KeysOrExprs::Keys((0..key_count).collect()), - )), - }); - - let Ok(timestamp_field) = key_plan - .schema() - .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) - else { - return plan_err!("no timestamp field found in schema"); - }; - - let timestamp_field: DFField = timestamp_field.into(); - let column = timestamp_field.qualified_column(); - aggr_expr.push(max(col(column.clone())).alias(TIMESTAMP_FIELD)); - - let mut output_schema_fields = fields_with_qualifiers(&schema); - output_schema_fields.push(timestamp_field.clone()); - let output_schema = Arc::new(schema_from_df_fields_with_metadata( - &output_schema_fields, - schema.metadata().clone(), - )?); - - let aggregate = Aggregate::try_new_with_schema( - Arc::new(key_plan), - group_expr, - aggr_expr, - output_schema, - )?; - - debug!( - "non-windowed aggregate field names: {:?}", - aggregate - .schema - .fields() - .iter() - .map(|f| f.name()) - .collect::>() - ); - - let final_plan = LogicalPlan::Aggregate(aggregate); - Ok(Transformed::yes(final_plan)) - } -} - -impl TreeNodeRewriter for AggregateRewriter<'_> { - type Node = LogicalPlan; - - fn f_up(&mut self, node: Self::Node) -> Result> { - let LogicalPlan::Aggregate(Aggregate { - input, - mut group_expr, - aggr_expr, - schema, - .. - }) = node - else { - return Ok(Transformed::no(node)); - }; - - let mut window_group_expr: Vec<_> = group_expr - .iter() - .enumerate() - .filter_map(|(i, expr)| { - find_window(expr) - .map(|option| option.map(|inner| (i, inner))) - .transpose() - }) - .collect::>>()?; - - if window_group_expr.len() > 1 { - return not_impl_err!( - "do not support {} window expressions in group by", - window_group_expr.len() - ); - } - - let mut key_fields: Vec = fields_with_qualifiers(&schema) - .iter() - .take(group_expr.len()) - .map(|field| { - DFField::new( - field.qualifier().cloned(), - format!("_key_{}", field.name()), - field.data_type().clone(), - field.is_nullable(), - ) - }) - .collect(); - - let mut window_detecting_visitor = WindowDetectingVisitor::default(); - input.visit_with_subqueries(&mut window_detecting_visitor)?; - - let window = window_detecting_visitor.window; - let window_behavior = match (window.is_some(), !window_group_expr.is_empty()) { - (true, true) => { - let input_window = window.unwrap(); - let (window_index, group_by_window_type) = window_group_expr.pop().unwrap(); - if group_by_window_type != input_window { - return Err(datafusion::error::DataFusionError::NotImplemented( - "window in group by does not match input window".to_string(), - )); - } - let matching_field = window_detecting_visitor.fields.iter().next(); - match matching_field { - Some(field) => { - group_expr[window_index] = Expr::Column(field.qualified_column()); - WindowBehavior::InData - } - None => { - if matches!(input_window, WindowType::Session { .. }) { - return plan_err!("can't reinvoke session window in nested aggregates"); - } - group_expr.remove(window_index); - key_fields.remove(window_index); - let window_field = schema.qualified_field(window_index).into(); - WindowBehavior::FromOperator { - window: input_window, - window_field, - window_index, - is_nested: true, - } - } - } - } - (true, false) => WindowBehavior::InData, - (false, true) => { - let (window_index, window_type) = window_group_expr.pop().unwrap(); - group_expr.remove(window_index); - key_fields.remove(window_index); - let window_field = schema.qualified_field(window_index).into(); - WindowBehavior::FromOperator { - window: window_type, - window_field, - window_index, - is_nested: false, - } - } - (false, false) => { - return Self::rewrite_non_windowed_aggregate( - input, - key_fields, - group_expr, - aggr_expr, - schema, - self.schema_provider, - ); - } - }; - - let key_count = key_fields.len(); - key_fields.extend(fields_with_qualifiers(input.schema())); - - let key_schema = Arc::new(schema_from_df_fields_with_metadata( - &key_fields, - schema.metadata().clone(), - )?); - - let mut key_projection_expressions: Vec<_> = group_expr - .iter() - .zip(key_fields.iter()) - .map(|(expr, f)| expr.clone().alias(f.name().to_string())) - .collect(); - - key_projection_expressions.extend( - fields_with_qualifiers(input.schema()) - .iter() - .map(|field| Expr::Column(field.qualified_column())), - ); - - let key_projection = - LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema( - key_projection_expressions, - input.clone(), - key_schema, - )?); - - let key_plan = LogicalPlan::Extension(Extension { - node: Arc::new(KeyCalculationExtension::new( - key_projection, - KeysOrExprs::Keys((0..key_count).collect()), - )), - }); - - let mut aggregate_schema_fields = fields_with_qualifiers(&schema); - if let WindowBehavior::FromOperator { window_index, .. } = &window_behavior { - aggregate_schema_fields.remove(*window_index); - } - let internal_schema = Arc::new(schema_from_df_fields_with_metadata( - &aggregate_schema_fields, - schema.metadata().clone(), - )?); - - let rewritten_aggregate = Aggregate::try_new_with_schema( - Arc::new(key_plan), - group_expr, - aggr_expr, - internal_schema, - )?; - - let aggregate_extension = AggregateExtension::new( - window_behavior, - LogicalPlan::Aggregate(rewritten_aggregate), - (0..key_count).collect(), - ); - let final_plan = LogicalPlan::Extension(Extension { - node: Arc::new(aggregate_extension), - }); - - WindowDetectingVisitor::get_window(&final_plan)?; - Ok(Transformed::yes(final_plan)) - } -} diff --git a/src/sql/planner/plan/aggregate_rewriter.rs b/src/sql/planner/plan/aggregate_rewriter.rs new file mode 100644 index 00000000..802fa180 --- /dev/null +++ b/src/sql/planner/plan/aggregate_rewriter.rs @@ -0,0 +1,262 @@ +use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; +use datafusion::common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err}; +use datafusion::functions_aggregate::expr_fn::max; +use datafusion::logical_expr::{self, Aggregate, Expr, Extension, LogicalPlan, Projection}; +use datafusion::prelude::col; +use std::sync::Arc; + +use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::extension::aggregate::AggregateExtension; +use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::planner::plan::streaming_window_analzer::StreamingWindowAnalzer; +use crate::sql::types::{ + DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, + schema_from_df_fields_with_metadata, +}; + +/// AggregateRewriter transforms batch DataFusion aggregates into streaming stateful operators. +/// It handles windowing (Tumble/Hop/Session), watermarks, and continuous updating aggregates. +pub(crate) struct AggregateRewriter<'a> { + pub schema_provider: &'a StreamSchemaProvider, +} + +impl TreeNodeRewriter for AggregateRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> Result> { + let LogicalPlan::Aggregate(mut agg) = node else { + return Ok(Transformed::no(node)); + }; + + // 1. Identify windowing functions (e.g., tumble, hop) in GROUP BY. + let mut window_exprs: Vec<_> = agg + .group_expr + .iter() + .enumerate() + .filter_map(|(i, e)| find_window(e).map(|opt| opt.map(|w| (i, w))).transpose()) + .collect::>>()?; + + if window_exprs.len() > 1 { + return not_impl_err!("Streaming aggregates support at most one window expression"); + } + + // 2. Prepare internal metadata for Key-based distribution. + let mut key_fields: Vec = fields_with_qualifiers(&agg.schema) + .iter() + .take(agg.group_expr.len()) + .map(|f| { + DFField::new( + f.qualifier().cloned(), + format!("_key_{}", f.name()), + f.data_type().clone(), + f.is_nullable(), + ) + }) + .collect(); + + // 3. Dispatch to Updating Aggregate if no windowing is detected. + let input_window = StreamingWindowAnalzer::get_window(&agg.input)?; + if window_exprs.is_empty() && input_window.is_none() { + return self.rewrite_as_updating_aggregate( + agg.input, + key_fields, + agg.group_expr, + agg.aggr_expr, + agg.schema, + ); + } + + // 4. Resolve Windowing Strategy (InData vs FromOperator). + let behavior = self.resolve_window_context( + &agg.input, + &mut agg.group_expr, + &agg.schema, + &mut window_exprs, + )?; + + // Adjust keys if windowing is handled by the operator. + if let WindowBehavior::FromOperator { window_index, .. } = &behavior { + key_fields.remove(*window_index); + } + + let key_count = key_fields.len(); + let keyed_input = + self.build_keyed_input(agg.input.clone(), &agg.group_expr, &key_fields)?; + + // 5. Build the final AggregateExtension for the physical planner. + let mut internal_fields = fields_with_qualifiers(&agg.schema); + if let WindowBehavior::FromOperator { window_index, .. } = &behavior { + internal_fields.remove(*window_index); + } + let internal_schema = Arc::new(schema_from_df_fields_with_metadata( + &internal_fields, + agg.schema.metadata().clone(), + )?); + + let rewritten_agg = Aggregate::try_new_with_schema( + Arc::new(keyed_input), + agg.group_expr, + agg.aggr_expr, + internal_schema, + )?; + + let extension = AggregateExtension::new( + behavior, + LogicalPlan::Aggregate(rewritten_agg), + (0..key_count).collect(), + ); + + Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(extension), + }))) + } +} + +impl<'a> AggregateRewriter<'a> { + pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self { + Self { schema_provider } + } + + /// [Internal] Builds the physical Key Calculation layer required for distributed Shuffling. + /// This wraps the input in a Projection and a KeyCalculationExtension. + fn build_keyed_input( + &self, + input: Arc, + group_expr: &[Expr], + key_fields: &[DFField], + ) -> Result { + let key_count = group_expr.len(); + let mut projection_fields = key_fields.to_vec(); + projection_fields.extend(fields_with_qualifiers(input.schema())); + + let key_schema = Arc::new(schema_from_df_fields_with_metadata( + &projection_fields, + input.schema().metadata().clone(), + )?); + + // Map group expressions to '_key_' aliases while passing through all original columns. + let mut exprs: Vec<_> = group_expr + .iter() + .zip(key_fields.iter()) + .map(|(expr, f)| expr.clone().alias(f.name().to_string())) + .collect(); + + exprs.extend( + fields_with_qualifiers(input.schema()) + .iter() + .map(|f| Expr::Column(f.qualified_column())), + ); + + let projection = + LogicalPlan::Projection(Projection::try_new_with_schema(exprs, input, key_schema)?); + + Ok(LogicalPlan::Extension(Extension { + node: Arc::new(KeyCalculationExtension::new( + projection, + KeysOrExprs::Keys((0..key_count).collect()), + )), + })) + } + + /// [Strategy] Rewrites standard GROUP BY into a non-windowed updating aggregate. + /// Injected max(_timestamp) ensures the streaming pulse (Watermark) continues to propagate. + fn rewrite_as_updating_aggregate( + &self, + input: Arc, + key_fields: Vec, + group_expr: Vec, + mut aggr_expr: Vec, + schema: Arc, + ) -> Result> { + let keyed_input = self.build_keyed_input(input, &group_expr, &key_fields)?; + + // Ensure the updating stream maintains time awareness. + let timestamp_col = keyed_input + .schema() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) + .map_err(|_| { + DataFusionError::Plan( + "Required _timestamp field missing for updating aggregate".to_string(), + ) + })?; + + let timestamp_field: DFField = timestamp_col.into(); + aggr_expr.push(max(col(timestamp_field.qualified_column())).alias(TIMESTAMP_FIELD)); + + let mut output_fields = fields_with_qualifiers(&schema); + output_fields.push(timestamp_field); + + let output_schema = Arc::new(schema_from_df_fields_with_metadata( + &output_fields, + schema.metadata().clone(), + )?); + + let aggregate = Aggregate::try_new_with_schema( + Arc::new(keyed_input), + group_expr, + aggr_expr, + output_schema, + )?; + + Ok(Transformed::yes(LogicalPlan::Aggregate(aggregate))) + } + + /// [Strategy] Reconciles window definitions between the input stream and the current GROUP BY. + fn resolve_window_context( + &self, + input: &LogicalPlan, + group_expr: &mut Vec, + schema: &DFSchema, + window_expr_info: &mut Vec<(usize, WindowType)>, + ) -> Result { + let mut visitor = StreamingWindowAnalzer::default(); + input.visit_with_subqueries(&mut visitor)?; + + let input_window = visitor.window; + let has_group_window = !window_expr_info.is_empty(); + + match (input_window, has_group_window) { + // Re-aggregation or subquery with an existing window. + (Some(i_win), true) => { + let (idx, g_win) = window_expr_info.pop().unwrap(); + if i_win != g_win { + return plan_err!( + "Inconsistent windowing: input is {:?}, but group by is {:?}", + i_win, + g_win + ); + } + + if let Some(field) = visitor.fields.iter().next() { + group_expr[idx] = Expr::Column(field.qualified_column()); + Ok(WindowBehavior::InData) + } else { + if matches!(i_win, WindowType::Session { .. }) { + return plan_err!("Nested session windows are not supported"); + } + group_expr.remove(idx); + Ok(WindowBehavior::FromOperator { + window: i_win, + window_field: schema.qualified_field(idx).into(), + window_index: idx, + is_nested: true, + }) + } + } + // First-time windowing defined in this aggregate. + (None, true) => { + let (idx, g_win) = window_expr_info.pop().unwrap(); + group_expr.remove(idx); + Ok(WindowBehavior::FromOperator { + window: g_win, + window_field: schema.qualified_field(idx).into(), + window_index: idx, + is_nested: false, + }) + } + // Passthrough: input is already windowed, no new window in group by. + (Some(_), false) => Ok(WindowBehavior::InData), + _ => unreachable!("Dispatched to non-windowed path previously"), + } + } +} diff --git a/src/sql/planner/plan/join.rs b/src/sql/planner/plan/join.rs deleted file mode 100644 index 04a27e9b..00000000 --- a/src/sql/planner/plan/join.rs +++ /dev/null @@ -1,240 +0,0 @@ -use std::sync::Arc; - -use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; -use datafusion::common::{ - Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference, - not_impl_err, -}; -use datafusion::logical_expr; -use datafusion::logical_expr::expr::Alias; -use datafusion::logical_expr::{ - BinaryExpr, Case, Expr, Extension, Join, LogicalPlan, Projection, build_join_schema, -}; -use datafusion::prelude::coalesce; - -use crate::sql::planner::StreamSchemaProvider; -use crate::sql::planner::extension::join::JoinExtension; -use crate::sql::planner::extension::key_calculation::KeyCalculationExtension; -use crate::sql::planner::plan::WindowDetectingVisitor; -use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata}; - -pub(crate) struct JoinRewriter<'a> { - pub schema_provider: &'a StreamSchemaProvider, -} - -impl JoinRewriter<'_> { - fn check_join_windowing(join: &Join) -> Result { - let left_window = WindowDetectingVisitor::get_window(&join.left)?; - let right_window = WindowDetectingVisitor::get_window(&join.right)?; - match (left_window, right_window) { - (None, None) => { - if join.join_type == JoinType::Inner { - Ok(false) - } else { - Err(DataFusionError::NotImplemented( - "can't handle non-inner joins without windows".into(), - )) - } - } - (None, Some(_)) => Err(DataFusionError::NotImplemented( - "can't handle mixed windowing between left (non-windowed) and right (windowed)" - .into(), - )), - (Some(_), None) => Err(DataFusionError::NotImplemented( - "can't handle mixed windowing between left (windowed) and right (non-windowed)" - .into(), - )), - (Some(left_window), Some(right_window)) => { - if left_window != right_window { - return Err(DataFusionError::NotImplemented( - "can't handle mixed windowing between left and right".into(), - )); - } - if let WindowType::Session { .. } = left_window { - return Err(DataFusionError::NotImplemented( - "can't handle session windows in joins".into(), - )); - } - Ok(true) - } - } - } - - fn create_join_key_plan( - input: Arc, - join_expressions: Vec, - name: &'static str, - ) -> Result { - let key_count = join_expressions.len(); - - let join_expressions: Vec<_> = join_expressions - .into_iter() - .enumerate() - .map(|(index, expr)| { - expr.alias_qualified( - Some(TableReference::bare("_stream")), - format!("_key_{index}"), - ) - }) - .chain( - fields_with_qualifiers(input.schema()) - .iter() - .map(|field| Expr::Column(field.qualified_column())), - ) - .collect(); - - let projection = Projection::try_new(join_expressions, input)?; - let key_calculation_extension = KeyCalculationExtension::new_named_and_trimmed( - LogicalPlan::Projection(projection), - (0..key_count).collect(), - name.to_string(), - ); - Ok(LogicalPlan::Extension(Extension { - node: Arc::new(key_calculation_extension), - })) - } - - fn post_join_timestamp_projection(&mut self, input: LogicalPlan) -> Result { - let schema = input.schema().clone(); - let mut schema_with_timestamp = fields_with_qualifiers(&schema); - let timestamp_fields = schema_with_timestamp - .iter() - .filter(|field| field.name() == "_timestamp") - .cloned() - .collect::>(); - - if timestamp_fields.len() != 2 { - return not_impl_err!("join must have two timestamp fields"); - } - - schema_with_timestamp.retain(|field| field.name() != "_timestamp"); - let mut projection_expr = schema_with_timestamp - .iter() - .map(|field| { - Expr::Column(Column { - relation: field.qualifier().cloned(), - name: field.name().to_string(), - spans: Spans::default(), - }) - }) - .collect::>(); - - schema_with_timestamp.push(timestamp_fields[0].clone()); - - let output_schema = Arc::new(schema_from_df_fields_with_metadata( - &schema_with_timestamp, - schema.metadata().clone(), - )?); - - let left_field = ×tamp_fields[0]; - let left_column = Expr::Column(Column { - relation: left_field.qualifier().cloned(), - name: left_field.name().to_string(), - spans: Spans::default(), - }); - let right_field = ×tamp_fields[1]; - let right_column = Expr::Column(Column { - relation: right_field.qualifier().cloned(), - name: right_field.name().to_string(), - spans: Spans::default(), - }); - - let max_timestamp = Expr::Case(Case { - expr: Some(Box::new(Expr::BinaryExpr(BinaryExpr { - left: Box::new(left_column.clone()), - op: logical_expr::Operator::GtEq, - right: Box::new(right_column.clone()), - }))), - when_then_expr: vec![ - ( - Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)), None)), - Box::new(left_column.clone()), - ), - ( - Box::new(Expr::Literal(ScalarValue::Boolean(Some(false)), None)), - Box::new(right_column.clone()), - ), - ], - else_expr: Some(Box::new(coalesce(vec![ - left_column.clone(), - right_column.clone(), - ]))), - }); - - projection_expr.push(Expr::Alias(Alias { - expr: Box::new(max_timestamp), - relation: timestamp_fields[0].qualifier().cloned(), - name: timestamp_fields[0].name().to_string(), - metadata: None, - })); - - Ok(LogicalPlan::Projection(Projection::try_new_with_schema( - projection_expr, - Arc::new(input), - output_schema, - )?)) - } -} - -impl TreeNodeRewriter for JoinRewriter<'_> { - type Node = LogicalPlan; - - fn f_up(&mut self, node: Self::Node) -> Result> { - let LogicalPlan::Join(join) = node else { - return Ok(Transformed::no(node)); - }; - - let is_instant = Self::check_join_windowing(&join)?; - - let Join { - left, - right, - on, - filter, - join_type, - join_constraint: JoinConstraint::On, - schema: _, - null_equals_null: false, - } = join - else { - return not_impl_err!("can't handle join constraint other than ON"); - }; - - if on.is_empty() && !is_instant { - return not_impl_err!("Updating joins must include an equijoin condition"); - } - - let (left_expressions, right_expressions): (Vec<_>, Vec<_>) = - on.clone().into_iter().unzip(); - - let left_input = Self::create_join_key_plan(left, left_expressions, "left")?; - let right_input = Self::create_join_key_plan(right, right_expressions, "right")?; - - let rewritten_join = LogicalPlan::Join(Join { - schema: Arc::new(build_join_schema( - left_input.schema(), - right_input.schema(), - &join_type, - )?), - left: Arc::new(left_input), - right: Arc::new(right_input), - on, - join_type, - join_constraint: JoinConstraint::On, - null_equals_null: false, - filter, - }); - - let final_logical_plan = self.post_join_timestamp_projection(rewritten_join)?; - - let join_extension = JoinExtension { - rewritten_join: final_logical_plan, - is_instant, - ttl: (!is_instant).then_some(self.schema_provider.planning_options.ttl), - }; - - Ok(Transformed::yes(LogicalPlan::Extension(Extension { - node: Arc::new(join_extension), - }))) - } -} diff --git a/src/sql/planner/plan/join_rewriter.rs b/src/sql/planner/plan/join_rewriter.rs new file mode 100644 index 00000000..f6031183 --- /dev/null +++ b/src/sql/planner/plan/join_rewriter.rs @@ -0,0 +1,224 @@ +use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::extension::join::JoinExtension; +use crate::sql::planner::extension::key_calculation::KeyCalculationExtension; +use crate::sql::planner::plan::streaming_window_analzer::StreamingWindowAnalzer; +use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata}; +use crate::types::TIMESTAMP_FIELD; +use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; +use datafusion::common::{ + Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference, + not_impl_err, plan_err, +}; +use datafusion::logical_expr::{ + self, BinaryExpr, Case, Expr, Extension, Join, LogicalPlan, Projection, build_join_schema, +}; +use datafusion::prelude::coalesce; +use std::sync::Arc; + +/// JoinRewriter handles the transformation of standard SQL joins into streaming-capable joins. +/// It manages stateful "Updating Joins" and time-aligned "Instant Joins". +pub(crate) struct JoinRewriter<'a> { + pub schema_provider: &'a StreamSchemaProvider, +} + +impl<'a> JoinRewriter<'a> { + pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self { + Self { schema_provider } + } + + /// [Validation] Ensures left and right streams have compatible windowing strategies. + fn validate_join_windows(&self, join: &Join) -> Result { + let left_win = StreamingWindowAnalzer::get_window(&join.left)?; + let right_win = StreamingWindowAnalzer::get_window(&join.right)?; + + match (left_win, right_win) { + (None, None) => { + if join.join_type == JoinType::Inner { + Ok(false) // Standard Updating Join (Inner) + } else { + plan_err!( + "Non-inner joins (e.g., LEFT/RIGHT) require windowing to bound state." + ) + } + } + (Some(l), Some(r)) => { + if l != r { + return plan_err!( + "Join window mismatch: left={:?}, right={:?}. Windows must match exactly.", + l, + r + ); + } + if let WindowType::Session { .. } = l { + return plan_err!( + "Session windows are currently not supported in streaming joins." + ); + } + Ok(true) // Instant Windowed Join + } + _ => plan_err!( + "Mixed windowing detected. Both sides of a join must be either windowed or non-windowed." + ), + } + } + + /// [Internal] Wraps a join input in a KeyCalculation layer to facilitate Shuffle/KeyBy distribution. + fn build_keyed_side( + &self, + input: Arc, + keys: Vec, + side: &str, + ) -> Result { + let key_count = keys.len(); + + let projection_exprs = keys + .into_iter() + .enumerate() + .map(|(i, e)| { + e.alias_qualified(Some(TableReference::bare("_stream")), format!("_key_{i}")) + }) + .chain( + fields_with_qualifiers(input.schema()) + .iter() + .map(|f| Expr::Column(f.qualified_column())), + ) + .collect(); + + let projection = Projection::try_new(projection_exprs, input)?; + let key_ext = KeyCalculationExtension::new_named_and_trimmed( + LogicalPlan::Projection(projection), + (0..key_count).collect(), + side.to_string(), + ); + + Ok(LogicalPlan::Extension(Extension { + node: Arc::new(key_ext), + })) + } + + /// [Strategy] Resolves the output timestamp of the join. + /// Streaming joins must output the 'max' of the two input timestamps to ensure Watermark progression. + fn apply_timestamp_resolution(&self, join_plan: LogicalPlan) -> Result { + let schema = join_plan.schema(); + let all_fields = fields_with_qualifiers(schema); + + let timestamp_fields: Vec<_> = all_fields + .iter() + .filter(|f| f.name() == "_timestamp") + .cloned() + .collect(); + + if timestamp_fields.len() != 2 { + return plan_err!( + "Streaming join requires exactly two input timestamp fields to resolve output time." + ); + } + + // Project all fields except the two raw timestamps + let mut exprs: Vec<_> = all_fields + .iter() + .filter(|f| f.name() != "_timestamp") + .map(|f| Expr::Column(f.qualified_column())) + .collect(); + + // Calculate: GREATEST(left._timestamp, right._timestamp) + let left_ts = Expr::Column(timestamp_fields[0].qualified_column()); + let right_ts = Expr::Column(timestamp_fields[1].qualified_column()); + + let max_ts_expr = Expr::Case(Case { + expr: Some(Box::new(Expr::BinaryExpr(BinaryExpr { + left: Box::new(left_ts.clone()), + op: logical_expr::Operator::GtEq, + right: Box::new(right_ts.clone()), + }))), + when_then_expr: vec![ + ( + Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)), None)), + Box::new(left_ts.clone()), + ), + ( + Box::new(Expr::Literal(ScalarValue::Boolean(Some(false)), None)), + Box::new(right_ts.clone()), + ), + ], + else_expr: Some(Box::new(coalesce(vec![left_ts, right_ts]))), + }) + .alias(TIMESTAMP_FIELD); + + exprs.push(max_ts_expr); + + let out_fields: Vec<_> = all_fields + .iter() + .filter(|f| f.name() != "_timestamp") + .cloned() + .chain(std::iter::once(timestamp_fields[0].clone())) + .collect(); + + let out_schema = Arc::new(schema_from_df_fields_with_metadata( + &out_fields, + schema.metadata().clone(), + )?); + + Ok(LogicalPlan::Projection(Projection::try_new_with_schema( + exprs, + Arc::new(join_plan), + out_schema, + )?)) + } +} + +impl TreeNodeRewriter for JoinRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> Result> { + let LogicalPlan::Join(join) = node else { + return Ok(Transformed::no(node)); + }; + + // 1. Validate Streaming Context + let is_instant = self.validate_join_windows(&join)?; + if join.join_constraint != JoinConstraint::On { + return not_impl_err!("Only 'ON' join constraints are supported in streaming SQL."); + } + if join.on.is_empty() && !is_instant { + return plan_err!("Updating joins require at least one equality condition (Equijoin)."); + } + + // 2. Prepare Keyed Inputs for Shuffle + let (left_on, right_on): (Vec<_>, Vec<_>) = join.on.clone().into_iter().unzip(); + let keyed_left = self.build_keyed_side(join.left, left_on, "left")?; + let keyed_right = self.build_keyed_side(join.right, right_on, "right")?; + + // 3. Assemble Rewritten Join Node + let join_schema = Arc::new(build_join_schema( + keyed_left.schema(), + keyed_right.schema(), + &join.join_type, + )?); + let rewritten_join = LogicalPlan::Join(Join { + left: Arc::new(keyed_left), + right: Arc::new(keyed_right), + on: join.on, + filter: join.filter, + join_type: join.join_type, + join_constraint: JoinConstraint::On, + schema: join_schema, + null_equals_null: false, + }); + + // 4. Resolve Output Watermark (Timestamp Projection) + let plan_with_timestamp = self.apply_timestamp_resolution(rewritten_join)?; + + // 5. Wrap in JoinExtension for Physical Planning + let ttl = (!is_instant).then_some(self.schema_provider.planning_options.ttl); + let extension = JoinExtension { + rewritten_join: plan_with_timestamp, + is_instant, + ttl, + }; + + Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(extension), + }))) + } +} diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs index 83891731..c734a88b 100644 --- a/src/sql/planner/plan/mod.rs +++ b/src/sql/planner/plan/mod.rs @@ -1,38 +1,54 @@ use datafusion::common::Result; -use datafusion::common::tree_node::TreeNode; +use datafusion::common::tree_node::{Transformed, TreeNode}; use datafusion::logical_expr::LogicalPlan; +use tracing::{debug, info, instrument}; use crate::sql::planner::StreamSchemaProvider; +use crate::sql::planner::plan::stream_rewriter::StreamRewriter; use crate::sql::planner::rewrite::TimeWindowUdfChecker; -use self::aggregate::AggregateRewriter; -use self::join::JoinRewriter; -use self::stream_rewriter::StreamRewriter; -use self::window_detecting_visitor::{WindowDetectingVisitor, extract_column}; -use self::window_fn::WindowFunctionRewriter; - -pub(crate) mod aggregate; -pub(crate) mod join; +// Module declarations +pub(crate) mod aggregate_rewriter; +pub(crate) mod join_rewriter; +pub(crate) mod row_time_rewriter; pub(crate) mod stream_rewriter; -pub(crate) mod window_detecting_visitor; -pub(crate) mod window_fn; - -use tracing::debug; - +pub(crate) mod streaming_window_analzer; +pub(crate) mod window_function_rewriter; + +/// Entry point for transforming a standard DataFusion LogicalPlan into a +/// Streaming-aware LogicalPlan. +/// +/// This function coordinates multiple rewriting passes and ensures the +/// resulting plan satisfies streaming constraints. +#[instrument(skip_all, level = "debug")] pub fn rewrite_plan( plan: LogicalPlan, schema_provider: &StreamSchemaProvider, ) -> Result { - let rewritten_plan = plan.rewrite_with_subqueries(&mut StreamRewriter { schema_provider })?; - - rewritten_plan - .data - .visit_with_subqueries(&mut TimeWindowUdfChecker {})?; - - debug!( - "Streaming logical plan:\n{}", - rewritten_plan.data.display_graphviz() - ); - - Ok(rewritten_plan.data) + info!("Starting streaming plan rewrite pipeline"); + + // Phase 1: Core Transformation + // This pass handles the structural changes (Aggregates, Joins, Windows) + // using a Bottom-Up traversal. + let mut rewriter = StreamRewriter::new(schema_provider); + let Transformed { + data: rewritten_plan, + .. + } = plan.rewrite_with_subqueries(&mut rewriter)?; + + // Phase 2: Post-rewrite Validation + // Ensure that the rewritten plan doesn't violate specific streaming UDF rules. + rewritten_plan.visit_with_subqueries(&mut TimeWindowUdfChecker {})?; + + // Phase 3: Observability & Debugging + // Industrial engines use Graphviz or specialized Explain formats for plan diffs. + if cfg!(debug_assertions) { + debug!( + "Streaming logical plan graphviz:\n{}", + rewritten_plan.display_graphviz() + ); + } + + info!("Streaming plan rewrite completed successfully"); + Ok(rewritten_plan) } diff --git a/src/sql/planner/plan/row_time_rewriter.rs b/src/sql/planner/plan/row_time_rewriter.rs new file mode 100644 index 00000000..0a31d9f8 --- /dev/null +++ b/src/sql/planner/plan/row_time_rewriter.rs @@ -0,0 +1,36 @@ +use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; +use datafusion::common::{Column, Result as DFResult}; +use datafusion::logical_expr::Expr; + +use crate::sql::types::TIMESTAMP_FIELD; + +/// Replaces the virtual `row_time()` scalar function with a physical reference to `_timestamp`. +/// +/// This is a critical mapping step that allows users to use a friendly SQL function +/// while the engine operates on the mandatory internal streaming timestamp. +pub struct RowTimeRewriter; + +impl TreeNodeRewriter for RowTimeRewriter { + type Node = Expr; + + fn f_down(&mut self, node: Self::Node) -> DFResult> { + // Use pattern matching to identify the 'row_time' scalar function. + if let Expr::ScalarFunction(func) = &node + && func.name() == "row_time" + { + // Map the virtual function to the physical internal timestamp column. + // We use .alias() to preserve the original name "row_time()" in the output schema, + // ensuring that user-facing column names do not change unexpectedly. + let physical_col = Expr::Column(Column { + relation: None, + name: TIMESTAMP_FIELD.to_string(), + spans: Default::default(), + }) + .alias("row_time()"); + + return Ok(Transformed::yes(physical_col)); + } + + Ok(Transformed::no(node)) + } +} diff --git a/src/sql/planner/plan/stream_rewriter.rs b/src/sql/planner/plan/stream_rewriter.rs index 53549af4..c3caed0e 100644 --- a/src/sql/planner/plan/stream_rewriter.rs +++ b/src/sql/planner/plan/stream_rewriter.rs @@ -1,148 +1,219 @@ use std::sync::Arc; +use super::StreamSchemaProvider; use crate::sql::planner::extension::StreamExtension; use crate::sql::planner::extension::remote_table::RemoteTableExtension; +use crate::sql::planner::plan::row_time_rewriter::RowTimeRewriter; use crate::sql::planner::plan::{ - aggregate::AggregateRewriter, join::JoinRewriter, window_fn::WindowFunctionRewriter, + aggregate_rewriter::AggregateRewriter, join_rewriter::JoinRewriter, + window_function_rewriter::WindowFunctionRewriter, }; -use crate::sql::planner::rewrite::{RowTimeRewriter, TimeWindowNullCheckRemover}; +use crate::sql::planner::rewrite::TimeWindowNullCheckRemover; use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; use crate::sql::types::{DFField, TIMESTAMP_FIELD}; use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{Column, DataFusionError, Result, Spans, TableReference, plan_err}; -use datafusion::logical_expr::{Expr, Extension, Filter, LogicalPlan, SubqueryAlias}; +use datafusion::logical_expr::{ + Expr, Extension, Filter, LogicalPlan, Projection, SubqueryAlias, Union, +}; use datafusion_common::tree_node::TreeNode; - -use super::StreamSchemaProvider; +use datafusion_expr::{Aggregate, Join}; pub struct StreamRewriter<'a> { pub(crate) schema_provider: &'a StreamSchemaProvider, } +impl TreeNodeRewriter for StreamRewriter<'_> { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> Result> { + match node { + // Logic Delegation + LogicalPlan::Projection(p) => self.rewrite_projection(p), + LogicalPlan::Filter(f) => self.rewrite_filter(f), + LogicalPlan::Union(u) => self.rewrite_union(u), + + // Delegation to specialized sub-rewriters + LogicalPlan::Aggregate(agg) => self.rewrite_aggregate(agg), + LogicalPlan::Join(join) => self.rewrite_join(join), + LogicalPlan::Window(_) => self.rewrite_window(node), + LogicalPlan::SubqueryAlias(sa) => self.rewrite_subquery_alias(sa), + + // Explicitly Unsupported Operations + LogicalPlan::Sort(_) => self.unsupported_error("ORDER BY", &node), + LogicalPlan::Limit(_) => self.unsupported_error("LIMIT", &node), + LogicalPlan::Repartition(_) => self.unsupported_error("Repartitions", &node), + LogicalPlan::Explain(_) => self.unsupported_error("EXPLAIN", &node), + LogicalPlan::Analyze(_) => self.unsupported_error("ANALYZE", &node), + + _ => Ok(Transformed::no(node)), + } + } +} + impl<'a> StreamRewriter<'a> { pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self { Self { schema_provider } } -} -impl TreeNodeRewriter for StreamRewriter<'_> { - type Node = LogicalPlan; + /// Delegates to AggregateRewriter to transform batch aggregates into streaming stateful operators. + fn rewrite_aggregate(&self, agg: Aggregate) -> Result> { + AggregateRewriter { + schema_provider: self.schema_provider, + } + .f_up(LogicalPlan::Aggregate(agg)) + } - fn f_up(&mut self, mut node: Self::Node) -> Result> { - match node { - LogicalPlan::Projection(ref mut projection) => { - if !has_timestamp_field(&projection.schema) { - let timestamp_field: DFField = projection - .input - .schema() - .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) - .map_err(|_| { - DataFusionError::Plan(format!( - "No timestamp field found in projection input ({})", - projection.input.display() - )) - })? - .into(); - projection.schema = add_timestamp_field( - projection.schema.clone(), - timestamp_field.qualifier().cloned(), - ) - .expect("in projection"); - projection.expr.push(Expr::Column(Column { - relation: timestamp_field.qualifier().cloned(), - name: TIMESTAMP_FIELD.to_string(), - spans: Spans::default(), - })); - } + /// Delegates to JoinRewriter to handle streaming join semantics (e.g., TTL, state management). + fn rewrite_join(&self, join: Join) -> Result> { + JoinRewriter { + schema_provider: self.schema_provider, + } + .f_up(LogicalPlan::Join(join)) + } - let rewritten = projection - .expr - .iter() - .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {})) - .collect::>>()?; - if rewritten.iter().any(|r| r.transformed) { - projection.expr = rewritten.into_iter().map(|r| r.data).collect(); - } - return Ok(Transformed::yes(node)); - } - LogicalPlan::Aggregate(aggregate) => { - return AggregateRewriter { - schema_provider: self.schema_provider, - } - .f_up(LogicalPlan::Aggregate(aggregate)); - } - LogicalPlan::Join(join) => { - return JoinRewriter { - schema_provider: self.schema_provider, - } - .f_up(LogicalPlan::Join(join)); - } - LogicalPlan::Filter(f) => { - let expr = f - .predicate - .clone() - .rewrite(&mut TimeWindowNullCheckRemover {})?; - return Ok(if expr.transformed { - Transformed::yes(LogicalPlan::Filter(Filter::try_new(expr.data, f.input)?)) - } else { - Transformed::no(LogicalPlan::Filter(f)) - }); - } - LogicalPlan::Window(_) => { - return WindowFunctionRewriter {}.f_up(node); - } - LogicalPlan::Sort(_) => { - return plan_err!( - "ORDER BY is not currently supported in streaming SQL ({})", - node.display() - ); - } - LogicalPlan::Repartition(_) => { - return plan_err!( - "Repartitions are not currently supported ({})", - node.display() - ); - } - LogicalPlan::Union(mut union) => { - union.schema = union.inputs[0].schema().clone(); - for input in union.inputs.iter_mut() { - if let LogicalPlan::Extension(Extension { node }) = input.as_ref() { - let stream_extension: &dyn StreamExtension = node.try_into().unwrap(); - if !stream_extension.transparent() { - continue; - } - } - let remote_table_extension = Arc::new(RemoteTableExtension { - input: input.as_ref().clone(), - name: TableReference::bare("union_input"), - schema: union.schema.clone(), - materialize: false, - }); - *input = Arc::new(LogicalPlan::Extension(Extension { - node: remote_table_extension, - })); + /// Delegates to WindowFunctionRewriter for stream-aware windowing logic. + fn rewrite_window(&self, node: LogicalPlan) -> Result> { + WindowFunctionRewriter {}.f_up(node) + } + + /// Refreshes SubqueryAlias metadata to align with potentially rewritten internal schemas. + fn rewrite_subquery_alias(&self, sa: SubqueryAlias) -> Result> { + // Since the inner 'sa.input' has been rewritten (bottom-up), we must re-create + // the alias node to ensure the outer schema correctly reflects internal changes. + let new_sa = SubqueryAlias::try_new(sa.input, sa.alias).map_err(|e| { + DataFusionError::Internal(format!("Failed to re-alias subquery: {}", e)) + })?; + + Ok(Transformed::yes(LogicalPlan::SubqueryAlias(new_sa))) + } + + /// Handles timestamp propagation and row_time() mapping for Projections + fn rewrite_projection(&self, mut projection: Projection) -> Result> { + // Check if the current projection already has a timestamp field; + // if not, we must inject it to maintain streaming heartbeats. + if !has_timestamp_field(&projection.schema) { + let input_schema = projection.input.schema(); + + // Resolve the timestamp field from the input schema using the global constant. + let timestamp_field: DFField = input_schema + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) + .map_err(|_| { + DataFusionError::Plan(format!( + "No timestamp field found in projection input ({})", + projection.input.display() + )) + })? + .into(); + + // Update the logical schema to include the newly injected timestamp. + projection.schema = add_timestamp_field( + projection.schema.clone(), + timestamp_field.qualifier().cloned(), + ) + .expect("Failed to add timestamp to projection schema"); + + // Physically push the timestamp column into the expression list. + projection.expr.push(Expr::Column(Column { + relation: timestamp_field.qualifier().cloned(), + name: TIMESTAMP_FIELD.to_string(), + spans: Spans::default(), + })); + } + + // Map user-friendly row_time() function calls to internal _timestamp column references. + let rewritten = projection + .expr + .iter() + .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {})) + .collect::>>()?; + + // If any expressions were modified (e.g., row_time() was replaced), update the projection. + if rewritten.iter().any(|r| r.transformed) { + projection.expr = rewritten.into_iter().map(|r| r.data).collect(); + } + + // Return the updated plan node wrapped in a Transformed container. + Ok(Transformed::yes(LogicalPlan::Projection(projection))) + } + + /// Harmonizes schemas across Union branches and wraps them in RemoteTableExtensions. + /// + /// This ensures that all inputs to a UNION operation share the exact same schema metadata, + /// preventing "Schema Drift" where different branches have different field qualifiers. + fn rewrite_union(&self, mut union: Union) -> Result> { + // Industrial engines use the first branch as the "Master Schema" for the Union. + // We clone it once to ensure all subsequent branches are forced to comply. + let master_schema = union.inputs[0].schema().clone(); + union.schema = master_schema.clone(); + + for input in union.inputs.iter_mut() { + // Optimization: If the node is already a non-transparent Extension, + // we skip wrapping to avoid unnecessary nesting of logical nodes. + if let LogicalPlan::Extension(Extension { node }) = input.as_ref() { + let stream_ext: &dyn StreamExtension = node.try_into().map_err(|e| { + DataFusionError::Internal(format!("Failed to resolve StreamExtension: {}", e)) + })?; + + if !stream_ext.transparent() { + continue; } - return Ok(Transformed::yes(LogicalPlan::Union(union))); - } - LogicalPlan::SubqueryAlias(sa) => { - return Ok(Transformed::yes(LogicalPlan::SubqueryAlias( - SubqueryAlias::try_new(sa.input, sa.alias)?, - ))); - } - LogicalPlan::Limit(_) => { - return plan_err!( - "LIMIT is not currently supported in streaming SQL ({})", - node.display() - ); - } - LogicalPlan::Explain(_) => { - return plan_err!("EXPLAIN is not supported ({})", node.display()); } - LogicalPlan::Analyze(_) => { - return plan_err!("ANALYZE is not supported ({})", node.display()); - } - _ => {} + + // Wrap each branch in a RemoteTableExtension. + // This acts as a logical "bridge" that forces the input to adopt the master_schema, + // effectively stripping away branch-specific qualifiers (e.g., table aliases). + let remote_ext = Arc::new(RemoteTableExtension { + input: input.as_ref().clone(), + name: TableReference::bare("union_input"), + schema: master_schema.clone(), + materialize: false, // Internal logical boundary only; does not require physical sink. + }); + + // Atomically replace the input with the wrapped version. + *input = Arc::new(LogicalPlan::Extension(Extension { node: remote_ext })); + } + + Ok(Transformed::yes(LogicalPlan::Union(union))) + } + + /// Optimizes Filter nodes by stripping redundant NULL checks on time window expressions. + /// + /// In streaming SQL, DataFusion often injects 'IS NOT NULL' guards for window functions + /// that are redundant or can interfere with watermark propagation. This rewriter + /// cleans those predicates to simplify the physical execution plan. + fn rewrite_filter(&self, filter: Filter) -> Result> { + // We attempt to rewrite the predicate using a specialized sub-rewriter. + // The TimeWindowNullCheckRemover specifically targets expressions like + // `tumble(...) IS NOT NULL` and simplifies them to `TRUE`. + let rewritten_expr = filter + .predicate + .clone() + .rewrite(&mut TimeWindowNullCheckRemover {})?; + + if !rewritten_expr.transformed { + return Ok(Transformed::no(LogicalPlan::Filter(filter))); } - Ok(Transformed::no(node)) + + // Industrial Guard: Re-validate the predicate against the input schema. + // 'Filter::try_new' ensures that the transformed expression is still semantically + // valid for the underlying data stream. + let new_filter = Filter::try_new(rewritten_expr.data, filter.input).map_err(|e| { + DataFusionError::Internal(format!( + "Failed to re-validate filtered predicate after NULL-check removal: {}", + e + )) + })?; + + Ok(Transformed::yes(LogicalPlan::Filter(new_filter))) + } + + /// Centralized error handler for unsupported streaming operations + fn unsupported_error(&self, op: &str, node: &LogicalPlan) -> Result> { + plan_err!( + "{} is not currently supported in streaming SQL ({})", + op, + node.display() + ) } } diff --git a/src/sql/planner/plan/streaming_window_analzer.rs b/src/sql/planner/plan/streaming_window_analzer.rs new file mode 100644 index 00000000..db3506b7 --- /dev/null +++ b/src/sql/planner/plan/streaming_window_analzer.rs @@ -0,0 +1,203 @@ +use std::collections::HashSet; +use std::sync::Arc; + +use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; +use datafusion::common::{Column, DFSchema, DataFusionError, Result}; +use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias}; + +use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension}; +use crate::sql::planner::extension::join::JOIN_NODE_NAME; +use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window}; + +/// WindowDetectingVisitor identifies windowing strategies and tracks window-carrying fields +/// as they propagate upward through the logical plan tree. +#[derive(Debug, Default)] +pub(crate) struct StreamingWindowAnalzer { + /// The specific window type discovered (Tumble, Hop, etc.) + pub(crate) window: Option, + /// Set of fields in the current plan node that carry window semantics. + pub(crate) fields: HashSet, +} + +impl StreamingWindowAnalzer { + /// Entry point to resolve the WindowType of a given plan branch. + pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result> { + let mut visitor = Self::default(); + logical_plan.visit_with_subqueries(&mut visitor)?; + Ok(visitor.window) + } + + /// Resolves whether an expression is a reference to an existing window field + /// or a definition of a new window function. + fn resolve_window_from_expr( + &self, + expr: &Expr, + input_schema: &DFSchema, + ) -> Result> { + // 1. Check if the expression directly references a known window field. + if let Some(col) = extract_column(expr) { + let field = input_schema.field_with_name(col.relation.as_ref(), &col.name)?; + let df_field: DFField = (col.relation.clone(), Arc::new(field.clone())).into(); + + if self.fields.contains(&df_field) { + return Ok(self.window.clone()); + } + } + + // 2. Otherwise, check if it's a new window function call (e.g., tumble(), hop()). + find_window(expr) + } + + /// Updates the internal state with new window findings and maps them to the output schema. + fn update_state( + &mut self, + matched_windows: Vec<(usize, WindowType)>, + schema: &DFSchema, + ) -> Result<()> { + // Clear fields from the previous level to maintain schema strictly for the current node. + self.fields.clear(); + + for (index, window) in matched_windows { + if let Some(existing) = &self.window { + if existing != &window { + return Err(DataFusionError::Plan(format!( + "Conflicting windows in the same operator: expected {:?}, found {:?}", + existing, window + ))); + } + } else { + self.window = Some(window); + } + // Record this specific index in the schema as a window carrier. + self.fields.insert(schema.qualified_field(index).into()); + } + Ok(()) + } +} + +pub(crate) fn extract_column(expr: &Expr) -> Option<&Column> { + match expr { + Expr::Column(column) => Some(column), + Expr::Alias(Alias { expr, .. }) => extract_column(expr), + _ => None, + } +} + +impl TreeNodeVisitor<'_> for StreamingWindowAnalzer { + type Node = LogicalPlan; + + fn f_down(&mut self, node: &Self::Node) -> Result { + // Joins require cross-branch validation to ensure left and right sides align on time. + if let LogicalPlan::Extension(Extension { node }) = node + && node.name() == JOIN_NODE_NAME + { + let mut branch_windows = HashSet::new(); + for input in node.inputs() { + if let Some(w) = Self::get_window(input)? { + branch_windows.insert(w); + } + } + + if branch_windows.len() > 1 { + return Err(DataFusionError::Plan( + "Join inputs have mismatched windowing strategies.".into(), + )); + } + self.window = branch_windows.into_iter().next(); + + // Optimization: No need to recurse manually if we've resolved the join boundary. + return Ok(TreeNodeRecursion::Jump); + } + Ok(TreeNodeRecursion::Continue) + } + + fn f_up(&mut self, node: &Self::Node) -> Result { + match node { + LogicalPlan::Projection(p) => { + let windows = p + .expr + .iter() + .enumerate() + .filter_map(|(i, e)| { + self.resolve_window_from_expr(e, p.input.schema()) + .transpose() + .map(|res| res.map(|w| (i, w))) + }) + .collect::>>()?; + + self.update_state(windows, &p.schema)?; + } + + LogicalPlan::Aggregate(agg) => { + let windows = agg + .group_expr + .iter() + .enumerate() + .filter_map(|(i, e)| { + self.resolve_window_from_expr(e, agg.input.schema()) + .transpose() + .map(|res| res.map(|w| (i, w))) + }) + .collect::>>()?; + + self.update_state(windows, &agg.schema)?; + } + + LogicalPlan::SubqueryAlias(sa) => { + // Map fields through the alias layer by resolving column indices. + let input_schema = sa.input.schema(); + let mapped = self + .fields + .drain() + .map(|f| { + let idx = input_schema.index_of_column(&f.qualified_column())?; + Ok(sa.schema.qualified_field(idx).into()) + }) + .collect::>>()?; + + self.fields = mapped; + } + + LogicalPlan::Extension(Extension { node }) + if node.name() == AGGREGATE_EXTENSION_NAME => + { + let ext = node + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal("AggregateExtension node is malformed".into()) + })?; + + match &ext.window_behavior { + WindowBehavior::FromOperator { + window, + window_field, + is_nested, + .. + } => { + if self.window.is_some() && !*is_nested { + return Err(DataFusionError::Plan( + "Redundant window definition on an already windowed stream.".into(), + )); + } + self.window = Some(window.clone()); + self.fields.insert(window_field.clone()); + } + WindowBehavior::InData => { + let current_schema_fields: HashSet<_> = + fields_with_qualifiers(node.schema()).into_iter().collect(); + self.fields.retain(|f| current_schema_fields.contains(f)); + + if self.fields.is_empty() { + return Err(DataFusionError::Plan( + "Windowed aggregate missing window metadata from its input.".into(), + )); + } + } + } + } + _ => {} + } + Ok(TreeNodeRecursion::Continue) + } +} diff --git a/src/sql/planner/plan/window_detecting_visitor.rs b/src/sql/planner/plan/window_detecting_visitor.rs deleted file mode 100644 index 0a0a0323..00000000 --- a/src/sql/planner/plan/window_detecting_visitor.rs +++ /dev/null @@ -1,215 +0,0 @@ -use std::collections::HashSet; -use std::sync::Arc; - -use datafusion::common::{ - Column, DataFusionError, Result, - tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}, -}; -use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias}; - -use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension}; -use crate::sql::planner::extension::join::JOIN_NODE_NAME; -use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window}; - -#[derive(Debug, Default)] -pub(crate) struct WindowDetectingVisitor { - pub(crate) window: Option, - pub(crate) fields: HashSet, -} - -impl WindowDetectingVisitor { - pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result> { - let mut visitor = WindowDetectingVisitor { - window: None, - fields: HashSet::new(), - }; - logical_plan.visit_with_subqueries(&mut visitor)?; - Ok(visitor.window.take()) - } -} - -pub(crate) fn extract_column(expr: &Expr) -> Option<&Column> { - match expr { - Expr::Column(column) => Some(column), - Expr::Alias(Alias { expr, .. }) => extract_column(expr), - _ => None, - } -} - -impl TreeNodeVisitor<'_> for WindowDetectingVisitor { - type Node = LogicalPlan; - - fn f_down(&mut self, node: &Self::Node) -> Result { - let LogicalPlan::Extension(Extension { node }) = node else { - return Ok(TreeNodeRecursion::Continue); - }; - - if node.name() == JOIN_NODE_NAME { - let input_windows: HashSet<_> = node - .inputs() - .iter() - .map(|input| Self::get_window(input)) - .collect::>>()?; - if input_windows.len() > 1 { - return Err(DataFusionError::Plan( - "can't handle mixed windowing between left and right".to_string(), - )); - } - self.window = input_windows - .into_iter() - .next() - .expect("join has at least one input"); - return Ok(TreeNodeRecursion::Jump); - } - Ok(TreeNodeRecursion::Continue) - } - - fn f_up(&mut self, node: &Self::Node) -> Result { - match node { - LogicalPlan::Projection(projection) => { - let window_expressions = projection - .expr - .iter() - .enumerate() - .filter_map(|(index, expr)| { - if let Some(column) = extract_column(expr) { - let input_field = projection - .input - .schema() - .field_with_name(column.relation.as_ref(), &column.name); - let input_field = match input_field { - Ok(field) => field, - Err(err) => return Some(Err(err)), - }; - if self.fields.contains( - &(column.relation.clone(), Arc::new(input_field.clone())).into(), - ) { - return self.window.clone().map(|window| Ok((index, window))); - } - } - find_window(expr) - .map(|option| option.map(|inner| (index, inner))) - .transpose() - }) - .collect::>>()?; - self.fields.clear(); - for (index, window) in window_expressions { - if let Some(existing_window) = &self.window { - if *existing_window != window { - return Err(DataFusionError::Plan( - "window expressions do not match".to_string(), - )); - } - } else { - self.window = Some(window); - } - self.fields - .insert(projection.schema.qualified_field(index).into()); - } - } - LogicalPlan::SubqueryAlias(subquery_alias) => { - self.fields = self - .fields - .drain() - .map(|field| { - Ok(subquery_alias - .schema - .qualified_field( - subquery_alias - .input - .schema() - .index_of_column(&field.qualified_column())?, - ) - .into()) - }) - .collect::>>()?; - } - LogicalPlan::Aggregate(Aggregate { - input, - group_expr, - aggr_expr: _, - schema, - .. - }) => { - let window_expressions = group_expr - .iter() - .enumerate() - .filter_map(|(index, expr)| { - if let Some(column) = extract_column(expr) { - let input_field = input - .schema() - .field_with_name(column.relation.as_ref(), &column.name); - let input_field = match input_field { - Ok(field) => field, - Err(err) => return Some(Err(err)), - }; - if self - .fields - .contains(&(column.relation.as_ref(), input_field).into()) - { - return self.window.clone().map(|window| Ok((index, window))); - } - } - find_window(expr) - .map(|option| option.map(|inner| (index, inner))) - .transpose() - }) - .collect::>>()?; - self.fields.clear(); - for (index, window) in window_expressions { - if let Some(existing_window) = &self.window { - if *existing_window != window { - return Err(DataFusionError::Plan( - "window expressions do not match".to_string(), - )); - } - } else { - self.window = Some(window); - } - self.fields.insert(schema.qualified_field(index).into()); - } - } - LogicalPlan::Extension(Extension { node }) => { - if node.name() == AGGREGATE_EXTENSION_NAME { - let aggregate_extension = node - .as_any() - .downcast_ref::() - .expect("should be aggregate extension"); - - match &aggregate_extension.window_behavior { - WindowBehavior::FromOperator { - window, - window_field, - window_index: _, - is_nested, - } => { - if self.window.is_some() && !*is_nested { - return Err(DataFusionError::Plan( - "aggregate node should not be recalculating window, as input is windowed.".to_string(), - )); - } - self.window = Some(window.clone()); - self.fields.insert(window_field.clone()); - } - WindowBehavior::InData => { - let input_fields = self.fields.clone(); - self.fields.clear(); - for field in fields_with_qualifiers(node.schema()) { - if input_fields.contains(&field) { - self.fields.insert(field); - } - } - if self.fields.is_empty() { - return Err(DataFusionError::Plan( - "must have window in aggregate. Make sure you are calling one of the windowing functions (hop, tumble, session) or using the window field of the input".to_string(), - )); - } - } - } - } - } - _ => {} - } - Ok(TreeNodeRecursion::Continue) - } -} diff --git a/src/sql/planner/plan/window_fn.rs b/src/sql/planner/plan/window_fn.rs deleted file mode 100644 index 66f673d1..00000000 --- a/src/sql/planner/plan/window_fn.rs +++ /dev/null @@ -1,178 +0,0 @@ -use std::sync::Arc; - -use datafusion::common::tree_node::Transformed; -use datafusion::common::{Result as DFResult, plan_err, tree_node::TreeNodeRewriter}; -use datafusion::logical_expr; -use datafusion::logical_expr::expr::WindowFunctionParams; -use datafusion::logical_expr::{ - Expr, Extension, LogicalPlan, Projection, Sort, Window, expr::WindowFunction, -}; -use tracing::debug; - -use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; -use crate::sql::planner::extension::window_fn::WindowFunctionExtension; -use crate::sql::planner::plan::{WindowDetectingVisitor, extract_column}; -use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields}; - -pub(crate) struct WindowFunctionRewriter; - -fn get_window_and_name(expr: &Expr) -> DFResult<(WindowFunction, String)> { - match expr { - Expr::Alias(alias) => { - let (window, _) = get_window_and_name(&alias.expr)?; - Ok((window, alias.name.clone())) - } - Expr::WindowFunction(window_function) => { - Ok((*window_function.clone(), expr.name_for_alias()?)) - } - _ => plan_err!("Expect a column or alias expression, not {:?}", expr), - } -} - -impl TreeNodeRewriter for WindowFunctionRewriter { - type Node = LogicalPlan; - - fn f_up(&mut self, node: Self::Node) -> DFResult> { - let LogicalPlan::Window(window) = node else { - return Ok(Transformed::no(node)); - }; - - debug!( - "Rewriting window function: {:?}", - LogicalPlan::Window(window.clone()) - ); - - let mut window_detecting_visitor = WindowDetectingVisitor::default(); - window - .input - .visit_with_subqueries(&mut window_detecting_visitor)?; - - let Some(input_window) = window_detecting_visitor.window else { - return plan_err!("Window functions require already windowed input"); - }; - if matches!(input_window, WindowType::Session { .. }) { - return plan_err!("Window functions do not support session windows"); - } - - let input_window_fields = window_detecting_visitor.fields; - - let Window { - input, window_expr, .. - } = window; - - if window_expr.len() != 1 { - return plan_err!("Window functions require exactly one window expression"); - } - - let (WindowFunction { fun, params }, original_name) = get_window_and_name(&window_expr[0])?; - - let mut window_field: Vec<_> = params - .partition_by - .iter() - .enumerate() - .filter_map(|(index, expr)| { - if let Some(column) = extract_column(expr) { - let Ok(input_field) = input - .schema() - .field_with_name(column.relation.as_ref(), &column.name) - else { - return Some(plan_err!( - "Column {} not found in input schema", - column.name - )); - }; - if input_window_fields.contains(&(column.relation.as_ref(), input_field).into()) - { - return Some(Ok((input_field.clone(), index))); - } - } - None - }) - .collect::>()?; - - if window_field.len() != 1 { - return plan_err!( - "Window function requires exactly one window expression in partition_by" - ); - } - - let (_window_field, index) = window_field.pop().unwrap(); - let mut additional_keys = params.partition_by.clone(); - additional_keys.remove(index); - let key_count = additional_keys.len(); - - let params = WindowFunctionParams { - args: params.args, - partition_by: additional_keys.clone(), - order_by: params.order_by, - window_frame: params.window_frame, - null_treatment: params.null_treatment, - }; - - let new_window_func = WindowFunction { fun, params }; - - let mut key_projection_expressions: Vec<_> = additional_keys - .iter() - .enumerate() - .map(|(index, expression)| expression.clone().alias(format!("_key_{index}"))) - .collect(); - - key_projection_expressions.extend( - fields_with_qualifiers(input.schema()) - .iter() - .map(|field| Expr::Column(field.qualified_column())), - ); - - let auto_schema = - Projection::try_new(key_projection_expressions.clone(), input.clone())?.schema; - let mut key_fields = fields_with_qualifiers(&auto_schema) - .iter() - .take(additional_keys.len()) - .cloned() - .collect::>(); - key_fields.extend(fields_with_qualifiers(input.schema())); - let key_schema = Arc::new(schema_from_df_fields(&key_fields)?); - - let key_projection = LogicalPlan::Projection(Projection::try_new_with_schema( - key_projection_expressions, - input.clone(), - key_schema, - )?); - - let key_plan = LogicalPlan::Extension(Extension { - node: Arc::new(KeyCalculationExtension::new( - key_projection, - KeysOrExprs::Keys((0..key_count).collect()), - )), - }); - - let mut sort_expressions: Vec<_> = additional_keys - .iter() - .map(|partition| logical_expr::expr::Sort { - expr: partition.clone(), - asc: true, - nulls_first: false, - }) - .collect(); - sort_expressions.extend(new_window_func.params.order_by.clone()); - - let shuffle = LogicalPlan::Sort(Sort { - expr: sort_expressions, - input: Arc::new(key_plan), - fetch: None, - }); - - let window_expr = - Expr::WindowFunction(Box::new(new_window_func)).alias_if_changed(original_name)?; - - let rewritten_window_plan = - LogicalPlan::Window(Window::try_new(vec![window_expr], Arc::new(shuffle))?); - - Ok(Transformed::yes(LogicalPlan::Extension(Extension { - node: Arc::new(WindowFunctionExtension::new( - rewritten_window_plan, - (0..key_count).collect(), - )), - }))) - } -} diff --git a/src/sql/planner/plan/window_function_rewriter.rs b/src/sql/planner/plan/window_function_rewriter.rs new file mode 100644 index 00000000..5c8e511b --- /dev/null +++ b/src/sql/planner/plan/window_function_rewriter.rs @@ -0,0 +1,191 @@ +use datafusion::common::tree_node::Transformed; +use datafusion::common::{Column, Result as DFResult, plan_err, tree_node::TreeNodeRewriter}; +use datafusion::logical_expr::{ + self, Expr, Extension, LogicalPlan, Projection, Sort, Window, expr::WindowFunction, + expr::WindowFunctionParams, +}; +use datafusion_common::DataFusionError; +use std::sync::Arc; +use tracing::debug; + +use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::planner::extension::window_fn::WindowFunctionExtension; +use crate::sql::planner::plan::streaming_window_analzer::{StreamingWindowAnalzer, extract_column}; +use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields}; + +/// WindowFunctionRewriter transforms standard SQL Window functions into streaming-compatible +/// stateful operators, ensuring proper data partitioning and sorting for distributed execution. +pub(crate) struct WindowFunctionRewriter; + +impl WindowFunctionRewriter { + /// Recursively unwraps Aliases to find the underlying WindowFunction. + fn resolve_window_function(&self, expr: &Expr) -> DFResult<(WindowFunction, String)> { + match expr { + Expr::Alias(alias) => { + let (func, _) = self.resolve_window_function(&alias.expr)?; + Ok((func, alias.name.clone())) + } + Expr::WindowFunction(wf) => Ok((wf.as_ref().clone(), expr.name_for_alias()?)), + _ => plan_err!("Expected WindowFunction or Alias, found: {:?}", expr), + } + } + + /// Identifies which field in the PARTITION BY clause corresponds to the streaming window. + fn identify_window_partition( + &self, + params: &WindowFunctionParams, + input: &LogicalPlan, + input_window_fields: &std::collections::HashSet, + ) -> DFResult { + let matched: Vec<_> = params + .partition_by + .iter() + .enumerate() + .filter_map(|(i, e)| { + let col = extract_column(e)?; + let field = input + .schema() + .field_with_name(col.relation.as_ref(), &col.name) + .ok()?; + let df_field = (col.relation.clone(), Arc::new(field.clone())).into(); + + if input_window_fields.contains(&df_field) { + Some(i) + } else { + None + } + }) + .collect(); + + if matched.len() != 1 { + return plan_err!( + "Streaming window functions require exactly one window column in PARTITION BY. Found: {}", + matched.len() + ); + } + Ok(matched[0]) + } + + /// Wraps the input in a Projection and KeyCalculationExtension to handle data distribution. + fn build_keyed_input( + &self, + input: Arc, + partition_keys: &[Expr], + ) -> DFResult { + let key_count = partition_keys.len(); + + // 1. Build projection: [_key_0, _key_1, ..., original_columns] + let mut exprs: Vec<_> = partition_keys + .iter() + .enumerate() + .map(|(i, e)| e.clone().alias(format!("_key_{i}"))) + .collect(); + + exprs.extend( + fields_with_qualifiers(input.schema()) + .iter() + .map(|f| Expr::Column(f.qualified_column())), + ); + + // 2. Derive the keyed schema + let mut keyed_fields = + fields_with_qualifiers(&Projection::try_new(exprs.clone(), input.clone())?.schema) + .iter() + .take(key_count) + .cloned() + .collect::>(); + keyed_fields.extend(fields_with_qualifiers(input.schema())); + + let keyed_schema = Arc::new(schema_from_df_fields(&keyed_fields)?); + + let projection = + LogicalPlan::Projection(Projection::try_new_with_schema(exprs, input, keyed_schema)?); + + // 3. Wrap in KeyCalculationExtension for the physical planner + Ok(LogicalPlan::Extension(Extension { + node: Arc::new(KeyCalculationExtension::new( + projection, + KeysOrExprs::Keys((0..key_count).collect()), + )), + })) + } +} + +impl TreeNodeRewriter for WindowFunctionRewriter { + type Node = LogicalPlan; + + fn f_up(&mut self, node: Self::Node) -> DFResult> { + let LogicalPlan::Window(window) = node else { + return Ok(Transformed::no(node)); + }; + + debug!("Rewriting window function for streaming: {:?}", window); + + // 1. Analyze input windowing context + let mut analyzer = StreamingWindowAnalzer::default(); + window.input.visit_with_subqueries(&mut analyzer)?; + + let input_window = analyzer.window.ok_or_else(|| { + DataFusionError::Plan( + "Window functions require a windowed input stream (e.g., TUMBLE/HOP)".into(), + ) + })?; + + if matches!(input_window, WindowType::Session { .. }) { + return plan_err!( + "Streaming window functions (OVER) are not supported on Session windows." + ); + } + + // 2. Validate window expression constraints + if window.window_expr.len() != 1 { + return plan_err!( + "Arroyo currently supports exactly one window expression per OVER clause." + ); + } + + let (mut wf, original_name) = self.resolve_window_function(&window.window_expr[0])?; + + // 3. Identify and extract the window column from PARTITION BY + let window_part_idx = + self.identify_window_partition(&wf.params, &window.input, &analyzer.fields)?; + let mut partition_keys = wf.params.partition_by.clone(); + partition_keys.remove(window_part_idx); + + // Update function params to exclude the window column from internal partitioning + // as the streaming engine handles window boundaries natively. + wf.params.partition_by = partition_keys.clone(); + let key_count = partition_keys.len(); + + // 4. Build the data-shuffling pipeline (Projection -> KeyCalc -> Sort) + let keyed_plan = self.build_keyed_input(window.input.clone(), &partition_keys)?; + + let mut sort_exprs: Vec<_> = partition_keys + .iter() + .map(|e| logical_expr::expr::Sort { + expr: e.clone(), + asc: true, + nulls_first: false, + }) + .collect(); + sort_exprs.extend(wf.params.order_by.clone()); + + let sorted_plan = LogicalPlan::Sort(Sort { + expr: sort_exprs, + input: Arc::new(keyed_plan), + fetch: None, + }); + + // 5. Final Assembly + let final_wf_expr = Expr::WindowFunction(Box::new(wf)).alias_if_changed(original_name)?; + let rewritten_window = + LogicalPlan::Window(Window::try_new(vec![final_wf_expr], Arc::new(sorted_plan))?); + + Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(WindowFunctionExtension::new( + rewritten_window, + (0..key_count).collect(), + )), + }))) + } +} diff --git a/src/sql/planner/rewrite/mod.rs b/src/sql/planner/rewrite/mod.rs index 20b2e9bb..bfebae4c 100644 --- a/src/sql/planner/rewrite/mod.rs +++ b/src/sql/planner/rewrite/mod.rs @@ -11,7 +11,6 @@ // limitations under the License. pub mod async_udf_rewriter; -pub mod row_time; pub mod sink_input_rewriter; pub mod source_metadata_visitor; pub mod source_rewriter; @@ -19,7 +18,6 @@ pub mod time_window; pub mod unnest_rewriter; pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter}; -pub use row_time::RowTimeRewriter; pub use sink_input_rewriter::SinkInputRewriter; pub use source_metadata_visitor::SourceMetadataVisitor; pub use source_rewriter::SourceRewriter; diff --git a/src/sql/planner/rewrite/row_time.rs b/src/sql/planner/rewrite/row_time.rs deleted file mode 100644 index 51309feb..00000000 --- a/src/sql/planner/rewrite/row_time.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; -use datafusion::common::{Column, Result as DFResult}; -use datafusion::logical_expr::Expr; - -use crate::sql::types::TIMESTAMP_FIELD; - -/// Rewrites `row_time()` scalar function calls to a column reference on `_timestamp`. -pub struct RowTimeRewriter {} - -impl TreeNodeRewriter for RowTimeRewriter { - type Node = Expr; - - fn f_down(&mut self, node: Self::Node) -> DFResult> { - if let Expr::ScalarFunction(func) = &node - && func.name() == "row_time" - { - let transformed = Expr::Column(Column { - relation: None, - name: TIMESTAMP_FIELD.to_string(), - spans: Default::default(), - }) - .alias("row_time()"); - return Ok(Transformed::yes(transformed)); - } - Ok(Transformed::no(node)) - } -} From 3c94267334a879c351fc910a02e5cd6e3148cb44 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sat, 21 Mar 2026 18:13:55 +0800 Subject: [PATCH 07/44] update --- protocol/proto/fs_api.proto | 11 + src/coordinator/coordinator.rs | 2 +- src/coordinator/execution/executor.rs | 23 +- src/coordinator/plan/logical_plan_visitor.rs | 54 +- src/coordinator/plan/lookup_table_plan.rs | 2 +- .../plan/streaming_table_connector_plan.rs | 2 +- src/coordinator/plan/streaming_table_plan.rs | 2 +- src/datastream/mod.rs | 2 - src/lib.rs | 1 - src/main.rs | 1 - src/runtime/processor/wasm/wasm_processor.rs | 1 + src/server/handler.rs | 2 +- .../plan => analysis}/aggregate_rewriter.rs | 8 +- .../async_udf_rewriter.rs | 8 +- .../plan => analysis}/join_rewriter.rs | 8 +- src/sql/analysis/mod.rs | 227 +++++++ .../plan => analysis}/row_time_rewriter.rs | 0 .../sink_input_rewriter.rs | 5 +- .../source_metadata_visitor.rs | 8 +- .../rewrite => analysis}/source_rewriter.rs | 12 +- .../plan => analysis}/stream_rewriter.rs | 12 +- .../streaming_window_analzer.rs | 4 +- .../rewrite => analysis}/time_window.rs | 0 src/sql/{planner => analysis}/udafs.rs | 0 .../rewrite => analysis}/unnest_rewriter.rs | 0 .../window_function_rewriter.rs | 6 +- src/sql/extensions/aggregate.rs | 607 ++++++++++++++++++ .../extension => extensions}/debezium.rs | 129 ++-- src/sql/extensions/join.rs | 120 ++++ src/sql/extensions/key_calculation.rs | 242 +++++++ src/sql/extensions/lookup.rs | 194 ++++++ .../{planner/extension => extensions}/mod.rs | 251 +++++--- src/sql/extensions/projection.rs | 154 +++++ src/sql/extensions/remote_table.rs | 124 ++++ src/sql/extensions/sink.rs | 168 +++++ .../extension => extensions}/table_source.rs | 58 +- src/sql/extensions/updating_aggregate.rs | 165 +++++ .../watermark_node.rs | 64 +- src/sql/extensions/window_fn.rs | 123 ++++ src/sql/functions/mod.rs | 2 +- .../logical_node}/logical.rs | 77 ++- src/sql/logical_node/mod.rs | 1 + src/sql/{physical => logical_planner}/mod.rs | 7 +- .../logical_planner}/optimizers.rs | 14 +- .../planner.rs} | 24 +- src/sql/mod.rs | 18 +- src/sql/{planner => }/parse.rs | 0 src/sql/planner/extension/aggregate.rs | 348 ---------- src/sql/planner/extension/join.rs | 61 -- src/sql/planner/extension/key_calculation.rs | 138 ---- src/sql/planner/extension/lookup.rs | 127 ---- src/sql/planner/extension/projection.rs | 91 --- src/sql/planner/extension/remote_table.rs | 71 -- src/sql/planner/extension/sink.rs | 128 ---- .../planner/extension/updating_aggregate.rs | 89 --- src/sql/planner/extension/window_fn.rs | 62 -- src/sql/planner/mod.rs | 348 ---------- src/sql/planner/plan/mod.rs | 54 -- src/sql/planner/rewrite/mod.rs | 25 - src/sql/planner/schemas.rs | 5 - src/sql/planner/sql_to_plan.rs | 22 - src/sql/{catalog => schema}/connector.rs | 28 - .../{catalog => schema}/connector_table.rs | 36 +- src/sql/{catalog => schema}/field_spec.rs | 0 src/sql/{catalog => schema}/insert.rs | 2 +- src/sql/{catalog => schema}/mod.rs | 4 +- src/sql/{catalog => schema}/optimizer.rs | 2 +- .../{planner => schema}/schema_provider.rs | 11 +- src/sql/{catalog => schema}/table.rs | 23 +- src/sql/{catalog => schema}/utils.rs | 0 src/types/converter.rs | 83 +++ src/types/df.rs | 370 ++++++----- src/types/mod.rs | 3 +- 73 files changed, 3013 insertions(+), 2061 deletions(-) delete mode 100644 src/datastream/mod.rs rename src/sql/{planner/plan => analysis}/aggregate_rewriter.rs (97%) rename src/sql/{planner/rewrite => analysis}/async_udf_rewriter.rs (93%) rename src/sql/{planner/plan => analysis}/join_rewriter.rs (96%) create mode 100644 src/sql/analysis/mod.rs rename src/sql/{planner/plan => analysis}/row_time_rewriter.rs (100%) rename src/sql/{planner/rewrite => analysis}/sink_input_rewriter.rs (91%) rename src/sql/{planner/rewrite => analysis}/source_metadata_visitor.rs (86%) rename src/sql/{planner/rewrite => analysis}/source_rewriter.rs (96%) rename src/sql/{planner/plan => analysis}/stream_rewriter.rs (96%) rename src/sql/{planner/plan => analysis}/streaming_window_analzer.rs (98%) rename src/sql/{planner/rewrite => analysis}/time_window.rs (100%) rename src/sql/{planner => analysis}/udafs.rs (100%) rename src/sql/{planner/rewrite => analysis}/unnest_rewriter.rs (100%) rename src/sql/{planner/plan => analysis}/window_function_rewriter.rs (96%) create mode 100644 src/sql/extensions/aggregate.rs rename src/sql/{planner/extension => extensions}/debezium.rs (63%) create mode 100644 src/sql/extensions/join.rs create mode 100644 src/sql/extensions/key_calculation.rs create mode 100644 src/sql/extensions/lookup.rs rename src/sql/{planner/extension => extensions}/mod.rs (66%) create mode 100644 src/sql/extensions/projection.rs create mode 100644 src/sql/extensions/remote_table.rs create mode 100644 src/sql/extensions/sink.rs rename src/sql/{planner/extension => extensions}/table_source.rs (54%) create mode 100644 src/sql/extensions/updating_aggregate.rs rename src/sql/{planner/extension => extensions}/watermark_node.rs (57%) create mode 100644 src/sql/extensions/window_fn.rs rename src/{datastream => sql/logical_node}/logical.rs (80%) create mode 100644 src/sql/logical_node/mod.rs rename src/sql/{physical => logical_planner}/mod.rs (99%) rename src/{datastream => sql/logical_planner}/optimizers.rs (88%) rename src/sql/{planner/physical_planner.rs => logical_planner/planner.rs} (95%) rename src/sql/{planner => }/parse.rs (100%) delete mode 100644 src/sql/planner/extension/aggregate.rs delete mode 100644 src/sql/planner/extension/join.rs delete mode 100644 src/sql/planner/extension/key_calculation.rs delete mode 100644 src/sql/planner/extension/lookup.rs delete mode 100644 src/sql/planner/extension/projection.rs delete mode 100644 src/sql/planner/extension/remote_table.rs delete mode 100644 src/sql/planner/extension/sink.rs delete mode 100644 src/sql/planner/extension/updating_aggregate.rs delete mode 100644 src/sql/planner/extension/window_fn.rs delete mode 100644 src/sql/planner/mod.rs delete mode 100644 src/sql/planner/plan/mod.rs delete mode 100644 src/sql/planner/rewrite/mod.rs delete mode 100644 src/sql/planner/schemas.rs delete mode 100644 src/sql/planner/sql_to_plan.rs rename src/sql/{catalog => schema}/connector.rs (57%) rename src/sql/{catalog => schema}/connector_table.rs (91%) rename src/sql/{catalog => schema}/field_spec.rs (100%) rename src/sql/{catalog => schema}/insert.rs (97%) rename src/sql/{catalog => schema}/mod.rs (85%) rename src/sql/{catalog => schema}/optimizer.rs (98%) rename src/sql/{planner => schema}/schema_provider.rs (97%) rename src/sql/{catalog => schema}/table.rs (90%) rename src/sql/{catalog => schema}/utils.rs (100%) create mode 100644 src/types/converter.rs diff --git a/protocol/proto/fs_api.proto b/protocol/proto/fs_api.proto index 24525583..b178f6ea 100644 --- a/protocol/proto/fs_api.proto +++ b/protocol/proto/fs_api.proto @@ -12,6 +12,17 @@ message ConnectorOp { string description = 3; } +message ValuePlanOperator { + string name = 1; + bytes physical_plan = 2; +} + +message KeyPlanOperator { + string name = 1; + bytes physical_plan = 2; + repeated uint64 key_fields = 3; +} + message ProjectionOperator { string name = 1; FsSchema input_schema = 2; diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs index 378c670b..8dc55c4d 100644 --- a/src/coordinator/coordinator.rs +++ b/src/coordinator/coordinator.rs @@ -20,7 +20,7 @@ use crate::coordinator::execution::Executor; use crate::coordinator::plan::{LogicalPlanVisitor, LogicalPlanner, PlanNode}; use crate::coordinator::statement::Statement; use crate::runtime::taskexecutor::TaskManager; -use crate::sql::planner::StreamSchemaProvider; +use crate::sql::schema::StreamSchemaProvider; use super::execution_context::ExecutionContext; diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 2dfb6326..8285a2c5 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -19,13 +19,11 @@ use crate::coordinator::plan::{ }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::taskexecutor::TaskManager; +use crate::sql::schema::table::Table as CatalogTable; +use crate::sql::analysis::{ StreamSchemaProvider}; use std::sync::Arc; use thiserror::Error; use tracing::{debug, info}; -use crate::datastream::logical::{LogicalProgram, ProgramConfig}; -use crate::datastream::optimizers::ChainingOptimizer; -use crate::sql::CompiledSql; -use crate::sql::planner::{physical_planner, rewrite_sinks}; #[derive(Error, Debug)] pub enum ExecuteError { @@ -222,12 +220,21 @@ impl PlanVisitor for Executor { fn visit_streaming_table( &self, - _plan: &StreamingTable, + plan: &StreamingTable, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let result = Err(ExecuteError::Internal( - "StreamingTable execution not yet implemented".to_string(), - )); + let result = (|| -> Result { + let catalog_table = + CatalogTable::ConnectorTable(plan.connector_table.clone()); + let mut schema_provider = StreamSchemaProvider::new(); + schema_provider.insert_catalog_table(catalog_table.clone()); + + + Ok(ExecuteResult::ok_with_data( + format!("Streaming table '{}' compiled successfully", plan.name), + empty_record_batch(), + )) + })(); PlanVisitorResult::Execute(result) } diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index dfcf2e10..1daf5a16 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -13,8 +13,10 @@ use std::sync::Arc; use datafusion::common::{Result, plan_datafusion_err, plan_err}; +use datafusion::execution::SessionStateBuilder; use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement}; use datafusion_common::TableReference; +use datafusion_execution::config::SessionConfig; use datafusion_expr::{Expr, Extension, LogicalPlan, col}; use sqlparser::ast::Statement; use tracing::debug; @@ -30,21 +32,24 @@ use crate::coordinator::statement::{ StreamingTableStatement, }; use crate::coordinator::tool::ConnectorOptions; -use crate::sql::catalog::Table; -use crate::sql::catalog::connector::ConnectionType; -use crate::sql::catalog::connector_table::ConnectorTable; -use crate::sql::catalog::field_spec::FieldSpec; -use crate::sql::catalog::optimizer::produce_optimized_plan; +use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig}; +use crate::sql::logical_planner::optimizers::ChainingOptimizer; +use crate::sql::schema::Table; +use crate::sql::schema::connector::ConnectionType; +use crate::sql::schema::connector_table::ConnectorTable; +use crate::sql::schema::field_spec::FieldSpec; +use crate::sql::schema::optimizer::produce_optimized_plan; use crate::sql::functions::{is_json_union, serialize_outgoing_json}; -use crate::sql::planner::extension::sink::SinkExtension; -use crate::sql::planner::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks}; +use crate::sql::extensions::sink::SinkExtension; +use crate::sql::logical_planner::planner; +use crate::sql::analysis::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks}; use crate::sql::rewrite_plan; const CONNECTOR: &str = "connector"; const PARTITION_BY: &str = "partition_by"; const IDLE_MICROS: &str = "idle_time"; -/// 将 WITH 选项列表转为 key-value map,便于读取 connector 等配置。 +/// Convert `WITH` option list to a key-value map (e.g. connector settings). fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap { options .iter() @@ -153,6 +158,8 @@ impl LogicalPlanVisitor { primary_keys: Arc::new(vec![]), // PKs are inferred or explicitly set here inferred_fields: None, partition_exprs: Arc::new(partition_exprs), + lookup_cache_ttl:None, + lookup_cache_max_bytes:None, }; // 6. Sink Extension & Final Rewrites @@ -173,6 +180,37 @@ impl LogicalPlanVisitor { let final_extensions = rewrite_sinks(vec![plan_with_keys])?; let final_plan = final_extensions.into_iter().next().unwrap(); + + + let mut config = SessionConfig::new(); + config + .options_mut() + .optimizer + .enable_round_robin_repartition = false; + config.options_mut().optimizer.repartition_aggregations = false; + config.options_mut().optimizer.repartition_windows = false; + config.options_mut().optimizer.repartition_sorts = false; + config.options_mut().optimizer.repartition_joins = false; + config.options_mut().execution.target_partitions = 1; + + let session_state = SessionStateBuilder::new() + .with_config(config) + .with_default_features() + .with_physical_optimizer_rules(vec![]) + .build(); + + let mut plan_to_graph_visitor = + planner::PlanToGraphVisitor::new(&self.schema_provider, &session_state); + + plan_to_graph_visitor.add_plan(final_plan.clone())?; + + let graph = plan_to_graph_visitor.into_graph(); + + let mut program = LogicalProgram::new(graph, ProgramConfig::default()); + + program.optimize(&ChainingOptimizer {}); + + Ok(Box::new(StreamingTable { name: table_name, comment: comment.clone(), diff --git a/src/coordinator/plan/lookup_table_plan.rs b/src/coordinator/plan/lookup_table_plan.rs index 889f57e1..e0ea06ba 100644 --- a/src/coordinator/plan/lookup_table_plan.rs +++ b/src/coordinator/plan/lookup_table_plan.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::catalog::connector_table::ConnectorTable; +use crate::sql::schema::connector_table::ConnectorTable; use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; diff --git a/src/coordinator/plan/streaming_table_connector_plan.rs b/src/coordinator/plan/streaming_table_connector_plan.rs index be1cda31..c2407ec8 100644 --- a/src/coordinator/plan/streaming_table_connector_plan.rs +++ b/src/coordinator/plan/streaming_table_connector_plan.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::catalog::connector_table::ConnectorTable; +use crate::sql::schema::connector_table::ConnectorTable; use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs index 577e6494..30e519f8 100644 --- a/src/coordinator/plan/streaming_table_plan.rs +++ b/src/coordinator/plan/streaming_table_plan.rs @@ -11,7 +11,7 @@ // limitations under the License. use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; -use crate::sql::catalog::connector_table::ConnectorTable; +use crate::sql::schema::connector_table::ConnectorTable; use datafusion::logical_expr::LogicalPlan; /// Plan node representing a fully resolved streaming table (DDL). diff --git a/src/datastream/mod.rs b/src/datastream/mod.rs deleted file mode 100644 index 994a96b4..00000000 --- a/src/datastream/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod logical; -pub mod optimizers; diff --git a/src/lib.rs b/src/lib.rs index a41536c5..0a3c6dc6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,6 @@ pub mod api; pub mod config; pub mod coordinator; -pub mod datastream; pub mod logging; pub mod runtime; pub mod server; diff --git a/src/main.rs b/src/main.rs index 29935d62..e847b16c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,7 +15,6 @@ mod api; mod config; mod coordinator; -mod datastream; mod logging; mod runtime; mod server; diff --git a/src/runtime/processor/wasm/wasm_processor.rs b/src/runtime/processor/wasm/wasm_processor.rs index 1afc9dcf..cd61be98 100644 --- a/src/runtime/processor/wasm/wasm_processor.rs +++ b/src/runtime/processor/wasm/wasm_processor.rs @@ -679,3 +679,4 @@ impl WasmProcessor for WasmProcessorImpl { Ok(()) } } + diff --git a/src/server/handler.rs b/src/server/handler.rs index bf9350e6..1920680c 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -29,7 +29,7 @@ use crate::coordinator::{ CreateFunction, CreatePythonFunction, DataSet, DropFunction, ShowFunctions, ShowFunctionsResult, StartFunction, Statement, StopFunction, }; -use crate::sql::planner::parse::parse_sql; +use crate::sql::parse::parse_sql; pub struct FunctionStreamServiceImpl { coordinator: Arc, diff --git a/src/sql/planner/plan/aggregate_rewriter.rs b/src/sql/analysis/aggregate_rewriter.rs similarity index 97% rename from src/sql/planner/plan/aggregate_rewriter.rs rename to src/sql/analysis/aggregate_rewriter.rs index 802fa180..04ac0896 100644 --- a/src/sql/planner/plan/aggregate_rewriter.rs +++ b/src/sql/analysis/aggregate_rewriter.rs @@ -5,10 +5,10 @@ use datafusion::logical_expr::{self, Aggregate, Expr, Extension, LogicalPlan, Pr use datafusion::prelude::col; use std::sync::Arc; -use crate::sql::planner::StreamSchemaProvider; -use crate::sql::planner::extension::aggregate::AggregateExtension; -use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; -use crate::sql::planner::plan::streaming_window_analzer::StreamingWindowAnalzer; +use crate::sql::schema::StreamSchemaProvider; +use crate::sql::extensions::aggregate::AggregateExtension; +use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer; use crate::sql::types::{ DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, schema_from_df_fields_with_metadata, diff --git a/src/sql/planner/rewrite/async_udf_rewriter.rs b/src/sql/analysis/async_udf_rewriter.rs similarity index 93% rename from src/sql/planner/rewrite/async_udf_rewriter.rs rename to src/sql/analysis/async_udf_rewriter.rs index def3c4ef..9584c022 100644 --- a/src/sql/planner/rewrite/async_udf_rewriter.rs +++ b/src/sql/analysis/async_udf_rewriter.rs @@ -1,6 +1,6 @@ -use crate::sql::planner::extension::remote_table::RemoteTableExtension; -use crate::sql::planner::extension::{ASYNC_RESULT_FIELD, AsyncUDFExtension}; -use crate::sql::planner::mod_prelude::StreamSchemaProvider; +use crate::sql::extensions::remote_table::RemoteTableExtension; +use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncUDFExtension}; +use crate::sql::schema::StreamSchemaProvider; use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion::common::{Column, Result as DFResult, TableReference, plan_err}; use datafusion::logical_expr::expr::ScalarFunction; @@ -88,6 +88,7 @@ impl TreeNodeRewriter for AsyncUdfRewriter<'_> { let Some((name, opts, arg_exprs)) = args else { return Ok(Transformed::no(LogicalPlan::Projection(projection))); }; + let udf = self.provider.dylib_udfs.get(&name).unwrap().clone(); let input = if matches!(*projection.input, LogicalPlan::Projection(..)) { Arc::new(LogicalPlan::Extension(Extension { @@ -106,6 +107,7 @@ impl TreeNodeRewriter for AsyncUdfRewriter<'_> { node: Arc::new(AsyncUDFExtension { input, name, + udf, arg_exprs, final_exprs: projection.expr, ordered: opts.ordered, diff --git a/src/sql/planner/plan/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs similarity index 96% rename from src/sql/planner/plan/join_rewriter.rs rename to src/sql/analysis/join_rewriter.rs index f6031183..465d4620 100644 --- a/src/sql/planner/plan/join_rewriter.rs +++ b/src/sql/analysis/join_rewriter.rs @@ -1,7 +1,7 @@ -use crate::sql::planner::StreamSchemaProvider; -use crate::sql::planner::extension::join::JoinExtension; -use crate::sql::planner::extension::key_calculation::KeyCalculationExtension; -use crate::sql::planner::plan::streaming_window_analzer::StreamingWindowAnalzer; +use crate::sql::schema::StreamSchemaProvider; +use crate::sql::extensions::join::JoinExtension; +use crate::sql::extensions::key_calculation::KeyCalculationExtension; +use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer; use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata}; use crate::types::TIMESTAMP_FIELD; use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs new file mode 100644 index 00000000..04230aa0 --- /dev/null +++ b/src/sql/analysis/mod.rs @@ -0,0 +1,227 @@ +#![allow(clippy::new_without_default)] + +pub(crate) mod aggregate_rewriter; +pub(crate) mod join_rewriter; +pub(crate) mod row_time_rewriter; +pub(crate) mod stream_rewriter; +pub(crate) mod streaming_window_analzer; +pub(crate) mod window_function_rewriter; + +pub mod async_udf_rewriter; +pub mod sink_input_rewriter; +pub mod source_metadata_visitor; +pub mod source_rewriter; +pub mod time_window; +pub mod unnest_rewriter; + +pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter}; +pub use sink_input_rewriter::SinkInputRewriter; +pub use source_metadata_visitor::SourceMetadataVisitor; +pub use source_rewriter::SourceRewriter; +pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker, is_time_window}; +pub use unnest_rewriter::{UNNESTED_COL, UnnestRewriter}; + +pub use crate::sql::schema::schema_provider::{ + LogicalBatchInput, StreamSchemaProvider, StreamTable, +}; + +pub(crate) mod mod_prelude { + pub use super::StreamSchemaProvider; +} + +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use datafusion::common::tree_node::{Transformed, TreeNode}; +use datafusion::common::{Result, plan_err}; +use datafusion::error::DataFusionError; +use datafusion::execution::SessionStateBuilder; +use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion::prelude::SessionConfig; +use datafusion::sql::TableReference; +use datafusion::sql::sqlparser::ast::{OneOrManyWithParens, Statement}; +use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; +use datafusion::sql::sqlparser::parser::Parser; +use tracing::{debug, info, instrument}; + +use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig}; +use crate::sql::logical_planner::optimizers::ChainingOptimizer; +use crate::sql::schema::insert::Insert; +use crate::sql::schema::table::Table as CatalogTable; +use crate::sql::functions::{is_json_union, serialize_outgoing_json}; +use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::extensions::projection::ProjectionExtension; +use crate::sql::extensions::sink::SinkExtension; +use crate::sql::extensions::{ StreamExtension}; +use crate::sql::logical_planner::planner::NamedNode; +use crate::sql::types::SqlConfig; + +// ── Compilation pipeline ────────────────────────────────────────────── + +#[derive(Clone, Debug)] +pub struct CompiledSql { + pub program: LogicalProgram, + pub connection_ids: Vec, +} + +fn duration_from_sql_expr( + expr: &datafusion::sql::sqlparser::ast::Expr, +) -> Result { + use datafusion::sql::sqlparser::ast::Expr as SqlExpr; + use datafusion::sql::sqlparser::ast::Value as SqlValue; + use datafusion::sql::sqlparser::ast::ValueWithSpan; + + match expr { + SqlExpr::Interval(interval) => { + let value_str = match interval.value.as_ref() { + SqlExpr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + .. + }) => s.clone(), + other => return plan_err!("expected interval string literal, found {other}"), + }; + + parse_interval_to_duration(&value_str) + } + SqlExpr::Value(ValueWithSpan { + value: SqlValue::SingleQuotedString(s), + .. + }) => parse_interval_to_duration(s), + other => plan_err!("expected an interval expression, found {other}"), + } +} + +fn parse_interval_to_duration(s: &str) -> Result { + let parts: Vec<&str> = s.trim().split_whitespace().collect(); + if parts.len() != 2 { + return plan_err!("invalid interval string '{s}'; expected ' '"); + } + let value: u64 = parts[0] + .parse() + .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?; + match parts[1].to_lowercase().as_str() { + "second" | "seconds" | "s" => Ok(std::time::Duration::from_secs(value)), + "minute" | "minutes" | "min" => Ok(std::time::Duration::from_secs(value * 60)), + "hour" | "hours" | "h" => Ok(std::time::Duration::from_secs(value * 3600)), + "day" | "days" | "d" => Ok(std::time::Duration::from_secs(value * 86400)), + unit => plan_err!("unsupported interval unit '{unit}'"), + } +} + +fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap> { + let mut sink_inputs = HashMap::>::new(); + for extension in extensions.iter() { + if let LogicalPlan::Extension(ext) = extension { + if let Some(sink_node) = ext.node.as_any().downcast_ref::() { + if let Some(named_node) = sink_node.node_name() { + let inputs = sink_node + .inputs() + .into_iter() + .cloned() + .collect::>(); + sink_inputs.entry(named_node).or_default().extend(inputs); + } + } + } + } + sink_inputs +} + +pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result { + let LogicalPlan::Extension(ref ext) = plan else { + return Ok(plan); + }; + + let Some(sink) = ext.node.as_any().downcast_ref::() else { + return Ok(plan); + }; + + let Some(partition_exprs) = sink.table.partition_exprs() else { + return Ok(plan); + }; + + if partition_exprs.is_empty() { + return Ok(plan); + } + + let inputs = plan + .inputs() + .into_iter() + .map(|input| { + Ok(LogicalPlan::Extension(Extension { + node: Arc::new(KeyCalculationExtension { + name: Some("key-calc-partition".to_string()), + schema: input.schema().clone(), + input: input.clone(), + keys: KeysOrExprs::Exprs(partition_exprs.clone()), + }), + })) + }) + .collect::>()?; + + use datafusion::prelude::col; + let unkey = LogicalPlan::Extension(Extension { + node: Arc::new( + ProjectionExtension::new( + inputs, + Some("unkey".to_string()), + sink.schema().iter().map(|(_, f)| col(f.name())).collect(), + ) + .shuffled(), + ), + }); + + let node = sink.with_exprs_and_inputs(vec![], vec![unkey])?; + Ok(LogicalPlan::Extension(Extension { + node: Arc::new(node), + })) +} + +pub fn rewrite_sinks(extensions: Vec) -> Result> { + let mut sink_inputs = build_sink_inputs(&extensions); + let mut new_extensions = vec![]; + for extension in extensions { + let mut rewriter = SinkInputRewriter::new(&mut sink_inputs); + let result = extension.rewrite(&mut rewriter)?; + if !rewriter.was_removed { + new_extensions.push(result.data); + } + } + + new_extensions + .into_iter() + .map(maybe_add_key_extension_to_sink) + .collect() + +} + +/// Entry point for transforming a standard DataFusion LogicalPlan into a +/// Streaming-aware LogicalPlan. +/// +/// This function coordinates multiple rewriting passes and ensures the +/// resulting plan satisfies streaming constraints. +#[instrument(skip_all, level = "debug")] +pub fn rewrite_plan( + plan: LogicalPlan, + schema_provider: &StreamSchemaProvider, +) -> Result { + info!("Starting streaming plan rewrite pipeline"); + + let mut rewriter = stream_rewriter::StreamRewriter::new(schema_provider); + let Transformed { + data: rewritten_plan, + .. + } = plan.rewrite_with_subqueries(&mut rewriter)?; + + rewritten_plan.visit_with_subqueries(&mut TimeWindowUdfChecker {})?; + + if cfg!(debug_assertions) { + debug!( + "Streaming logical plan graphviz:\n{}", + rewritten_plan.display_graphviz() + ); + } + + info!("Streaming plan rewrite completed successfully"); + Ok(rewritten_plan) +} diff --git a/src/sql/planner/plan/row_time_rewriter.rs b/src/sql/analysis/row_time_rewriter.rs similarity index 100% rename from src/sql/planner/plan/row_time_rewriter.rs rename to src/sql/analysis/row_time_rewriter.rs diff --git a/src/sql/planner/rewrite/sink_input_rewriter.rs b/src/sql/analysis/sink_input_rewriter.rs similarity index 91% rename from src/sql/planner/rewrite/sink_input_rewriter.rs rename to src/sql/analysis/sink_input_rewriter.rs index e6b6a0bd..b33ac647 100644 --- a/src/sql/planner/rewrite/sink_input_rewriter.rs +++ b/src/sql/analysis/sink_input_rewriter.rs @@ -1,10 +1,11 @@ -use crate::sql::planner::extension::sink::SinkExtension; -use crate::sql::planner::extension::{NamedNode, StreamExtension}; +use crate::sql::extensions::sink::SinkExtension; +use crate::sql::extensions::{StreamExtension}; use datafusion::common::Result as DFResult; use datafusion::common::tree_node::{Transformed, TreeNodeRecursion, TreeNodeRewriter}; use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; use std::collections::HashMap; use std::sync::Arc; +use crate::sql::logical_planner::planner::NamedNode; type SinkInputs = HashMap>; diff --git a/src/sql/planner/rewrite/source_metadata_visitor.rs b/src/sql/analysis/source_metadata_visitor.rs similarity index 86% rename from src/sql/planner/rewrite/source_metadata_visitor.rs rename to src/sql/analysis/source_metadata_visitor.rs index 168ff712..a49a7e72 100644 --- a/src/sql/planner/rewrite/source_metadata_visitor.rs +++ b/src/sql/analysis/source_metadata_visitor.rs @@ -1,6 +1,6 @@ -use crate::sql::planner::extension::sink::SinkExtension; -use crate::sql::planner::extension::table_source::TableSourceExtension; -use crate::sql::planner::mod_prelude::StreamSchemaProvider; +use crate::sql::extensions::sink::SinkExtension; +use crate::sql::extensions::table_source::TableSourceExtension; +use crate::sql::schema::StreamSchemaProvider; use datafusion::common::Result as DFResult; use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor}; use datafusion::logical_expr::{Extension, LogicalPlan}; @@ -39,7 +39,7 @@ impl<'a> SourceMetadataVisitor<'a> { let table = self.schema_provider.get_catalog_table(&table_name)?; match table { - crate::sql::catalog::table::Table::ConnectorTable(t) => t.id, + crate::sql::schema::table::Table::ConnectorTable(t) => t.id, _ => None, } } diff --git a/src/sql/planner/rewrite/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs similarity index 96% rename from src/sql/planner/rewrite/source_rewriter.rs rename to src/sql/analysis/source_rewriter.rs index 27281b41..1bba1551 100644 --- a/src/sql/planner/rewrite/source_rewriter.rs +++ b/src/sql/analysis/source_rewriter.rs @@ -20,12 +20,12 @@ use datafusion::logical_expr::{ self, BinaryExpr, Expr, Extension, LogicalPlan, Projection, TableScan, }; -use crate::sql::catalog::connector_table::ConnectorTable; -use crate::sql::catalog::field_spec::FieldSpec; -use crate::sql::catalog::table::Table; -use crate::sql::planner::StreamSchemaProvider; -use crate::sql::planner::extension::remote_table::RemoteTableExtension; -use crate::sql::planner::extension::watermark_node::WatermarkNode; +use crate::sql::schema::connector_table::ConnectorTable; +use crate::sql::schema::field_spec::FieldSpec; +use crate::sql::schema::table::Table; +use crate::sql::schema::StreamSchemaProvider; +use crate::sql::extensions::remote_table::RemoteTableExtension; +use crate::sql::extensions::watermark_node::WatermarkNode; use crate::sql::types::TIMESTAMP_FIELD; /// Rewrites table scans into proper source nodes with projections and watermarks. diff --git a/src/sql/planner/plan/stream_rewriter.rs b/src/sql/analysis/stream_rewriter.rs similarity index 96% rename from src/sql/planner/plan/stream_rewriter.rs rename to src/sql/analysis/stream_rewriter.rs index c3caed0e..999b1fb8 100644 --- a/src/sql/planner/plan/stream_rewriter.rs +++ b/src/sql/analysis/stream_rewriter.rs @@ -1,15 +1,15 @@ use std::sync::Arc; use super::StreamSchemaProvider; -use crate::sql::planner::extension::StreamExtension; -use crate::sql::planner::extension::remote_table::RemoteTableExtension; -use crate::sql::planner::plan::row_time_rewriter::RowTimeRewriter; -use crate::sql::planner::plan::{ +use crate::sql::extensions::StreamExtension; +use crate::sql::extensions::remote_table::RemoteTableExtension; +use crate::sql::analysis::row_time_rewriter::RowTimeRewriter; +use crate::sql::analysis::{ aggregate_rewriter::AggregateRewriter, join_rewriter::JoinRewriter, window_function_rewriter::WindowFunctionRewriter, }; -use crate::sql::planner::rewrite::TimeWindowNullCheckRemover; -use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; +use crate::sql::analysis::TimeWindowNullCheckRemover; +use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field}; use crate::sql::types::{DFField, TIMESTAMP_FIELD}; use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{Column, DataFusionError, Result, Spans, TableReference, plan_err}; diff --git a/src/sql/planner/plan/streaming_window_analzer.rs b/src/sql/analysis/streaming_window_analzer.rs similarity index 98% rename from src/sql/planner/plan/streaming_window_analzer.rs rename to src/sql/analysis/streaming_window_analzer.rs index db3506b7..59ded792 100644 --- a/src/sql/planner/plan/streaming_window_analzer.rs +++ b/src/sql/analysis/streaming_window_analzer.rs @@ -5,8 +5,8 @@ use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor use datafusion::common::{Column, DFSchema, DataFusionError, Result}; use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias}; -use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension}; -use crate::sql::planner::extension::join::JOIN_NODE_NAME; +use crate::sql::extensions::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension}; +use crate::sql::extensions::join::JOIN_NODE_NAME; use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window}; /// WindowDetectingVisitor identifies windowing strategies and tracks window-carrying fields diff --git a/src/sql/planner/rewrite/time_window.rs b/src/sql/analysis/time_window.rs similarity index 100% rename from src/sql/planner/rewrite/time_window.rs rename to src/sql/analysis/time_window.rs diff --git a/src/sql/planner/udafs.rs b/src/sql/analysis/udafs.rs similarity index 100% rename from src/sql/planner/udafs.rs rename to src/sql/analysis/udafs.rs diff --git a/src/sql/planner/rewrite/unnest_rewriter.rs b/src/sql/analysis/unnest_rewriter.rs similarity index 100% rename from src/sql/planner/rewrite/unnest_rewriter.rs rename to src/sql/analysis/unnest_rewriter.rs diff --git a/src/sql/planner/plan/window_function_rewriter.rs b/src/sql/analysis/window_function_rewriter.rs similarity index 96% rename from src/sql/planner/plan/window_function_rewriter.rs rename to src/sql/analysis/window_function_rewriter.rs index 5c8e511b..ce580eaf 100644 --- a/src/sql/planner/plan/window_function_rewriter.rs +++ b/src/sql/analysis/window_function_rewriter.rs @@ -8,9 +8,9 @@ use datafusion_common::DataFusionError; use std::sync::Arc; use tracing::debug; -use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; -use crate::sql::planner::extension::window_fn::WindowFunctionExtension; -use crate::sql::planner::plan::streaming_window_analzer::{StreamingWindowAnalzer, extract_column}; +use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::extensions::window_fn::WindowFunctionExtension; +use crate::sql::analysis::streaming_window_analzer::{StreamingWindowAnalzer, extract_column}; use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields}; /// WindowFunctionRewriter transforms standard SQL Window functions into streaming-compatible diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs new file mode 100644 index 00000000..c8c070f2 --- /dev/null +++ b/src/sql/extensions/aggregate.rs @@ -0,0 +1,607 @@ +use std::fmt::Formatter; +use std::sync::Arc; +use std::time::Duration; +use arrow_array::types::IntervalMonthDayNanoType; +use datafusion::common::{Column, DFSchemaRef, Result, ScalarValue, internal_err}; +use datafusion::logical_expr; +use datafusion::logical_expr::{ + BinaryExpr, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, expr::ScalarFunction, +}; +use datafusion_common::{plan_err, DFSchema, DataFusionError}; +use datafusion_expr::Aggregate; +use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec}; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use datafusion_proto::protobuf::PhysicalPlanNode; +use prost::Message; +use protocol::grpc::api::{ SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::multifield_partial_ord; +use crate::sql::logical_planner::{window, FsPhysicalExtensionCodec}; +use crate::sql::extensions::{ NodeWithIncomingEdges, StreamExtension, TimestampAppendExtension}; +use crate::sql::logical_planner::planner::{NamedNode, Planner, SplitPlanOutput}; +use crate::sql::types::{ + DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, + schema_from_df_fields, schema_from_df_fields_with_metadata, +}; +use crate::types::{FsSchema, FsSchemaRef}; + +pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension"; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct AggregateExtension { + pub(crate) window_behavior: WindowBehavior, + pub(crate) aggregate: LogicalPlan, + pub(crate) schema: DFSchemaRef, + pub(crate) key_fields: Vec, + pub(crate) final_calculation: LogicalPlan, +} + +multifield_partial_ord!(AggregateExtension, aggregate, key_fields, final_calculation); + +impl AggregateExtension { + pub fn new( + window_behavior: WindowBehavior, + aggregate: LogicalPlan, + key_fields: Vec, + ) -> Self { + let final_calculation = + Self::final_projection(&aggregate, window_behavior.clone()).unwrap(); + + Self { + window_behavior, + aggregate, + schema: final_calculation.schema().clone(), + key_fields, + final_calculation, + } + } + + pub fn tumbling_window_config( + &self, + planner: &Planner, + index: usize, + input_schema: DFSchemaRef, + width: Duration, + ) -> Result { + let binning_function_proto = planner.binning_function_proto(width, input_schema.clone())?; + let SplitPlanOutput { + partial_aggregation_plan, + partial_schema, + finish_plan, + } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?; + + let final_physical_plan = planner.sync_plan(&self.final_calculation)?; + let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan( + final_physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + + let config = TumblingWindowAggregateOperator { + name: "TumblingWindow".to_string(), + width_micros: width.as_micros() as u64, + binning_function: binning_function_proto.encode_to_vec(), + input_schema: Some( + FsSchema::from_schema_keys( + Arc::new(input_schema.as_ref().into()), + self.key_fields.clone(), + )?.into(), + ), + partial_schema: Some(partial_schema.into()), + partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(), + final_aggregation_plan: finish_plan.encode_to_vec(), + final_projection: Some(final_physical_plan_node.encode_to_vec()), + }; + + Ok(LogicalNode::single( + index as u32, + format!("tumbling_{index}"), + OperatorName::TumblingWindowAggregate, + config.encode_to_vec(), + format!("TumblingWindow<{}>", config.name), + 1, + )) + } + + pub fn sliding_window_config( + &self, + planner: &Planner, + index: usize, + input_schema: DFSchemaRef, + width: Duration, + slide: Duration, + ) -> Result { + let binning_function_proto = planner.binning_function_proto(slide, input_schema.clone())?; + + let SplitPlanOutput { + partial_aggregation_plan, + partial_schema, + finish_plan, + } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?; + + let final_physical_plan = planner.sync_plan(&self.final_calculation)?; + let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan( + final_physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + + let config = SlidingWindowAggregateOperator { + name: format!("SlidingWindow<{width:?}>"), + width_micros: width.as_micros() as u64, + slide_micros: slide.as_micros() as u64, + binning_function: binning_function_proto.encode_to_vec(), + input_schema: Some( + FsSchema::from_schema_keys( + Arc::new(input_schema.as_ref().into()), + self.key_fields.clone(), + )?.into(), + ), + partial_schema: Some(partial_schema.into()), + partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(), + final_aggregation_plan: finish_plan.encode_to_vec(), + final_projection: final_physical_plan_node.encode_to_vec(), + // TODO add final aggregation. + }; + + Ok(LogicalNode::single( + index as u32, + format!("sliding_window_{index}"), + OperatorName::SlidingWindowAggregate, + config.encode_to_vec(), + "sliding window".to_string(), + 1, + )) + } + + pub fn session_window_config( + &self, + planner: &Planner, + index: usize, + input_schema: DFSchemaRef, + ) -> Result { + let WindowBehavior::FromOperator { + window: WindowType::Session { gap }, + window_index, + window_field, + is_nested: false, + } = &self.window_behavior + else { + return plan_err!("expected sliding window"); + }; + let output_schema = fields_with_qualifiers(self.aggregate.schema()); + let LogicalPlan::Aggregate(agg) = self.aggregate.clone() else { + return plan_err!("expected aggregate"); + }; + let key_count = self.key_fields.len(); + let unkeyed_aggregate_schema = Arc::new(schema_from_df_fields_with_metadata( + &output_schema[key_count..], + self.aggregate.schema().metadata().clone(), + )?); + + let unkeyed_aggregate = Aggregate::try_new_with_schema( + agg.input.clone(), + vec![], + agg.aggr_expr.clone(), + unkeyed_aggregate_schema.clone(), + )?; + let aggregate_plan = planner.sync_plan(&LogicalPlan::Aggregate(unkeyed_aggregate))?; + + let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( + aggregate_plan, + &FsPhysicalExtensionCodec::default(), + )?; + let input_schema = FsSchema::from_schema_keys( + Arc::new(input_schema.as_ref().into()), + self.key_fields.clone(), + )?; + + let config = SessionWindowAggregateOperator { + name: format!("session_window_{index}"), + gap_micros: gap.as_micros() as u64, + window_field_name: window_field.name().to_string(), + window_index: *window_index as u64, + input_schema: Some(input_schema.into()), + unkeyed_aggregate_schema: None, + partial_aggregation_plan: vec![], + final_aggregation_plan: physical_plan_node.encode_to_vec(), + }; + + Ok(LogicalNode::single( + index as u32, + format!("SessionWindow<{gap:?}>"), + OperatorName::SessionWindowAggregate, + config.encode_to_vec(), + config.name.clone(), + 1, + )) + } + + pub fn instant_window_config( + &self, + planner: &Planner, + index: usize, + input_schema: DFSchemaRef, + use_final_projection: bool, + ) -> Result { + let binning_function = planner.create_physical_expr( + &Expr::Column(Column::new_unqualified("_timestamp".to_string())), + &input_schema, + )?; + let binning_function_proto = + serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})?; + + let final_projection = use_final_projection + .then(|| { + let final_physical_plan = planner.sync_plan(&self.final_calculation)?; + let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan( + final_physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + Ok::, DataFusionError>(final_physical_plan_node.encode_to_vec()) + }) + .transpose()?; + + let SplitPlanOutput { + partial_aggregation_plan, + partial_schema, + finish_plan, + } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?; + + let config = TumblingWindowAggregateOperator { + name: "InstantWindow".to_string(), + width_micros: 0, + binning_function: binning_function_proto.encode_to_vec(), + input_schema: Some( + FsSchema::from_schema_keys( + Arc::new(input_schema.as_ref().into()), + self.key_fields.clone(), + )?.into(), + ), + partial_schema: Some(partial_schema.into()), + partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(), + final_aggregation_plan: finish_plan.encode_to_vec(), + final_projection, + }; + + Ok(LogicalNode::single( + index as u32, + format!("instant_window_{index}"), + OperatorName::TumblingWindowAggregate, + config.encode_to_vec(), + "instant window".to_string(), + 1, + )) + } + + // projection assuming that _timestamp has been populated with the start of the bin. + pub fn final_projection( + aggregate_plan: &LogicalPlan, + window_behavior: WindowBehavior, + ) -> Result { + let timestamp_field: DFField = aggregate_plan.inputs()[0] + .schema() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)? + .into(); + let timestamp_append = LogicalPlan::Extension(Extension { + node: Arc::new(TimestampAppendExtension::new( + aggregate_plan.clone(), + timestamp_field.qualifier().cloned(), + )), + }); + let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema()); + let mut aggregate_expressions: Vec<_> = aggregate_fields + .iter() + .map(|field| Expr::Column(field.qualified_column())) + .collect(); + let (window_field, window_index, width, is_nested) = match window_behavior { + WindowBehavior::InData => return Ok(timestamp_append), + WindowBehavior::FromOperator { + window, + window_field, + window_index, + is_nested, + } => match window { + WindowType::Tumbling { width, .. } | WindowType::Sliding { width, .. } => { + (window_field, window_index, width, is_nested) + } + WindowType::Session { .. } => { + return Ok(LogicalPlan::Extension(Extension { + node: Arc::new(WindowAppendExtension::new( + timestamp_append, + window_field, + window_index, + )), + })); + } + WindowType::Instant => return Ok(timestamp_append), + }, + }; + if is_nested { + return Self::nested_final_projection( + timestamp_append, + window_field, + window_index, + width, + ); + } + let timestamp_column = + Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name()); + aggregate_fields.insert(window_index, window_field.clone()); + + let window_expression = Expr::ScalarFunction(ScalarFunction { + func: window(), + args: vec![ + // copy bin_start as first argument + Expr::Column(timestamp_column.clone()), + // add width interval to _timestamp for bin end + Expr::BinaryExpr(BinaryExpr { + left: Box::new(Expr::Column(timestamp_column.clone())), + op: logical_expr::Operator::Plus, + right: Box::new(Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, width.as_nanos() as i64), + )), + None, + )), + }), + ], + }); + aggregate_expressions.insert( + window_index, + window_expression + .alias_qualified(window_field.qualifier().cloned(), window_field.name()), + ); + aggregate_fields.push(timestamp_field); + let bin_end_calculation = Expr::BinaryExpr(BinaryExpr { + left: Box::new(Expr::Column(timestamp_column.clone())), + op: logical_expr::Operator::Plus, + right: Box::new(Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 0, + 0, + (width.as_nanos() - 1) as i64, + ))), + None, + )), + }); + aggregate_expressions.push(bin_end_calculation); + Ok(LogicalPlan::Projection( + logical_expr::Projection::try_new_with_schema( + aggregate_expressions, + Arc::new(timestamp_append), + Arc::new(schema_from_df_fields(&aggregate_fields)?), + )?, + )) + } + + fn nested_final_projection( + aggregate_plan: LogicalPlan, + window_field: DFField, + window_index: usize, + width: Duration, + ) -> Result { + let timestamp_field: DFField = aggregate_plan + .schema() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) + .unwrap() + .into(); + let timestamp_column = + Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name()); + + let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema()); + let mut aggregate_expressions: Vec<_> = aggregate_fields + .iter() + .map(|field| Expr::Column(field.qualified_column())) + .collect(); + aggregate_fields.insert(window_index, window_field.clone()); + let window_expression = Expr::ScalarFunction(ScalarFunction { + func: window(), + args: vec![ + // calculate the start of the bin + Expr::BinaryExpr(BinaryExpr { + left: Box::new(Expr::Column(timestamp_column.clone())), + op: logical_expr::Operator::Minus, + right: Box::new(Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, width.as_nanos() as i64 - 1), + )), + None, + )), + }), + // add 1 nanosecond to the timestamp + Expr::BinaryExpr(BinaryExpr { + left: Box::new(Expr::Column(timestamp_column.clone())), + op: logical_expr::Operator::Plus, + right: Box::new(Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, 1), + )), + None, + )), + }), + ], + }); + aggregate_expressions.insert( + window_index, + window_expression + .alias_qualified(window_field.qualifier().cloned(), window_field.name()), + ); + Ok(LogicalPlan::Projection( + logical_expr::Projection::try_new_with_schema( + aggregate_expressions, + Arc::new(aggregate_plan), + Arc::new(schema_from_df_fields(&aggregate_fields).unwrap()), + ) + .unwrap(), + )) + } +} + +impl UserDefinedLogicalNodeCore for AggregateExtension { + fn name(&self) -> &str { + AGGREGATE_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.aggregate] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "AggregateExtension: {} | window_behavior: {:?}", + self.schema(), + match &self.window_behavior { + WindowBehavior::InData => "InData".to_string(), + WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"), + } + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("input size inconsistent"); + } + + Ok(Self::new( + self.window_behavior.clone(), + inputs[0].clone(), + self.key_fields.clone(), + )) + } +} + +impl StreamExtension for AggregateExtension { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 1 { + return plan_err!("AggregateExtension should have exactly one input"); + } + let input_schema = input_schemas[0].clone(); + let input_df_schema = + Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone()).unwrap()); + let logical_node = match &self.window_behavior { + WindowBehavior::FromOperator { + window, + window_field: _, + window_index: _, + is_nested, + } => { + if *is_nested { + self.instant_window_config(planner, index, input_df_schema, true)? + } else { + match window { + WindowType::Tumbling { width } => { + self.tumbling_window_config(planner, index, input_df_schema, *width)? + } + WindowType::Sliding { width, slide } => self.sliding_window_config( + planner, + index, + input_df_schema, + *width, + *slide, + )?, + WindowType::Instant => { + return plan_err!( + "instant window not supported in aggregate extension" + ); + } + WindowType::Session { gap: _ } => { + self.session_window_config(planner, index, input_df_schema)? + } + } + } + } + WindowBehavior::InData => self + .instant_window_config(planner, index, input_df_schema, false) + .map_err(|e| e.context("instant window"))?, + }; + let edge = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schema).clone()); + Ok(NodeWithIncomingEdges { + node: logical_node, + edges: vec![edge], + }) + } + + fn output_schema(&self) -> FsSchema { + let output_schema = (*self.schema).clone().into(); + FsSchema::from_schema_keys(Arc::new(output_schema), vec![]).unwrap() + } +} + +/* +This is a plan used for appending a _timestamp field to an existing record batch. + */ + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct WindowAppendExtension { + pub(crate) input: LogicalPlan, + pub(crate) window_field: DFField, + pub(crate) window_index: usize, + pub(crate) schema: DFSchemaRef, +} + +multifield_partial_ord!(WindowAppendExtension, input, window_index); + +impl WindowAppendExtension { + fn new(input: LogicalPlan, window_field: DFField, window_index: usize) -> Self { + let mut fields = fields_with_qualifiers(input.schema()); + fields.insert(window_index, window_field.clone()); + let metadata = input.schema().metadata().clone(); + Self { + input, + window_field, + window_index, + schema: Arc::new(schema_from_df_fields_with_metadata(&fields, metadata).unwrap()), + } + } +} + +impl UserDefinedLogicalNodeCore for WindowAppendExtension { + fn name(&self) -> &str { + "WindowAppendExtension" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "WindowAppendExtension: field {:?} at {}", + self.window_field, self.window_index + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self::new( + inputs[0].clone(), + self.window_field.clone(), + self.window_index, + )) + } +} diff --git a/src/sql/planner/extension/debezium.rs b/src/sql/extensions/debezium.rs similarity index 63% rename from src/sql/planner/extension/debezium.rs rename to src/sql/extensions/debezium.rs index 1760533c..184de88d 100644 --- a/src/sql/planner/extension/debezium.rs +++ b/src/sql/extensions/debezium.rs @@ -1,22 +1,28 @@ + +use super::{ StreamExtension}; +use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD}; use std::sync::Arc; -use datafusion::arrow::datatypes::{DataType, Field, Schema}; -use datafusion::common::{DFSchema, DFSchemaRef, Result, TableReference, plan_err}; +use arrow_schema::{DataType, Schema}; + +use datafusion::common::{DFSchema, DFSchemaRef, Result, TableReference, internal_err, plan_err}; +use datafusion::error::DataFusionError; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion::physical_plan::DisplayAs; -use super::{NamedNode, StreamExtension}; +use super::{NodeWithIncomingEdges}; use crate::multifield_partial_ord; -use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD}; +use crate::sql::logical_planner::updating_meta_field; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; pub(crate) const DEBEZIUM_UNROLLING_EXTENSION_NAME: &str = "DebeziumUnrollingExtension"; pub(crate) const TO_DEBEZIUM_EXTENSION_NAME: &str = "ToDebeziumExtension"; -/// Unrolls a Debezium-formatted (before/after/op) stream into individual rows -/// with an updating metadata column. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DebeziumUnrollingExtension { - pub(crate) input: LogicalPlan, - pub(crate) schema: DFSchemaRef, + input: LogicalPlan, + schema: DFSchemaRef, pub primary_keys: Vec, primary_key_names: Arc>, } @@ -45,23 +51,34 @@ impl DebeziumUnrollingExtension { let struct_schema: Vec<_> = input_schema .fields() .iter() - .filter(|field| field.name() != TIMESTAMP_FIELD) + .filter(|field| field.name() != TIMESTAMP_FIELD && field.name() != UPDATING_META_FIELD) .cloned() .collect(); let struct_type = DataType::Struct(struct_schema.into()); - let before = Arc::new(Field::new("before", struct_type.clone(), true)); - let after = Arc::new(Field::new("after", struct_type, true)); - let op = Arc::new(Field::new("op", DataType::Utf8, true)); + let before = Arc::new(arrow::datatypes::Field::new( + "before", + struct_type.clone(), + true, + )); + let after = Arc::new(arrow::datatypes::Field::new( + "after", + struct_type.clone(), + true, + )); + + let op = Arc::new(arrow::datatypes::Field::new("op", DataType::Utf8, true)); let mut fields = vec![before, after, op]; - if let Some(ts) = timestamp_field { - fields.push(Arc::new(ts)); + if let Some(timestamp_field) = timestamp_field { + fields.push(Arc::new(timestamp_field)); } let schema = match qualifier { - Some(q) => DFSchema::try_from_qualified_schema(q, &Schema::new(fields))?, + Some(qualifier) => { + DFSchema::try_from_qualified_schema(qualifier, &Schema::new(fields))? + } None => DFSchema::try_from(Schema::new(fields))?, }; Ok(Arc::new(schema)) @@ -70,6 +87,7 @@ impl DebeziumUnrollingExtension { pub fn try_new(input: LogicalPlan, primary_keys: Arc>) -> Result { let input_schema = input.schema(); + // confirm that the input schema has before, after and op columns, and before and after match let Some(before_index) = input_schema.index_of_column_by_name(None, "before") else { return plan_err!("DebeziumUnrollingExtension requires a before column"); }; @@ -90,11 +108,13 @@ impl DebeziumUnrollingExtension { ); } + // check that op is a string let op_type = input_schema.field(op_index).data_type(); if *op_type != DataType::Utf8 { return plan_err!("op column must be a string, not {}", op_type); } + // create the output schema let DataType::Struct(fields) = before_type else { return plan_err!( "before and after columns must be structs, not {}", @@ -102,41 +122,44 @@ impl DebeziumUnrollingExtension { ); }; + // get the primary keys let primary_key_idx = primary_keys .iter() .map(|pk| fields.find(pk).map(|(i, _)| i)) .collect::>>() .ok_or_else(|| { - datafusion::error::DataFusionError::Plan( - "primary key field not found in Debezium schema".to_string(), - ) + DataFusionError::Plan("primary key field not found in Debezium schema".to_string()) })?; + // determine the qualifier from the before and after columns let qualifier = match ( input_schema.qualified_field(before_index).0, input_schema.qualified_field(after_index).0, ) { - (Some(bq), Some(aq)) => { - if bq != aq { + (Some(before_qualifier), Some(after_qualifier)) => { + if before_qualifier != after_qualifier { return plan_err!("before and after columns must have the same alias"); } - Some(bq.clone()) + Some(before_qualifier.clone()) } (None, None) => None, _ => return plan_err!("before and after columns must both have an alias or neither"), }; - let mut out_fields = fields.to_vec(); + let mut fields = fields.to_vec(); + fields.push(updating_meta_field()); - let Some(input_ts_index) = input_schema.index_of_column_by_name(None, TIMESTAMP_FIELD) + let Some(input_timestamp_field) = + input_schema.index_of_column_by_name(None, TIMESTAMP_FIELD) else { return plan_err!("DebeziumUnrollingExtension requires a timestamp field"); }; - out_fields.push(Arc::new(input_schema.field(input_ts_index).clone())); - let arrow_schema = Schema::new(out_fields); + fields.push(Arc::new(input_schema.field(input_timestamp_field).clone())); + let arrow_schema = Schema::new(fields); + let schema = match qualifier { - Some(q) => DFSchema::try_from_qualified_schema(q, &arrow_schema)?, + Some(qualifier) => DFSchema::try_from_qualified_schema(qualifier, &arrow_schema)?, None => DFSchema::try_from(arrow_schema)?, }; @@ -180,8 +203,17 @@ impl StreamExtension for DebeziumUnrollingExtension { None } - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + fn plan_node( + &self, + _planner: &Planner, + _index: usize, + _input_schemas: Vec, + ) -> Result { + plan_err!("DebeziumUnrollingExtension should not be planned") + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() } fn transparent(&self) -> bool { @@ -189,19 +221,19 @@ impl StreamExtension for DebeziumUnrollingExtension { } } -/// Wraps an input stream into Debezium format (before/after/op) for updating sinks. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub(crate) struct ToDebeziumExtension { - pub(crate) input: Arc, - pub(crate) schema: DFSchemaRef, + input: Arc, + schema: DFSchemaRef, } multifield_partial_ord!(ToDebeziumExtension, input); impl ToDebeziumExtension { pub(crate) fn try_new(input: LogicalPlan) -> Result { - let schema = DebeziumUnrollingExtension::as_debezium_schema(input.schema(), None) - .expect("should be able to create ToDebeziumExtension"); + let input_schema = input.schema(); + let schema = DebeziumUnrollingExtension::as_debezium_schema(input_schema, None) + .expect("should be able to create ToDebeziumExtenison"); Ok(Self { input: Arc::new(input), schema, @@ -209,6 +241,16 @@ impl ToDebeziumExtension { } } +impl DisplayAs for ToDebeziumExtension { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "ToDebeziumExtension") + } +} + impl UserDefinedLogicalNodeCore for ToDebeziumExtension { fn name(&self) -> &str { TO_DEBEZIUM_EXTENSION_NAME @@ -222,7 +264,7 @@ impl UserDefinedLogicalNodeCore for ToDebeziumExtension { &self.schema } - fn expressions(&self) -> Vec { + fn expressions(&self) -> Vec { vec![] } @@ -230,7 +272,11 @@ impl UserDefinedLogicalNodeCore for ToDebeziumExtension { write!(f, "ToDebeziumExtension") } - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + fn with_exprs_and_inputs( + &self, + _exprs: Vec, + inputs: Vec, + ) -> Result { Self::try_new(inputs[0].clone()) } } @@ -240,8 +286,17 @@ impl StreamExtension for ToDebeziumExtension { None } - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + fn plan_node( + &self, + _planner: &Planner, + _index: usize, + _input_schemas: Vec, + ) -> Result { + internal_err!("ToDebeziumExtension should not be planned") + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() } fn transparent(&self) -> bool { diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs new file mode 100644 index 00000000..c28a6e01 --- /dev/null +++ b/src/sql/extensions/join.rs @@ -0,0 +1,120 @@ +use std::time::Duration; + +use datafusion::common::{DFSchemaRef, Result}; +use datafusion::logical_expr::expr::Expr; +use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::types::StreamSchema; + +use std::sync::Arc; +use datafusion_common::plan_err; +use datafusion_proto::physical_plan::AsExecutionPlan; +use datafusion_proto::protobuf::PhysicalPlanNode; +use prost::Message; +use protocol::grpc::api::JoinOperator; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::types::{FsSchema, FsSchemaRef}; + +pub(crate) const JOIN_NODE_NAME: &str = "JoinNode"; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub struct JoinExtension { + pub(crate) rewritten_join: LogicalPlan, + pub(crate) is_instant: bool, + pub(crate) ttl: Option, +} + +impl StreamExtension for JoinExtension { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 2 { + return plan_err!("join should have exactly two inputs"); + } + let left_schema = input_schemas[0].clone(); + let right_schema = input_schemas[1].clone(); + + let join_plan = planner.sync_plan(&self.rewritten_join)?; + let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( + join_plan.clone(), + &FsPhysicalExtensionCodec::default(), + )?; + + let operator_name = if self.is_instant { + OperatorName::InstantJoin + } else { + OperatorName::Join + }; + + let config = JoinOperator { + name: format!("join_{index}"), + left_schema: Some(left_schema.as_ref().clone().into()), + right_schema: Some(right_schema.as_ref().clone().into()), + output_schema: Some(self.output_schema().into()), + join_plan: physical_plan_node.encode_to_vec(), + ttl_micros: self.ttl.map(|t| t.as_micros() as u64), + }; + + let logical_node = LogicalNode::single( + index as u32, + format!("join_{index}"), + operator_name, + config.encode_to_vec(), + "join".to_string(), + 1, + ); + + let left_edge = + LogicalEdge::project_all(LogicalEdgeType::LeftJoin, left_schema.as_ref().clone()); + let right_edge = + LogicalEdge::project_all(LogicalEdgeType::RightJoin, right_schema.as_ref().clone()); + Ok(NodeWithIncomingEdges { + node: logical_node, + edges: vec![left_edge, right_edge], + }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(self.schema().inner().clone()).unwrap() + } +} + +impl UserDefinedLogicalNodeCore for JoinExtension { + fn name(&self) -> &str { + JOIN_NODE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.rewritten_join] + } + + fn schema(&self) -> &DFSchemaRef { + self.rewritten_join.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "JoinExtension: {}", self.schema()) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self { + rewritten_join: inputs[0].clone(), + is_instant: self.is_instant, + ttl: self.ttl, + }) + } +} diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs new file mode 100644 index 00000000..e0edb67a --- /dev/null +++ b/src/sql/extensions/key_calculation.rs @@ -0,0 +1,242 @@ +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::arrow::datatypes::{Field, Schema}; +use datafusion::common::{DFSchemaRef, Result, internal_err}; +use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion_common::{plan_err, DFSchema}; +use datafusion_expr::col; +use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec}; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use datafusion_proto::protobuf::PhysicalPlanNode; +use itertools::Itertools; +use prost::Message; +use protocol::grpc::api::{KeyPlanOperator, ProjectionOperator}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::multifield_partial_ord; +use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::types::{ + StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata, +}; +use crate::types::{FsSchema, FsSchemaRef}; + +pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension"; + +/// Two ways of specifying keys — either as col indexes in the existing data or as a set of +/// exprs to evaluate +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub enum KeysOrExprs { + Keys(Vec), + Exprs(Vec), +} + +/// Calculation for computing keyed data, with a vec of keys +/// that will be used for shuffling data to the correct nodes. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct KeyCalculationExtension { + pub(crate) name: Option, + pub(crate) input: LogicalPlan, + pub(crate) keys: KeysOrExprs, + pub(crate) schema: DFSchemaRef, +} + +multifield_partial_ord!(KeyCalculationExtension, name, input, keys); + +impl KeyCalculationExtension { + pub fn new_named_and_trimmed(input: LogicalPlan, keys: Vec, name: String) -> Self { + let output_fields: Vec<_> = fields_with_qualifiers(input.schema()) + .into_iter() + .enumerate() + .filter_map(|(index, field)| { + if !keys.contains(&index) { + Some(field.clone()) + } else { + None + } + }) + .collect(); + + let schema = + schema_from_df_fields_with_metadata(&output_fields, input.schema().metadata().clone()) + .unwrap(); + Self { + name: Some(name), + input, + keys: KeysOrExprs::Keys(keys), + schema: Arc::new(schema), + } + } + pub fn new(input: LogicalPlan, keys: KeysOrExprs) -> Self { + let schema = input.schema().clone(); + Self { + name: None, + input, + keys, + schema, + } + } +} + +impl StreamExtension for KeyCalculationExtension { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + // check there's only one input + if input_schemas.len() != 1 { + return plan_err!("KeyCalculationExtension should have exactly one input"); + } + let input_schema = (*input_schemas[0]).clone(); + let input_df_schema = Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone())?); + + let physical_plan = planner.sync_plan(&self.input)?; + + let physical_plan_node: PhysicalPlanNode = PhysicalPlanNode::try_from_physical_plan( + physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + + let (config, name) = match &self.keys { + KeysOrExprs::Keys(keys) => ( + KeyPlanOperator { + name: "key".into(), + physical_plan: physical_plan_node.encode_to_vec(), + key_fields: keys.iter().map(|k| *k as u64).collect(), + } + .encode_to_vec(), + OperatorName::ArrowKey, + ), + KeysOrExprs::Exprs(key_exprs) => { + let mut exprs = vec![]; + for k in key_exprs { + exprs.push(k.clone()) + } + + for f in input_schema.schema.fields.iter() { + exprs.push(col(f.name())); + } + + let output_schema = self.output_schema(); + + // ensure that the exprs generate the output schema + for (expr, expected) in exprs.iter().zip(output_schema.schema.fields()) { + let (data_type, nullable) = expr.data_type_and_nullable(&input_df_schema)?; + assert_eq!(data_type, *expected.data_type()); + assert_eq!(nullable, expected.is_nullable()); + } + + let mut physical_exprs = vec![]; + + for e in exprs { + let phys = planner + .create_physical_expr(&e, &input_df_schema) + .map_err(|e| e.context("in PARTITION BY"))?; + physical_exprs.push( + serialize_physical_expr(&phys, &DefaultPhysicalExtensionCodec {})? + .encode_to_vec(), + ); + } + + let config = ProjectionOperator { + name: self.name.as_deref().unwrap_or("key").to_string(), + input_schema: Some(input_schema.clone().into()), + + output_schema: Some(self.output_schema().into()), + exprs: physical_exprs, + }; + + (config.encode_to_vec(), OperatorName::Projection) + } + }; + + let node = LogicalNode::single( + index as u32, + format!("key_{index}"), + name, + config, + format!("ArrowKey<{}>", self.name.as_deref().unwrap_or("_")), + 1, + ); + let edge = LogicalEdge::project_all(LogicalEdgeType::Forward, input_schema); + Ok(NodeWithIncomingEdges { + node, + edges: vec![edge], + }) + } + + fn output_schema(&self) -> FsSchema { + let arrow_schema = self.input.schema().as_ref(); + + match &self.keys { + KeysOrExprs::Keys(keys) => { + FsSchema::from_schema_keys(Arc::new(arrow_schema.into()), keys.clone()).unwrap() + } + KeysOrExprs::Exprs(exprs) => { + let mut fields = vec![]; + + for (i, e) in exprs.iter().enumerate() { + let (dt, nullable) = e.data_type_and_nullable(arrow_schema).unwrap(); + fields.push(Field::new(format!("__key_{i}"), dt, nullable).into()); + } + + for f in arrow_schema.fields().iter() { + fields.push(f.clone()); + } + + FsSchema::from_schema_keys( + Arc::new(Schema::new(fields)), + (1..=exprs.len()).collect_vec(), + ) + .unwrap() + } + } + } +} + +impl UserDefinedLogicalNodeCore for KeyCalculationExtension { + fn name(&self) -> &str { + KEY_CALCULATION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "KeyCalculationExtension: {}", self.schema()) + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("input size inconsistent"); + } + + let keys = match &self.keys { + KeysOrExprs::Keys(k) => KeysOrExprs::Keys(k.clone()), + KeysOrExprs::Exprs(_) => KeysOrExprs::Exprs(exprs), + }; + + Ok(Self { + name: self.name.clone(), + input: inputs[0].clone(), + keys, + schema: self.schema.clone(), + }) + } +} diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs new file mode 100644 index 00000000..2dc76265 --- /dev/null +++ b/src/sql/extensions/lookup.rs @@ -0,0 +1,194 @@ +use datafusion::common::{Column, DFSchemaRef, JoinType, internal_err, plan_err}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion::sql::TableReference; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use prost::Message; +use std::fmt::Formatter; +use std::sync::Arc; +use protocol::grpc::api; +use protocol::grpc::api::{ConnectorOp, LookupJoinCondition, LookupJoinOperator}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::multifield_partial_ord; +use crate::sql::schema::ConnectorTable; +use crate::sql::schema::utils::add_timestamp_field_arrow; +use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::types::{FsSchema, FsSchemaRef}; + +pub const SOURCE_EXTENSION_NAME: &str = "LookupSource"; +pub const JOIN_EXTENSION_NAME: &str = "LookupJoin"; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LookupSource { + pub(crate) table: ConnectorTable, + pub(crate) schema: DFSchemaRef, +} + +multifield_partial_ord!(LookupSource, table); + +impl UserDefinedLogicalNodeCore for LookupSource { + fn name(&self) -> &str { + SOURCE_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "LookupSource: {}", self.schema) + } + + fn with_exprs_and_inputs( + &self, + _exprs: Vec, + inputs: Vec, + ) -> datafusion::common::Result { + if !inputs.is_empty() { + return internal_err!("LookupSource cannot have inputs"); + } + + Ok(Self { + table: self.table.clone(), + schema: self.schema.clone(), + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LookupJoin { + pub(crate) input: LogicalPlan, + pub(crate) schema: DFSchemaRef, + pub(crate) connector: ConnectorTable, + pub(crate) on: Vec<(Expr, Column)>, + pub(crate) filter: Option, + pub(crate) alias: Option, + pub(crate) join_type: JoinType, +} + +multifield_partial_ord!(LookupJoin, input, connector, on, filter, alias); + +impl StreamExtension for LookupJoin { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> datafusion::common::Result { + let schema = FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into()))?; + let lookup_schema = FsSchema::from_schema_unkeyed(add_timestamp_field_arrow( + self.connector.physical_schema(), + ))?; + let join_config = LookupJoinOperator { + input_schema: Some(schema.into()), + lookup_schema: Some(lookup_schema.into()), + connector: Some(ConnectorOp { + connector: self.connector.connector.clone(), + config: self.connector.config.clone(), + description: self.connector.description.clone(), + }), + key_exprs: self + .on + .iter() + .map(|(l, r)| { + let expr = planner.create_physical_expr(l, &self.schema)?; + let expr = serialize_physical_expr(&expr, &DefaultPhysicalExtensionCodec {})?; + Ok(LookupJoinCondition { + left_expr: expr.encode_to_vec(), + right_key: r.name.clone(), + }) + }) + .collect::>>()?, + join_type: match self.join_type { + JoinType::Inner => api::JoinType::Inner as i32, + JoinType::Left => api::JoinType::Left as i32, + j => { + return plan_err!( + "unsupported join type '{j}' for lookup join; only inner and left joins are supported" + ); + } + }, + ttl_micros: self + .connector + .lookup_cache_ttl + .map(|t| t.as_micros() as u64), + max_capacity_bytes: self.connector.lookup_cache_max_bytes, + }; + + let incoming_edge = + LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schemas[0]).clone()); + + Ok(NodeWithIncomingEdges { + node: LogicalNode::single( + index as u32, + format!("lookupjoin_{index}"), + OperatorName::LookupJoin, + join_config.encode_to_vec(), + format!("LookupJoin<{}>", self.connector.name), + 1, + ), + edges: vec![incoming_edge], + }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(self.schema.inner().clone()).unwrap() + } +} + +impl UserDefinedLogicalNodeCore for LookupJoin { + fn name(&self) -> &str { + JOIN_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + let mut e: Vec<_> = self.on.iter().map(|(l, _)| l.clone()).collect(); + + if let Some(filter) = &self.filter { + e.push(filter.clone()); + } + + e + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "LookupJoinExtension: {}", self.schema) + } + + fn with_exprs_and_inputs( + &self, + _: Vec, + inputs: Vec, + ) -> datafusion::common::Result { + Ok(Self { + input: inputs[0].clone(), + schema: self.schema.clone(), + connector: self.connector.clone(), + on: self.on.clone(), + filter: self.filter.clone(), + alias: self.alias.clone(), + join_type: self.join_type, + }) + } +} \ No newline at end of file diff --git a/src/sql/planner/extension/mod.rs b/src/sql/extensions/mod.rs similarity index 66% rename from src/sql/planner/extension/mod.rs rename to src/sql/extensions/mod.rs index 4de1892e..25632930 100644 --- a/src/sql/planner/extension/mod.rs +++ b/src/sql/extensions/mod.rs @@ -7,13 +7,33 @@ use datafusion::common::{DFSchemaRef, DataFusionError, Result, TableReference}; use datafusion::logical_expr::{ Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, }; - -use crate::datastream::logical::{LogicalEdge, LogicalNode}; -use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field}; +use datafusion_common::internal_err; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use prost::Message; +use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering}; +use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::logical_planner::updating_meta_field; +use crate::sql::extensions::aggregate::AggregateExtension; +use crate::sql::extensions::debezium::{DebeziumUnrollingExtension, ToDebeziumExtension}; +use crate::sql::extensions::join::JoinExtension; +use crate::sql::extensions::key_calculation::KeyCalculationExtension; +use crate::sql::extensions::lookup::LookupJoin; +use crate::sql::extensions::projection::ProjectionExtension; +use crate::sql::extensions::remote_table::RemoteTableExtension; +use crate::sql::extensions::sink::SinkExtension; +use crate::sql::extensions::table_source::TableSourceExtension; +use crate::sql::extensions::updating_aggregate::UpdatingAggregateExtension; +use crate::sql::extensions::watermark_node::WatermarkNode; +use crate::sql::extensions::window_fn::WindowFunctionExtension; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field}; use crate::sql::types::{ DFField, StreamSchema, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields, }; -use crate::types::FsSchemaRef; +use crate::types::{FsSchema, FsSchemaRef}; + +pub const ASYNC_RESULT_FIELD: &str = "__async_result"; pub(crate) mod aggregate; pub(crate) mod debezium; @@ -28,43 +48,29 @@ pub(crate) mod updating_aggregate; pub(crate) mod watermark_node; pub(crate) mod window_fn; -pub(crate) struct NodeWithIncomingEdges { - pub node: LogicalNode, - pub edges: Vec, -} pub(crate) trait StreamExtension: Debug { fn node_name(&self) -> Option; - fn plan_node( &self, - _planner: &super::physical_planner::Planner, - _index: usize, - _input_schemas: Vec, - ) -> Result { - Err(DataFusionError::NotImplemented(format!( - "plan_node not yet implemented for {:?}", - self - ))) - } - - fn output_schema(&self) -> StreamSchema; + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result; + fn output_schema(&self) -> FsSchema; fn transparent(&self) -> bool { false } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum NamedNode { - Source(TableReference), - Watermark(TableReference), - RemoteTable(TableReference), - Sink(TableReference), +pub(crate) struct NodeWithIncomingEdges { + pub node: LogicalNode, + pub edges: Vec, } fn try_from_t( node: &dyn UserDefinedLogicalNode, -) -> std::result::Result<&dyn StreamExtension, ()> { +) -> Result<&dyn StreamExtension, ()> { node.as_any() .downcast_ref::() .map(|t| t as &dyn StreamExtension) @@ -75,19 +81,6 @@ impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension { type Error = DataFusionError; fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result { - use aggregate::AggregateExtension; - use debezium::{DebeziumUnrollingExtension, ToDebeziumExtension}; - use join::JoinExtension; - use key_calculation::KeyCalculationExtension; - use lookup::{LookupJoin, LookupSource}; - use projection::ProjectionExtension; - use remote_table::RemoteTableExtension; - use sink::SinkExtension; - use table_source::TableSourceExtension; - use updating_aggregate::UpdatingAggregateExtension; - use watermark_node::WatermarkNode; - use window_fn::WindowFunctionExtension; - try_from_t::(node) .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) @@ -101,9 +94,7 @@ impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension { .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name()))) } } @@ -130,8 +121,8 @@ macro_rules! multifield_partial_ord { Some(std::cmp::Ordering::Equal) } } - } } + } #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub(crate) struct TimestampAppendExtension { @@ -141,10 +132,11 @@ pub(crate) struct TimestampAppendExtension { } impl TimestampAppendExtension { - pub(crate) fn new(input: LogicalPlan, qualifier: Option) -> Self { + fn new(input: LogicalPlan, qualifier: Option) -> Self { if has_timestamp_field(input.schema()) { unreachable!( - "shouldn't be adding timestamp to a plan that already has it: {:?}", + "shouldn't be adding timestamp to a plan that already has it: plan :\n {:?}\n schema: {:?}", + input, input.schema() ); } @@ -195,8 +187,111 @@ impl UserDefinedLogicalNodeCore for TimestampAppendExtension { } } -/// Appends an `_updating_meta` and properly qualified `_timestamp` field -/// to the output schema of an updating aggregate. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct AsyncUDFExtension { + pub(crate) input: Arc, + pub(crate) name: String, + pub(crate) udf: DylibUdfConfig, + pub(crate) arg_exprs: Vec, + pub(crate) final_exprs: Vec, + pub(crate) ordered: bool, + pub(crate) max_concurrency: usize, + pub(crate) timeout: Duration, + pub(crate) final_schema: DFSchemaRef, +} + +multifield_partial_ord!( + AsyncUDFExtension, + input, + name, + udf, + arg_exprs, + final_exprs, + ordered, + max_concurrency, + timeout +); + +impl StreamExtension for AsyncUDFExtension { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + let arg_exprs = self + .arg_exprs + .iter() + .map(|e| { + let p = planner.create_physical_expr(e, self.input.schema())?; + Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec()) + }) + .collect::>>()?; + + let mut final_fields = fields_with_qualifiers(self.input.schema()); + final_fields.push(DFField::new( + None, + ASYNC_RESULT_FIELD, + self.udf.return_type.clone(), + true, + )); + let post_udf_schema = schema_from_df_fields(&final_fields)?; + + let final_exprs = self + .final_exprs + .iter() + .map(|e| { + let p = planner.create_physical_expr(e, &post_udf_schema)?; + Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec()) + }) + .collect::>>()?; + + let config = AsyncUdfOperator { + name: self.name.clone(), + udf: Some(self.udf.clone().into()), + arg_exprs, + final_exprs, + ordering: if self.ordered { + AsyncUdfOrdering::Ordered as i32 + } else { + AsyncUdfOrdering::Unordered as i32 + }, + max_concurrency: self.max_concurrency as u32, + timeout_micros: self.timeout.as_micros() as u64, + }; + + let node = LogicalNode::single( + index as u32, + format!("async_udf_{index}"), + OperatorName::AsyncUdf, + config.encode_to_vec(), + format!("async_udf<{}>", self.name), + 1, + ); + + let incoming_edge = + LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone()); + Ok(NodeWithIncomingEdges { + node, + edges: vec![incoming_edge], + }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_fields( + self.final_schema + .fields() + .iter() + .map(|f| (**f).clone()) + .collect(), + ) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub(crate) struct IsRetractExtension { pub(crate) input: LogicalPlan, @@ -217,6 +312,7 @@ impl IsRetractExtension { DataType::Timestamp(TimeUnit::Nanosecond, None), false, ); + output_fields.push((timestamp_qualifier.clone(), updating_meta_field()).into()); let schema = Arc::new(schema_from_df_fields(&output_fields).unwrap()); Self { input, @@ -255,42 +351,6 @@ impl UserDefinedLogicalNodeCore for IsRetractExtension { } } -impl StreamExtension for IsRetractExtension { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() - } -} - -pub(crate) const ASYNC_RESULT_FIELD: &str = "__async_result"; - -/// Extension node for async UDF calls in streaming projections. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct AsyncUDFExtension { - pub(crate) input: Arc, - pub(crate) name: String, - pub(crate) arg_exprs: Vec, - pub(crate) final_exprs: Vec, - pub(crate) ordered: bool, - pub(crate) max_concurrency: usize, - pub(crate) timeout: Duration, - pub(crate) final_schema: DFSchemaRef, -} - -multifield_partial_ord!( - AsyncUDFExtension, - input, - name, - arg_exprs, - final_exprs, - ordered, - max_concurrency, - timeout -); - impl UserDefinedLogicalNodeCore for AsyncUDFExtension { fn name(&self) -> &str { "AsyncUDFNode" @@ -308,7 +368,7 @@ impl UserDefinedLogicalNodeCore for AsyncUDFExtension { self.arg_exprs .iter() .chain(self.final_exprs.iter()) - .cloned() + .map(|e| e.to_owned()) .collect() } @@ -318,17 +378,16 @@ impl UserDefinedLogicalNodeCore for AsyncUDFExtension { fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { if inputs.len() != 1 { - return Err(DataFusionError::Internal("input size inconsistent".into())); + return internal_err!("input size inconsistent"); } if UserDefinedLogicalNode::expressions(self) != exprs { - return Err(DataFusionError::Internal( - "Tried to recreate async UDF node with different expressions".into(), - )); + return internal_err!("Tried to recreate async UDF node with different expressions"); } Ok(Self { input: Arc::new(inputs[0].clone()), name: self.name.clone(), + udf: self.udf.clone(), arg_exprs: self.arg_exprs.clone(), final_exprs: self.final_exprs.clone(), ordered: self.ordered, @@ -338,19 +397,3 @@ impl UserDefinedLogicalNodeCore for AsyncUDFExtension { }) } } - -impl StreamExtension for AsyncUDFExtension { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_fields( - self.final_schema - .fields() - .iter() - .map(|f| (**f).clone()) - .collect(), - ) - } -} diff --git a/src/sql/extensions/projection.rs b/src/sql/extensions/projection.rs new file mode 100644 index 00000000..fa0f118b --- /dev/null +++ b/src/sql/extensions/projection.rs @@ -0,0 +1,154 @@ + +use datafusion::common::{DFSchema, DFSchemaRef, Result, internal_err}; +use std::{fmt::Formatter, sync::Arc}; + +use super::{StreamExtension, NodeWithIncomingEdges}; +use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use itertools::Itertools; +use prost::Message; +use protocol::grpc::api::ProjectionOperator; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::multifield_partial_ord; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::types::{schema_from_df_fields, DFField}; +use crate::types::{FsSchema, FsSchemaRef}; + +pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension"; + +/// Projection operations +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct ProjectionExtension { + pub(crate) inputs: Vec, + pub(crate) name: Option, + pub(crate) exprs: Vec, + pub(crate) schema: DFSchemaRef, + pub(crate) shuffle: bool, +} + +multifield_partial_ord!(ProjectionExtension, name, exprs); + +impl ProjectionExtension { + pub(crate) fn new(inputs: Vec, name: Option, exprs: Vec) -> Self { + let input_schema = inputs.first().unwrap().schema(); + let fields = exprs + .iter() + .map(|e| DFField::from(e.to_field(input_schema).unwrap())) + .collect_vec(); + + let schema = Arc::new(schema_from_df_fields(&fields).unwrap()); + + Self { + inputs, + name, + exprs, + schema, + shuffle: false, + } + } + + pub(crate) fn shuffled(mut self) -> Self { + self.shuffle = true; + self + } +} + +impl StreamExtension for ProjectionExtension { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + let input_schema = (*input_schemas[0]).clone(); + + // check that all inputs have the same schemas + for s in input_schemas.iter().skip(1) { + if **s != input_schema { + return internal_err!("all input schemas to a projection node must mast"); + } + } + + let input_df_schema = Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone())?); + let mut physical_exprs = vec![]; + + for e in &self.exprs { + let phys = planner + .create_physical_expr(e, &input_df_schema) + .map_err(|e| e.context("projection"))?; + physical_exprs.push( + serialize_physical_expr(&phys, &DefaultPhysicalExtensionCodec {})?.encode_to_vec(), + ); + } + + let config = ProjectionOperator { + name: self.name.as_deref().unwrap_or("projection").to_string(), + input_schema: Some(input_schema.clone().into()), + + output_schema: Some(self.output_schema().into()), + exprs: physical_exprs, + }; + + let node = LogicalNode::single( + index as u32, + format!("projection_{index}"), + OperatorName::Projection, + config.encode_to_vec(), + format!("ArrowProjection<{}>", self.name.as_deref().unwrap_or("_")), + 1, + ); + + let edge_type = if self.shuffle { + LogicalEdgeType::Shuffle + } else { + LogicalEdgeType::Forward + }; + + let edge = LogicalEdge::project_all(edge_type, input_schema); + Ok(NodeWithIncomingEdges { + node, + edges: vec![edge], + }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_arrow().clone())).unwrap() + } +} + +impl UserDefinedLogicalNodeCore for ProjectionExtension { + fn name(&self) -> &str { + PROJECTION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + self.inputs.iter().collect() + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "KeyCalculationExtension: {}", self.schema()) + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + Ok(Self { + name: self.name.clone(), + inputs, + exprs, + schema: self.schema.clone(), + shuffle: self.shuffle, + }) + } +} diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs new file mode 100644 index 00000000..91ef4d0e --- /dev/null +++ b/src/sql/extensions/remote_table.rs @@ -0,0 +1,124 @@ +use std::{fmt::Formatter, sync::Arc}; + +use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err, plan_err}; + +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode}; +use prost::Message; +use protocol::grpc::api::ValuePlanOperator; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::multifield_partial_ord; +use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::types::{FsSchema, FsSchemaRef}; +use super::{StreamExtension, NodeWithIncomingEdges}; + +pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension"; + +/* Lightweight extension that allows us to segment the graph and merge nodes with the same name. + An Extension Planner will be used to isolate computation to individual nodes. +*/ +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct RemoteTableExtension { + pub(crate) input: LogicalPlan, + pub(crate) name: TableReference, + pub(crate) schema: DFSchemaRef, + pub(crate) materialize: bool, +} + +multifield_partial_ord!(RemoteTableExtension, input, name, materialize); + +impl StreamExtension for RemoteTableExtension { + fn node_name(&self) -> Option { + if self.materialize { + Some(NamedNode::RemoteTable(self.name.to_owned())) + } else { + None + } + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + match input_schemas.len() { + 0 => return plan_err!("RemoteTableExtension should have exactly one input"), + 1 => {} + _multiple_inputs => { + // check they are all the same + let first = input_schemas[0].clone(); + for schema in input_schemas.iter().skip(1) { + if *schema != first { + return plan_err!( + "If a node has multiple inputs, they must all have the same schema" + ); + } + } + } + } + let physical_plan = planner.sync_plan(&self.input)?; + let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( + physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + let config = ValuePlanOperator { + name: format!("value_calculation({})", self.name), + physical_plan: physical_plan_node.encode_to_vec(), + }; + let node = LogicalNode::single( + index as u32, + format!("value_{index}"), + OperatorName::ArrowValue, + config.encode_to_vec(), + self.name.to_string(), + 1, + ); + + let edges = input_schemas + .into_iter() + .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone())) + .collect(); + Ok(NodeWithIncomingEdges { node, edges }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap() + } +} + +impl UserDefinedLogicalNodeCore for RemoteTableExtension { + fn name(&self) -> &str { + REMOTE_TABLE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "RemoteTableExtension: {}", self.schema) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("input size inconsistent"); + } + + Ok(Self { + input: inputs[0].clone(), + name: self.name.clone(), + schema: self.schema.clone(), + materialize: self.materialize, + }) + } +} diff --git a/src/sql/extensions/sink.rs b/src/sql/extensions/sink.rs new file mode 100644 index 00000000..7b58a7b4 --- /dev/null +++ b/src/sql/extensions/sink.rs @@ -0,0 +1,168 @@ +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; + +use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; + +use prost::Message; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::multifield_partial_ord; +use crate::sql::schema::Table; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; +use super::{ + StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension, + remote_table::RemoteTableExtension, +}; + +pub(crate) const SINK_NODE_NAME: &str = "SinkExtension"; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct SinkExtension { + pub(crate) name: TableReference, + pub(crate) table: Table, + pub(crate) schema: DFSchemaRef, + inputs: Arc>, +} + +multifield_partial_ord!(SinkExtension, name, inputs); + +impl SinkExtension { + pub fn new( + name: TableReference, + table: Table, + mut schema: DFSchemaRef, + mut input: Arc, + ) -> Result { + let input_is_updating = input + .schema() + .has_column_with_unqualified_name(UPDATING_META_FIELD); + match &table { + Table::ConnectorTable(connector_table) => { + match (input_is_updating, connector_table.is_updating()) { + (_, true) => { + let to_debezium_extension = + ToDebeziumExtension::try_new(input.as_ref().clone())?; + input = Arc::new(LogicalPlan::Extension(Extension { + node: Arc::new(to_debezium_extension), + })); + schema = input.schema().clone(); + } + (true, false) => { + return plan_err!( + "input is updating, but sink is not configured as an updating sink (hint: use `format = 'debezium_json'`)" + ); + } + (false, false) => {} + } + } + Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"), + Table::TableFromQuery { .. } => {} + + } + Self::add_remote_if_necessary(&schema, &mut input); + + let inputs = Arc::new(vec![(*input).clone()]); + Ok(Self { + name, + table, + schema, + inputs, + }) + } + + // The input to a sink needs to be a non-transparent logical plan extension. + // If it isn't, wrap the input in a RemoteTableExtension. + pub fn add_remote_if_necessary(schema: &DFSchemaRef, input: &mut Arc) { + if let LogicalPlan::Extension(node) = input.as_ref() { + let arroyo_extension: &dyn StreamExtension = (&node.node).try_into().unwrap(); + if !arroyo_extension.transparent() { + return; + } + } + let remote_table_extension = RemoteTableExtension { + input: input.as_ref().clone(), + name: TableReference::bare("sink projection"), + schema: schema.clone(), + materialize: false, + }; + *input = Arc::new(LogicalPlan::Extension(Extension { + node: Arc::new(remote_table_extension), + })); + } +} + +impl UserDefinedLogicalNodeCore for SinkExtension { + fn name(&self) -> &str { + SINK_NODE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + self.inputs.iter().collect() + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "SinkExtension({:?}): {}", self.name, self.schema) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self { + name: self.name.clone(), + table: self.table.clone(), + schema: self.schema.clone(), + inputs: Arc::new(inputs), + }) + } +} + +impl StreamExtension for SinkExtension { + fn node_name(&self) -> Option { + match &self.table { + _ => Some(NamedNode::Sink(self.name.clone())), + } + } + + fn plan_node( + &self, + _planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + let operator_config = (self + .table + .connector_op() + .map_err(|e| e.context("connector op"))?) + .encode_to_vec(); + + let node = LogicalNode::single( + index as u32, + format!("sink_{}_{}", self.name, index), + OperatorName::ConnectorSink, + operator_config, + self.table.connector_op()?.description.clone(), + 1, + ); + + let edges = input_schemas + .into_iter() + .map(|input_schema| { + LogicalEdge::project_all(LogicalEdgeType::Forward, (*input_schema).clone()) + }) + .collect(); + Ok(NodeWithIncomingEdges { node, edges }) + } + + + + fn output_schema(&self) -> FsSchema { + FsSchema::from_fields(vec![]) + } +} \ No newline at end of file diff --git a/src/sql/planner/extension/table_source.rs b/src/sql/extensions/table_source.rs similarity index 54% rename from src/sql/planner/extension/table_source.rs rename to src/sql/extensions/table_source.rs index cab3ae3d..bdf470e2 100644 --- a/src/sql/planner/extension/table_source.rs +++ b/src/sql/extensions/table_source.rs @@ -1,15 +1,22 @@ use std::sync::Arc; -use datafusion::common::{DFSchemaRef, Result, TableReference}; -use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; -use super::{NamedNode, StreamExtension}; -use crate::multifield_partial_ord; -use crate::sql::catalog::connector_table::ConnectorTable; -use crate::sql::catalog::field_spec::FieldSpec; -use crate::sql::planner::schemas::add_timestamp_field; -use crate::sql::types::{StreamSchema, schema_from_df_fields}; +use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; +use prost::Message; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::multifield_partial_ord; +use crate::sql::schema::{ConnectorTable, FieldSpec, Table}; +use crate::sql::schema::utils::add_timestamp_field; +use crate::sql::extensions::debezium::DebeziumUnrollingExtension; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::types::schema_from_df_fields; +use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; +use super::{ + StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension, + remote_table::RemoteTableExtension, +}; pub(crate) const TABLE_SOURCE_NAME: &str = "TableSourceExtension"; #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -36,11 +43,8 @@ impl TableSourceExtension { let base_schema = Arc::new(schema_from_df_fields(&physical_fields).unwrap()); let schema = if table.is_updating() { - super::debezium::DebeziumUnrollingExtension::as_debezium_schema( - &base_schema, - Some(name.clone()), - ) - .unwrap() + DebeziumUnrollingExtension::as_debezium_schema(&base_schema, Some(name.clone())) + .unwrap() } else { base_schema }; @@ -88,7 +92,31 @@ impl StreamExtension for TableSourceExtension { Some(NamedNode::Source(self.name.clone())) } - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap() + fn plan_node( + &self, + _planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + if !input_schemas.is_empty() { + return plan_err!("TableSourceExtension should not have inputs"); + } + let sql_source = self.table.as_sql_source()?; + let node = LogicalNode::single( + index as u32, + format!("source_{}_{}", self.name, index), + OperatorName::ConnectorSource, + sql_source.source.config.encode_to_vec(), + sql_source.source.config.description.clone(), + 1, + ); + Ok(NodeWithIncomingEdges { + node, + edges: vec![], + }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap() } } diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs new file mode 100644 index 00000000..fdb2bb1d --- /dev/null +++ b/src/sql/extensions/updating_aggregate.rs @@ -0,0 +1,165 @@ +use datafusion::common::{DFSchemaRef, Result, TableReference, ToDFSchema, plan_err}; +use datafusion::logical_expr::expr::ScalarFunction; +use datafusion::logical_expr::{ + Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, col, lit, +}; +use datafusion::prelude::named_struct; +use datafusion::scalar::ScalarValue; +use datafusion_proto::physical_plan::AsExecutionPlan; +use datafusion_proto::protobuf::PhysicalPlanNode; +use prost::Message; +use std::sync::Arc; +use std::time::Duration; +use protocol::grpc::api::UpdatingAggregateOperator; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::functions::multi_hash; +use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::extensions::{IsRetractExtension, NodeWithIncomingEdges, StreamExtension}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::types::{FsSchema, FsSchemaRef}; + +pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension"; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub(crate) struct UpdatingAggregateExtension { + pub(crate) aggregate: LogicalPlan, + pub(crate) key_fields: Vec, + pub(crate) final_calculation: LogicalPlan, + pub(crate) timestamp_qualifier: Option, + pub(crate) ttl: Duration, +} + +impl UpdatingAggregateExtension { + pub fn new( + aggregate: LogicalPlan, + key_fields: Vec, + timestamp_qualifier: Option, + ttl: Duration, + ) -> Result { + let final_calculation = LogicalPlan::Extension(Extension { + node: Arc::new(IsRetractExtension::new( + aggregate.clone(), + timestamp_qualifier.clone(), + )), + }); + + Ok(Self { + aggregate, + key_fields, + final_calculation, + timestamp_qualifier, + ttl, + }) + } +} + +impl UserDefinedLogicalNodeCore for UpdatingAggregateExtension { + fn name(&self) -> &str { + UPDATING_AGGREGATE_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.aggregate] + } + + fn schema(&self) -> &DFSchemaRef { + self.final_calculation.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "UpdatingAggregateExtension") + } + + fn with_exprs_and_inputs( + &self, + _exprs: Vec, + inputs: Vec, + ) -> Result { + Self::new( + inputs[0].clone(), + self.key_fields.clone(), + self.timestamp_qualifier.clone(), + self.ttl, + ) + } +} + +impl StreamExtension for UpdatingAggregateExtension { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 1 { + return plan_err!( + "UpdatingAggregateExtension requires exactly one input schema, found {}", + input_schemas.len() + ); + } + + let input_schema = input_schemas[0].clone(); + let input_dfschema = input_schema.schema.clone().to_dfschema()?; + + let aggregate_exec = PhysicalPlanNode::try_from_physical_plan( + planner.sync_plan(&self.aggregate)?, + &FsPhysicalExtensionCodec::default(), + )?; + + let key_exprs: Vec = self + .key_fields + .iter() + .map(|&i| col(input_schema.schema.field(i).name())) + .collect(); + let hash_expr = if key_exprs.is_empty() { + Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(vec![0; 16])), None) + } else { + Expr::ScalarFunction(ScalarFunction { + func: multi_hash(), + args: key_exprs, + }) + }; + + let updating_meta_expr = + named_struct(vec![lit("is_retract"), lit(false), lit("id"), hash_expr]); + + let config = UpdatingAggregateOperator { + name: "UpdatingAggregate".to_string(), + input_schema: Some((*input_schema).clone().into()), + final_schema: Some(self.output_schema().into()), + aggregate_exec: aggregate_exec.encode_to_vec(), + metadata_expr: planner + .serialize_as_physical_expr(&updating_meta_expr, &input_dfschema)?, + flush_interval_micros: 10_000_000, + ttl_micros: self.ttl.as_micros() as u64, + }; + + let node = LogicalNode::single( + index as u32, + format!("updating_aggregate_{index}"), + OperatorName::UpdatingAggregate, + config.encode_to_vec(), + "UpdatingAggregate".to_string(), + 1, + ); + + let edge = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schema).clone()); + + Ok(NodeWithIncomingEdges { + node, + edges: vec![edge], + }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap() + } +} diff --git a/src/sql/planner/extension/watermark_node.rs b/src/sql/extensions/watermark_node.rs similarity index 57% rename from src/sql/planner/extension/watermark_node.rs rename to src/sql/extensions/watermark_node.rs index a06bdb9a..f13b3472 100644 --- a/src/sql/planner/extension/watermark_node.rs +++ b/src/sql/extensions/watermark_node.rs @@ -1,19 +1,20 @@ -use std::fmt::Formatter; -use std::sync::Arc; - use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err}; use datafusion::error::DataFusionError; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; - +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use prost::Message; +use std::fmt::Formatter; +use std::sync::Arc; +use protocol::grpc::api::ExpressionWatermarkConfig; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::multifield_partial_ord; -use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::planner::schemas::add_timestamp_field; -use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD}; +use crate::sql::schema::utils::add_timestamp_field; +use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::types::{FsSchema, FsSchemaRef}; pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode"; - -/// Represents a watermark node in the streaming query plan. -/// Watermarks track event-time progress and enable time-based operations. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct WatermarkNode { pub input: LogicalPlan, @@ -62,7 +63,7 @@ impl UserDefinedLogicalNodeCore for WatermarkNode { let timestamp_index = self .schema - .index_of_column_by_name(Some(&self.qualifier), TIMESTAMP_FIELD) + .index_of_column_by_name(Some(&self.qualifier), "_timestamp") .ok_or_else(|| DataFusionError::Plan("missing timestamp column".to_string()))?; Ok(Self { @@ -80,8 +81,38 @@ impl StreamExtension for WatermarkNode { Some(NamedNode::Watermark(self.qualifier.clone())) } - fn output_schema(&self) -> StreamSchema { - self.stream_schema() + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + let expression = planner.create_physical_expr(&self.watermark_expression, &self.schema)?; + let expression = serialize_physical_expr(&expression, &DefaultPhysicalExtensionCodec {})?; + let node = LogicalNode::single( + index as u32, + format!("watermark_{index}"), + OperatorName::ExpressionWatermark, + ExpressionWatermarkConfig { + period_micros: 1_000_000, + idle_time_micros: None, + expression: expression.encode_to_vec(), + input_schema: Some(self.arroyo_schema().into()), + } + .encode_to_vec(), + "watermark".to_string(), + 1, + ); + + let incoming_edge = + LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone()); + Ok(NodeWithIncomingEdges { + node, + edges: vec![incoming_edge], + }) + } + fn output_schema(&self) -> FsSchema { + self.arroyo_schema() } } @@ -93,7 +124,7 @@ impl WatermarkNode { ) -> Result { let schema = add_timestamp_field(input.schema().clone(), Some(qualifier.clone()))?; let timestamp_index = schema - .index_of_column_by_name(None, TIMESTAMP_FIELD) + .index_of_column_by_name(None, "_timestamp") .ok_or_else(|| DataFusionError::Plan("missing _timestamp column".to_string()))?; Ok(Self { input, @@ -103,8 +134,7 @@ impl WatermarkNode { timestamp_index, }) } - - pub(crate) fn stream_schema(&self) -> StreamSchema { - StreamSchema::new_unkeyed(Arc::new(self.schema.as_ref().into()), self.timestamp_index) + pub(crate) fn arroyo_schema(&self) -> FsSchema { + FsSchema::new_unkeyed(Arc::new(self.schema.as_ref().into()), self.timestamp_index) } } diff --git a/src/sql/extensions/window_fn.rs b/src/sql/extensions/window_fn.rs new file mode 100644 index 00000000..1c8b5687 --- /dev/null +++ b/src/sql/extensions/window_fn.rs @@ -0,0 +1,123 @@ +use std::sync::Arc; +use datafusion::common::{Column, DFSchema, DFSchemaRef, Result, plan_err}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode}; +use prost::Message; +use protocol::grpc::api::WindowFunctionOperator; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::types::TIMESTAMP_FIELD; +use crate::types::{FsSchema, FsSchemaRef}; +use super::{ NodeWithIncomingEdges, StreamExtension}; + +pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension"; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub(crate) struct WindowFunctionExtension { + window_plan: LogicalPlan, + key_fields: Vec, +} + +impl WindowFunctionExtension { + pub fn new(window_plan: LogicalPlan, key_fields: Vec) -> Self { + Self { + window_plan, + key_fields, + } + } +} + +impl UserDefinedLogicalNodeCore for WindowFunctionExtension { + fn name(&self) -> &str { + WINDOW_FUNCTION_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.window_plan] + } + + fn schema(&self) -> &DFSchemaRef { + self.window_plan.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "WindowFunction: {}", self.schema()) + } + + fn with_exprs_and_inputs( + &self, + _exprs: Vec, + inputs: Vec, + ) -> Result { + Ok(Self::new(inputs[0].clone(), self.key_fields.clone())) + } +} + +impl StreamExtension for WindowFunctionExtension { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 1 { + return plan_err!("WindowFunctionExtension requires exactly one input"); + } + let input_schema = input_schemas[0].clone(); + let input_df_schema = + Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone()).unwrap()); + + let binning_function = planner.create_physical_expr( + &Expr::Column(Column::new_unqualified(TIMESTAMP_FIELD.to_string())), + &input_df_schema, + )?; + let binning_function_proto = + serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})?; + + let window_plan = planner.sync_plan(&self.window_plan)?; + let codec = FsPhysicalExtensionCodec::default(); + let window_plan_proto = PhysicalPlanNode::try_from_physical_plan(window_plan, &codec)?; + + let config = WindowFunctionOperator { + name: "WindowFunction".to_string(), + input_schema: Some(input_schema.as_ref().clone().into()), + binning_function: binning_function_proto.encode_to_vec(), + window_function_plan: window_plan_proto.encode_to_vec(), + }; + + let logical_node = LogicalNode::single( + index as u32, + format!("window_function_{index}"), + OperatorName::WindowFunction, + config.encode_to_vec(), + "window function".to_string(), + 1, + ); + + let edge = LogicalEdge::project_all( + // TODO: detect when this shuffle is unnecessary + LogicalEdgeType::Shuffle, + input_schema.as_ref().clone(), + ); + + Ok(NodeWithIncomingEdges { + node: logical_node, + edges: vec![edge], + }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).unwrap() + } +} diff --git a/src/sql/functions/mod.rs b/src/sql/functions/mod.rs index 84d3c7d4..bfd59654 100644 --- a/src/sql/functions/mod.rs +++ b/src/sql/functions/mod.rs @@ -1,4 +1,4 @@ -use crate::sql::planner::StreamSchemaProvider; +use crate::sql::schema::StreamSchemaProvider; use datafusion::arrow::array::{ Array, ArrayRef, StringArray, UnionArray, builder::{FixedSizeBinaryBuilder, ListBuilder, StringBuilder}, diff --git a/src/datastream/logical.rs b/src/sql/logical_node/logical.rs similarity index 80% rename from src/datastream/logical.rs rename to src/sql/logical_node/logical.rs index a6486760..13560a3e 100644 --- a/src/datastream/logical.rs +++ b/src/sql/logical_node/logical.rs @@ -1,7 +1,5 @@ use itertools::Itertools; -use crate::datastream::optimizers::Optimizer; -use crate::sql::types::StreamSchema; use datafusion::arrow::datatypes::DataType; use petgraph::Direction; use petgraph::dot::Dot; @@ -9,7 +7,11 @@ use petgraph::graph::DiGraph; use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Display, Formatter}; use std::sync::Arc; +use datafusion_proto::protobuf::ArrowType; +use prost::Message; use strum::{Display, EnumString}; +use protocol::grpc::api; +use crate::types::FsSchema; #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] pub enum OperatorName { @@ -52,18 +54,18 @@ impl Display for LogicalEdgeType { #[derive(Clone, Debug, Eq, PartialEq)] pub struct LogicalEdge { pub edge_type: LogicalEdgeType, - pub schema: Arc, + pub schema: Arc, } impl LogicalEdge { - pub fn new(edge_type: LogicalEdgeType, schema: StreamSchema) -> Self { + pub fn new(edge_type: LogicalEdgeType, schema: FsSchema) -> Self { LogicalEdge { edge_type, schema: Arc::new(schema), } } - pub fn project_all(edge_type: LogicalEdgeType, schema: StreamSchema) -> Self { + pub fn project_all(edge_type: LogicalEdgeType, schema: FsSchema) -> Self { LogicalEdge { edge_type, schema: Arc::new(schema), @@ -81,7 +83,7 @@ pub struct ChainedLogicalOperator { #[derive(Clone, Debug)] pub struct OperatorChain { pub(crate) operators: Vec, - pub(crate) edges: Vec>, + pub(crate) edges: Vec>, } impl OperatorChain { @@ -94,7 +96,7 @@ impl OperatorChain { pub fn iter( &self, - ) -> impl Iterator>)> { + ) -> impl Iterator>)> { self.operators .iter() .zip_longest(self.edges.iter()) @@ -104,7 +106,7 @@ impl OperatorChain { pub fn iter_mut( &mut self, - ) -> impl Iterator>)> { + ) -> impl Iterator>)> { self.operators .iter_mut() .zip_longest(self.edges.iter()) @@ -190,6 +192,18 @@ impl Debug for LogicalNode { pub type LogicalGraph = DiGraph; +pub trait Optimizer { + fn optimize_once(&self, plan: &mut LogicalGraph) -> bool; + + fn optimize(&self, plan: &mut LogicalGraph) { + loop { + if !self.optimize_once(plan) { + break; + } + } + } +} + #[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)] pub struct DylibUdfConfig { pub dylib_path: String, @@ -315,3 +329,50 @@ impl LogicalProgram { s } } + + +impl From for api::DylibUdfConfig { + fn from(from: DylibUdfConfig) -> Self { + api::DylibUdfConfig { + dylib_path: from.dylib_path, + arg_types: from + .arg_types + .iter() + .map(|t| { + ArrowType::try_from(t) + .expect("unsupported data type") + .encode_to_vec() + }) + .collect(), + return_type: ArrowType::try_from(&from.return_type) + .expect("unsupported data type") + .encode_to_vec(), + aggregate: from.aggregate, + is_async: from.is_async, + } + } +} + +impl From for DylibUdfConfig { + fn from(from: api::DylibUdfConfig) -> Self { + DylibUdfConfig { + dylib_path: from.dylib_path, + arg_types: from + .arg_types + .iter() + .map(|t| { + DataType::try_from( + &ArrowType::decode(&mut t.as_slice()).expect("invalid arrow type"), + ) + .expect("invalid arrow type") + }) + .collect(), + return_type: DataType::try_from( + &ArrowType::decode(&mut from.return_type.as_slice()).unwrap(), + ) + .expect("invalid arrow type"), + aggregate: from.aggregate, + is_async: from.is_async, + } + } +} \ No newline at end of file diff --git a/src/sql/logical_node/mod.rs b/src/sql/logical_node/mod.rs new file mode 100644 index 00000000..82d25f24 --- /dev/null +++ b/src/sql/logical_node/mod.rs @@ -0,0 +1 @@ +pub mod logical; diff --git a/src/sql/physical/mod.rs b/src/sql/logical_planner/mod.rs similarity index 99% rename from src/sql/physical/mod.rs rename to src/sql/logical_planner/mod.rs index bfb37f11..e4db07a0 100644 --- a/src/sql/physical/mod.rs +++ b/src/sql/logical_planner/mod.rs @@ -29,8 +29,8 @@ use std::{ use crate::make_udf_function; use crate::sql::functions::MultiHashFunction; -use crate::sql::planner::rewrite::UNNESTED_COL; -use crate::sql::planner::schemas::window_arrow_struct; +use crate::sql::analysis::UNNESTED_COL; +use crate::sql::schema::utils::window_arrow_struct; use crate::types::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type}; use datafusion::catalog::memory::MemorySourceConfig; @@ -56,6 +56,9 @@ use std::fmt::Debug; use tokio::sync::mpsc::UnboundedReceiver; use tokio_stream::wrappers::UnboundedReceiverStream; +pub(crate) mod planner; +pub mod optimizers; + // ─────────────────── Updating Meta Helpers ─────────────────── pub fn updating_meta_fields() -> Fields { diff --git a/src/datastream/optimizers.rs b/src/sql/logical_planner/optimizers.rs similarity index 88% rename from src/datastream/optimizers.rs rename to src/sql/logical_planner/optimizers.rs index 2d258aff..bdf32657 100644 --- a/src/datastream/optimizers.rs +++ b/src/sql/logical_planner/optimizers.rs @@ -1,20 +1,8 @@ -use crate::datastream::logical::{LogicalEdgeType, LogicalGraph}; +use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer}; use petgraph::prelude::*; use petgraph::visit::NodeRef; use std::mem; -pub trait Optimizer { - fn optimize_once(&self, plan: &mut LogicalGraph) -> bool; - - fn optimize(&self, plan: &mut LogicalGraph) { - loop { - if !self.optimize_once(plan) { - break; - } - } - } -} - pub struct ChainingOptimizer {} fn remove_in_place(graph: &mut DiGraph, node: NodeIndex) { diff --git a/src/sql/planner/physical_planner.rs b/src/sql/logical_planner/planner.rs similarity index 95% rename from src/sql/planner/physical_planner.rs rename to src/sql/logical_planner/planner.rs index e7e1cf60..150b86f1 100644 --- a/src/sql/planner/physical_planner.rs +++ b/src/sql/logical_planner/planner.rs @@ -26,22 +26,32 @@ use tokio::runtime::Builder; use tokio::sync::oneshot; use async_trait::async_trait; +use datafusion_common::TableReference; use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; -use crate::datastream::logical::{LogicalEdge, LogicalGraph, LogicalNode}; -use crate::sql::physical::{ +use crate::sql::logical_node::logical::{LogicalEdge, LogicalGraph, LogicalNode}; +use crate::sql::logical_planner::{ DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec, }; -use crate::sql::planner::StreamSchemaProvider; -use crate::sql::planner::extension::debezium::{ +use crate::sql::extensions::debezium::{ DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME, }; -use crate::sql::planner::extension::key_calculation::KeyCalculationExtension; -use crate::sql::planner::extension::{NamedNode, NodeWithIncomingEdges, StreamExtension}; -use crate::sql::planner::schemas::add_timestamp_field_arrow; +use crate::sql::extensions::key_calculation::KeyCalculationExtension; +use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::schema::utils::add_timestamp_field_arrow; +use crate::sql::schema::StreamSchemaProvider; use crate::types::{FsSchema, FsSchemaRef}; +#[derive(Eq, Hash, PartialEq)] +#[derive(Debug)] +pub(crate) enum NamedNode { + Source(TableReference), + Watermark(TableReference), + RemoteTable(TableReference), + Sink(TableReference), +} + pub(crate) struct PlanToGraphVisitor<'a> { graph: DiGraph, output_schemas: HashMap, diff --git a/src/sql/mod.rs b/src/sql/mod.rs index e0931530..be44d979 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -10,14 +10,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod catalog; +pub mod schema; pub mod functions; -pub mod physical; -pub mod planner; +pub mod parse; +pub mod logical_node; +pub mod logical_planner; +pub mod analysis; +pub(crate) mod extensions; pub mod types; -pub use planner::StreamSchemaProvider; -pub use planner::parse::parse_sql; -pub use planner::plan::rewrite_plan; -pub use planner::sql_to_plan::statement_to_plan; -pub use planner::{CompiledSql, parse_and_get_arrow_program, parse_sql_statements}; +pub use schema::StreamSchemaProvider; +pub use parse::parse_sql; +pub use analysis::rewrite_plan; +pub use analysis::{CompiledSql}; diff --git a/src/sql/planner/parse.rs b/src/sql/parse.rs similarity index 100% rename from src/sql/planner/parse.rs rename to src/sql/parse.rs diff --git a/src/sql/planner/extension/aggregate.rs b/src/sql/planner/extension/aggregate.rs deleted file mode 100644 index 878d3cc5..00000000 --- a/src/sql/planner/extension/aggregate.rs +++ /dev/null @@ -1,348 +0,0 @@ -use std::fmt::Formatter; -use std::sync::Arc; -use std::time::Duration; - -use datafusion::arrow::datatypes::DataType; -use datafusion::common::{Column, DFSchemaRef, Result, ScalarValue, internal_err}; -use datafusion::logical_expr; -use datafusion::logical_expr::{ - BinaryExpr, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, expr::ScalarFunction, -}; - -use crate::multifield_partial_ord; -use crate::sql::planner::extension::{NamedNode, StreamExtension, TimestampAppendExtension}; -use crate::sql::types::{ - DFField, StreamSchema, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, - schema_from_df_fields, schema_from_df_fields_with_metadata, -}; - -pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension"; - -/// Extension node for windowed aggregate operations in streaming SQL. -/// Supports tumbling, sliding, session, and instant window aggregations. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct AggregateExtension { - pub(crate) window_behavior: WindowBehavior, - pub(crate) aggregate: LogicalPlan, - pub(crate) schema: DFSchemaRef, - pub(crate) key_fields: Vec, - pub(crate) final_calculation: LogicalPlan, -} - -multifield_partial_ord!(AggregateExtension, aggregate, key_fields, final_calculation); - -impl AggregateExtension { - pub fn new( - window_behavior: WindowBehavior, - aggregate: LogicalPlan, - key_fields: Vec, - ) -> Self { - let final_calculation = - Self::final_projection(&aggregate, window_behavior.clone()).unwrap(); - Self { - window_behavior, - aggregate, - schema: final_calculation.schema().clone(), - key_fields, - final_calculation, - } - } - - /// Build the final projection after aggregation, which adds the window struct - /// and computes the output timestamp based on the window behavior. - pub fn final_projection( - aggregate_plan: &LogicalPlan, - window_behavior: WindowBehavior, - ) -> Result { - let timestamp_field: DFField = aggregate_plan.inputs()[0] - .schema() - .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)? - .into(); - let timestamp_append = LogicalPlan::Extension(Extension { - node: Arc::new(TimestampAppendExtension::new( - aggregate_plan.clone(), - timestamp_field.qualifier().cloned(), - )), - }); - let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema()); - let mut aggregate_expressions: Vec<_> = aggregate_fields - .iter() - .map(|field| Expr::Column(field.qualified_column())) - .collect(); - - let (window_field, window_index, width, is_nested) = match window_behavior { - WindowBehavior::InData => return Ok(timestamp_append), - WindowBehavior::FromOperator { - window, - window_field, - window_index, - is_nested, - } => match window { - WindowType::Tumbling { width, .. } | WindowType::Sliding { width, .. } => { - (window_field, window_index, width, is_nested) - } - WindowType::Session { .. } => { - return Ok(LogicalPlan::Extension(Extension { - node: Arc::new(WindowAppendExtension::new( - timestamp_append, - window_field, - window_index, - )), - })); - } - WindowType::Instant => return Ok(timestamp_append), - }, - }; - - if is_nested { - return Self::nested_final_projection( - timestamp_append, - window_field, - window_index, - width, - ); - } - - let timestamp_column = - Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name()); - aggregate_fields.insert(window_index, window_field.clone()); - - let window_expression = Self::build_window_struct_expr(×tamp_column, width); - aggregate_expressions.insert( - window_index, - window_expression - .alias_qualified(window_field.qualifier().cloned(), window_field.name()), - ); - aggregate_fields.push(timestamp_field); - - let bin_end_calculation = Expr::BinaryExpr(BinaryExpr { - left: Box::new(Expr::Column(timestamp_column.clone())), - op: logical_expr::Operator::Plus, - right: Box::new(Expr::Literal( - ScalarValue::IntervalMonthDayNano(Some( - datafusion::arrow::datatypes::IntervalMonthDayNanoType::make_value( - 0, - 0, - (width.as_nanos() - 1) as i64, - ), - )), - None, - )), - }); - aggregate_expressions.push(bin_end_calculation); - - Ok(LogicalPlan::Projection( - logical_expr::Projection::try_new_with_schema( - aggregate_expressions, - Arc::new(timestamp_append), - Arc::new(schema_from_df_fields(&aggregate_fields)?), - )?, - )) - } - - fn build_window_struct_expr(timestamp_column: &Column, width: Duration) -> Expr { - let start_expr = Expr::Column(timestamp_column.clone()); - let end_expr = Expr::BinaryExpr(BinaryExpr { - left: Box::new(Expr::Column(timestamp_column.clone())), - op: logical_expr::Operator::Plus, - right: Box::new(Expr::Literal( - ScalarValue::IntervalMonthDayNano(Some( - datafusion::arrow::datatypes::IntervalMonthDayNanoType::make_value( - 0, - 0, - width.as_nanos() as i64, - ), - )), - None, - )), - }); - - Expr::ScalarFunction(ScalarFunction { - func: Arc::new(datafusion::logical_expr::ScalarUDF::new_from_impl( - WindowStructUdf {}, - )), - args: vec![start_expr, end_expr], - }) - } - - fn nested_final_projection( - aggregate_plan: LogicalPlan, - window_field: DFField, - window_index: usize, - width: Duration, - ) -> Result { - let timestamp_field: DFField = aggregate_plan - .schema() - .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) - .unwrap() - .into(); - let timestamp_column = - Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name()); - - let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema()); - let mut aggregate_expressions: Vec<_> = aggregate_fields - .iter() - .map(|field| Expr::Column(field.qualified_column())) - .collect(); - aggregate_fields.insert(window_index, window_field.clone()); - - let window_expression = Self::build_window_struct_expr(×tamp_column, width); - aggregate_expressions.insert( - window_index, - window_expression - .alias_qualified(window_field.qualifier().cloned(), window_field.name()), - ); - - Ok(LogicalPlan::Projection( - logical_expr::Projection::try_new_with_schema( - aggregate_expressions, - Arc::new(aggregate_plan), - Arc::new(schema_from_df_fields(&aggregate_fields).unwrap()), - ) - .unwrap(), - )) - } -} - -impl UserDefinedLogicalNodeCore for AggregateExtension { - fn name(&self) -> &str { - AGGREGATE_EXTENSION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.aggregate] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!( - f, - "AggregateExtension: {} | window_behavior: {:?}", - self.schema(), - match &self.window_behavior { - WindowBehavior::InData => "InData".to_string(), - WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"), - } - ) - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - if inputs.len() != 1 { - return internal_err!("input size inconsistent"); - } - Ok(Self::new( - self.window_behavior.clone(), - inputs[0].clone(), - self.key_fields.clone(), - )) - } -} - -impl StreamExtension for AggregateExtension { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - let output_schema = (*self.schema).clone().into(); - StreamSchema::from_schema_keys(Arc::new(output_schema), vec![]).unwrap() - } -} - -/// Extension for appending window struct (start, end) to the output -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct WindowAppendExtension { - pub(crate) input: LogicalPlan, - pub(crate) window_field: DFField, - pub(crate) window_index: usize, - pub(crate) schema: DFSchemaRef, -} - -multifield_partial_ord!(WindowAppendExtension, input, window_index); - -impl WindowAppendExtension { - fn new(input: LogicalPlan, window_field: DFField, window_index: usize) -> Self { - let mut fields = fields_with_qualifiers(input.schema()); - fields.insert(window_index, window_field.clone()); - let metadata = input.schema().metadata().clone(); - Self { - input, - window_field, - window_index, - schema: Arc::new(schema_from_df_fields_with_metadata(&fields, metadata).unwrap()), - } - } -} - -impl UserDefinedLogicalNodeCore for WindowAppendExtension { - fn name(&self) -> &str { - "WindowAppendExtension" - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "WindowAppendExtension: field {:?} at {}", - self.window_field, self.window_index - ) - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self::new( - inputs[0].clone(), - self.window_field.clone(), - self.window_index, - )) - } -} - -/// Placeholder UDF to construct the window struct at plan time -#[derive(Debug)] -struct WindowStructUdf; - -impl datafusion::logical_expr::ScalarUDFImpl for WindowStructUdf { - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn name(&self) -> &str { - "window" - } - - fn signature(&self) -> &datafusion::logical_expr::Signature { - &datafusion::logical_expr::Signature { - type_signature: datafusion::logical_expr::TypeSignature::Any(2), - volatility: datafusion::logical_expr::Volatility::Immutable, - } - } - - fn return_type(&self, _args: &[DataType]) -> Result { - Ok(crate::sql::planner::schemas::window_arrow_struct()) - } - - fn invoke_with_args( - &self, - _args: datafusion::logical_expr::ScalarFunctionArgs, - ) -> Result { - unimplemented!("WindowStructUdf is a plan-time-only function") - } -} diff --git a/src/sql/planner/extension/join.rs b/src/sql/planner/extension/join.rs deleted file mode 100644 index 3857fee7..00000000 --- a/src/sql/planner/extension/join.rs +++ /dev/null @@ -1,61 +0,0 @@ -use std::time::Duration; - -use datafusion::common::{DFSchemaRef, Result}; -use datafusion::logical_expr::expr::Expr; -use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore}; - -use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::types::StreamSchema; - -use std::sync::Arc; - -pub(crate) const JOIN_NODE_NAME: &str = "JoinNode"; - -/// Extension node for streaming joins. -/// Supports instant joins (windowed, no state) and updating joins (with TTL-based state). -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub struct JoinExtension { - pub(crate) rewritten_join: LogicalPlan, - pub(crate) is_instant: bool, - pub(crate) ttl: Option, -} - -impl StreamExtension for JoinExtension { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap() - } -} - -impl UserDefinedLogicalNodeCore for JoinExtension { - fn name(&self) -> &str { - JOIN_NODE_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.rewritten_join] - } - - fn schema(&self) -> &DFSchemaRef { - self.rewritten_join.schema() - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "JoinExtension: {}", self.schema()) - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self { - rewritten_join: inputs[0].clone(), - is_instant: self.is_instant, - ttl: self.ttl, - }) - } -} diff --git a/src/sql/planner/extension/key_calculation.rs b/src/sql/planner/extension/key_calculation.rs deleted file mode 100644 index c90b6d1d..00000000 --- a/src/sql/planner/extension/key_calculation.rs +++ /dev/null @@ -1,138 +0,0 @@ -use std::fmt::Formatter; -use std::sync::Arc; - -use datafusion::arrow::datatypes::{Field, Schema}; -use datafusion::common::{DFSchemaRef, Result, internal_err}; -use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; - -use crate::multifield_partial_ord; -use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::types::{ - StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata, -}; - -pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension"; - -/// Two ways of specifying keys: column indices or expressions to evaluate -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub enum KeysOrExprs { - Keys(Vec), - Exprs(Vec), -} - -/// Calculation for computing keyed data, used for shuffling data to correct nodes -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct KeyCalculationExtension { - pub(crate) name: Option, - pub(crate) input: LogicalPlan, - pub(crate) keys: KeysOrExprs, - pub(crate) schema: DFSchemaRef, -} - -multifield_partial_ord!(KeyCalculationExtension, name, input, keys); - -impl KeyCalculationExtension { - pub fn new_named_and_trimmed(input: LogicalPlan, keys: Vec, name: String) -> Self { - let output_fields: Vec<_> = fields_with_qualifiers(input.schema()) - .into_iter() - .enumerate() - .filter_map(|(index, field)| { - if !keys.contains(&index) { - Some(field.clone()) - } else { - None - } - }) - .collect(); - - let schema = - schema_from_df_fields_with_metadata(&output_fields, input.schema().metadata().clone()) - .unwrap(); - Self { - name: Some(name), - input, - keys: KeysOrExprs::Keys(keys), - schema: Arc::new(schema), - } - } - - pub fn new(input: LogicalPlan, keys: KeysOrExprs) -> Self { - let schema = input.schema().clone(); - Self { - name: None, - input, - keys, - schema, - } - } -} - -impl StreamExtension for KeyCalculationExtension { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - let input_schema = self.input.schema().as_ref(); - match &self.keys { - KeysOrExprs::Keys(keys) => { - StreamSchema::from_schema_keys(Arc::new(input_schema.into()), keys.clone()).unwrap() - } - KeysOrExprs::Exprs(exprs) => { - let mut fields = vec![]; - for (i, e) in exprs.iter().enumerate() { - let (dt, nullable) = e.data_type_and_nullable(input_schema).unwrap(); - fields.push(Field::new(format!("__key_{i}"), dt, nullable).into()); - } - for f in input_schema.fields().iter() { - fields.push(f.clone()); - } - StreamSchema::from_schema_keys( - Arc::new(Schema::new(fields)), - (1..=exprs.len()).collect(), - ) - .unwrap() - } - } - } -} - -impl UserDefinedLogicalNodeCore for KeyCalculationExtension { - fn name(&self) -> &str { - KEY_CALCULATION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "KeyCalculationExtension: {}", self.schema()) - } - - fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { - if inputs.len() != 1 { - return internal_err!("input size inconsistent"); - } - - let keys = match &self.keys { - KeysOrExprs::Keys(k) => KeysOrExprs::Keys(k.clone()), - KeysOrExprs::Exprs(_) => KeysOrExprs::Exprs(exprs), - }; - - Ok(Self { - name: self.name.clone(), - input: inputs[0].clone(), - keys, - schema: self.schema.clone(), - }) - } -} diff --git a/src/sql/planner/extension/lookup.rs b/src/sql/planner/extension/lookup.rs deleted file mode 100644 index daa4b094..00000000 --- a/src/sql/planner/extension/lookup.rs +++ /dev/null @@ -1,127 +0,0 @@ -use std::fmt::Formatter; -use std::sync::Arc; - -use datafusion::common::{Column, DFSchemaRef, JoinType, Result, TableReference, internal_err}; -use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; - -use super::{NamedNode, StreamExtension}; -use crate::multifield_partial_ord; -use crate::sql::catalog::connector_table::ConnectorTable; -use crate::sql::types::StreamSchema; - -pub const SOURCE_EXTENSION_NAME: &str = "LookupSource"; -pub const JOIN_EXTENSION_NAME: &str = "LookupJoin"; - -/// Represents a lookup table source in the streaming plan. -/// Lookup sources provide point-query access to external state. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct LookupSource { - pub(crate) table: ConnectorTable, - pub(crate) schema: DFSchemaRef, -} - -multifield_partial_ord!(LookupSource, table); - -impl UserDefinedLogicalNodeCore for LookupSource { - fn name(&self) -> &str { - SOURCE_EXTENSION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "LookupSource: {}", self.schema) - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - if !inputs.is_empty() { - return internal_err!("LookupSource cannot have inputs"); - } - Ok(Self { - table: self.table.clone(), - schema: self.schema.clone(), - }) - } -} - -impl StreamExtension for LookupSource { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() - } -} - -/// Represents a lookup join: a streaming input joined against a lookup table. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct LookupJoin { - pub(crate) input: LogicalPlan, - pub(crate) schema: DFSchemaRef, - pub(crate) connector: ConnectorTable, - pub(crate) on: Vec<(Expr, Column)>, - pub(crate) filter: Option, - pub(crate) alias: Option, - pub(crate) join_type: JoinType, -} - -multifield_partial_ord!(LookupJoin, input, connector, on, filter, alias); - -impl UserDefinedLogicalNodeCore for LookupJoin { - fn name(&self) -> &str { - JOIN_EXTENSION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - let mut e: Vec<_> = self.on.iter().map(|(l, _)| l.clone()).collect(); - if let Some(filter) = &self.filter { - e.push(filter.clone()); - } - e - } - - fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "LookupJoinExtension: {}", self.schema) - } - - fn with_exprs_and_inputs(&self, _: Vec, inputs: Vec) -> Result { - Ok(Self { - input: inputs[0].clone(), - schema: self.schema.clone(), - connector: self.connector.clone(), - on: self.on.clone(), - filter: self.filter.clone(), - alias: self.alias.clone(), - join_type: self.join_type, - }) - } -} - -impl StreamExtension for LookupJoin { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() - } -} diff --git a/src/sql/planner/extension/projection.rs b/src/sql/planner/extension/projection.rs deleted file mode 100644 index e6dc8ce7..00000000 --- a/src/sql/planner/extension/projection.rs +++ /dev/null @@ -1,91 +0,0 @@ -use std::fmt::Formatter; -use std::sync::Arc; - -use datafusion::common::{DFSchemaRef, Result}; -use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; - -use crate::multifield_partial_ord; -use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::types::{DFField, StreamSchema, schema_from_df_fields}; - -pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension"; - -/// Projection operations for streaming SQL plans. -/// Handles column projections, shuffles for key-based operations, etc. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct ProjectionExtension { - pub(crate) inputs: Vec, - pub(crate) name: Option, - pub(crate) exprs: Vec, - pub(crate) schema: DFSchemaRef, - pub(crate) shuffle: bool, -} - -multifield_partial_ord!(ProjectionExtension, name, exprs); - -impl ProjectionExtension { - pub(crate) fn new(inputs: Vec, name: Option, exprs: Vec) -> Self { - let input_schema = inputs.first().unwrap().schema(); - let fields: Vec = exprs - .iter() - .map(|e| DFField::from(e.to_field(input_schema).unwrap())) - .collect(); - - let schema = Arc::new(schema_from_df_fields(&fields).unwrap()); - - Self { - inputs, - name, - exprs, - schema, - shuffle: false, - } - } - - pub(crate) fn shuffled(mut self) -> Self { - self.shuffle = true; - self - } -} - -impl StreamExtension for ProjectionExtension { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_arrow().clone())).unwrap() - } -} - -impl UserDefinedLogicalNodeCore for ProjectionExtension { - fn name(&self) -> &str { - PROJECTION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - self.inputs.iter().collect() - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "ProjectionExtension: {}", self.schema()) - } - - fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { - Ok(Self { - name: self.name.clone(), - inputs, - exprs, - schema: self.schema.clone(), - shuffle: self.shuffle, - }) - } -} diff --git a/src/sql/planner/extension/remote_table.rs b/src/sql/planner/extension/remote_table.rs deleted file mode 100644 index 2d81cafc..00000000 --- a/src/sql/planner/extension/remote_table.rs +++ /dev/null @@ -1,71 +0,0 @@ -use std::fmt::Formatter; -use std::sync::Arc; - -use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err}; -use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; - -use crate::multifield_partial_ord; -use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::types::StreamSchema; - -pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension"; - -/// Lightweight extension that segments the execution graph and enables merging -/// nodes with the same name. Allows materializing intermediate results. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct RemoteTableExtension { - pub(crate) input: LogicalPlan, - pub(crate) name: TableReference, - pub(crate) schema: DFSchemaRef, - pub(crate) materialize: bool, -} - -multifield_partial_ord!(RemoteTableExtension, input, name, materialize); - -impl StreamExtension for RemoteTableExtension { - fn node_name(&self) -> Option { - if self.materialize { - Some(NamedNode::RemoteTable(self.name.to_owned())) - } else { - None - } - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap() - } -} - -impl UserDefinedLogicalNodeCore for RemoteTableExtension { - fn name(&self) -> &str { - REMOTE_TABLE_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "RemoteTableExtension: {}", self.schema) - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - if inputs.len() != 1 { - return internal_err!("input size inconsistent"); - } - Ok(Self { - input: inputs[0].clone(), - name: self.name.clone(), - schema: self.schema.clone(), - materialize: self.materialize, - }) - } -} diff --git a/src/sql/planner/extension/sink.rs b/src/sql/planner/extension/sink.rs deleted file mode 100644 index e73a8383..00000000 --- a/src/sql/planner/extension/sink.rs +++ /dev/null @@ -1,128 +0,0 @@ -use std::sync::Arc; - -use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; -use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; - -use super::debezium::ToDebeziumExtension; -use super::remote_table::RemoteTableExtension; -use super::{NamedNode, StreamExtension}; -use crate::multifield_partial_ord; -use crate::sql::catalog::table::Table; -use crate::sql::types::StreamSchema; - -pub(crate) const SINK_NODE_NAME: &str = "SinkExtension"; - -/// Extension node representing a sink (output) in the streaming plan. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct SinkExtension { - pub(crate) name: TableReference, - pub(crate) table: Table, - pub(crate) schema: DFSchemaRef, - pub(crate) inputs: Arc>, -} - -multifield_partial_ord!(SinkExtension, name, inputs); - -impl SinkExtension { - pub fn new( - name: TableReference, - table: Table, - mut schema: DFSchemaRef, - mut input: Arc, - ) -> Result { - match &table { - Table::ConnectorTable(connector_table) => { - if connector_table.is_updating() { - let to_debezium = ToDebeziumExtension::try_new(input.as_ref().clone())?; - input = Arc::new(LogicalPlan::Extension(Extension { - node: Arc::new(to_debezium), - })); - schema = input.schema().clone(); - } - } - Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"), - Table::TableFromQuery { .. } => {} - } - - Self::add_remote_if_necessary(&schema, &mut input); - - let inputs = Arc::new(vec![(*input).clone()]); - Ok(Self { - name, - table, - schema, - inputs, - }) - } - - pub fn add_remote_if_necessary(schema: &DFSchemaRef, input: &mut Arc) { - if let LogicalPlan::Extension(node) = input.as_ref() { - let Ok(ext): Result<&dyn StreamExtension, _> = (&node.node).try_into() else { - // not a StreamExtension, wrap it - let remote = RemoteTableExtension { - input: input.as_ref().clone(), - name: TableReference::bare("sink projection"), - schema: schema.clone(), - materialize: false, - }; - *input = Arc::new(LogicalPlan::Extension(Extension { - node: Arc::new(remote), - })); - return; - }; - if !ext.transparent() { - return; - } - } - let remote = RemoteTableExtension { - input: input.as_ref().clone(), - name: TableReference::bare("sink projection"), - schema: schema.clone(), - materialize: false, - }; - *input = Arc::new(LogicalPlan::Extension(Extension { - node: Arc::new(remote), - })); - } -} - -impl UserDefinedLogicalNodeCore for SinkExtension { - fn name(&self) -> &str { - SINK_NODE_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - self.inputs.iter().collect() - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "SinkExtension({:?}): {}", self.name, self.schema) - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self { - name: self.name.clone(), - table: self.table.clone(), - schema: self.schema.clone(), - inputs: Arc::new(inputs), - }) - } -} - -impl StreamExtension for SinkExtension { - fn node_name(&self) -> Option { - Some(NamedNode::Sink(self.name.clone())) - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_fields(vec![]) - } -} diff --git a/src/sql/planner/extension/updating_aggregate.rs b/src/sql/planner/extension/updating_aggregate.rs deleted file mode 100644 index 758edc67..00000000 --- a/src/sql/planner/extension/updating_aggregate.rs +++ /dev/null @@ -1,89 +0,0 @@ -use std::sync::Arc; -use std::time::Duration; - -use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; -use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; - -use super::{IsRetractExtension, NamedNode, StreamExtension}; -use crate::sql::types::StreamSchema; - -pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension"; - -/// Extension node for updating (non-windowed) aggregations. -/// Maintains state with TTL and emits retraction/update pairs. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub(crate) struct UpdatingAggregateExtension { - pub(crate) aggregate: LogicalPlan, - pub(crate) key_fields: Vec, - pub(crate) final_calculation: LogicalPlan, - pub(crate) timestamp_qualifier: Option, - pub(crate) ttl: Duration, -} - -impl UpdatingAggregateExtension { - pub fn new( - aggregate: LogicalPlan, - key_fields: Vec, - timestamp_qualifier: Option, - ttl: Duration, - ) -> Result { - let final_calculation = LogicalPlan::Extension(Extension { - node: Arc::new(IsRetractExtension::new( - aggregate.clone(), - timestamp_qualifier.clone(), - )), - }); - - Ok(Self { - aggregate, - key_fields, - final_calculation, - timestamp_qualifier, - ttl, - }) - } -} - -impl UserDefinedLogicalNodeCore for UpdatingAggregateExtension { - fn name(&self) -> &str { - UPDATING_AGGREGATE_EXTENSION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.aggregate] - } - - fn schema(&self) -> &DFSchemaRef { - self.final_calculation.schema() - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "UpdatingAggregateExtension") - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - if inputs.len() != 1 { - return plan_err!("UpdatingAggregateExtension expects exactly one input"); - } - Self::new( - inputs[0].clone(), - self.key_fields.clone(), - self.timestamp_qualifier.clone(), - self.ttl, - ) - } -} - -impl StreamExtension for UpdatingAggregateExtension { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap() - } -} diff --git a/src/sql/planner/extension/window_fn.rs b/src/sql/planner/extension/window_fn.rs deleted file mode 100644 index 95832183..00000000 --- a/src/sql/planner/extension/window_fn.rs +++ /dev/null @@ -1,62 +0,0 @@ -use std::sync::Arc; - -use datafusion::common::{DFSchemaRef, Result}; -use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; - -use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::types::StreamSchema; - -pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension"; - -/// Extension for window functions (e.g., ROW_NUMBER, RANK) over windowed input. -/// Window functions require already-windowed input and are evaluated per-window. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub(crate) struct WindowFunctionExtension { - pub(crate) window_plan: LogicalPlan, - pub(crate) key_fields: Vec, -} - -impl WindowFunctionExtension { - pub fn new(window_plan: LogicalPlan, key_fields: Vec) -> Self { - Self { - window_plan, - key_fields, - } - } -} - -impl UserDefinedLogicalNodeCore for WindowFunctionExtension { - fn name(&self) -> &str { - WINDOW_FUNCTION_EXTENSION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.window_plan] - } - - fn schema(&self) -> &DFSchemaRef { - self.window_plan.schema() - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "WindowFunction: {}", self.schema()) - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self::new(inputs[0].clone(), self.key_fields.clone())) - } -} - -impl StreamExtension for WindowFunctionExtension { - fn node_name(&self) -> Option { - None - } - - fn output_schema(&self) -> StreamSchema { - StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).unwrap() - } -} diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs deleted file mode 100644 index c85c0fb2..00000000 --- a/src/sql/planner/mod.rs +++ /dev/null @@ -1,348 +0,0 @@ -#![allow(clippy::new_without_default)] - -pub(crate) mod extension; -pub mod parse; -pub(crate) mod physical_planner; -pub mod plan; -pub mod rewrite; -pub mod schema_provider; -pub mod schemas; -pub mod sql_to_plan; - -pub(crate) mod mod_prelude { - pub use super::StreamSchemaProvider; -} - -pub use schema_provider::{LogicalBatchInput, StreamSchemaProvider, StreamTable}; - -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; - -use datafusion::common::tree_node::TreeNode; -use datafusion::common::{Result, plan_err}; -use datafusion::error::DataFusionError; -use datafusion::execution::SessionStateBuilder; -use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::prelude::SessionConfig; -use datafusion::sql::TableReference; -use datafusion::sql::sqlparser::ast::{OneOrManyWithParens, Statement}; -use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; -use datafusion::sql::sqlparser::parser::Parser; -use tracing::debug; - -use crate::datastream::logical::{LogicalProgram, ProgramConfig}; -use crate::datastream::optimizers::ChainingOptimizer; -use crate::sql::catalog::insert::Insert; -use crate::sql::catalog::table::Table as CatalogTable; -use crate::sql::functions::{is_json_union, serialize_outgoing_json}; -use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs}; -use crate::sql::planner::extension::projection::ProjectionExtension; -use crate::sql::planner::extension::sink::SinkExtension; -use crate::sql::planner::extension::{NamedNode, StreamExtension}; -use crate::sql::planner::plan::rewrite_plan; -use crate::sql::planner::rewrite::{SinkInputRewriter, SourceMetadataVisitor}; -use crate::sql::types::SqlConfig; - -// ── Compilation pipeline ────────────────────────────────────────────── - -#[derive(Clone, Debug)] -pub struct CompiledSql { - pub program: LogicalProgram, - pub connection_ids: Vec, -} - -pub fn parse_sql_statements( - sql: &str, -) -> std::result::Result, datafusion::sql::sqlparser::parser::ParserError> { - Parser::parse_sql(&FunctionStreamDialect {}, sql) -} - -fn try_handle_set_variable( - statement: &Statement, - schema_provider: &mut StreamSchemaProvider, -) -> Result { - if let Statement::SetVariable { - variables, value, .. - } = statement - { - let OneOrManyWithParens::One(opt) = variables else { - return plan_err!("invalid syntax for `SET` call"); - }; - - if opt.to_string() != "updating_ttl" { - return plan_err!( - "invalid option '{}'; supported options are 'updating_ttl'", - opt - ); - } - - if value.len() != 1 { - return plan_err!("invalid `SET updating_ttl` call; expected exactly one expression"); - } - - let duration = duration_from_sql_expr(&value[0])?; - schema_provider.planning_options.ttl = duration; - - return Ok(true); - } - - Ok(false) -} - -fn duration_from_sql_expr( - expr: &datafusion::sql::sqlparser::ast::Expr, -) -> Result { - use datafusion::sql::sqlparser::ast::Expr as SqlExpr; - use datafusion::sql::sqlparser::ast::Value as SqlValue; - use datafusion::sql::sqlparser::ast::ValueWithSpan; - - match expr { - SqlExpr::Interval(interval) => { - let value_str = match interval.value.as_ref() { - SqlExpr::Value(ValueWithSpan { - value: SqlValue::SingleQuotedString(s), - .. - }) => s.clone(), - other => return plan_err!("expected interval string literal, found {other}"), - }; - - parse_interval_to_duration(&value_str) - } - SqlExpr::Value(ValueWithSpan { - value: SqlValue::SingleQuotedString(s), - .. - }) => parse_interval_to_duration(s), - other => plan_err!("expected an interval expression, found {other}"), - } -} - -fn parse_interval_to_duration(s: &str) -> Result { - let parts: Vec<&str> = s.trim().split_whitespace().collect(); - if parts.len() != 2 { - return plan_err!("invalid interval string '{s}'; expected ' '"); - } - let value: u64 = parts[0] - .parse() - .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?; - match parts[1].to_lowercase().as_str() { - "second" | "seconds" | "s" => Ok(std::time::Duration::from_secs(value)), - "minute" | "minutes" | "min" => Ok(std::time::Duration::from_secs(value * 60)), - "hour" | "hours" | "h" => Ok(std::time::Duration::from_secs(value * 3600)), - "day" | "days" | "d" => Ok(std::time::Duration::from_secs(value * 86400)), - unit => plan_err!("unsupported interval unit '{unit}'"), - } -} - -fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap> { - let mut sink_inputs = HashMap::>::new(); - for extension in extensions.iter() { - if let LogicalPlan::Extension(ext) = extension { - if let Some(sink_node) = ext.node.as_any().downcast_ref::() { - if let Some(named_node) = sink_node.node_name() { - let inputs = sink_node - .inputs() - .into_iter() - .cloned() - .collect::>(); - sink_inputs.entry(named_node).or_default().extend(inputs); - } - } - } - } - sink_inputs -} - -pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result { - let LogicalPlan::Extension(ref ext) = plan else { - return Ok(plan); - }; - - let Some(sink) = ext.node.as_any().downcast_ref::() else { - return Ok(plan); - }; - - let Some(partition_exprs) = sink.table.partition_exprs() else { - return Ok(plan); - }; - - if partition_exprs.is_empty() { - return Ok(plan); - } - - let inputs = plan - .inputs() - .into_iter() - .map(|input| { - Ok(LogicalPlan::Extension(Extension { - node: Arc::new(KeyCalculationExtension { - name: Some("key-calc-partition".to_string()), - schema: input.schema().clone(), - input: input.clone(), - keys: KeysOrExprs::Exprs(partition_exprs.clone()), - }), - })) - }) - .collect::>()?; - - use datafusion::prelude::col; - let unkey = LogicalPlan::Extension(Extension { - node: Arc::new( - ProjectionExtension::new( - inputs, - Some("unkey".to_string()), - sink.schema().iter().map(|(_, f)| col(f.name())).collect(), - ) - .shuffled(), - ), - }); - - let node = sink.with_exprs_and_inputs(vec![], vec![unkey])?; - Ok(LogicalPlan::Extension(Extension { - node: Arc::new(node), - })) -} - -pub fn rewrite_sinks(extensions: Vec) -> Result> { - let mut sink_inputs = build_sink_inputs(&extensions); - let mut new_extensions = vec![]; - for extension in extensions { - let mut rewriter = SinkInputRewriter::new(&mut sink_inputs); - let result = extension.rewrite(&mut rewriter)?; - if !rewriter.was_removed { - new_extensions.push(result.data); - } - } - - new_extensions - .into_iter() - .map(maybe_add_key_extension_to_sink) - .collect() -} - -pub async fn parse_and_get_arrow_program( - query: String, - mut schema_provider: StreamSchemaProvider, - _config: SqlConfig, -) -> Result { - let mut config = SessionConfig::new(); - config - .options_mut() - .optimizer - .enable_round_robin_repartition = false; - config.options_mut().optimizer.repartition_aggregations = false; - config.options_mut().optimizer.repartition_windows = false; - config.options_mut().optimizer.repartition_sorts = false; - config.options_mut().optimizer.repartition_joins = false; - config.options_mut().execution.target_partitions = 1; - - let session_state = SessionStateBuilder::new() - .with_config(config) - .with_default_features() - .with_physical_optimizer_rules(vec![]) - .build(); - - let mut inserts = vec![]; - for statement in parse_sql_statements(&query)? { - if try_handle_set_variable(&statement, &mut schema_provider)? { - continue; - } - - if let Some(table) = CatalogTable::try_from_statement(&statement, &schema_provider)? { - schema_provider.insert_catalog_table(table); - } else { - inserts.push(Insert::try_from_statement(&statement, &schema_provider)?); - }; - } - - if inserts.is_empty() { - return plan_err!("The provided SQL does not contain a query"); - } - - let mut used_connections = HashSet::new(); - let mut extensions = vec![]; - - for insert in inserts { - let (plan, sink_name) = match insert { - Insert::InsertQuery { - sink_name, - logical_plan, - } => (logical_plan, Some(sink_name)), - Insert::Anonymous { logical_plan } => (logical_plan, None), - }; - - let mut plan_rewrite = rewrite_plan(plan, &schema_provider)?; - - if plan_rewrite - .schema() - .fields() - .iter() - .any(|f| is_json_union(f.data_type())) - { - plan_rewrite = serialize_outgoing_json(&schema_provider, Arc::new(plan_rewrite)); - } - - debug!("Plan = {}", plan_rewrite.display_graphviz()); - - let mut metadata = SourceMetadataVisitor::new(&schema_provider); - plan_rewrite.visit_with_subqueries(&mut metadata)?; - used_connections.extend(metadata.connection_ids.iter()); - - let sink = match sink_name { - Some(sink_name) => { - let table = schema_provider - .get_catalog_table_mut(&sink_name) - .ok_or_else(|| { - DataFusionError::Plan(format!("Connection {sink_name} not found")) - })?; - match table { - CatalogTable::ConnectorTable(c) => { - if let Some(id) = c.id { - used_connections.insert(id); - } - - SinkExtension::new( - TableReference::bare(sink_name), - table.clone(), - plan_rewrite.schema().clone(), - Arc::new(plan_rewrite), - ) - } - CatalogTable::LookupTable(_) => { - plan_err!("lookup (temporary) tables cannot be inserted into") - } - CatalogTable::TableFromQuery { .. } => { - plan_err!( - "shouldn't be inserting more data into a table made with CREATE TABLE AS" - ) - } - } - } - None => { - return plan_err!( - "Anonymous query is not supported; use INSERT INTO SELECT ..." - ); - } - }; - extensions.push(LogicalPlan::Extension(Extension { - node: Arc::new(sink?), - })); - } - - let extensions = rewrite_sinks(extensions)?; - - let mut plan_to_graph_visitor = - physical_planner::PlanToGraphVisitor::new(&schema_provider, &session_state); - for extension in extensions { - plan_to_graph_visitor.add_plan(extension)?; - } - let graph = plan_to_graph_visitor.into_graph(); - - let mut program = LogicalProgram::new(graph, ProgramConfig::default()); - - program.optimize(&ChainingOptimizer {}); - - Ok(CompiledSql { - program, - connection_ids: used_connections.into_iter().collect(), - }) -} diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs deleted file mode 100644 index c734a88b..00000000 --- a/src/sql/planner/plan/mod.rs +++ /dev/null @@ -1,54 +0,0 @@ -use datafusion::common::Result; -use datafusion::common::tree_node::{Transformed, TreeNode}; -use datafusion::logical_expr::LogicalPlan; -use tracing::{debug, info, instrument}; - -use crate::sql::planner::StreamSchemaProvider; -use crate::sql::planner::plan::stream_rewriter::StreamRewriter; -use crate::sql::planner::rewrite::TimeWindowUdfChecker; - -// Module declarations -pub(crate) mod aggregate_rewriter; -pub(crate) mod join_rewriter; -pub(crate) mod row_time_rewriter; -pub(crate) mod stream_rewriter; -pub(crate) mod streaming_window_analzer; -pub(crate) mod window_function_rewriter; - -/// Entry point for transforming a standard DataFusion LogicalPlan into a -/// Streaming-aware LogicalPlan. -/// -/// This function coordinates multiple rewriting passes and ensures the -/// resulting plan satisfies streaming constraints. -#[instrument(skip_all, level = "debug")] -pub fn rewrite_plan( - plan: LogicalPlan, - schema_provider: &StreamSchemaProvider, -) -> Result { - info!("Starting streaming plan rewrite pipeline"); - - // Phase 1: Core Transformation - // This pass handles the structural changes (Aggregates, Joins, Windows) - // using a Bottom-Up traversal. - let mut rewriter = StreamRewriter::new(schema_provider); - let Transformed { - data: rewritten_plan, - .. - } = plan.rewrite_with_subqueries(&mut rewriter)?; - - // Phase 2: Post-rewrite Validation - // Ensure that the rewritten plan doesn't violate specific streaming UDF rules. - rewritten_plan.visit_with_subqueries(&mut TimeWindowUdfChecker {})?; - - // Phase 3: Observability & Debugging - // Industrial engines use Graphviz or specialized Explain formats for plan diffs. - if cfg!(debug_assertions) { - debug!( - "Streaming logical plan graphviz:\n{}", - rewritten_plan.display_graphviz() - ); - } - - info!("Streaming plan rewrite completed successfully"); - Ok(rewritten_plan) -} diff --git a/src/sql/planner/rewrite/mod.rs b/src/sql/planner/rewrite/mod.rs deleted file mode 100644 index bfebae4c..00000000 --- a/src/sql/planner/rewrite/mod.rs +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub mod async_udf_rewriter; -pub mod sink_input_rewriter; -pub mod source_metadata_visitor; -pub mod source_rewriter; -pub mod time_window; -pub mod unnest_rewriter; - -pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter}; -pub use sink_input_rewriter::SinkInputRewriter; -pub use source_metadata_visitor::SourceMetadataVisitor; -pub use source_rewriter::SourceRewriter; -pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker, is_time_window}; -pub use unnest_rewriter::{UNNESTED_COL, UnnestRewriter}; diff --git a/src/sql/planner/schemas.rs b/src/sql/planner/schemas.rs deleted file mode 100644 index f903db83..00000000 --- a/src/sql/planner/schemas.rs +++ /dev/null @@ -1,5 +0,0 @@ -// Re-export schema utilities from catalog::utils. -// Kept for backward compatibility with existing planner imports. -pub use crate::sql::catalog::utils::{ - add_timestamp_field, add_timestamp_field_arrow, has_timestamp_field, window_arrow_struct, -}; diff --git a/src/sql/planner/sql_to_plan.rs b/src/sql/planner/sql_to_plan.rs deleted file mode 100644 index 049cd18e..00000000 --- a/src/sql/planner/sql_to_plan.rs +++ /dev/null @@ -1,22 +0,0 @@ -use datafusion::common::Result; -use datafusion::logical_expr::LogicalPlan; -use datafusion::sql::sqlparser::ast::Statement; -use tracing::debug; - -use crate::sql::planner::StreamSchemaProvider; - -/// Stage 2: Statement → LogicalPlan -/// -/// Converts a parsed SQL AST statement into a DataFusion logical plan -/// using the StreamSchemaProvider as the catalog context. -pub fn statement_to_plan( - statement: Statement, - schema_provider: &StreamSchemaProvider, -) -> Result { - let sql_to_rel = datafusion::sql::planner::SqlToRel::new(schema_provider); - let plan = sql_to_rel.sql_statement_to_plan(statement)?; - - debug!("Logical plan:\n{}", plan.display_graphviz()); - - Ok(plan) -} diff --git a/src/sql/catalog/connector.rs b/src/sql/schema/connector.rs similarity index 57% rename from src/sql/catalog/connector.rs rename to src/sql/schema/connector.rs index 01176d47..06a3df92 100644 --- a/src/sql/catalog/connector.rs +++ b/src/sql/schema/connector.rs @@ -29,31 +29,3 @@ impl fmt::Display for ConnectionType { } } } - -/// A connector operation that describes how to interact with an external system. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ConnectorOp { - pub connector: String, - pub config: String, - pub description: String, -} - -impl ConnectorOp { - pub fn new(connector: impl Into, config: impl Into) -> Self { - let connector = connector.into(); - let description = connector.clone(); - Self { - connector, - config: config.into(), - description, - } - } -} - -/// Configuration for a connection profile (e.g., Kafka broker, Pulsar endpoint). -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ConnectionProfile { - pub name: String, - pub connector: String, - pub config: std::collections::HashMap, -} diff --git a/src/sql/catalog/connector_table.rs b/src/sql/schema/connector_table.rs similarity index 91% rename from src/sql/catalog/connector_table.rs rename to src/sql/schema/connector_table.rs index 8dae1745..25e37184 100644 --- a/src/sql/catalog/connector_table.rs +++ b/src/sql/schema/connector_table.rs @@ -16,10 +16,11 @@ use std::time::Duration; use datafusion::arrow::datatypes::{FieldRef, Schema}; use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::Expr; - -use super::connector::{ConnectionType, ConnectorOp}; +use protocol::grpc::api::ConnectorOp; use super::field_spec::FieldSpec; use crate::multifield_partial_ord; +use crate::sql::schema::ConnectionType; +use crate::sql::schema::table::SqlSource; use crate::sql::types::ProcessingMode; /// Represents a table backed by an external connector (e.g., Kafka, Pulsar, NATS). @@ -38,6 +39,8 @@ pub struct ConnectorTable { pub primary_keys: Arc>, pub inferred_fields: Option>, pub partition_exprs: Arc>>, + pub lookup_cache_max_bytes: Option, + pub lookup_cache_ttl: Option, } multifield_partial_ord!( @@ -74,6 +77,8 @@ impl ConnectorTable { primary_keys: Arc::new(Vec::new()), inferred_fields: None, partition_exprs: Arc::new(None), + lookup_cache_max_bytes: None, + lookup_cache_ttl: None, } } @@ -160,40 +165,41 @@ impl ConnectorTable { ConnectionType::Sink | ConnectionType::Lookup => { return plan_err!("cannot read from sink"); } - } + }; if self.is_updating() && self.has_virtual_fields() { - return plan_err!("can't read from a source with virtual fields and update mode"); + return plan_err!("can't read from a source with virtual fields and update mode."); } let timestamp_override = self.timestamp_override()?; let watermark_column = self.watermark_column()?; - Ok(SourceOperator { - name: self.name.clone(), - connector_op: self.connector_op(), - processing_mode: self.processing_mode(), - idle_time: self.idle_time, - struct_fields: self + let source = SqlSource { + id: self.id, + struct_def: self .fields .iter() .filter(|f| !f.is_virtual()) .map(|f| Arc::new(f.field().clone())) .collect(), + config: self.connector_op(), + processing_mode: self.processing_mode(), + idle_time: self.idle_time, + }; + + Ok(SourceOperator { + name: self.name.clone(), + source, timestamp_override, watermark_column, }) } } -/// A fully resolved source operator ready for execution graph construction. #[derive(Debug, Clone)] pub struct SourceOperator { pub name: String, - pub connector_op: ConnectorOp, - pub processing_mode: ProcessingMode, - pub idle_time: Option, - pub struct_fields: Vec, + pub source: SqlSource, pub timestamp_override: Option, pub watermark_column: Option, } diff --git a/src/sql/catalog/field_spec.rs b/src/sql/schema/field_spec.rs similarity index 100% rename from src/sql/catalog/field_spec.rs rename to src/sql/schema/field_spec.rs diff --git a/src/sql/catalog/insert.rs b/src/sql/schema/insert.rs similarity index 97% rename from src/sql/catalog/insert.rs rename to src/sql/schema/insert.rs index a4a3814a..fe91325b 100644 --- a/src/sql/catalog/insert.rs +++ b/src/sql/schema/insert.rs @@ -15,7 +15,7 @@ use datafusion::logical_expr::{DmlStatement, LogicalPlan, WriteOp}; use datafusion::sql::sqlparser::ast::Statement; use super::optimizer::produce_optimized_plan; -use crate::sql::planner::StreamSchemaProvider; +use crate::sql::schema::StreamSchemaProvider; /// Represents an INSERT operation in a streaming SQL pipeline. #[derive(Debug)] diff --git a/src/sql/catalog/mod.rs b/src/sql/schema/mod.rs similarity index 85% rename from src/sql/catalog/mod.rs rename to src/sql/schema/mod.rs index 39c7bfcd..0bf7e4ea 100644 --- a/src/sql/catalog/mod.rs +++ b/src/sql/schema/mod.rs @@ -15,11 +15,13 @@ pub mod connector_table; pub mod field_spec; pub mod insert; pub mod optimizer; +pub mod schema_provider; pub mod table; pub mod utils; -pub use connector::{ConnectionType, ConnectorOp}; +pub use connector::{ConnectionType}; pub use connector_table::{ConnectorTable, SourceOperator}; pub use field_spec::FieldSpec; pub use insert::Insert; +pub use schema_provider::{LogicalBatchInput, StreamSchemaProvider, StreamTable}; pub use table::Table; diff --git a/src/sql/catalog/optimizer.rs b/src/sql/schema/optimizer.rs similarity index 98% rename from src/sql/catalog/optimizer.rs rename to src/sql/schema/optimizer.rs index 15abe61e..fbb64845 100644 --- a/src/sql/catalog/optimizer.rs +++ b/src/sql/schema/optimizer.rs @@ -41,7 +41,7 @@ use datafusion::optimizer::simplify_expressions::SimplifyExpressions; use datafusion::sql::planner::SqlToRel; use datafusion::sql::sqlparser::ast::Statement; -use crate::sql::planner::StreamSchemaProvider; +use crate::sql::schema::StreamSchemaProvider; /// Converts a SQL statement into an optimized DataFusion logical plan. /// diff --git a/src/sql/planner/schema_provider.rs b/src/sql/schema/schema_provider.rs similarity index 97% rename from src/sql/planner/schema_provider.rs rename to src/sql/schema/schema_provider.rs index d860fd6c..11c0d461 100644 --- a/src/sql/planner/schema_provider.rs +++ b/src/sql/schema/schema_provider.rs @@ -15,9 +15,9 @@ use datafusion::optimizer::Analyzer; use datafusion::sql::TableReference; use datafusion::sql::planner::ContextProvider; use unicase::UniCase; - -use crate::sql::catalog::table::Table as CatalogTable; -use crate::sql::planner::schemas::window_arrow_struct; +use crate::sql::logical_node::logical::DylibUdfConfig; +use crate::sql::schema::table::Table as CatalogTable; +use crate::sql::schema::utils::window_arrow_struct; use crate::sql::types::{PlaceholderUdf, PlanningOptions}; #[derive(Clone, Default)] @@ -28,6 +28,7 @@ pub struct StreamSchemaProvider { pub functions: HashMap>, pub aggregate_functions: HashMap>, pub window_functions: HashMap>, + pub dylib_udfs: HashMap, config_options: datafusion::config::ConfigOptions, pub expr_planners: Vec>, pub planning_options: PlanningOptions, @@ -97,7 +98,7 @@ impl datafusion::datasource::TableProvider for LogicalBatchInput { _filters: &[Expr], _limit: Option, ) -> Result> { - Ok(Arc::new(crate::sql::physical::FsMemExec::new( + Ok(Arc::new(crate::sql::logical_planner::FsMemExec::new( self.table_name.clone(), self.schema.clone(), ))) @@ -238,7 +239,7 @@ impl StreamSchemaProvider { pub fn get_async_udf_options( &self, _name: &str, - ) -> Option { + ) -> Option { // TODO: implement async UDF lookup None } diff --git a/src/sql/catalog/table.rs b/src/sql/schema/table.rs similarity index 90% rename from src/sql/catalog/table.rs rename to src/sql/schema/table.rs index a997680b..21f064fe 100644 --- a/src/sql/catalog/table.rs +++ b/src/sql/schema/table.rs @@ -11,18 +11,18 @@ // limitations under the License. use std::sync::Arc; - +use std::time::Duration; use datafusion::arrow::datatypes::FieldRef; use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::{Extension, LogicalPlan}; use datafusion::sql::sqlparser::ast::Statement; - +use protocol::grpc::api::ConnectorOp; use super::connector_table::ConnectorTable; use super::optimizer::produce_optimized_plan; -use crate::sql::planner::StreamSchemaProvider; -use crate::sql::planner::extension::remote_table::RemoteTableExtension; -use crate::sql::planner::plan::rewrite_plan; -use crate::sql::types::DFField; +use crate::sql::schema::StreamSchemaProvider; +use crate::sql::extensions::remote_table::RemoteTableExtension; +use crate::sql::analysis::rewrite_plan; +use crate::sql::types::{DFField, ProcessingMode}; /// Represents all table types in the FunctionStream SQL catalog. #[allow(clippy::enum_variant_names)] @@ -137,7 +137,7 @@ impl Table { Ok(()) } - pub fn connector_op(&self) -> Result { + pub fn connector_op(&self) -> Result { match self { Table::ConnectorTable(c) | Table::LookupTable(c) => Ok(c.connector_op()), Table::TableFromQuery { .. } => plan_err!("can't write to a query-defined table"), @@ -151,3 +151,12 @@ impl Table { } } } + +#[derive(Clone, Debug)] +pub struct SqlSource { + pub id: Option, + pub struct_def: Vec, + pub config: ConnectorOp, + pub processing_mode: ProcessingMode, + pub idle_time: Option, +} diff --git a/src/sql/catalog/utils.rs b/src/sql/schema/utils.rs similarity index 100% rename from src/sql/catalog/utils.rs rename to src/sql/schema/utils.rs diff --git a/src/types/converter.rs b/src/types/converter.rs new file mode 100644 index 00000000..8f6a2ba8 --- /dev/null +++ b/src/types/converter.rs @@ -0,0 +1,83 @@ +use std::sync::Arc; +use arrow::row::{OwnedRow, RowConverter, RowParser, Rows, SortField}; +use arrow_array::{Array, ArrayRef, BooleanArray}; +use arrow_schema::{ArrowError, DataType}; + +// need to handle the empty case as a row converter without sort fields emits empty Rows. +#[derive(Debug)] +pub enum Converter { + RowConverter(RowConverter), + Empty(RowConverter, Arc), +} + +impl Converter { + pub fn new(sort_fields: Vec) -> Result { + if sort_fields.is_empty() { + let array = Arc::new(BooleanArray::from(vec![false])); + Ok(Self::Empty( + RowConverter::new(vec![SortField::new(DataType::Boolean)])?, + array, + )) + } else { + Ok(Self::RowConverter(RowConverter::new(sort_fields)?)) + } + } + + pub fn convert_columns(&self, columns: &[Arc]) -> Result { + match self { + Converter::RowConverter(row_converter) => { + Ok(row_converter.convert_columns(columns)?.row(0).owned()) + } + Converter::Empty(row_converter, array) => Ok(row_converter + .convert_columns(std::slice::from_ref(array))? + .row(0) + .owned()), + } + } + + pub fn convert_all_columns( + &self, + columns: &[Arc], + num_rows: usize, + ) -> Result { + match self { + Converter::RowConverter(row_converter) => Ok(row_converter.convert_columns(columns)?), + Converter::Empty(row_converter, _array) => { + let array = Arc::new(BooleanArray::from(vec![false; num_rows])); + Ok(row_converter.convert_columns(&[array])?) + } + } + } + + pub fn convert_rows( + &self, + rows: Vec>, + ) -> Result, ArrowError> { + match self { + Converter::RowConverter(row_converter) => Ok(row_converter.convert_rows(rows)?), + Converter::Empty(_row_converter, _array) => Ok(vec![]), + } + } + + pub fn convert_raw_rows(&self, row_bytes: Vec<&[u8]>) -> Result, ArrowError> { + match self { + Converter::RowConverter(row_converter) => { + let parser = row_converter.parser(); + let mut row_list = vec![]; + for bytes in row_bytes { + let row = parser.parse(bytes); + row_list.push(row); + } + Ok(row_converter.convert_rows(row_list)?) + } + Converter::Empty(_row_converter, _array) => Ok(vec![]), + } + } + + pub fn parser(&self) -> Option { + match self { + Converter::RowConverter(r) => Some(r.parser()), + Converter::Empty(_, _) => None, + } + } +} \ No newline at end of file diff --git a/src/types/df.rs b/src/types/df.rs index 30b4eb9c..7266bb6b 100644 --- a/src/types/df.rs +++ b/src/types/df.rs @@ -4,22 +4,89 @@ use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuil use datafusion::arrow::error::ArrowError; use datafusion::common::{DataFusionError, Result as DFResult}; use std::sync::Arc; - -use super::TIMESTAMP_FIELD; -use crate::sql::types::StreamSchema; +use std::time::SystemTime; +use arrow::compute::{filter_record_batch, lexsort_to_indices, partition, take, SortColumn}; +use arrow::compute::kernels::cmp::gt_eq; +use arrow::compute::kernels::numeric::div; +use arrow::row::SortField; +use arrow_array::{PrimitiveArray, UInt64Array}; +use arrow_array::types::UInt64Type; +use protocol::grpc::api; +use super::{to_nanos, TIMESTAMP_FIELD}; +use std::ops::Range; +use crate::types::converter::Converter; pub type FsSchemaRef = Arc; -/// Core streaming schema with timestamp and key tracking. -/// Analogous to Arroyo's `ArroyoSchema`. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct FsSchema { pub schema: Arc, pub timestamp_index: usize, key_indices: Option>, + /// If defined, these indices are used for routing (i.e., which subtask gets which piece of data) routing_key_indices: Option>, } +impl TryFrom for FsSchema { + type Error = DataFusionError; + fn try_from(schema_proto: api::FsSchema) -> Result { + let schema: Schema = serde_json::from_str(&schema_proto.arrow_schema) + .map_err(|e| DataFusionError::Plan(format!("Invalid arrow schema: {e}")))?; + let timestamp_index = schema_proto.timestamp_index as usize; + + let key_indices = schema_proto.has_keys.then(|| { + schema_proto + .key_indices + .into_iter() + .map(|index| index as usize) + .collect() + }); + + let routing_key_indices = schema_proto.has_routing_keys.then(|| { + schema_proto + .routing_key_indices + .into_iter() + .map(|index| index as usize) + .collect() + }); + + Ok(Self { + schema: Arc::new(schema), + timestamp_index, + key_indices, + routing_key_indices, + }) + } +} + +impl From for api::FsSchema { + fn from(schema: FsSchema) -> Self { + let arrow_schema = serde_json::to_string(schema.schema.as_ref()).unwrap(); + let timestamp_index = schema.timestamp_index as u32; + + let has_keys = schema.key_indices.is_some(); + let key_indices = schema + .key_indices + .map(|ks| ks.into_iter().map(|index| index as u32).collect()) + .unwrap_or_default(); + + let has_routing_keys = schema.routing_key_indices.is_some(); + let routing_key_indices = schema + .routing_key_indices + .map(|ks| ks.into_iter().map(|index| index as u32).collect()) + .unwrap_or_default(); + + Self { + arrow_schema, + timestamp_index, + key_indices, + has_keys, + routing_key_indices, + has_routing_keys, + } + } +} + impl FsSchema { pub fn new( schema: Arc, @@ -34,7 +101,6 @@ impl FsSchema { routing_key_indices, } } - pub fn new_unkeyed(schema: Arc, timestamp_index: usize) -> Self { Self { schema, @@ -43,7 +109,6 @@ impl FsSchema { routing_key_indices: None, } } - pub fn new_keyed(schema: Arc, timestamp_index: usize, key_indices: Vec) -> Self { Self { schema, @@ -141,22 +206,100 @@ impl FsSchema { self.key_indices.as_ref() } - pub fn sort_field_indices(&self, with_timestamp: bool) -> Vec { - let mut indices = vec![]; + pub fn filter_by_time( + &self, + batch: RecordBatch, + cutoff: Option, + ) -> Result { + let Some(cutoff) = cutoff else { + // no watermark, so we just return the same batch. + return Ok(batch); + }; + // filter out late data + let timestamp_column = batch + .column(self.timestamp_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| ArrowError::CastError( + format!("failed to downcast column {} of {:?} to timestamp. Schema is supposed to be {:?}", + self.timestamp_index, batch, self.schema)))?; + let cutoff_scalar = TimestampNanosecondArray::new_scalar(to_nanos(cutoff) as i64); + let on_time = gt_eq(timestamp_column, &cutoff_scalar)?; + filter_record_batch(&batch, &on_time) + } + + pub fn sort_columns(&self, batch: &RecordBatch, with_timestamp: bool) -> Vec { + let mut columns = vec![]; if let Some(keys) = &self.key_indices { - indices.extend(keys.iter().copied()); + columns.extend(keys.iter().map(|index| SortColumn { + values: batch.column(*index).clone(), + options: None, + })); } if with_timestamp { - indices.push(self.timestamp_index); + columns.push(SortColumn { + values: batch.column(self.timestamp_index).clone(), + options: None, + }); + } + columns + } + + pub fn sort_fields(&self, with_timestamp: bool) -> Vec { + let mut sort_fields = vec![]; + if let Some(keys) = &self.key_indices { + sort_fields.extend(keys.iter()); } + if with_timestamp { + sort_fields.push(self.timestamp_index); + } + self.sort_fields_by_indices(&sort_fields) + } + + fn sort_fields_by_indices(&self, indices: &[usize]) -> Vec { indices + .iter() + .map(|index| SortField::new(self.schema.field(*index).data_type().clone())) + .collect() + } + + pub fn converter(&self, with_timestamp: bool) -> Result { + Converter::new(self.sort_fields(with_timestamp)) + } + + pub fn value_converter( + &self, + with_timestamp: bool, + generation_index: usize, + ) -> Result { + match &self.key_indices { + None => { + let mut indices = (0..self.schema.fields().len()).collect::>(); + indices.remove(generation_index); + if !with_timestamp { + indices.remove(self.timestamp_index); + } + Converter::new(self.sort_fields_by_indices(&indices)) + } + Some(keys) => { + let indices = (0..self.schema.fields().len()) + .filter(|index| { + !keys.contains(index) + && (with_timestamp || *index != self.timestamp_index) + && *index != generation_index + }) + .collect::>(); + Converter::new(self.sort_fields_by_indices(&indices)) + } + } } pub fn value_indices(&self, with_timestamp: bool) -> Vec { let field_count = self.schema.fields().len(); match &self.key_indices { None => { - let mut indices: Vec = (0..field_count).collect(); + let mut indices = (0..field_count).collect::>(); + if !with_timestamp { indices.remove(self.timestamp_index); } @@ -166,10 +309,51 @@ impl FsSchema { .filter(|index| { !keys.contains(index) && (with_timestamp || *index != self.timestamp_index) }) - .collect(), + .collect::>(), } } + pub fn sort( + &self, + batch: RecordBatch, + with_timestamp: bool, + ) -> Result { + if self.key_indices.is_none() && !with_timestamp { + return Ok(batch); + } + let sort_columns = self.sort_columns(&batch, with_timestamp); + let sort_indices = lexsort_to_indices(&sort_columns, None).expect("should be able to sort"); + let columns = batch + .columns() + .iter() + .map(|c| take(c, &sort_indices, None).unwrap()) + .collect(); + + RecordBatch::try_new(batch.schema(), columns) + } + + pub fn partition( + &self, + batch: &RecordBatch, + with_timestamp: bool, + ) -> Result>, ArrowError> { + if self.key_indices.is_none() && !with_timestamp { + #[allow(clippy::single_range_in_vec_init)] + return Ok(vec![0..batch.num_rows()]); + } + + let mut partition_columns = vec![]; + + if let Some(keys) = &self.routing_keys() { + partition_columns.extend(keys.iter().map(|index| batch.column(*index).clone())); + } + if with_timestamp { + partition_columns.push(batch.column(self.timestamp_index).clone()); + } + + Ok(partition(&partition_columns)?.ranges()) + } + pub fn unkeyed_batch(&self, batch: &RecordBatch) -> Result { if self.key_indices.is_none() { return Ok(batch.clone()); @@ -190,7 +374,7 @@ impl FsSchema { .fields() .iter() .enumerate() - .filter(|(index, _)| !key_indices.contains(index)) + .filter(|(index, _field)| !key_indices.contains(index)) .map(|(_, field)| field.as_ref().clone()) .collect::>(), ); @@ -239,156 +423,18 @@ impl FsSchema { ) -> Result { let mut fields = self.schema.fields.to_vec(); fields.extend(new_fields.map(Arc::new)); - self.with_fields(fields) - } -} - -/// Proto serialization: convert between FsSchema and the proto `FsSchema` message. -/// -/// Schema is encoded as JSON using Arrow's `SchemaRef` JSON representation. -/// This approach avoids depending on serde for `arrow_schema::Schema` directly. -impl FsSchema { - pub fn to_proto(&self) -> protocol::grpc::api::FsSchema { - let arrow_schema = schema_to_json_string(&self.schema); - let timestamp_index = self.timestamp_index as u32; - - let has_keys = self.key_indices.is_some(); - let key_indices = self - .key_indices - .as_ref() - .map(|ks| ks.iter().map(|i| *i as u32).collect()) - .unwrap_or_default(); - - let has_routing_keys = self.routing_key_indices.is_some(); - let routing_key_indices = self - .routing_key_indices - .as_ref() - .map(|ks| ks.iter().map(|i| *i as u32).collect()) - .unwrap_or_default(); - protocol::grpc::api::FsSchema { - arrow_schema, - timestamp_index, - key_indices, - has_keys, - routing_key_indices, - has_routing_keys, - } - } - - pub fn from_proto(proto: protocol::grpc::api::FsSchema) -> Result { - let schema = schema_from_json_string(&proto.arrow_schema)?; - let timestamp_index = proto.timestamp_index as usize; - - let key_indices = proto - .has_keys - .then(|| proto.key_indices.into_iter().map(|i| i as usize).collect()); - - let routing_key_indices = proto.has_routing_keys.then(|| { - proto - .routing_key_indices - .into_iter() - .map(|i| i as usize) - .collect() - }); - - Ok(Self { - schema: Arc::new(schema), - timestamp_index, - key_indices, - routing_key_indices, - }) - } -} - -fn schema_to_json_string(schema: &Schema) -> String { - let json_fields: Vec = schema - .fields() - .iter() - .map(|f| { - serde_json::json!({ - "name": f.name(), - "data_type": format!("{:?}", f.data_type()), - "nullable": f.is_nullable(), - }) - }) - .collect(); - serde_json::to_string(&json_fields).unwrap() -} - -fn schema_from_json_string(s: &str) -> Result { - let json_fields: Vec = serde_json::from_str(s) - .map_err(|e| DataFusionError::Plan(format!("Invalid schema JSON: {e}")))?; - - let fields: Vec = json_fields - .into_iter() - .map(|v| { - let name = v["name"] - .as_str() - .ok_or_else(|| DataFusionError::Plan("missing field name".into()))? - .to_string(); - let nullable = v["nullable"].as_bool().unwrap_or(true); - let dt_str = v["data_type"] - .as_str() - .ok_or_else(|| DataFusionError::Plan("missing data_type".into()))?; - let data_type = parse_debug_data_type(dt_str)?; - Ok(Field::new(name, data_type, nullable)) - }) - .collect::>()?; - - Ok(Schema::new(fields)) -} - -fn parse_debug_data_type(s: &str) -> Result { - match s { - "Boolean" => Ok(DataType::Boolean), - "Int8" => Ok(DataType::Int8), - "Int16" => Ok(DataType::Int16), - "Int32" => Ok(DataType::Int32), - "Int64" => Ok(DataType::Int64), - "UInt8" => Ok(DataType::UInt8), - "UInt16" => Ok(DataType::UInt16), - "UInt32" => Ok(DataType::UInt32), - "UInt64" => Ok(DataType::UInt64), - "Float16" => Ok(DataType::Float16), - "Float32" => Ok(DataType::Float32), - "Float64" => Ok(DataType::Float64), - "Utf8" => Ok(DataType::Utf8), - "LargeUtf8" => Ok(DataType::LargeUtf8), - "Binary" => Ok(DataType::Binary), - "LargeBinary" => Ok(DataType::LargeBinary), - "Date32" => Ok(DataType::Date32), - "Date64" => Ok(DataType::Date64), - "Null" => Ok(DataType::Null), - s if s.starts_with("Timestamp(Nanosecond") => { - Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) - } - s if s.starts_with("Timestamp(Microsecond") => { - Ok(DataType::Timestamp(TimeUnit::Microsecond, None)) - } - s if s.starts_with("Timestamp(Millisecond") => { - Ok(DataType::Timestamp(TimeUnit::Millisecond, None)) - } - s if s.starts_with("Timestamp(Second") => Ok(DataType::Timestamp(TimeUnit::Second, None)), - _ => Err(DataFusionError::Plan(format!( - "Unsupported data type in schema JSON: {s}" - ))), - } -} - -impl From for FsSchema { - fn from(s: StreamSchema) -> Self { - FsSchema { - schema: s.schema, - timestamp_index: s.timestamp_index, - key_indices: s.key_indices, - routing_key_indices: None, - } + self.with_fields(fields) } } -impl From for Arc { - fn from(s: StreamSchema) -> Self { - Arc::new(FsSchema::from(s)) - } +pub fn server_for_hash_array( + hash: &PrimitiveArray, + n: usize, +) -> Result, ArrowError> { + let range_size = u64::MAX / (n as u64) + 1; + let range_scalar = UInt64Array::new_scalar(range_size); + let division = div(hash, &range_scalar)?; + let result: &PrimitiveArray = division.as_any().downcast_ref().unwrap(); + Ok(result.clone()) } diff --git a/src/types/mod.rs b/src/types/mod.rs index ddf7baca..4da0a030 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -28,12 +28,13 @@ pub mod operator_config; pub mod task_info; pub mod time_utils; pub mod worker; +mod converter; // ── Re-exports from existing modules ── pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType}; pub use date::{DatePart, DateTruncPrecision}; pub use debezium::{Debezium, DebeziumOp, UpdatingData}; -pub use hash::{HASH_SEEDS, range_for_server, server_for_hash}; +pub use hash::{range_for_server, server_for_hash, HASH_SEEDS}; pub use message::{ArrowMessage, CheckpointBarrier, SignalMessage, Watermark}; pub use task_info::{ChainInfo, TaskInfo}; pub use time_utils::{from_micros, from_millis, from_nanos, to_micros, to_millis, to_nanos}; From 94879daac37c8485c025efa18c372f9b4fe31fb6 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sat, 21 Mar 2026 19:15:11 +0800 Subject: [PATCH 08/44] update --- src/lib.rs | 2 - src/main.rs | 2 - src/sql/analysis/join_rewriter.rs | 2 +- src/sql/analysis/mod.rs | 13 - src/{ => sql}/api/checkpoints.rs | 2 +- src/{ => sql}/api/connections.rs | 4 +- src/{ => sql}/api/metrics.rs | 0 src/{ => sql}/api/mod.rs | 0 src/{ => sql}/api/pipelines.rs | 2 +- src/{ => sql}/api/public_ids.rs | 0 src/{ => sql}/api/schema_resolver.rs | 0 src/{ => sql}/api/udfs.rs | 0 src/{ => sql}/api/var_str.rs | 0 src/{types => sql/common}/arrow_ext.rs | 0 src/{types => sql/common}/control.rs | 0 src/{types => sql/common}/converter.rs | 0 src/{types => sql/common}/date.rs | 0 src/{types => sql/common}/debezium.rs | 0 src/{types => sql/common}/errors.rs | 2 +- src/{types => sql/common}/formats.rs | 0 src/{types/df.rs => sql/common/fs_schema.rs} | 6 +- src/{types => sql/common}/hash.rs | 0 src/{types => sql/common}/message.rs | 0 src/{types => sql/common}/mod.rs | 10 +- src/{types => sql/common}/operator_config.rs | 0 src/{types => sql/common}/task_info.rs | 0 src/{types => sql/common}/time_utils.rs | 0 src/{types => sql/common}/worker.rs | 0 src/sql/extensions/aggregate.rs | 2 +- src/sql/extensions/async_udf.rs | 187 +++++++++ src/sql/extensions/constants.rs | 13 + src/sql/extensions/debezium.rs | 2 +- src/sql/extensions/extension_try_from.rs | 70 ++++ src/sql/extensions/is_retract.rs | 80 ++++ src/sql/extensions/join.rs | 2 +- src/sql/extensions/key_calculation.rs | 2 +- src/sql/extensions/lookup.rs | 2 +- src/sql/extensions/macros.rs | 28 ++ src/sql/extensions/mod.rs | 408 ++----------------- src/sql/extensions/projection.rs | 2 +- src/sql/extensions/remote_table.rs | 2 +- src/sql/extensions/sink.rs | 2 +- src/sql/extensions/stream_extension.rs | 38 ++ src/sql/extensions/table_source.rs | 2 +- src/sql/extensions/timestamp_append.rs | 80 ++++ src/sql/extensions/updating_aggregate.rs | 2 +- src/sql/extensions/watermark_node.rs | 2 +- src/sql/extensions/window_fn.rs | 2 +- src/sql/logical_node/logical.rs | 2 +- src/sql/logical_planner/compiled_sql.rs | 21 + src/sql/logical_planner/mod.rs | 5 +- src/sql/logical_planner/planner.rs | 2 +- src/sql/mod.rs | 5 +- src/sql/parse.rs | 5 - src/sql/types/data_type.rs | 2 +- 55 files changed, 582 insertions(+), 433 deletions(-) rename src/{ => sql}/api/checkpoints.rs (98%) rename src/{ => sql}/api/connections.rs (99%) rename src/{ => sql}/api/metrics.rs (100%) rename src/{ => sql}/api/mod.rs (100%) rename src/{ => sql}/api/pipelines.rs (98%) rename src/{ => sql}/api/public_ids.rs (100%) rename src/{ => sql}/api/schema_resolver.rs (100%) rename src/{ => sql}/api/udfs.rs (100%) rename src/{ => sql}/api/var_str.rs (100%) rename src/{types => sql/common}/arrow_ext.rs (100%) rename src/{types => sql/common}/control.rs (100%) rename src/{types => sql/common}/converter.rs (100%) rename src/{types => sql/common}/date.rs (100%) rename src/{types => sql/common}/debezium.rs (100%) rename src/{types => sql/common}/errors.rs (96%) rename src/{types => sql/common}/formats.rs (100%) rename src/{types/df.rs => sql/common/fs_schema.rs} (98%) rename src/{types => sql/common}/hash.rs (100%) rename src/{types => sql/common}/message.rs (100%) rename src/{types => sql/common}/mod.rs (90%) rename src/{types => sql/common}/operator_config.rs (100%) rename src/{types => sql/common}/task_info.rs (100%) rename src/{types => sql/common}/time_utils.rs (100%) rename src/{types => sql/common}/worker.rs (100%) create mode 100644 src/sql/extensions/async_udf.rs create mode 100644 src/sql/extensions/constants.rs create mode 100644 src/sql/extensions/extension_try_from.rs create mode 100644 src/sql/extensions/is_retract.rs create mode 100644 src/sql/extensions/macros.rs create mode 100644 src/sql/extensions/stream_extension.rs create mode 100644 src/sql/extensions/timestamp_append.rs create mode 100644 src/sql/logical_planner/compiled_sql.rs diff --git a/src/lib.rs b/src/lib.rs index 0a3c6dc6..a6bb4d28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ #![allow(dead_code)] -pub mod api; pub mod config; pub mod coordinator; pub mod logging; @@ -22,4 +21,3 @@ pub mod runtime; pub mod server; pub mod sql; pub mod storage; -pub mod types; diff --git a/src/main.rs b/src/main.rs index e847b16c..562b1526 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,7 +12,6 @@ #![allow(dead_code)] -mod api; mod config; mod coordinator; mod logging; @@ -20,7 +19,6 @@ mod runtime; mod server; mod sql; mod storage; -mod types; use anyhow::{Context, Result}; use std::thread; diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs index 465d4620..520af335 100644 --- a/src/sql/analysis/join_rewriter.rs +++ b/src/sql/analysis/join_rewriter.rs @@ -3,7 +3,7 @@ use crate::sql::extensions::join::JoinExtension; use crate::sql::extensions::key_calculation::KeyCalculationExtension; use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer; use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata}; -use crate::types::TIMESTAMP_FIELD; +use crate::sql::common::TIMESTAMP_FIELD; use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{ Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference, diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs index 04230aa0..e13e2b7e 100644 --- a/src/sql/analysis/mod.rs +++ b/src/sql/analysis/mod.rs @@ -25,10 +25,6 @@ pub use crate::sql::schema::schema_provider::{ LogicalBatchInput, StreamSchemaProvider, StreamTable, }; -pub(crate) mod mod_prelude { - pub use super::StreamSchemaProvider; -} - use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -44,7 +40,6 @@ use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; use datafusion::sql::sqlparser::parser::Parser; use tracing::{debug, info, instrument}; -use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig}; use crate::sql::logical_planner::optimizers::ChainingOptimizer; use crate::sql::schema::insert::Insert; use crate::sql::schema::table::Table as CatalogTable; @@ -56,14 +51,6 @@ use crate::sql::extensions::{ StreamExtension}; use crate::sql::logical_planner::planner::NamedNode; use crate::sql::types::SqlConfig; -// ── Compilation pipeline ────────────────────────────────────────────── - -#[derive(Clone, Debug)] -pub struct CompiledSql { - pub program: LogicalProgram, - pub connection_ids: Vec, -} - fn duration_from_sql_expr( expr: &datafusion::sql::sqlparser::ast::Expr, ) -> Result { diff --git a/src/api/checkpoints.rs b/src/sql/api/checkpoints.rs similarity index 98% rename from src/api/checkpoints.rs rename to src/sql/api/checkpoints.rs index 8462f311..243cae40 100644 --- a/src/api/checkpoints.rs +++ b/src/sql/api/checkpoints.rs @@ -1,4 +1,4 @@ -use crate::types::to_micros; +use crate::sql::common::to_micros; use serde::{Deserialize, Serialize}; use std::time::SystemTime; diff --git a/src/api/connections.rs b/src/sql/api/connections.rs similarity index 99% rename from src/api/connections.rs rename to src/sql/api/connections.rs index eb69690e..d88dee75 100644 --- a/src/api/connections.rs +++ b/src/sql/api/connections.rs @@ -1,5 +1,5 @@ -use crate::types::formats::{BadData, Format, Framing}; -use crate::types::{FsExtensionType, FsSchema}; +use crate::sql::common::formats::{BadData, Format, Framing}; +use crate::sql::common::{FsExtensionType, FsSchema}; use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit}; use serde::ser::SerializeMap; use serde::{Deserialize, Serialize, Serializer}; diff --git a/src/api/metrics.rs b/src/sql/api/metrics.rs similarity index 100% rename from src/api/metrics.rs rename to src/sql/api/metrics.rs diff --git a/src/api/mod.rs b/src/sql/api/mod.rs similarity index 100% rename from src/api/mod.rs rename to src/sql/api/mod.rs diff --git a/src/api/pipelines.rs b/src/sql/api/pipelines.rs similarity index 98% rename from src/api/pipelines.rs rename to src/sql/api/pipelines.rs index 3c77ce7a..8b42036c 100644 --- a/src/api/pipelines.rs +++ b/src/sql/api/pipelines.rs @@ -1,5 +1,5 @@ use super::udfs::Udf; -use crate::types::control::ErrorDomain; +use crate::sql::common::control::ErrorDomain; use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Clone, Debug)] diff --git a/src/api/public_ids.rs b/src/sql/api/public_ids.rs similarity index 100% rename from src/api/public_ids.rs rename to src/sql/api/public_ids.rs diff --git a/src/api/schema_resolver.rs b/src/sql/api/schema_resolver.rs similarity index 100% rename from src/api/schema_resolver.rs rename to src/sql/api/schema_resolver.rs diff --git a/src/api/udfs.rs b/src/sql/api/udfs.rs similarity index 100% rename from src/api/udfs.rs rename to src/sql/api/udfs.rs diff --git a/src/api/var_str.rs b/src/sql/api/var_str.rs similarity index 100% rename from src/api/var_str.rs rename to src/sql/api/var_str.rs diff --git a/src/types/arrow_ext.rs b/src/sql/common/arrow_ext.rs similarity index 100% rename from src/types/arrow_ext.rs rename to src/sql/common/arrow_ext.rs diff --git a/src/types/control.rs b/src/sql/common/control.rs similarity index 100% rename from src/types/control.rs rename to src/sql/common/control.rs diff --git a/src/types/converter.rs b/src/sql/common/converter.rs similarity index 100% rename from src/types/converter.rs rename to src/sql/common/converter.rs diff --git a/src/types/date.rs b/src/sql/common/date.rs similarity index 100% rename from src/types/date.rs rename to src/sql/common/date.rs diff --git a/src/types/debezium.rs b/src/sql/common/debezium.rs similarity index 100% rename from src/types/debezium.rs rename to src/sql/common/debezium.rs diff --git a/src/types/errors.rs b/src/sql/common/errors.rs similarity index 96% rename from src/types/errors.rs rename to src/sql/common/errors.rs index 2c425c93..bcda8667 100644 --- a/src/types/errors.rs +++ b/src/sql/common/errors.rs @@ -42,7 +42,7 @@ impl From for DataflowError { #[macro_export] macro_rules! connector_err { ($($arg:tt)*) => { - $crate::types::errors::DataflowError::Connector(format!($($arg)*)) + $crate::sql::common::errors::DataflowError::Connector(format!($($arg)*)) }; } diff --git a/src/types/formats.rs b/src/sql/common/formats.rs similarity index 100% rename from src/types/formats.rs rename to src/sql/common/formats.rs diff --git a/src/types/df.rs b/src/sql/common/fs_schema.rs similarity index 98% rename from src/types/df.rs rename to src/sql/common/fs_schema.rs index 7266bb6b..e1507e3e 100644 --- a/src/types/df.rs +++ b/src/sql/common/fs_schema.rs @@ -1,3 +1,7 @@ +//! FunctionStream table/stream schema: Arrow [`Schema`] plus timestamp index and optional key columns. +//! +//! [`Schema`]: datafusion::arrow::datatypes::Schema + use datafusion::arrow::array::builder::{ArrayBuilder, make_builder}; use datafusion::arrow::array::{RecordBatch, TimestampNanosecondArray}; use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit}; @@ -14,7 +18,7 @@ use arrow_array::types::UInt64Type; use protocol::grpc::api; use super::{to_nanos, TIMESTAMP_FIELD}; use std::ops::Range; -use crate::types::converter::Converter; +use crate::sql::common::converter::Converter; pub type FsSchemaRef = Arc; diff --git a/src/types/hash.rs b/src/sql/common/hash.rs similarity index 100% rename from src/types/hash.rs rename to src/sql/common/hash.rs diff --git a/src/types/message.rs b/src/sql/common/message.rs similarity index 100% rename from src/types/message.rs rename to src/sql/common/message.rs diff --git a/src/types/mod.rs b/src/sql/common/mod.rs similarity index 90% rename from src/types/mod.rs rename to src/sql/common/mod.rs index 4da0a030..d03511c0 100644 --- a/src/types/mod.rs +++ b/src/sql/common/mod.rs @@ -10,16 +10,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Core types shared across the FunctionStream system. +//! Shared core types and constants for FunctionStream (`crate::sql::common`). //! -//! This module provides fundamental types used by the runtime, SQL planner, -//! coordinator, and other subsystems — analogous to `arroyo-types` + `arroyo-rpc` in Arroyo. +//! Used by the runtime, SQL planner, coordinator, and other subsystems — +//! analogous to `arroyo-types` + `arroyo-rpc` in Arroyo. pub mod arrow_ext; pub mod control; pub mod date; pub mod debezium; -pub mod df; +pub mod fs_schema; pub mod errors; pub mod formats; pub mod hash; @@ -45,7 +45,7 @@ pub use control::{ CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp, ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError, }; -pub use df::{FsSchema, FsSchemaRef}; +pub use fs_schema::{FsSchema, FsSchemaRef}; pub use errors::DataflowError; pub use formats::{BadData, Format, Framing, JsonFormat}; pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; diff --git a/src/types/operator_config.rs b/src/sql/common/operator_config.rs similarity index 100% rename from src/types/operator_config.rs rename to src/sql/common/operator_config.rs diff --git a/src/types/task_info.rs b/src/sql/common/task_info.rs similarity index 100% rename from src/types/task_info.rs rename to src/sql/common/task_info.rs diff --git a/src/types/time_utils.rs b/src/sql/common/time_utils.rs similarity index 100% rename from src/types/time_utils.rs rename to src/sql/common/time_utils.rs diff --git a/src/types/worker.rs b/src/sql/common/worker.rs similarity index 100% rename from src/types/worker.rs rename to src/sql/common/worker.rs diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs index c8c070f2..12cde08c 100644 --- a/src/sql/extensions/aggregate.rs +++ b/src/sql/extensions/aggregate.rs @@ -23,7 +23,7 @@ use crate::sql::types::{ DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata, }; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension"; diff --git a/src/sql/extensions/async_udf.rs b/src/sql/extensions/async_udf.rs new file mode 100644 index 00000000..da0bdff1 --- /dev/null +++ b/src/sql/extensions/async_udf.rs @@ -0,0 +1,187 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; +use std::sync::Arc; +use std::time::Duration; + +use datafusion::common::{DFSchemaRef, Result}; +use datafusion::logical_expr::{ + Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, +}; +use datafusion_common::internal_err; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use prost::Message; +use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering}; + +use crate::multifield_partial_ord; +use crate::sql::extensions::constants::ASYNC_RESULT_FIELD; +use crate::sql::extensions::stream_extension::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::logical_node::logical::{ + DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName, +}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields}; +use crate::sql::common::{FsSchema, FsSchemaRef}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct AsyncUDFExtension { + pub(crate) input: Arc, + pub(crate) name: String, + pub(crate) udf: DylibUdfConfig, + pub(crate) arg_exprs: Vec, + pub(crate) final_exprs: Vec, + pub(crate) ordered: bool, + pub(crate) max_concurrency: usize, + pub(crate) timeout: Duration, + pub(crate) final_schema: DFSchemaRef, +} + +multifield_partial_ord!( + AsyncUDFExtension, + input, + name, + udf, + arg_exprs, + final_exprs, + ordered, + max_concurrency, + timeout +); + +impl StreamExtension for AsyncUDFExtension { + fn node_name(&self) -> Option { + None + } + + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result { + let arg_exprs = self + .arg_exprs + .iter() + .map(|e| { + let p = planner.create_physical_expr(e, self.input.schema())?; + Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec()) + }) + .collect::>>()?; + + let mut final_fields = fields_with_qualifiers(self.input.schema()); + final_fields.push(DFField::new( + None, + ASYNC_RESULT_FIELD, + self.udf.return_type.clone(), + true, + )); + let post_udf_schema = schema_from_df_fields(&final_fields)?; + + let final_exprs = self + .final_exprs + .iter() + .map(|e| { + let p = planner.create_physical_expr(e, &post_udf_schema)?; + Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec()) + }) + .collect::>>()?; + + let config = AsyncUdfOperator { + name: self.name.clone(), + udf: Some(self.udf.clone().into()), + arg_exprs, + final_exprs, + ordering: if self.ordered { + AsyncUdfOrdering::Ordered as i32 + } else { + AsyncUdfOrdering::Unordered as i32 + }, + max_concurrency: self.max_concurrency as u32, + timeout_micros: self.timeout.as_micros() as u64, + }; + + let node = LogicalNode::single( + index as u32, + format!("async_udf_{index}"), + OperatorName::AsyncUdf, + config.encode_to_vec(), + format!("async_udf<{}>", self.name), + 1, + ); + + let incoming_edge = + LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone()); + Ok(NodeWithIncomingEdges { + node, + edges: vec![incoming_edge], + }) + } + + fn output_schema(&self) -> FsSchema { + FsSchema::from_fields( + self.final_schema + .fields() + .iter() + .map(|f| (**f).clone()) + .collect(), + ) + } +} + +impl UserDefinedLogicalNodeCore for AsyncUDFExtension { + fn name(&self) -> &str { + "AsyncUDFNode" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.final_schema + } + + fn expressions(&self) -> Vec { + self.arg_exprs + .iter() + .chain(self.final_exprs.iter()) + .map(|e| e.to_owned()) + .collect() + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "AsyncUdfExtension<{}>: {}", self.name, self.final_schema) + } + + fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("input size inconsistent"); + } + if UserDefinedLogicalNode::expressions(self) != exprs { + return internal_err!("Tried to recreate async UDF node with different expressions"); + } + + Ok(Self { + input: Arc::new(inputs[0].clone()), + name: self.name.clone(), + udf: self.udf.clone(), + arg_exprs: self.arg_exprs.clone(), + final_exprs: self.final_exprs.clone(), + ordered: self.ordered, + max_concurrency: self.max_concurrency, + timeout: self.timeout, + final_schema: self.final_schema.clone(), + }) + } +} diff --git a/src/sql/extensions/constants.rs b/src/sql/extensions/constants.rs new file mode 100644 index 00000000..4f90ca6e --- /dev/null +++ b/src/sql/extensions/constants.rs @@ -0,0 +1,13 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub const ASYNC_RESULT_FIELD: &str = "__async_result"; diff --git a/src/sql/extensions/debezium.rs b/src/sql/extensions/debezium.rs index 184de88d..84407ee4 100644 --- a/src/sql/extensions/debezium.rs +++ b/src/sql/extensions/debezium.rs @@ -14,7 +14,7 @@ use super::{NodeWithIncomingEdges}; use crate::multifield_partial_ord; use crate::sql::logical_planner::updating_meta_field; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; +use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; pub(crate) const DEBEZIUM_UNROLLING_EXTENSION_NAME: &str = "DebeziumUnrollingExtension"; pub(crate) const TO_DEBEZIUM_EXTENSION_NAME: &str = "ToDebeziumExtension"; diff --git a/src/sql/extensions/extension_try_from.rs b/src/sql/extensions/extension_try_from.rs new file mode 100644 index 00000000..eb042a90 --- /dev/null +++ b/src/sql/extensions/extension_try_from.rs @@ -0,0 +1,70 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use datafusion::common::{DataFusionError, Result}; +use datafusion::logical_expr::UserDefinedLogicalNode; + +use crate::sql::extensions::aggregate::AggregateExtension; +use crate::sql::extensions::async_udf::AsyncUDFExtension; +use crate::sql::extensions::debezium::{DebeziumUnrollingExtension, ToDebeziumExtension}; +use crate::sql::extensions::join::JoinExtension; +use crate::sql::extensions::key_calculation::KeyCalculationExtension; +use crate::sql::extensions::lookup::LookupJoin; +use crate::sql::extensions::projection::ProjectionExtension; +use crate::sql::extensions::remote_table::RemoteTableExtension; +use crate::sql::extensions::sink::SinkExtension; +use crate::sql::extensions::stream_extension::StreamExtension; +use crate::sql::extensions::table_source::TableSourceExtension; +use crate::sql::extensions::updating_aggregate::UpdatingAggregateExtension; +use crate::sql::extensions::watermark_node::WatermarkNode; +use crate::sql::extensions::window_fn::WindowFunctionExtension; + +fn try_from_t( + node: &dyn UserDefinedLogicalNode, +) -> std::result::Result<&dyn StreamExtension, ()> { + node.as_any() + .downcast_ref::() + .map(|t| t as &dyn StreamExtension) + .ok_or(()) +} + +impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension { + type Error = DataFusionError; + + fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result { + try_from_t::(node) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name()))) + } +} + +impl<'a> TryFrom<&'a Arc> for &'a dyn StreamExtension { + type Error = DataFusionError; + + fn try_from(node: &'a Arc) -> Result { + TryFrom::try_from(node.as_ref()) + } +} diff --git a/src/sql/extensions/is_retract.rs b/src/sql/extensions/is_retract.rs new file mode 100644 index 00000000..4375b716 --- /dev/null +++ b/src/sql/extensions/is_retract.rs @@ -0,0 +1,80 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use datafusion::arrow::datatypes::{DataType, TimeUnit}; +use datafusion::common::{DFSchemaRef, Result, TableReference}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::multifield_partial_ord; +use crate::sql::logical_planner::updating_meta_field; +use crate::sql::types::{DFField, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct IsRetractExtension { + pub(crate) input: LogicalPlan, + pub(crate) schema: DFSchemaRef, + pub(crate) timestamp_qualifier: Option, +} + +multifield_partial_ord!(IsRetractExtension, input, timestamp_qualifier); + +impl IsRetractExtension { + pub(crate) fn new(input: LogicalPlan, timestamp_qualifier: Option) -> Self { + let mut output_fields = fields_with_qualifiers(input.schema()); + + let timestamp_index = output_fields.len() - 1; + output_fields[timestamp_index] = DFField::new( + timestamp_qualifier.clone(), + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ); + output_fields.push((timestamp_qualifier.clone(), updating_meta_field()).into()); + let schema = Arc::new(schema_from_df_fields(&output_fields).unwrap()); + Self { + input, + schema, + timestamp_qualifier, + } + } +} + +impl UserDefinedLogicalNodeCore for IsRetractExtension { + fn name(&self) -> &str { + "IsRetractExtension" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "IsRetractExtension") + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self::new( + inputs[0].clone(), + self.timestamp_qualifier.clone(), + )) + } +} diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs index c28a6e01..74dcfde6 100644 --- a/src/sql/extensions/join.rs +++ b/src/sql/extensions/join.rs @@ -16,7 +16,7 @@ use protocol::grpc::api::JoinOperator; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::sql::logical_planner::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; pub(crate) const JOIN_NODE_NAME: &str = "JoinNode"; diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs index e0edb67a..3a94f592 100644 --- a/src/sql/extensions/key_calculation.rs +++ b/src/sql/extensions/key_calculation.rs @@ -20,7 +20,7 @@ use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::{ StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata, }; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension"; diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs index 2dc76265..c2ef8f28 100644 --- a/src/sql/extensions/lookup.rs +++ b/src/sql/extensions/lookup.rs @@ -14,7 +14,7 @@ use crate::sql::schema::ConnectorTable; use crate::sql::schema::utils::add_timestamp_field_arrow; use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; pub const SOURCE_EXTENSION_NAME: &str = "LookupSource"; pub const JOIN_EXTENSION_NAME: &str = "LookupJoin"; diff --git a/src/sql/extensions/macros.rs b/src/sql/extensions/macros.rs new file mode 100644 index 00000000..4ce649c2 --- /dev/null +++ b/src/sql/extensions/macros.rs @@ -0,0 +1,28 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[macro_export] +macro_rules! multifield_partial_ord { + ($ty:ty, $($field:tt), *) => { + impl PartialOrd for $ty { + fn partial_cmp(&self, other: &Self) -> Option { + $( + let cmp = self.$field.partial_cmp(&other.$field)?; + if cmp != std::cmp::Ordering::Equal { + return Some(cmp); + } + )* + Some(std::cmp::Ordering::Equal) + } + } + }; +} diff --git a/src/sql/extensions/mod.rs b/src/sql/extensions/mod.rs index 25632930..a78ca419 100644 --- a/src/sql/extensions/mod.rs +++ b/src/sql/extensions/mod.rs @@ -1,39 +1,22 @@ -use std::fmt::{Debug, Formatter}; -use std::sync::Arc; -use std::time::Duration; - -use datafusion::arrow::datatypes::{DataType, TimeUnit}; -use datafusion::common::{DFSchemaRef, DataFusionError, Result, TableReference}; -use datafusion::logical_expr::{ - Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, -}; -use datafusion_common::internal_err; -use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; -use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; -use prost::Message; -use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering}; -use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_planner::updating_meta_field; -use crate::sql::extensions::aggregate::AggregateExtension; -use crate::sql::extensions::debezium::{DebeziumUnrollingExtension, ToDebeziumExtension}; -use crate::sql::extensions::join::JoinExtension; -use crate::sql::extensions::key_calculation::KeyCalculationExtension; -use crate::sql::extensions::lookup::LookupJoin; -use crate::sql::extensions::projection::ProjectionExtension; -use crate::sql::extensions::remote_table::RemoteTableExtension; -use crate::sql::extensions::sink::SinkExtension; -use crate::sql::extensions::table_source::TableSourceExtension; -use crate::sql::extensions::updating_aggregate::UpdatingAggregateExtension; -use crate::sql::extensions::watermark_node::WatermarkNode; -use crate::sql::extensions::window_fn::WindowFunctionExtension; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field}; -use crate::sql::types::{ - DFField, StreamSchema, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields, -}; -use crate::types::{FsSchema, FsSchemaRef}; - -pub const ASYNC_RESULT_FIELD: &str = "__async_result"; +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod macros; + +pub(crate) mod constants; +pub(crate) use constants::ASYNC_RESULT_FIELD; + +pub(crate) mod stream_extension; +pub(crate) use stream_extension::{NodeWithIncomingEdges, StreamExtension}; pub(crate) mod aggregate; pub(crate) mod debezium; @@ -48,352 +31,13 @@ pub(crate) mod updating_aggregate; pub(crate) mod watermark_node; pub(crate) mod window_fn; +pub(crate) mod timestamp_append; +pub(crate) use timestamp_append::TimestampAppendExtension; -pub(crate) trait StreamExtension: Debug { - fn node_name(&self) -> Option; - fn plan_node( - &self, - planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result; - fn output_schema(&self) -> FsSchema; - fn transparent(&self) -> bool { - false - } -} - -pub(crate) struct NodeWithIncomingEdges { - pub node: LogicalNode, - pub edges: Vec, -} - -fn try_from_t( - node: &dyn UserDefinedLogicalNode, -) -> Result<&dyn StreamExtension, ()> { - node.as_any() - .downcast_ref::() - .map(|t| t as &dyn StreamExtension) - .ok_or(()) -} - -impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension { - type Error = DataFusionError; - - fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result { - try_from_t::(node) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name()))) - } -} - -impl<'a> TryFrom<&'a Arc> for &'a dyn StreamExtension { - type Error = DataFusionError; - - fn try_from(node: &'a Arc) -> Result { - TryFrom::try_from(node.as_ref()) - } -} - -#[macro_export] -macro_rules! multifield_partial_ord { - ($ty:ty, $($field:tt), *) => { - impl PartialOrd for $ty { - fn partial_cmp(&self, other: &Self) -> Option { - $( - let cmp = self.$field.partial_cmp(&other.$field)?; - if cmp != std::cmp::Ordering::Equal { - return Some(cmp); - } - )* - Some(std::cmp::Ordering::Equal) - } - } -} - } - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct TimestampAppendExtension { - pub(crate) input: LogicalPlan, - pub(crate) qualifier: Option, - pub(crate) schema: DFSchemaRef, -} - -impl TimestampAppendExtension { - fn new(input: LogicalPlan, qualifier: Option) -> Self { - if has_timestamp_field(input.schema()) { - unreachable!( - "shouldn't be adding timestamp to a plan that already has it: plan :\n {:?}\n schema: {:?}", - input, - input.schema() - ); - } - let schema = add_timestamp_field(input.schema().clone(), qualifier.clone()).unwrap(); - Self { - input, - qualifier, - schema, - } - } -} - -multifield_partial_ord!(TimestampAppendExtension, input, qualifier); - -impl UserDefinedLogicalNodeCore for TimestampAppendExtension { - fn name(&self) -> &str { - "TimestampAppendExtension" - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "TimestampAppendExtension({:?}): {}", - self.qualifier, - self.schema - .fields() - .iter() - .map(|f| f.name().to_string()) - .collect::>() - .join(", ") - ) - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self::new(inputs[0].clone(), self.qualifier.clone())) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct AsyncUDFExtension { - pub(crate) input: Arc, - pub(crate) name: String, - pub(crate) udf: DylibUdfConfig, - pub(crate) arg_exprs: Vec, - pub(crate) final_exprs: Vec, - pub(crate) ordered: bool, - pub(crate) max_concurrency: usize, - pub(crate) timeout: Duration, - pub(crate) final_schema: DFSchemaRef, -} - -multifield_partial_ord!( - AsyncUDFExtension, - input, - name, - udf, - arg_exprs, - final_exprs, - ordered, - max_concurrency, - timeout -); - -impl StreamExtension for AsyncUDFExtension { - fn node_name(&self) -> Option { - None - } - - fn plan_node( - &self, - planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - let arg_exprs = self - .arg_exprs - .iter() - .map(|e| { - let p = planner.create_physical_expr(e, self.input.schema())?; - Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec()) - }) - .collect::>>()?; - - let mut final_fields = fields_with_qualifiers(self.input.schema()); - final_fields.push(DFField::new( - None, - ASYNC_RESULT_FIELD, - self.udf.return_type.clone(), - true, - )); - let post_udf_schema = schema_from_df_fields(&final_fields)?; - - let final_exprs = self - .final_exprs - .iter() - .map(|e| { - let p = planner.create_physical_expr(e, &post_udf_schema)?; - Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec()) - }) - .collect::>>()?; - - let config = AsyncUdfOperator { - name: self.name.clone(), - udf: Some(self.udf.clone().into()), - arg_exprs, - final_exprs, - ordering: if self.ordered { - AsyncUdfOrdering::Ordered as i32 - } else { - AsyncUdfOrdering::Unordered as i32 - }, - max_concurrency: self.max_concurrency as u32, - timeout_micros: self.timeout.as_micros() as u64, - }; - - let node = LogicalNode::single( - index as u32, - format!("async_udf_{index}"), - OperatorName::AsyncUdf, - config.encode_to_vec(), - format!("async_udf<{}>", self.name), - 1, - ); - - let incoming_edge = - LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone()); - Ok(NodeWithIncomingEdges { - node, - edges: vec![incoming_edge], - }) - } - - fn output_schema(&self) -> FsSchema { - FsSchema::from_fields( - self.final_schema - .fields() - .iter() - .map(|f| (**f).clone()) - .collect(), - ) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct IsRetractExtension { - pub(crate) input: LogicalPlan, - pub(crate) schema: DFSchemaRef, - pub(crate) timestamp_qualifier: Option, -} - -multifield_partial_ord!(IsRetractExtension, input, timestamp_qualifier); - -impl IsRetractExtension { - pub(crate) fn new(input: LogicalPlan, timestamp_qualifier: Option) -> Self { - let mut output_fields = fields_with_qualifiers(input.schema()); - - let timestamp_index = output_fields.len() - 1; - output_fields[timestamp_index] = DFField::new( - timestamp_qualifier.clone(), - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ); - output_fields.push((timestamp_qualifier.clone(), updating_meta_field()).into()); - let schema = Arc::new(schema_from_df_fields(&output_fields).unwrap()); - Self { - input, - schema, - timestamp_qualifier, - } - } -} - -impl UserDefinedLogicalNodeCore for IsRetractExtension { - fn name(&self) -> &str { - "IsRetractExtension" - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "IsRetractExtension") - } - - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self::new( - inputs[0].clone(), - self.timestamp_qualifier.clone(), - )) - } -} - -impl UserDefinedLogicalNodeCore for AsyncUDFExtension { - fn name(&self) -> &str { - "AsyncUDFNode" - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] - } - - fn schema(&self) -> &DFSchemaRef { - &self.final_schema - } - - fn expressions(&self) -> Vec { - self.arg_exprs - .iter() - .chain(self.final_exprs.iter()) - .map(|e| e.to_owned()) - .collect() - } - - fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "AsyncUdfExtension<{}>: {}", self.name, self.final_schema) - } +pub(crate) mod async_udf; +pub(crate) use async_udf::AsyncUDFExtension; - fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { - if inputs.len() != 1 { - return internal_err!("input size inconsistent"); - } - if UserDefinedLogicalNode::expressions(self) != exprs { - return internal_err!("Tried to recreate async UDF node with different expressions"); - } +pub(crate) mod is_retract; +pub(crate) use is_retract::IsRetractExtension; - Ok(Self { - input: Arc::new(inputs[0].clone()), - name: self.name.clone(), - udf: self.udf.clone(), - arg_exprs: self.arg_exprs.clone(), - final_exprs: self.final_exprs.clone(), - ordered: self.ordered, - max_concurrency: self.max_concurrency, - timeout: self.timeout, - final_schema: self.final_schema.clone(), - }) - } -} +mod extension_try_from; diff --git a/src/sql/extensions/projection.rs b/src/sql/extensions/projection.rs index fa0f118b..ff319d12 100644 --- a/src/sql/extensions/projection.rs +++ b/src/sql/extensions/projection.rs @@ -13,7 +13,7 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNod use crate::multifield_partial_ord; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::{schema_from_df_fields, DFField}; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension"; diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs index 91ef4d0e..570a3393 100644 --- a/src/sql/extensions/remote_table.rs +++ b/src/sql/extensions/remote_table.rs @@ -10,7 +10,7 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNod use crate::multifield_partial_ord; use crate::sql::logical_planner::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; use super::{StreamExtension, NodeWithIncomingEdges}; pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension"; diff --git a/src/sql/extensions/sink.rs b/src/sql/extensions/sink.rs index 7b58a7b4..a1112c4b 100644 --- a/src/sql/extensions/sink.rs +++ b/src/sql/extensions/sink.rs @@ -9,7 +9,7 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNod use crate::multifield_partial_ord; use crate::sql::schema::Table; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; +use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; use super::{ StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension, remote_table::RemoteTableExtension, diff --git a/src/sql/extensions/stream_extension.rs b/src/sql/extensions/stream_extension.rs new file mode 100644 index 00000000..76954529 --- /dev/null +++ b/src/sql/extensions/stream_extension.rs @@ -0,0 +1,38 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Debug; + +use datafusion::common::Result; + +use crate::sql::logical_node::logical::{LogicalEdge, LogicalNode}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::common::{FsSchema, FsSchemaRef}; + +pub(crate) trait StreamExtension: Debug { + fn node_name(&self) -> Option; + fn plan_node( + &self, + planner: &Planner, + index: usize, + input_schemas: Vec, + ) -> Result; + fn output_schema(&self) -> FsSchema; + fn transparent(&self) -> bool { + false + } +} + +pub(crate) struct NodeWithIncomingEdges { + pub node: LogicalNode, + pub edges: Vec, +} diff --git a/src/sql/extensions/table_source.rs b/src/sql/extensions/table_source.rs index bdf470e2..0b069bbf 100644 --- a/src/sql/extensions/table_source.rs +++ b/src/sql/extensions/table_source.rs @@ -12,7 +12,7 @@ use crate::sql::schema::utils::add_timestamp_field; use crate::sql::extensions::debezium::DebeziumUnrollingExtension; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::schema_from_df_fields; -use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; +use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; use super::{ StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension, remote_table::RemoteTableExtension, diff --git a/src/sql/extensions/timestamp_append.rs b/src/sql/extensions/timestamp_append.rs new file mode 100644 index 00000000..069b288a --- /dev/null +++ b/src/sql/extensions/timestamp_append.rs @@ -0,0 +1,80 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::common::{DFSchemaRef, Result, TableReference}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; + +use crate::multifield_partial_ord; +use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct TimestampAppendExtension { + pub(crate) input: LogicalPlan, + pub(crate) qualifier: Option, + pub(crate) schema: DFSchemaRef, +} + +impl TimestampAppendExtension { + pub(crate) fn new(input: LogicalPlan, qualifier: Option) -> Self { + if has_timestamp_field(input.schema()) { + unreachable!( + "shouldn't be adding timestamp to a plan that already has it: plan :\n {:?}\n schema: {:?}", + input, + input.schema() + ); + } + let schema = add_timestamp_field(input.schema().clone(), qualifier.clone()).unwrap(); + Self { + input, + qualifier, + schema, + } + } +} + +multifield_partial_ord!(TimestampAppendExtension, input, qualifier); + +impl UserDefinedLogicalNodeCore for TimestampAppendExtension { + fn name(&self) -> &str { + "TimestampAppendExtension" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "TimestampAppendExtension({:?}): {}", + self.qualifier, + self.schema + .fields() + .iter() + .map(|f| f.name().to_string()) + .collect::>() + .join(", ") + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self::new(inputs[0].clone(), self.qualifier.clone())) + } +} diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs index fdb2bb1d..8220945b 100644 --- a/src/sql/extensions/updating_aggregate.rs +++ b/src/sql/extensions/updating_aggregate.rs @@ -16,7 +16,7 @@ use crate::sql::functions::multi_hash; use crate::sql::logical_planner::FsPhysicalExtensionCodec; use crate::sql::extensions::{IsRetractExtension, NodeWithIncomingEdges, StreamExtension}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension"; diff --git a/src/sql/extensions/watermark_node.rs b/src/sql/extensions/watermark_node.rs index f13b3472..5ef8aa49 100644 --- a/src/sql/extensions/watermark_node.rs +++ b/src/sql/extensions/watermark_node.rs @@ -12,7 +12,7 @@ use crate::multifield_partial_ord; use crate::sql::schema::utils::add_timestamp_field; use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode"; #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/src/sql/extensions/window_fn.rs b/src/sql/extensions/window_fn.rs index 1c8b5687..c2594546 100644 --- a/src/sql/extensions/window_fn.rs +++ b/src/sql/extensions/window_fn.rs @@ -10,7 +10,7 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNod use crate::sql::logical_planner::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::TIMESTAMP_FIELD; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; use super::{ NodeWithIncomingEdges, StreamExtension}; pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension"; diff --git a/src/sql/logical_node/logical.rs b/src/sql/logical_node/logical.rs index 13560a3e..9fa139d1 100644 --- a/src/sql/logical_node/logical.rs +++ b/src/sql/logical_node/logical.rs @@ -11,7 +11,7 @@ use datafusion_proto::protobuf::ArrowType; use prost::Message; use strum::{Display, EnumString}; use protocol::grpc::api; -use crate::types::FsSchema; +use crate::sql::common::FsSchema; #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] pub enum OperatorName { diff --git a/src/sql/logical_planner/compiled_sql.rs b/src/sql/logical_planner/compiled_sql.rs new file mode 100644 index 00000000..e0525097 --- /dev/null +++ b/src/sql/logical_planner/compiled_sql.rs @@ -0,0 +1,21 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::sql::logical_node::logical::LogicalProgram; + +// ── Compilation pipeline ────────────────────────────────────────────── + +#[derive(Clone, Debug)] +pub struct CompiledSql { + pub program: LogicalProgram, + pub connection_ids: Vec, +} diff --git a/src/sql/logical_planner/mod.rs b/src/sql/logical_planner/mod.rs index e4db07a0..8b7d9e76 100644 --- a/src/sql/logical_planner/mod.rs +++ b/src/sql/logical_planner/mod.rs @@ -31,7 +31,7 @@ use crate::make_udf_function; use crate::sql::functions::MultiHashFunction; use crate::sql::analysis::UNNESTED_COL; use crate::sql::schema::utils::window_arrow_struct; -use crate::types::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; +use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type}; use datafusion::catalog::memory::MemorySourceConfig; use datafusion::datasource::memory::DataSourceExec; @@ -56,9 +56,12 @@ use std::fmt::Debug; use tokio::sync::mpsc::UnboundedReceiver; use tokio_stream::wrappers::UnboundedReceiverStream; +pub mod compiled_sql; pub(crate) mod planner; pub mod optimizers; +pub use compiled_sql::CompiledSql; + // ─────────────────── Updating Meta Helpers ─────────────────── pub fn updating_meta_fields() -> Fields { diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs index 150b86f1..45d373c3 100644 --- a/src/sql/logical_planner/planner.rs +++ b/src/sql/logical_planner/planner.rs @@ -41,7 +41,7 @@ use crate::sql::extensions::key_calculation::KeyCalculationExtension; use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; use crate::sql::schema::utils::add_timestamp_field_arrow; use crate::sql::schema::StreamSchemaProvider; -use crate::types::{FsSchema, FsSchemaRef}; +use crate::sql::common::{FsSchema, FsSchemaRef}; #[derive(Eq, Hash, PartialEq)] #[derive(Debug)] diff --git a/src/sql/mod.rs b/src/sql/mod.rs index be44d979..32c0dce9 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -10,6 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod common; +pub mod api; + pub mod schema; pub mod functions; pub mod parse; @@ -22,4 +25,4 @@ pub mod types; pub use schema::StreamSchemaProvider; pub use parse::parse_sql; pub use analysis::rewrite_plan; -pub use analysis::{CompiledSql}; +pub use logical_planner::CompiledSql; diff --git a/src/sql/parse.rs b/src/sql/parse.rs index bdb4d481..1b4be38a 100644 --- a/src/sql/parse.rs +++ b/src/sql/parse.rs @@ -23,11 +23,6 @@ use crate::coordinator::{ Statement as CoordinatorStatement, StopFunction, StreamingTableStatement, }; -/// Stage 1: String → Vec> -/// -/// Parses SQL using FunctionStreamDialect (from sqlparser-rs), then classifies -/// each statement into a concrete coordinator Statement type. -/// A single SQL input may contain multiple statements (separated by `;`). pub fn parse_sql(query: &str) -> Result>> { let trimmed = query.trim(); if trimmed.is_empty() { diff --git a/src/sql/types/data_type.rs b/src/sql/types/data_type.rs index 57edc3c9..66076da3 100644 --- a/src/sql/types/data_type.rs +++ b/src/sql/types/data_type.rs @@ -5,7 +5,7 @@ use datafusion::arrow::datatypes::{ }; use datafusion::common::{Result, plan_datafusion_err, plan_err}; -use crate::types::FsExtensionType; +use crate::sql::common::FsExtensionType; pub fn convert_data_type( sql_type: &datafusion::sql::sqlparser::ast::DataType, From d647ea15f9b749e75f0b8e2ecda56f5f722c8c57 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sat, 21 Mar 2026 22:23:02 +0800 Subject: [PATCH 09/44] update --- src/coordinator/coordinator.rs | 378 ++++++++- src/coordinator/plan/logical_plan_visitor.rs | 76 +- src/coordinator/statement/streaming_table.rs | 7 +- src/sql/extensions/remote_table.rs | 90 ++- src/sql/frontend_sql_coverage_tests.rs | 807 +++++++++++++++++++ src/sql/logical_planner/planner.rs | 1 - src/sql/mod.rs | 3 + src/sql/parse.rs | 69 +- 8 files changed, 1370 insertions(+), 61 deletions(-) create mode 100644 src/sql/frontend_sql_coverage_tests.rs diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs index 8dc55c4d..0ddca660 100644 --- a/src/coordinator/coordinator.rs +++ b/src/coordinator/coordinator.rs @@ -37,12 +37,28 @@ impl Coordinator { Self {} } - pub fn execute(&self, stmt: &dyn Statement) -> ExecuteResult { + pub fn compile_plan( + &self, + stmt: &dyn Statement, + schema_provider: StreamSchemaProvider, + ) -> Result, anyhow::Error> { + let context = ExecutionContext::new(); + let analysis = self.step_analyze(&context, stmt)?; + let plan = self.step_build_logical_plan(&analysis, schema_provider)?; + self.step_optimize(&analysis, plan) + } + + /// Same as [`Self::execute`], but uses the provided catalog / stream tables (e.g. tests). + pub fn execute_with_schema_provider( + &self, + stmt: &dyn Statement, + schema_provider: StreamSchemaProvider, + ) -> ExecuteResult { let start_time = Instant::now(); let context = ExecutionContext::new(); let execution_id = context.execution_id; - match self.execute_pipeline(&context, stmt) { + match self.execute_pipeline(&context, stmt, schema_provider) { Ok(result) => { log::debug!( "[{}] Execution completed in {}ms", @@ -63,13 +79,18 @@ impl Coordinator { } } + pub fn execute(&self, stmt: &dyn Statement) -> ExecuteResult { + self.execute_with_schema_provider(stmt, StreamSchemaProvider::new()) + } + fn execute_pipeline( &self, context: &ExecutionContext, stmt: &dyn Statement, + schema_provider: StreamSchemaProvider, ) -> Result { let analysis = self.step_analyze(context, stmt)?; - let plan = self.step_build_logical_plan(&analysis)?; + let plan = self.step_build_logical_plan(&analysis, schema_provider)?; let optimized_plan = self.step_optimize(&analysis, plan)?; self.step_execute(optimized_plan) } @@ -90,8 +111,11 @@ impl Coordinator { result } - fn step_build_logical_plan(&self, analysis: &Analysis) -> Result> { - let schema_provider = StreamSchemaProvider::new(); + fn step_build_logical_plan( + &self, + analysis: &Analysis, + schema_provider: StreamSchemaProvider, + ) -> Result> { let visitor = LogicalPlanVisitor::new(schema_provider); let plan = visitor.visit(analysis); Ok(plan) @@ -137,3 +161,347 @@ impl Coordinator { result } } + +#[cfg(test)] +mod create_streaming_table_coordinator_tests { + use std::sync::Arc; + + use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + + use crate::sql::common::TIMESTAMP_FIELD; + use crate::sql::parse::parse_sql; + use crate::sql::schema::StreamSchemaProvider; + + use super::Coordinator; + + fn fake_stream_schema_provider() -> StreamSchemaProvider { + let mut provider = StreamSchemaProvider::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "src".to_string(), + schema, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider + } + + fn fake_stream_schema_provider_with_v() -> StreamSchemaProvider { + let mut provider = StreamSchemaProvider::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("v", DataType::Utf8, true), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "src".to_string(), + schema, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider + } + + fn fake_src_dim_provider() -> StreamSchemaProvider { + let mut provider = fake_stream_schema_provider_with_v(); + let dim = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + Field::new("amt", DataType::Float64, true), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "dim".to_string(), + dim, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider + } + + fn assert_coordinator_streaming_build_ok( + sql: &str, + provider: StreamSchemaProvider, + expect_sink_substring: &str, + expect_connector_substring: &str, + ) { + let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); + assert_eq!(stmts.len(), 1); + let plan = Coordinator::new() + .compile_plan(stmts[0].as_ref(), provider) + .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}")); + let rendered = format!("{plan:?}"); + assert!(rendered.contains("StreamingTable"), "{rendered}"); + assert!( + rendered.contains(expect_sink_substring), + "expected sink name fragment {expect_sink_substring:?} in:\n{rendered}" + ); + assert!( + rendered.contains(expect_connector_substring), + "expected connector fragment {expect_connector_substring:?} in:\n{rendered}" + ); + } + + #[test] + fn coordinator_build_create_streaming_table_select_star_kafka() { + assert_coordinator_streaming_build_ok( + concat!( + "CREATE STREAMING TABLE my_sink ", + "WITH ('connector' = 'kafka') ", + "AS SELECT * FROM src", + ), + fake_stream_schema_provider(), + "my_sink", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_memory_connector() { + assert_coordinator_streaming_build_ok( + "CREATE STREAMING TABLE mem_out WITH ('connector'='memory') AS SELECT * FROM src", + fake_stream_schema_provider(), + "mem_out", + "memory", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_postgres_connector() { + assert_coordinator_streaming_build_ok( + "CREATE STREAMING TABLE pg_out WITH ('connector'='postgres') AS SELECT id FROM src", + fake_stream_schema_provider(), + "pg_out", + "postgres", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_partition_by_and_idle_time() { + assert_coordinator_streaming_build_ok( + concat!( + "CREATE STREAMING TABLE part_idle ", + "WITH ('connector'='kafka', 'partition_by'='id', 'idle_time'='30 seconds') ", + "AS SELECT * FROM src", + ), + fake_stream_schema_provider(), + "part_idle", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_project_timestamp_columns() { + let sql = format!( + "CREATE STREAMING TABLE ts_cols WITH ('connector'='kafka') AS SELECT id, {ts} FROM src", + ts = TIMESTAMP_FIELD + ); + assert_coordinator_streaming_build_ok( + &sql, + fake_stream_schema_provider(), + "ts_cols", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_where_filters() { + let p = fake_stream_schema_provider_with_v(); + for (label, body) in [ + ("eq", "SELECT * FROM src WHERE id = 1"), + ("range", "SELECT * FROM src WHERE id > 0 AND id < 100"), + ("in_list", "SELECT * FROM src WHERE id IN (1, 2, 3)"), + ("between", "SELECT * FROM src WHERE id BETWEEN 1 AND 10"), + ("like", "SELECT * FROM src WHERE v LIKE 'a%'"), + ("null", "SELECT * FROM src WHERE v IS NULL"), + ] { + let sql = format!( + "CREATE STREAMING TABLE sink_w_{label} WITH ('connector'='kafka') AS {body}" + ); + assert_coordinator_streaming_build_ok(&sql, p.clone(), &format!("sink_w_{label}"), "kafka"); + } + } + + #[test] + fn coordinator_build_create_streaming_table_case_coalesce_cast() { + let ts = TIMESTAMP_FIELD; + let sql = format!( + "CREATE STREAMING TABLE sink_expr WITH ('connector'='kafka') AS \ + SELECT CASE WHEN id < 0 THEN 0 ELSE id END AS c, COALESCE(v, 'x') AS v2, \ + CAST(id AS DOUBLE) AS id_f, {ts} FROM src" + ); + assert_coordinator_streaming_build_ok( + &sql, + fake_stream_schema_provider_with_v(), + "sink_expr", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_row_time_projection() { + let ts = TIMESTAMP_FIELD; + let sql = format!( + "CREATE STREAMING TABLE sink_rt WITH ('connector'='kafka') AS \ + SELECT row_time(), id, {ts} FROM src" + ); + assert_coordinator_streaming_build_ok( + &sql, + fake_stream_schema_provider(), + "sink_rt", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_scalar_funcs_projection() { + let ts = TIMESTAMP_FIELD; + let sql = format!( + "CREATE STREAMING TABLE sink_scalar WITH ('connector'='kafka') AS \ + SELECT ABS(id), UPPER(v), LOWER(v), BTRIM(v), CHARACTER_LENGTH(v), {ts} FROM src" + ); + assert_coordinator_streaming_build_ok( + &sql, + fake_stream_schema_provider_with_v(), + "sink_scalar", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_cte() { + let ts = TIMESTAMP_FIELD; + let sql = format!( + "CREATE STREAMING TABLE sink_cte WITH ('connector'='kafka') AS \ + WITH t AS (SELECT id, {ts} FROM src WHERE id > 0) SELECT * FROM t" + ); + assert_coordinator_streaming_build_ok( + &sql, + fake_stream_schema_provider(), + "sink_cte", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_cte_chain() { + let sql = "CREATE STREAMING TABLE sink_cte2 WITH ('connector'='kafka') AS \ + WITH a AS (SELECT id FROM src), b AS (SELECT id FROM a WHERE id > 1) SELECT * FROM b"; + assert_coordinator_streaming_build_ok( + sql, + fake_stream_schema_provider(), + "sink_cte2", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_sink_name_with_digits() { + assert_coordinator_streaming_build_ok( + "CREATE STREAMING TABLE out_sink_01 WITH ('connector'='kafka') AS SELECT * FROM src", + fake_stream_schema_provider(), + "out_sink_01", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_subquery_in_from() { + let ts = TIMESTAMP_FIELD; + let sql = format!( + "CREATE STREAMING TABLE sink_sq WITH ('connector'='kafka') AS \ + SELECT * FROM (SELECT id, {ts} FROM src WHERE id >= 0) AS x" + ); + assert_coordinator_streaming_build_ok( + &sql, + fake_stream_schema_provider(), + "sink_sq", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_nested_subqueries() { + let sql = "CREATE STREAMING TABLE sink_nest WITH ('connector'='kafka') AS \ + SELECT * FROM (SELECT * FROM (SELECT id FROM src) AS i2) AS i1"; + assert_coordinator_streaming_build_ok( + sql, + fake_stream_schema_provider(), + "sink_nest", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_union_all() { + let ts = TIMESTAMP_FIELD; + let sql = format!( + "CREATE STREAMING TABLE sink_union WITH ('connector'='kafka') AS \ + SELECT id, v, {ts} FROM src \ + UNION ALL \ + SELECT id, name AS v, {ts} FROM dim" + ); + assert_coordinator_streaming_build_ok( + &sql, + fake_src_dim_provider(), + "sink_union", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_nullif_regexp() { + let ts = TIMESTAMP_FIELD; + let sql = format!( + "CREATE STREAMING TABLE sink_re WITH ('connector'='kafka') AS \ + SELECT id, NULLIF(v, ''), REGEXP_LIKE(v, '^x'), {ts} FROM src" + ); + assert_coordinator_streaming_build_ok( + &sql, + fake_stream_schema_provider_with_v(), + "sink_re", + "kafka", + ); + } + + #[test] + fn coordinator_build_create_streaming_table_not_and_or_where() { + let p = fake_stream_schema_provider_with_v(); + assert_coordinator_streaming_build_ok( + "CREATE STREAMING TABLE sink_bool WITH ('connector'='kafka') AS \ + SELECT * FROM src WHERE NOT (id = 0) AND (v IS NOT NULL OR id > 0)", + p, + "sink_bool", + "kafka", + ); + } + + #[test] + fn coordinator_sql_create_streaming_table_compiles_full_pipeline() { + assert_coordinator_streaming_build_ok( + concat!( + "CREATE STREAMING TABLE my_sink ", + "WITH ('connector' = 'kafka') ", + "AS SELECT * FROM src", + ), + fake_stream_schema_provider(), + "my_sink", + "kafka", + ); + } +} diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 1daf5a16..93f8776a 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -49,7 +49,6 @@ const CONNECTOR: &str = "connector"; const PARTITION_BY: &str = "partition_by"; const IDLE_MICROS: &str = "idle_time"; -/// Convert `WITH` option list to a key-value map (e.g. connector settings). fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap { options .iter() @@ -83,8 +82,6 @@ impl LogicalPlanVisitor { _ => panic!("LogicalPlanVisitor should return Plan"), } } - /// Builds the logical plan for 'CREATE STREAMING TABLE'. - /// This orchestrates the transformation from a SQL Query to a stateful Sink. fn build_create_streaming_table_plan( &self, stmt: &StreamingTableStatement, @@ -102,8 +99,6 @@ impl LogicalPlanVisitor { let table_name = name.to_string(); debug!("Compiling Streaming Table Sink for: {}", table_name); - // 1. Connector Options Extraction - // Extract 'connector' (Kafka, Postgres, etc.) and other physical properties. let mut opts = ConnectorOptions::new(with_options, &None)?; let connector = opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| { plan_datafusion_err!( @@ -113,14 +108,10 @@ impl LogicalPlanVisitor { ) })?; - // 2. Query Optimization & Streaming Rewrite - // Convert the standard SQL query into a streaming-aware logical plan. let base_plan = produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?; let mut plan = rewrite_plan(base_plan, &self.schema_provider)?; - // 3. Outgoing Data Serialization - // If the query produces internal types (like JSON Union), inject a serialization layer. if plan .schema() .fields() @@ -130,11 +121,8 @@ impl LogicalPlanVisitor { plan = serialize_outgoing_json(&self.schema_provider, Arc::new(plan)); } - // 4. Sink Metadata & Partitioning Logic - // Determine how data should be partitioned before hitting the external system. let partition_exprs = self.resolve_partition_expressions(&mut opts)?; - // Map DataFusion fields to Arroyo FieldSpecs for the connector. let fields: Vec = plan .schema() .fields() @@ -142,28 +130,24 @@ impl LogicalPlanVisitor { .map(|f| FieldSpec::Struct((**f).clone())) .collect(); - // 5. Connector Table Construction - // This object acts as the 'Identity Card' for the Sink in the physical cluster. let connector_table = ConnectorTable { id: None, connector, name: table_name.clone(), connection_type: ConnectionType::Sink, fields, - config: "".to_string(), // Filled by the coordinator later + config: "".to_string(), description: comment.clone().unwrap_or_default(), event_time_field: None, watermark_field: None, idle_time: opts.pull_opt_duration(IDLE_MICROS)?, - primary_keys: Arc::new(vec![]), // PKs are inferred or explicitly set here + primary_keys: Arc::new(vec![]), inferred_fields: None, partition_exprs: Arc::new(partition_exprs), lookup_cache_ttl:None, lookup_cache_max_bytes:None, }; - // 6. Sink Extension & Final Rewrites - // Wrap the plan in a SinkExtension and ensure Key/Partition alignment. let sink_extension = SinkExtension::new( TableReference::bare(table_name.clone()), Table::ConnectorTable(connector_table.clone()), @@ -171,12 +155,10 @@ impl LogicalPlanVisitor { Arc::new(plan), )?; - // Ensure the data distribution matches the Sink's requirements (e.g., Shuffle by Partition Key) let plan_with_keys = maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension { node: Arc::new(sink_extension), }))?; - // Global pass to wire inputs and handle shared sub-plans let final_extensions = rewrite_sinks(vec![plan_with_keys])?; let final_plan = final_extensions.into_iter().next().unwrap(); @@ -328,3 +310,57 @@ impl StatementVisitor for LogicalPlanVisitor { } } } + +#[cfg(test)] +mod create_streaming_table_tests { + use std::sync::Arc; + + use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use datafusion::sql::sqlparser::ast::Statement as DFStatement; + use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; + use datafusion::sql::sqlparser::parser::Parser; + + use crate::sql::common::TIMESTAMP_FIELD; + use crate::sql::rewrite_plan; + use crate::sql::schema::optimizer::produce_optimized_plan; + use crate::sql::schema::StreamSchemaProvider; + + fn schema_provider_with_src() -> StreamSchemaProvider { + let mut provider = StreamSchemaProvider::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "src".to_string(), + schema, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider + } + + #[test] + fn create_streaming_table_query_plans_and_rewrites() { + let sql = + "CREATE STREAMING TABLE my_sink WITH ('connector' = 'kafka') AS SELECT * FROM src"; + let dialect = FunctionStreamDialect {}; + let ast = Parser::parse_sql(&dialect, sql).expect("parse CREATE STREAMING TABLE"); + let DFStatement::CreateStreamingTable { query, .. } = &ast[0] else { + panic!("expected CreateStreamingTable, got {:?}", ast[0]); + }; + let provider = schema_provider_with_src(); + let base = produce_optimized_plan(&DFStatement::Query(query.clone()), &provider) + .expect("produce optimized logical plan for sink query"); + let rewritten = rewrite_plan(base, &provider).expect("streaming rewrite_plan"); + let dot = format!("{}", rewritten.display_graphviz()); + assert!( + dot.contains("src") || dot.contains("Src"), + "rewritten plan should reference source; got subgraph:\n{dot}" + ); + } +} diff --git a/src/coordinator/statement/streaming_table.rs b/src/coordinator/statement/streaming_table.rs index 48fd25e9..86ec1a85 100644 --- a/src/coordinator/statement/streaming_table.rs +++ b/src/coordinator/statement/streaming_table.rs @@ -14,11 +14,10 @@ use datafusion::sql::sqlparser::ast::Statement as DFStatement; use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; -/// Represents an INSERT INTO or standalone SELECT/query that creates a streaming table/pipeline. +/// Wrapper for **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (parsed AST). /// -/// In the streaming SQL context, both INSERT INTO (writing to a sink) -/// and standalone SELECT (anonymous computation) are treated as -/// data-producing operations that create/feed into the streaming pipeline. +/// The coordinator `parse_sql` frontend does **not** support `INSERT`; streaming sinks are +/// defined only via **`CREATE STREAMING TABLE`** (and regular tables via **`CREATE TABLE`**). #[derive(Debug)] pub struct StreamingTableStatement { pub statement: DFStatement, diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs index 570a3393..0bd2706f 100644 --- a/src/sql/extensions/remote_table.rs +++ b/src/sql/extensions/remote_table.rs @@ -28,6 +28,67 @@ pub(crate) struct RemoteTableExtension { multifield_partial_ord!(RemoteTableExtension, input, name, materialize); +impl RemoteTableExtension { + fn plan_node_inlined( + planner: &Planner, + index: usize, + this: &RemoteTableExtension, + ) -> Result { + let physical_plan = planner.sync_plan(&this.input)?; + let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( + physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + let config = ValuePlanOperator { + name: format!("value_calculation({})", this.name), + physical_plan: physical_plan_node.encode_to_vec(), + }; + let node = LogicalNode::single( + index as u32, + format!("value_{index}"), + OperatorName::ArrowValue, + config.encode_to_vec(), + this.name.to_string(), + 1, + ); + Ok(NodeWithIncomingEdges { + node, + edges: vec![], + }) + } + + fn plan_node_with_edges( + planner: &Planner, + index: usize, + this: &RemoteTableExtension, + input_schemas: Vec, + ) -> Result { + let physical_plan = planner.sync_plan(&this.input)?; + let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( + physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + let config = ValuePlanOperator { + name: format!("value_calculation({})", this.name), + physical_plan: physical_plan_node.encode_to_vec(), + }; + let node = LogicalNode::single( + index as u32, + format!("value_{index}"), + OperatorName::ArrowValue, + config.encode_to_vec(), + this.name.to_string(), + 1, + ); + + let edges = input_schemas + .into_iter() + .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone())) + .collect(); + Ok(NodeWithIncomingEdges { node, edges }) + } +} + impl StreamExtension for RemoteTableExtension { fn node_name(&self) -> Option { if self.materialize { @@ -44,10 +105,11 @@ impl StreamExtension for RemoteTableExtension { input_schemas: Vec, ) -> Result { match input_schemas.len() { - 0 => return plan_err!("RemoteTableExtension should have exactly one input"), + 0 => { + return Self::plan_node_inlined(planner, index, self); + } 1 => {} _multiple_inputs => { - // check they are all the same let first = input_schemas[0].clone(); for schema in input_schemas.iter().skip(1) { if *schema != first { @@ -58,29 +120,7 @@ impl StreamExtension for RemoteTableExtension { } } } - let physical_plan = planner.sync_plan(&self.input)?; - let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( - physical_plan, - &FsPhysicalExtensionCodec::default(), - )?; - let config = ValuePlanOperator { - name: format!("value_calculation({})", self.name), - physical_plan: physical_plan_node.encode_to_vec(), - }; - let node = LogicalNode::single( - index as u32, - format!("value_{index}"), - OperatorName::ArrowValue, - config.encode_to_vec(), - self.name.to_string(), - 1, - ); - - let edges = input_schemas - .into_iter() - .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone())) - .collect(); - Ok(NodeWithIncomingEdges { node, edges }) + Self::plan_node_with_edges(planner, index, self, input_schemas) } fn output_schema(&self) -> FsSchema { diff --git a/src/sql/frontend_sql_coverage_tests.rs b/src/sql/frontend_sql_coverage_tests.rs new file mode 100644 index 00000000..fa730614 --- /dev/null +++ b/src/sql/frontend_sql_coverage_tests.rs @@ -0,0 +1,807 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL parse and streaming-related tests. + +use std::sync::Arc; + +use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; +use datafusion::sql::sqlparser::ast::Statement as DFStatement; +use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; +use datafusion::sql::sqlparser::parser::Parser; + +use crate::coordinator::Coordinator; +use crate::sql::common::TIMESTAMP_FIELD; +use crate::sql::parse::parse_sql; +use crate::sql::rewrite_plan; +use crate::sql::schema::optimizer::produce_optimized_plan; +use crate::sql::schema::StreamSchemaProvider; + +fn assert_parses_as(sql: &str, type_prefix: &str) { + let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse failed for {sql:?}: {e}")); + assert!(!stmts.is_empty(), "{sql}"); + let dbg = format!("{:?}", stmts[0]); + assert!( + dbg.starts_with(type_prefix), + "sql={sql:?} expected prefix {type_prefix}, got {dbg}" + ); +} + +fn assert_parse_fails(sql: &str) { + assert!( + parse_sql(sql).is_err(), + "expected parse/classify failure for {sql:?}" + ); +} + +fn fake_src_stream_provider() -> StreamSchemaProvider { + let mut provider = StreamSchemaProvider::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("v", DataType::Utf8, true), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "src".to_string(), + schema, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider +} + +fn compile_first(coordinator: &Coordinator, sql: &str, provider: StreamSchemaProvider) { + let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); + coordinator + .compile_plan(stmts[0].as_ref(), provider) + .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}")); +} + +fn compile_first_streaming(sql: &str) { + compile_first( + &Coordinator::new(), + sql, + fake_src_stream_provider(), + ); +} + +fn fake_src_dim_stream_provider() -> StreamSchemaProvider { + let mut provider = fake_src_stream_provider(); + let dim_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + Field::new("amt", DataType::Float64, true), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "dim".to_string(), + dim_schema, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider +} + +fn compile_streaming_select_body(body: &str, provider: StreamSchemaProvider) { + let sql = format!( + "CREATE STREAMING TABLE sink_shape_cov WITH ('connector'='kafka') AS {body}" + ); + compile_first(&Coordinator::new(), &sql, provider); +} + +fn assert_streaming_select_logical_rewrites(body: &str, provider: &StreamSchemaProvider) { + let sql = format!( + "CREATE STREAMING TABLE sink_lr WITH ('connector'='kafka') AS {body}" + ); + let dialect = FunctionStreamDialect {}; + let stmts = Parser::parse_sql(&dialect, &sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); + let DFStatement::CreateStreamingTable { query, .. } = &stmts[0] else { + panic!("expected CreateStreamingTable, got {:?}", stmts[0]); + }; + let plan = produce_optimized_plan(&DFStatement::Query(query.clone()), provider) + .unwrap_or_else(|e| panic!("produce_optimized_plan {sql:?}: {e:#}")); + rewrite_plan(plan, provider).unwrap_or_else(|e| panic!("rewrite_plan {sql:?}: {e:#}")); +} + +fn assert_streaming_select_logical_rewrite_err_contains( + body: &str, + provider: &StreamSchemaProvider, + needle: &str, +) { + let sql = format!( + "CREATE STREAMING TABLE sink_lr WITH ('connector'='kafka') AS {body}" + ); + let dialect = FunctionStreamDialect {}; + let stmts = Parser::parse_sql(&dialect, &sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); + let DFStatement::CreateStreamingTable { query, .. } = &stmts[0] else { + panic!("expected CreateStreamingTable, got {:?}", stmts[0]); + }; + let plan = produce_optimized_plan(&DFStatement::Query(query.clone()), provider) + .unwrap_or_else(|e| panic!("produce_optimized_plan {sql:?}: {e:#}")); + let err = rewrite_plan(plan, provider).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains(needle), + "expected '{needle}' in rewrite error, got: {msg}" + ); +} + +#[test] +fn parse_create_function_double_quoted_path_style() { + assert_parses_as( + r#"CREATE FUNCTION WITH ("function_path"='./a.wasm', "config_path"='./b.yml')"#, + "CreateFunction", + ); +} + +#[test] +fn parse_create_function_extra_numeric_and_bool_like_strings() { + assert_parses_as( + r#"CREATE FUNCTION WITH ( + 'function_path'='./f.wasm', + 'config_path'='./c.yml', + 'parallelism'='8', + 'dry_run'='false' + )"#, + "CreateFunction", + ); +} + +#[test] +fn parse_create_function_fails_without_function_path() { + let err = parse_sql("CREATE FUNCTION WITH ('config_path'='./only.yml')").unwrap_err(); + let s = err.to_string(); + assert!( + s.contains("function_path") || s.contains("CREATE FUNCTION"), + "{s}" + ); +} + +#[test] +fn parse_drop_function_quoted_name() { + assert_parses_as(r#"DROP FUNCTION "my-pipeline""#, "DropFunction"); +} + +#[test] +fn parse_start_stop_function_dotted_style_name() { + assert_parses_as("START FUNCTION job.v1.main", "StartFunction"); + assert_parses_as("STOP FUNCTION job.v1.main", "StopFunction"); +} + +#[test] +fn parse_show_functions_extra_whitespace() { + assert_parses_as(" SHOW FUNCTIONS ", "ShowFunctions"); +} + +#[test] +fn parse_create_table_multiple_columns_types() { + assert_parses_as( + "CREATE TABLE metrics (ts TIMESTAMP, name VARCHAR, val DOUBLE, ok BOOLEAN)", + "CreateTable", + ); +} + +#[test] +fn parse_create_table_with_not_null_and_precision() { + assert_parses_as( + "CREATE TABLE t (id BIGINT NOT NULL, code DECIMAL(10,2))", + "CreateTable", + ); +} + +#[test] +fn parse_create_table_if_not_exists_if_dialect_accepts() { + if let Ok(stmts) = parse_sql("CREATE TABLE IF NOT EXISTS guard (id INT)") { + assert!(format!("{:?}", stmts[0]).starts_with("CreateTable")); + } +} + +#[test] +fn parse_streaming_table_select_star() { + assert_parses_as( + "CREATE STREAMING TABLE s1 WITH ('connector'='kafka') AS SELECT * FROM src", + "StreamingTableStatement", + ); +} + +#[test] +fn parse_streaming_table_select_columns() { + assert_parses_as( + "CREATE STREAMING TABLE s2 WITH ('connector'='memory') AS SELECT id, v FROM src", + "StreamingTableStatement", + ); +} + +#[test] +fn parse_streaming_table_with_partition_by() { + let sql = format!( + "CREATE STREAMING TABLE s3 WITH ('connector' = 'kafka', 'partition_by' = 'id') AS SELECT id, {} FROM src", + TIMESTAMP_FIELD + ); + assert_parses_as(&sql, "StreamingTableStatement"); +} + +#[test] +fn parse_streaming_table_with_idle_time_option() { + assert_parses_as( + "CREATE STREAMING TABLE s4 WITH ('connector'='kafka', 'idle_time'='30s') AS SELECT * FROM src", + "StreamingTableStatement", + ); +} + +#[test] +fn parse_streaming_table_sink_name_snake_and_digits() { + assert_parses_as( + "CREATE STREAMING TABLE sink_01_out WITH ('connector'='memory') AS SELECT 1", + "StreamingTableStatement", + ); +} + +#[test] +fn parse_streaming_table_comment_before_as_if_supported() { + let sql = "CREATE STREAMING TABLE c1 WITH ('connector'='kafka') COMMENT 'out' AS SELECT * FROM src"; + if let Ok(stmts) = parse_sql(sql) { + assert!( + format!("{:?}", stmts[0]).starts_with("StreamingTableStatement"), + "{stmts:?}" + ); + } +} + +#[test] +fn parse_three_semicolon_separated_statements() { + let sql = concat!( + "CREATE FUNCTION WITH ('function_path'='./x.wasm'); ", + "CREATE TABLE meta (id INT); ", + "CREATE STREAMING TABLE out1 WITH ('connector'='kafka') AS SELECT 1", + ); + let stmts = parse_sql(sql).unwrap(); + assert_eq!(stmts.len(), 3); + assert!(format!("{:?}", stmts[0]).starts_with("CreateFunction")); + assert!(format!("{:?}", stmts[1]).starts_with("CreateTable")); + assert!(format!("{:?}", stmts[2]).starts_with("StreamingTableStatement")); +} + +#[test] +fn parse_rejects_insert_with_columns_list() { + assert_parse_fails("INSERT INTO t (a,b) VALUES (1,2)"); +} + +#[test] +fn parse_rejects_update_delete() { + assert_parse_fails("UPDATE src SET id = 1"); + assert_parse_fails("DELETE FROM src WHERE id = 0"); +} + +#[test] +fn parse_rejects_merge_explain() { + assert_parse_fails("EXPLAIN SELECT 1"); + assert_parse_fails("MERGE INTO t USING s ON true WHEN MATCHED THEN UPDATE SET x=1"); +} + +#[test] +fn parse_rejects_create_schema_database() { + assert_parse_fails("CREATE SCHEMA s"); + assert_parse_fails("CREATE DATABASE d"); +} + +#[test] +fn compile_streaming_select_star_from_src() { + compile_first_streaming(concat!( + "CREATE STREAMING TABLE kafka_all ", + "WITH ('connector'='kafka') ", + "AS SELECT * FROM src", + )); +} + +#[test] +fn compile_streaming_select_id_v_from_src() { + let sql = format!( + "CREATE STREAMING TABLE kafka_cols WITH ('connector'='kafka') AS SELECT id, v, {} FROM src", + TIMESTAMP_FIELD + ); + compile_first_streaming(&sql); +} + +#[test] +fn compile_streaming_memory_connector() { + compile_first_streaming( + "CREATE STREAMING TABLE mem_sink WITH ('connector'='memory') AS SELECT * FROM src", + ); +} + +#[test] +fn compile_streaming_with_partition_by_id() { + compile_first_streaming(concat!( + "CREATE STREAMING TABLE part_sink ", + "WITH ('connector'='kafka', 'partition_by'='id') ", + "AS SELECT * FROM src", + )); +} + +#[test] +fn compile_streaming_connector_postgres_string() { + compile_first_streaming( + "CREATE STREAMING TABLE pg_sink WITH ('connector'='postgres') AS SELECT id FROM src", + ); +} + +#[test] +#[should_panic(expected = "connector")] +fn compile_streaming_fails_without_connector() { + let sql = "CREATE STREAMING TABLE bad WITH ('partition_by'='id') AS SELECT * FROM src"; + let stmts = parse_sql(sql).unwrap(); + let _ = Coordinator::new().compile_plan(stmts[0].as_ref(), fake_src_stream_provider()); +} + +#[test] +fn compile_plan_show_functions() { + let stmts = parse_sql("SHOW FUNCTIONS").unwrap(); + Coordinator::new() + .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) + .expect("ShowFunctions plan"); +} + +#[test] +fn compile_plan_start_stop_drop_function() { + for sql in [ + "START FUNCTION t1", + "STOP FUNCTION t1", + "DROP FUNCTION t1", + ] { + let stmts = parse_sql(sql).unwrap(); + Coordinator::new() + .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) + .unwrap_or_else(|e| panic!("{sql}: {e:#}")); + } +} + +#[test] +fn compile_plan_create_function() { + let sql = + "CREATE FUNCTION WITH ('function_path'='./x.wasm', 'config_path'='./c.yml')"; + let stmts = parse_sql(sql).unwrap(); + Coordinator::new() + .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) + .expect("CreateFunction plan"); +} + +#[test] +fn compile_plan_create_table_simple_ddl() { + let sql = "CREATE TABLE local_only (id INT, name VARCHAR)"; + let stmts = parse_sql(sql).unwrap(); + Coordinator::new() + .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) + .expect("CreateTable plan"); +} + +#[test] +fn streaming_where_eq_ne_and_or_not() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!("SELECT * FROM src WHERE id = 1 AND (v <> 'x' OR NOT (id < 0))"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT * FROM src WHERE id > 0 AND id <= 100 AND id >= 1"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT id, v, {ts} FROM src WHERE (id = 2 OR id = 3) AND v IS NOT NULL"), + fake_src_stream_provider(), + ); +} + +#[test] +fn streaming_where_in_between_like_null() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!("SELECT * FROM src WHERE id IN (1, 2, 3)"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT * FROM src WHERE id NOT IN (99, 100)"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT * FROM src WHERE id BETWEEN 1 AND 10"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT * FROM src WHERE v LIKE 'pre%'"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT * FROM src WHERE v IS NULL"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT id, v, {ts} FROM src WHERE v IS NOT NULL OR id = 0"), + fake_src_stream_provider(), + ); +} + +#[test] +fn streaming_where_scalar_subquery() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_dim_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT src.id, src.v, src.{ts} FROM src \ + WHERE src.id = (SELECT MAX(dim.id) FROM dim)" + ), + &p, + ); +} + +#[test] +#[should_panic(expected = "window")] +fn streaming_where_in_subquery_currently_panics() { + let p = fake_src_dim_stream_provider(); + compile_streaming_select_body( + "SELECT * FROM src WHERE id IN (SELECT id FROM dim WHERE amt IS NOT NULL)", + p, + ); +} + +#[test] +#[should_panic(expected = "window")] +fn streaming_where_exists_correlated_currently_panics() { + let p = fake_src_dim_stream_provider(); + compile_streaming_select_body( + "SELECT * FROM src WHERE EXISTS (SELECT 1 FROM dim WHERE dim.id = src.id)", + p, + ); +} + +#[test] +fn streaming_select_case_coalesce_cast() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!( + "SELECT CASE WHEN id < 0 THEN 0 WHEN id > 1000 THEN 1000 ELSE id END AS c, v, {ts} FROM src" + ), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT COALESCE(v, 'na') AS v2, id, {ts} FROM src"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!("SELECT CAST(id AS DOUBLE) AS id_f, {ts} FROM src"), + fake_src_stream_provider(), + ); +} + +#[test] +fn streaming_select_row_time_distinct() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!("SELECT row_time(), id, v, {ts} FROM src"), + fake_src_stream_provider(), + ); + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites("SELECT DISTINCT id FROM src", &p); +} + +#[test] +fn streaming_from_subquery_nested() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!("SELECT * FROM (SELECT id, v, {ts} FROM src WHERE id > 0) AS t"), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!( + "SELECT * FROM (SELECT * FROM (SELECT id FROM src) AS i2) AS i1" + ), + fake_src_stream_provider(), + ); +} + +#[test] +fn streaming_with_cte_single_and_chain() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!( + "WITH a AS (SELECT id, v, {ts} FROM src WHERE id > 0) SELECT * FROM a" + ), + fake_src_stream_provider(), + ); + compile_streaming_select_body( + &format!( + "WITH a AS (SELECT id FROM src), b AS (SELECT id FROM a WHERE id > 1) SELECT * FROM b" + ), + fake_src_stream_provider(), + ); +} + +#[test] +fn streaming_group_by_updating_aggregate_bundle() { + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + "SELECT id, COUNT(*), SUM(id), AVG(id), MIN(v), MAX(v) FROM src GROUP BY id", + &p, + ); +} + +#[test] +fn streaming_group_by_count_distinct_and_stats() { + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + "SELECT id, COUNT(DISTINCT v), STDDEV_POP(id), VAR_POP(id) FROM src GROUP BY id", + &p, + ); +} + +#[test] +fn streaming_group_by_having() { + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + "SELECT id, COUNT(*) AS c FROM src GROUP BY id HAVING COUNT(*) >= 0", + &p, + ); +} + +#[test] +fn streaming_group_by_tumble_window() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT tumble(INTERVAL '1' MINUTE) AS w, id, COUNT(*) AS c, MAX({ts}) AS max_evt \ + FROM src GROUP BY tumble(INTERVAL '1' MINUTE), id" + ), + &p, + ); +} + +#[test] +fn streaming_group_by_hop_window() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT hop(INTERVAL '1' MINUTE, INTERVAL '3' MINUTE) AS w, id, SUM(id), MAX({ts}) AS max_evt \ + FROM src GROUP BY hop(INTERVAL '1' MINUTE, INTERVAL '3' MINUTE), id" + ), + &p, + ); +} + +#[test] +fn streaming_window_row_number_over_tumble_aggregate() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT ROW_NUMBER() OVER (PARTITION BY w ORDER BY max_evt) AS rn, id, w, max_evt \ + FROM ( \ + SELECT tumble(INTERVAL '1' MINUTE) AS w, id, MAX({ts}) AS max_evt \ + FROM src \ + GROUP BY tumble(INTERVAL '1' MINUTE), id \ + ) AS x" + ), + &p, + ); +} + +#[test] +fn streaming_inner_join_eq_and_compound_on() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_dim_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT src.id, src.v, dim.name, src.{ts} \ + FROM src INNER JOIN dim ON src.id = dim.id" + ), + &p, + ); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT src.id, dim.amt, src.{ts} \ + FROM src JOIN dim ON src.id = dim.id AND dim.amt > CAST(0 AS DOUBLE)" + ), + &p, + ); +} + +#[test] +#[ignore] +fn streaming_self_join_inner_ignored() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!( + "SELECT a.id, b.v, a.{ts} \ + FROM src AS a JOIN src AS b ON a.id = b.id AND a.v = b.v" + ), + fake_src_stream_provider(), + ); +} + +#[test] +fn streaming_join_subquery_branch() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_dim_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT src.id, src.v, j.name, src.{ts} \ + FROM src JOIN (SELECT id, name FROM dim) AS j ON src.id = j.id" + ), + &p, + ); +} + +#[test] +fn streaming_union_all_compatible_schemas() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_dim_stream_provider(); + compile_streaming_select_body( + &format!( + "SELECT id, v, {ts} FROM src \ + UNION ALL \ + SELECT id, name AS v, {ts} FROM dim" + ), + p, + ); +} + +#[test] +fn streaming_logical_group_by_two_keys_and_filter_agg() { + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + "SELECT id, v, COUNT(*) AS c FROM src GROUP BY id, v", + &p, + ); + assert_streaming_select_logical_rewrites( + "SELECT id, SUM(id) FILTER (WHERE v IS NOT NULL) AS s FROM src GROUP BY id", + &p, + ); +} + +#[test] +fn streaming_logical_more_builtin_aggregates() { + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + "SELECT id, STDDEV_POP(CAST(id AS DOUBLE)), COVAR_SAMP(CAST(id AS DOUBLE), CAST(id AS DOUBLE)), \ + COVAR_POP(CAST(id AS DOUBLE), CAST(id AS DOUBLE)) \ + FROM src GROUP BY id", + &p, + ); + assert_streaming_select_logical_rewrites( + "SELECT id, CORR(CAST(id AS DOUBLE), CAST(id AS DOUBLE)) FROM src GROUP BY id", + &p, + ); +} + +#[test] +fn streaming_logical_bit_and_bool_aggregates() { + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + "SELECT id, BIT_AND(id), BIT_OR(id), BIT_XOR(id) FROM src GROUP BY id", + &p, + ); + assert_streaming_select_logical_rewrites( + "SELECT id, BOOL_AND(id > 0), BOOL_OR(id < 100000) FROM src GROUP BY id", + &p, + ); +} + +#[test] +fn streaming_logical_array_agg_and_list_union() { + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + "SELECT id, ARRAY_AGG(v) FROM src GROUP BY id", + &p, + ); +} + +#[test] +fn streaming_logical_scalar_funcs_on_projection() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!( + "SELECT ABS(id), POWER(CAST(id AS DOUBLE), 2.0), UPPER(v), LOWER(v), BTRIM(v), \ + CHARACTER_LENGTH(v), CONCAT(v, '_x'), {ts} FROM src" + ), + fake_src_stream_provider(), + ); +} + +#[test] +fn streaming_logical_nullif_regexp() { + let ts = TIMESTAMP_FIELD; + compile_streaming_select_body( + &format!( + "SELECT id, NULLIF(v, ''), REGEXP_LIKE(v, '^a'), {ts} FROM src WHERE v IS NOT NULL OR id = 0" + ), + fake_src_stream_provider(), + ); +} + +#[test] +fn streaming_window_first_value_over_tumbled_subquery() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT FIRST_VALUE(id) OVER (PARTITION BY w ORDER BY max_evt) AS fv, w, id \ + FROM ( \ + SELECT tumble(INTERVAL '1' MINUTE) AS w, id, MAX({ts}) AS max_evt \ + FROM src GROUP BY tumble(INTERVAL '1' MINUTE), id \ + ) AS x" + ), + &p, + ); +} + +#[test] +fn streaming_window_lag_over_tumbled_subquery() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT LAG(id, 1) OVER (PARTITION BY w ORDER BY max_evt) AS prev_id, w, id \ + FROM ( \ + SELECT tumble(INTERVAL '2' MINUTE) AS w, id, MAX({ts}) AS max_evt \ + FROM src GROUP BY tumble(INTERVAL '2' MINUTE), id \ + ) AS x" + ), + &p, + ); +} + +#[test] +fn streaming_window_lead_over_tumbled_subquery() { + let ts = TIMESTAMP_FIELD; + let p = fake_src_stream_provider(); + assert_streaming_select_logical_rewrites( + &format!( + "SELECT LEAD(id, 1) OVER (PARTITION BY w ORDER BY max_evt) AS next_id, w \ + FROM ( \ + SELECT tumble(INTERVAL '2' MINUTE) AS w, id, MAX({ts}) AS max_evt \ + FROM src GROUP BY tumble(INTERVAL '2' MINUTE), id \ + ) AS x" + ), + &p, + ); +} + +#[test] +fn streaming_logical_full_outer_join_errors() { + let p = fake_src_dim_stream_provider(); + assert_streaming_select_logical_rewrite_err_contains( + "SELECT src.id, dim.name FROM src FULL OUTER JOIN dim ON src.id = dim.id", + &p, + "inner", + ); +} + +#[test] +#[should_panic(expected = "Non-inner")] +fn streaming_left_join_errors_without_window() { + let ts = TIMESTAMP_FIELD; + let sql = format!( + "CREATE STREAMING TABLE sink_left WITH ('connector'='kafka') AS \ + SELECT src.id, dim.name, src.{ts} FROM src LEFT JOIN dim ON src.id = dim.id" + ); + let stmts = parse_sql(&sql).unwrap(); + let _ = Coordinator::new().compile_plan(stmts[0].as_ref(), fake_src_dim_stream_provider()); +} diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs index 45d373c3..0f2075c1 100644 --- a/src/sql/logical_planner/planner.rs +++ b/src/sql/logical_planner/planner.rs @@ -96,7 +96,6 @@ impl<'a> Planner<'a> { let fut = self.planner.create_physical_plan(plan, self.session_state); let (tx, mut rx) = oneshot::channel(); thread::scope(|s| { - let _handle = tokio::runtime::Handle::current(); let builder = thread::Builder::new(); let builder = if cfg!(debug_assertions) { builder.stack_size(10_000_000) diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 32c0dce9..6e17e0f2 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -26,3 +26,6 @@ pub use schema::StreamSchemaProvider; pub use parse::parse_sql; pub use analysis::rewrite_plan; pub use logical_planner::CompiledSql; + +#[cfg(test)] +mod frontend_sql_coverage_tests; diff --git a/src/sql/parse.rs b/src/sql/parse.rs index 1b4be38a..78c8bac0 100644 --- a/src/sql/parse.rs +++ b/src/sql/parse.rs @@ -10,6 +10,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! Coordinator-facing SQL parsing (`parse_sql`). +//! +//! **Data-definition / pipeline shape (this entry point)** +//! Only these table-related forms are supported: +//! - **`CREATE TABLE ...`** (including `CREATE TABLE ... AS SELECT` where the planner accepts it) +//! - **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (streaming sink DDL) +//! +//! **`INSERT` is not supported** here — use `CREATE TABLE ... AS SELECT` or +//! `CREATE STREAMING TABLE ... AS SELECT` to define the query shape instead. +//! +//! Other supported statements include function lifecycle (`CREATE FUNCTION WITH`, `START FUNCTION`, …). + use std::collections::HashMap; use datafusion::common::{Result, plan_err}; @@ -62,7 +74,14 @@ fn classify_statement(stmt: DFStatement) -> Result s @ DFStatement::CreateStreamingTable { .. } => { Ok(Box::new(StreamingTableStatement::new(s))) } - other => plan_err!("Unsupported SQL statement: {other}"), + DFStatement::Insert { .. } => plan_err!( + "INSERT is not supported; only CREATE TABLE and CREATE STREAMING TABLE (with AS SELECT) \ + are supported for defining table/query pipelines in this SQL frontend" + ), + other => plan_err!( + "Unsupported SQL statement: {other}. \ + For tables/pipelines use CREATE TABLE or CREATE STREAMING TABLE ... AS SELECT; INSERT is not supported." + ), } } @@ -139,10 +158,31 @@ mod tests { assert!(is_type(stmt.as_ref(), "CreateTable")); } + /// `CREATE STREAMING TABLE` is the sink DDL supported by FunctionStream (not `CREATE STREAM TABLE`). #[test] - fn test_parse_insert_statement() { - let stmt = first_stmt("INSERT INTO sink SELECT * FROM source"); - assert!(is_type(stmt.as_ref(), "CreateStreamingTableStatement")); + fn test_parse_create_streaming_table() { + let sql = concat!( + "CREATE STREAMING TABLE my_sink ", + "WITH ('connector' = 'kafka') ", + "AS SELECT id FROM src", + ); + let stmt = first_stmt(sql); + assert!( + is_type(stmt.as_ref(), "StreamingTableStatement"), + "expected StreamingTableStatement, got {:?}", + stmt + ); + } + + #[test] + fn test_parse_create_streaming_table_case_insensitive() { + let sql = concat!( + "create streaming table out_q ", + "with ('connector' = 'memory') ", + "as select 1 as x", + ); + let stmt = first_stmt(sql); + assert!(is_type(stmt.as_ref(), "StreamingTableStatement")); } #[test] @@ -163,11 +203,14 @@ mod tests { #[test] fn test_parse_multiple_statements() { - let sql = "CREATE TABLE t1 (id INT); INSERT INTO sink SELECT * FROM t1"; + let sql = concat!( + "CREATE TABLE t1 (id INT); ", + "CREATE STREAMING TABLE sk WITH ('connector' = 'kafka') AS SELECT id FROM t1", + ); let stmts = parse_sql(sql).unwrap(); assert_eq!(stmts.len(), 2); assert!(is_type(stmts[0].as_ref(), "CreateTable")); - assert!(is_type(stmts[1].as_ref(), "CreateStreamingTableStatement")); + assert!(is_type(stmts[1].as_ref(), "StreamingTableStatement")); } #[test] @@ -182,6 +225,20 @@ mod tests { assert!(result.is_err()); } + #[test] + fn test_insert_not_supported() { + let err = parse_sql("INSERT INTO sink SELECT * FROM src").unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("INSERT") && msg.contains("not supported"), + "expected explicit INSERT rejection, got: {msg}" + ); + assert!( + msg.contains("CREATE TABLE") || msg.contains("CREATE STREAMING TABLE"), + "error should mention supported alternatives, got: {msg}" + ); + } + #[test] fn test_parse_with_extra_properties() { let sql = r#"CREATE FUNCTION WITH ( From 13e1341db50cea310cf281ef73c24f7b74d3c129 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 22 Mar 2026 01:45:15 +0800 Subject: [PATCH 10/44] update --- src/coordinator/execution/executor.rs | 2 +- src/coordinator/plan/logical_plan_visitor.rs | 59 +- src/coordinator/plan/lookup_table_plan.rs | 4 +- .../plan/streaming_table_connector_plan.rs | 4 +- src/coordinator/plan/streaming_table_plan.rs | 4 +- src/coordinator/tool/mod.rs | 4 +- src/sql/analysis/aggregate_rewriter.rs | 18 +- src/sql/analysis/async_udf_rewriter.rs | 34 +- src/sql/analysis/join_rewriter.rs | 22 +- src/sql/analysis/mod.rs | 35 +- src/sql/analysis/sink_input_rewriter.rs | 8 +- src/sql/analysis/source_metadata_visitor.rs | 18 +- src/sql/analysis/source_rewriter.rs | 105 ++- src/sql/analysis/stream_rewriter.rs | 24 +- src/sql/analysis/streaming_window_analzer.rs | 18 +- src/sql/analysis/window_function_rewriter.rs | 14 +- .../tool => sql/common}/connector_options.rs | 10 + src/sql/common/format_from_opts.rs | 162 ++++ src/sql/common/mod.rs | 5 +- src/sql/extensions/aggregate.rs | 776 +++++++++--------- src/sql/extensions/async_udf.rs | 259 +++--- src/sql/extensions/constants.rs | 1 + src/sql/extensions/debezium.rs | 442 ++++++---- src/sql/extensions/extension_try_from.rs | 66 +- src/sql/extensions/join.rs | 230 ++++-- src/sql/extensions/key_calculation.rs | 369 +++++---- src/sql/extensions/lookup.rs | 302 ++++--- src/sql/extensions/mod.rs | 10 +- src/sql/extensions/projection.rs | 263 ++++-- src/sql/extensions/remote_table.rs | 231 +++--- src/sql/extensions/sink.rs | 292 ++++--- src/sql/extensions/stream_extension.rs | 38 - .../streaming_operator_blueprint.rs | 65 ++ src/sql/extensions/table_source.rs | 195 +++-- src/sql/extensions/timestamp_append.rs | 104 ++- src/sql/extensions/updating_aggregate.rs | 264 +++--- src/sql/extensions/watermark_node.rs | 263 ++++-- src/sql/extensions/window_fn.rs | 123 --- src/sql/extensions/windows_function.rs | 197 +++++ src/sql/frontend_sql_coverage_tests.rs | 2 +- src/sql/logical_node/logical.rs | 378 --------- .../logical_node/logical/dylib_udf_config.rs | 71 ++ src/sql/logical_node/logical/logical_edge.rs | 57 ++ src/sql/logical_node/logical/logical_graph.rs | 30 + src/sql/logical_node/logical/logical_node.rs | 71 ++ .../logical_node/logical/logical_program.rs | 123 +++ src/sql/logical_node/logical/mod.rs | 30 + .../logical_node/logical/operator_chain.rs | 80 ++ src/sql/logical_node/logical/operator_name.rs | 32 + .../logical_node/logical/program_config.rs | 22 + .../logical_node/logical/python_udf_config.rs | 23 + .../{optimizers.rs => optimizers/chaining.rs} | 18 +- .../optimizers/datafusion_logical.rs} | 0 src/sql/logical_planner/optimizers/mod.rs | 20 + src/sql/logical_planner/planner.rs | 57 +- src/sql/mod.rs | 2 +- src/sql/schema/column_descriptor.rs | 136 +++ .../{connector.rs => connection_type.rs} | 0 src/sql/schema/connector_table.rs | 205 ----- src/sql/schema/data_encoding_format.rs | 82 ++ src/sql/schema/field_spec.rs | 52 -- src/sql/schema/insert.rs | 55 -- src/sql/schema/mod.rs | 36 +- src/sql/schema/schema_context.rs | 37 + src/sql/schema/schema_provider.rs | 417 ++++++---- src/sql/schema/source_table.rs | 564 +++++++++++++ src/sql/schema/table.rs | 36 +- src/sql/schema/table_execution_unit.rs | 33 + src/sql/schema/table_role.rs | 110 +++ src/sql/schema/temporal_pipeline_config.rs | 57 ++ 70 files changed, 4966 insertions(+), 2910 deletions(-) rename src/{coordinator/tool => sql/common}/connector_options.rs (96%) create mode 100644 src/sql/common/format_from_opts.rs delete mode 100644 src/sql/extensions/stream_extension.rs create mode 100644 src/sql/extensions/streaming_operator_blueprint.rs delete mode 100644 src/sql/extensions/window_fn.rs create mode 100644 src/sql/extensions/windows_function.rs delete mode 100644 src/sql/logical_node/logical.rs create mode 100644 src/sql/logical_node/logical/dylib_udf_config.rs create mode 100644 src/sql/logical_node/logical/logical_edge.rs create mode 100644 src/sql/logical_node/logical/logical_graph.rs create mode 100644 src/sql/logical_node/logical/logical_node.rs create mode 100644 src/sql/logical_node/logical/logical_program.rs create mode 100644 src/sql/logical_node/logical/mod.rs create mode 100644 src/sql/logical_node/logical/operator_chain.rs create mode 100644 src/sql/logical_node/logical/operator_name.rs create mode 100644 src/sql/logical_node/logical/program_config.rs create mode 100644 src/sql/logical_node/logical/python_udf_config.rs rename src/sql/logical_planner/{optimizers.rs => optimizers/chaining.rs} (81%) rename src/sql/{schema/optimizer.rs => logical_planner/optimizers/datafusion_logical.rs} (100%) create mode 100644 src/sql/logical_planner/optimizers/mod.rs create mode 100644 src/sql/schema/column_descriptor.rs rename src/sql/schema/{connector.rs => connection_type.rs} (100%) delete mode 100644 src/sql/schema/connector_table.rs create mode 100644 src/sql/schema/data_encoding_format.rs delete mode 100644 src/sql/schema/field_spec.rs delete mode 100644 src/sql/schema/insert.rs create mode 100644 src/sql/schema/schema_context.rs create mode 100644 src/sql/schema/source_table.rs create mode 100644 src/sql/schema/table_execution_unit.rs create mode 100644 src/sql/schema/table_role.rs create mode 100644 src/sql/schema/temporal_pipeline_config.rs diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 8285a2c5..4dae91d5 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -225,7 +225,7 @@ impl PlanVisitor for Executor { ) -> PlanVisitorResult { let result = (|| -> Result { let catalog_table = - CatalogTable::ConnectorTable(plan.connector_table.clone()); + CatalogTable::ConnectorTable(plan.source_table.clone()); let mut schema_provider = StreamSchemaProvider::new(); schema_provider.insert_catalog_table(catalog_table.clone()); diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 93f8776a..4a747fdf 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -33,21 +33,19 @@ use crate::coordinator::statement::{ }; use crate::coordinator::tool::ConnectorOptions; use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig}; -use crate::sql::logical_planner::optimizers::ChainingOptimizer; +use crate::sql::logical_planner::optimizers::{ChainingOptimizer, produce_optimized_plan}; use crate::sql::schema::Table; -use crate::sql::schema::connector::ConnectionType; -use crate::sql::schema::connector_table::ConnectorTable; -use crate::sql::schema::field_spec::FieldSpec; -use crate::sql::schema::optimizer::produce_optimized_plan; +use crate::sql::schema::ConnectionType; +use crate::sql::schema::source_table::SourceTable; +use crate::sql::schema::ColumnDescriptor; use crate::sql::functions::{is_json_union, serialize_outgoing_json}; -use crate::sql::extensions::sink::SinkExtension; +use crate::sql::extensions::sink::StreamEgressNode; use crate::sql::logical_planner::planner; use crate::sql::analysis::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks}; use crate::sql::rewrite_plan; const CONNECTOR: &str = "connector"; const PARTITION_BY: &str = "partition_by"; -const IDLE_MICROS: &str = "idle_time"; fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap { options @@ -108,6 +106,8 @@ impl LogicalPlanVisitor { ) })?; + let partition_exprs = self.resolve_partition_expressions(&mut opts)?; + let base_plan = produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?; let mut plan = rewrite_plan(base_plan, &self.schema_provider)?; @@ -121,38 +121,33 @@ impl LogicalPlanVisitor { plan = serialize_outgoing_json(&self.schema_provider, Arc::new(plan)); } - let partition_exprs = self.resolve_partition_expressions(&mut opts)?; - - let fields: Vec = plan + let fields: Vec = plan .schema() .fields() .iter() - .map(|f| FieldSpec::Struct((**f).clone())) + .map(|f| ColumnDescriptor::from((**f).clone())) .collect(); - let connector_table = ConnectorTable { - id: None, - connector, - name: table_name.clone(), - connection_type: ConnectionType::Sink, + let mut source_table = SourceTable::from_options( + &table_name, + &connector, + false, fields, - config: "".to_string(), - description: comment.clone().unwrap_or_default(), - event_time_field: None, - watermark_field: None, - idle_time: opts.pull_opt_duration(IDLE_MICROS)?, - primary_keys: Arc::new(vec![]), - inferred_fields: None, - partition_exprs: Arc::new(partition_exprs), - lookup_cache_ttl:None, - lookup_cache_max_bytes:None, - }; + vec![], + None, + &mut opts, + None, + &self.schema_provider, + Some(ConnectionType::Sink), + comment.clone().unwrap_or_default(), + )?; + source_table.partition_exprs = Arc::new(partition_exprs); - let sink_extension = SinkExtension::new( + let sink_extension = StreamEgressNode::try_new( TableReference::bare(table_name.clone()), - Table::ConnectorTable(connector_table.clone()), + Table::ConnectorTable(source_table.clone()), plan.schema().clone(), - Arc::new(plan), + plan, )?; let plan_with_keys = maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension { @@ -196,7 +191,7 @@ impl LogicalPlanVisitor { Ok(Box::new(StreamingTable { name: table_name, comment: comment.clone(), - connector_table, + source_table, logical_plan: final_plan, })) } @@ -322,7 +317,7 @@ mod create_streaming_table_tests { use crate::sql::common::TIMESTAMP_FIELD; use crate::sql::rewrite_plan; - use crate::sql::schema::optimizer::produce_optimized_plan; + use crate::sql::logical_planner::optimizers::produce_optimized_plan; use crate::sql::schema::StreamSchemaProvider; fn schema_provider_with_src() -> StreamSchemaProvider { diff --git a/src/coordinator/plan/lookup_table_plan.rs b/src/coordinator/plan/lookup_table_plan.rs index e0ea06ba..65103b61 100644 --- a/src/coordinator/plan/lookup_table_plan.rs +++ b/src/coordinator/plan/lookup_table_plan.rs @@ -10,14 +10,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::schema::connector_table::ConnectorTable; +use crate::sql::schema::source_table::SourceTable; use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; /// Plan node that exposes a lookup table config as a logical plan input. #[derive(Debug)] pub struct LookupTablePlan { - pub table: ConnectorTable, + pub table: SourceTable, } impl PlanNode for LookupTablePlan { diff --git a/src/coordinator/plan/streaming_table_connector_plan.rs b/src/coordinator/plan/streaming_table_connector_plan.rs index c2407ec8..214e2e15 100644 --- a/src/coordinator/plan/streaming_table_connector_plan.rs +++ b/src/coordinator/plan/streaming_table_connector_plan.rs @@ -10,14 +10,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::schema::connector_table::ConnectorTable; +use crate::sql::schema::source_table::SourceTable; use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; /// Plan node that exposes a connector table config as a logical plan input. #[derive(Debug)] pub struct StreamingTableConnectorPlan { - pub table: ConnectorTable, + pub table: SourceTable, } impl PlanNode for StreamingTableConnectorPlan { diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs index 30e519f8..01b8dbb8 100644 --- a/src/coordinator/plan/streaming_table_plan.rs +++ b/src/coordinator/plan/streaming_table_plan.rs @@ -11,7 +11,7 @@ // limitations under the License. use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; -use crate::sql::schema::connector_table::ConnectorTable; +use crate::sql::schema::source_table::SourceTable; use datafusion::logical_expr::LogicalPlan; /// Plan node representing a fully resolved streaming table (DDL). @@ -19,7 +19,7 @@ use datafusion::logical_expr::LogicalPlan; pub struct StreamingTable { pub name: String, pub comment: Option, - pub connector_table: ConnectorTable, + pub source_table: SourceTable, pub logical_plan: LogicalPlan, } diff --git a/src/coordinator/tool/mod.rs b/src/coordinator/tool/mod.rs index 95d6a7ed..8ef77230 100644 --- a/src/coordinator/tool/mod.rs +++ b/src/coordinator/tool/mod.rs @@ -1,3 +1 @@ -mod connector_options; - -pub use connector_options::{ConnectorOptions, FromOpts}; +pub use crate::sql::common::ConnectorOptions; diff --git a/src/sql/analysis/aggregate_rewriter.rs b/src/sql/analysis/aggregate_rewriter.rs index 04ac0896..f11b53d0 100644 --- a/src/sql/analysis/aggregate_rewriter.rs +++ b/src/sql/analysis/aggregate_rewriter.rs @@ -1,13 +1,13 @@ use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err}; use datafusion::functions_aggregate::expr_fn::max; -use datafusion::logical_expr::{self, Aggregate, Expr, Extension, LogicalPlan, Projection}; +use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, Projection}; use datafusion::prelude::col; use std::sync::Arc; use crate::sql::schema::StreamSchemaProvider; -use crate::sql::extensions::aggregate::AggregateExtension; -use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs}; +use crate::sql::extensions::aggregate::StreamWindowAggregateNode; +use crate::sql::extensions::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer; use crate::sql::types::{ DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window, @@ -83,7 +83,7 @@ impl TreeNodeRewriter for AggregateRewriter<'_> { let keyed_input = self.build_keyed_input(agg.input.clone(), &agg.group_expr, &key_fields)?; - // 5. Build the final AggregateExtension for the physical planner. + // 5. Build the final StreamWindowAggregateNode for the physical planner. let mut internal_fields = fields_with_qualifiers(&agg.schema); if let WindowBehavior::FromOperator { window_index, .. } = &behavior { internal_fields.remove(*window_index); @@ -100,11 +100,11 @@ impl TreeNodeRewriter for AggregateRewriter<'_> { internal_schema, )?; - let extension = AggregateExtension::new( + let extension = StreamWindowAggregateNode::try_new( behavior, LogicalPlan::Aggregate(rewritten_agg), (0..key_count).collect(), - ); + )?; Ok(Transformed::yes(LogicalPlan::Extension(Extension { node: Arc::new(extension), @@ -118,7 +118,7 @@ impl<'a> AggregateRewriter<'a> { } /// [Internal] Builds the physical Key Calculation layer required for distributed Shuffling. - /// This wraps the input in a Projection and a KeyCalculationExtension. + /// This wraps the input in a Projection and a KeyExtractionNode. fn build_keyed_input( &self, input: Arc, @@ -151,9 +151,9 @@ impl<'a> AggregateRewriter<'a> { LogicalPlan::Projection(Projection::try_new_with_schema(exprs, input, key_schema)?); Ok(LogicalPlan::Extension(Extension { - node: Arc::new(KeyCalculationExtension::new( + node: Arc::new(KeyExtractionNode::new( projection, - KeysOrExprs::Keys((0..key_count).collect()), + KeyExtractionStrategy::ColumnIndices((0..key_count).collect()), )), })) } diff --git a/src/sql/analysis/async_udf_rewriter.rs b/src/sql/analysis/async_udf_rewriter.rs index 9584c022..0ad4dfc2 100644 --- a/src/sql/analysis/async_udf_rewriter.rs +++ b/src/sql/analysis/async_udf_rewriter.rs @@ -1,5 +1,5 @@ -use crate::sql::extensions::remote_table::RemoteTableExtension; -use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncUDFExtension}; +use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; +use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncFunctionExecutionNode}; use crate::sql::schema::StreamSchemaProvider; use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion::common::{Column, Result as DFResult, TableReference, plan_err}; @@ -92,11 +92,11 @@ impl TreeNodeRewriter for AsyncUdfRewriter<'_> { let input = if matches!(*projection.input, LogicalPlan::Projection(..)) { Arc::new(LogicalPlan::Extension(Extension { - node: Arc::new(RemoteTableExtension { - input: (*projection.input).clone(), - name: TableReference::bare("subquery_projection"), - schema: projection.input.schema().clone(), - materialize: false, + node: Arc::new(RemoteTableBoundaryNode { + upstream_plan: (*projection.input).clone(), + table_identifier: TableReference::bare("subquery_projection"), + resolved_schema: projection.input.schema().clone(), + requires_materialization: false, }), })) } else { @@ -104,16 +104,16 @@ impl TreeNodeRewriter for AsyncUdfRewriter<'_> { }; Ok(Transformed::yes(LogicalPlan::Extension(Extension { - node: Arc::new(AsyncUDFExtension { - input, - name, - udf, - arg_exprs, - final_exprs: projection.expr, - ordered: opts.ordered, - max_concurrency: opts.max_concurrency, - timeout: opts.timeout, - final_schema: projection.schema, + node: Arc::new(AsyncFunctionExecutionNode { + upstream_plan: input, + operator_name: name, + function_config: udf, + invocation_args: arg_exprs, + result_projections: projection.expr, + preserve_ordering: opts.ordered, + concurrency_limit: opts.max_concurrency, + execution_timeout: opts.timeout, + resolved_schema: projection.schema, }), }))) } diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs index 520af335..e9efe96b 100644 --- a/src/sql/analysis/join_rewriter.rs +++ b/src/sql/analysis/join_rewriter.rs @@ -1,6 +1,6 @@ use crate::sql::schema::StreamSchemaProvider; -use crate::sql::extensions::join::JoinExtension; -use crate::sql::extensions::key_calculation::KeyCalculationExtension; +use crate::sql::extensions::join::StreamingJoinNode; +use crate::sql::extensions::key_calculation::KeyExtractionNode; use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer; use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata}; use crate::sql::common::TIMESTAMP_FIELD; @@ -62,7 +62,7 @@ impl<'a> JoinRewriter<'a> { } } - /// [Internal] Wraps a join input in a KeyCalculation layer to facilitate Shuffle/KeyBy distribution. + /// [Internal] Wraps a join input in a key-extraction layer to facilitate shuffle / key-by distribution. fn build_keyed_side( &self, input: Arc, @@ -85,11 +85,11 @@ impl<'a> JoinRewriter<'a> { .collect(); let projection = Projection::try_new(projection_exprs, input)?; - let key_ext = KeyCalculationExtension::new_named_and_trimmed( + let key_ext = KeyExtractionNode::try_new_with_projection( LogicalPlan::Projection(projection), (0..key_count).collect(), side.to_string(), - ); + )?; Ok(LogicalPlan::Extension(Extension { node: Arc::new(key_ext), @@ -209,13 +209,13 @@ impl TreeNodeRewriter for JoinRewriter<'_> { // 4. Resolve Output Watermark (Timestamp Projection) let plan_with_timestamp = self.apply_timestamp_resolution(rewritten_join)?; - // 5. Wrap in JoinExtension for Physical Planning - let ttl = (!is_instant).then_some(self.schema_provider.planning_options.ttl); - let extension = JoinExtension { - rewritten_join: plan_with_timestamp, + // 5. Wrap in StreamingJoinNode for physical planning + let state_retention_ttl = (!is_instant).then_some(self.schema_provider.planning_options.ttl); + let extension = StreamingJoinNode::new( + plan_with_timestamp, is_instant, - ttl, - }; + state_retention_ttl, + ); Ok(Transformed::yes(LogicalPlan::Extension(Extension { node: Arc::new(extension), diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs index e13e2b7e..697d8c97 100644 --- a/src/sql/analysis/mod.rs +++ b/src/sql/analysis/mod.rs @@ -41,13 +41,12 @@ use datafusion::sql::sqlparser::parser::Parser; use tracing::{debug, info, instrument}; use crate::sql::logical_planner::optimizers::ChainingOptimizer; -use crate::sql::schema::insert::Insert; use crate::sql::schema::table::Table as CatalogTable; use crate::sql::functions::{is_json_union, serialize_outgoing_json}; -use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs}; -use crate::sql::extensions::projection::ProjectionExtension; -use crate::sql::extensions::sink::SinkExtension; -use crate::sql::extensions::{ StreamExtension}; +use crate::sql::extensions::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; +use crate::sql::extensions::projection::StreamProjectionNode; +use crate::sql::extensions::sink::StreamEgressNode; +use crate::sql::extensions::StreamingOperatorBlueprint; use crate::sql::logical_planner::planner::NamedNode; use crate::sql::types::SqlConfig; @@ -99,8 +98,8 @@ fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap>::new(); for extension in extensions.iter() { if let LogicalPlan::Extension(ext) = extension { - if let Some(sink_node) = ext.node.as_any().downcast_ref::() { - if let Some(named_node) = sink_node.node_name() { + if let Some(sink_node) = ext.node.as_any().downcast_ref::() { + if let Some(named_node) = sink_node.operator_identity() { let inputs = sink_node .inputs() .into_iter() @@ -119,11 +118,11 @@ pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result() else { + let Some(sink) = ext.node.as_any().downcast_ref::() else { return Ok(plan); }; - let Some(partition_exprs) = sink.table.partition_exprs() else { + let Some(partition_exprs) = sink.destination_table.partition_exprs() else { return Ok(plan); }; @@ -136,11 +135,13 @@ pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result Result { fn f_down(&mut self, node: Self::Node) -> DFResult> { if let LogicalPlan::Extension(extension) = &node { - if let Some(sink_node) = extension.node.as_any().downcast_ref::() { - if let Some(named_node) = sink_node.node_name() { + if let Some(sink_node) = extension.node.as_any().downcast_ref::() { + if let Some(named_node) = sink_node.operator_identity() { if let Some(inputs) = self.sink_inputs.remove(&named_node) { let new_node = LogicalPlan::Extension(Extension { node: Arc::new(sink_node.with_exprs_and_inputs(vec![], inputs)?), diff --git a/src/sql/analysis/source_metadata_visitor.rs b/src/sql/analysis/source_metadata_visitor.rs index a49a7e72..0d2e1455 100644 --- a/src/sql/analysis/source_metadata_visitor.rs +++ b/src/sql/analysis/source_metadata_visitor.rs @@ -1,5 +1,5 @@ -use crate::sql::extensions::sink::SinkExtension; -use crate::sql::extensions::table_source::TableSourceExtension; +use crate::sql::extensions::sink::{StreamEgressNode, STREAM_EGRESS_NODE_NAME}; +use crate::sql::extensions::table_source::{StreamIngestionNode, STREAM_INGESTION_NODE_NAME}; use crate::sql::schema::StreamSchemaProvider; use datafusion::common::Result as DFResult; use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor}; @@ -26,20 +26,20 @@ impl<'a> SourceMetadataVisitor<'a> { }; let table_name = match node.name() { - "TableSourceExtension" => { - let ext = node.as_any().downcast_ref::()?; - ext.name.to_string() + name if name == STREAM_INGESTION_NODE_NAME => { + let ext = node.as_any().downcast_ref::()?; + ext.source_identifier.to_string() } - "SinkExtension" => { - let ext = node.as_any().downcast_ref::()?; - ext.name.to_string() + name if name == STREAM_EGRESS_NODE_NAME => { + let ext = node.as_any().downcast_ref::()?; + ext.target_identifier.to_string() } _ => return None, }; let table = self.schema_provider.get_catalog_table(&table_name)?; match table { - crate::sql::schema::table::Table::ConnectorTable(t) => t.id, + crate::sql::schema::table::Table::ConnectorTable(t) => t.registry_id, _ => None, } } diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs index 1bba1551..d642afd5 100644 --- a/src/sql/analysis/source_rewriter.rs +++ b/src/sql/analysis/source_rewriter.rs @@ -20,12 +20,12 @@ use datafusion::logical_expr::{ self, BinaryExpr, Expr, Extension, LogicalPlan, Projection, TableScan, }; -use crate::sql::schema::connector_table::ConnectorTable; -use crate::sql::schema::field_spec::FieldSpec; +use crate::sql::schema::source_table::SourceTable; +use crate::sql::schema::ColumnDescriptor; use crate::sql::schema::table::Table; use crate::sql::schema::StreamSchemaProvider; -use crate::sql::extensions::remote_table::RemoteTableExtension; -use crate::sql::extensions::watermark_node::WatermarkNode; +use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; +use crate::sql::extensions::watermark_node::EventTimeWatermarkNode; use crate::sql::types::TIMESTAMP_FIELD; /// Rewrites table scans into proper source nodes with projections and watermarks. @@ -34,22 +34,35 @@ pub struct SourceRewriter<'a> { } impl SourceRewriter<'_> { - fn watermark_expression(table: &ConnectorTable) -> DFResult { - match table.watermark_field.clone() { + fn projection_expr_for_column(col: &ColumnDescriptor, qualifier: &TableReference) -> Expr { + if let Some(logic) = col.computation_logic() { + logic + .clone() + .alias_qualified(Some(qualifier.clone()), col.arrow_field().name().to_string()) + } else { + Expr::Column(Column { + relation: Some(qualifier.clone()), + name: col.arrow_field().name().to_string(), + spans: Default::default(), + }) + } + } + + fn watermark_expression(table: &SourceTable) -> DFResult { + match table.temporal_config.watermark_strategy_column.clone() { Some(watermark_field) => table - .fields + .schema_specs .iter() - .find_map(|f| { - if f.field().name() == &watermark_field { - return match f { - FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => { - Some(Expr::Column(Column { - relation: None, - name: field.name().to_string(), - spans: Default::default(), - })) - } - FieldSpec::Virtual { expression, .. } => Some(*expression.clone()), + .find_map(|c| { + if c.arrow_field().name() == watermark_field.as_str() { + return if let Some(expr) = c.computation_logic() { + Some(expr.clone()) + } else { + Some(Expr::Column(Column { + relation: None, + name: c.arrow_field().name().to_string(), + spans: Default::default(), + })) }; } None @@ -73,47 +86,27 @@ impl SourceRewriter<'_> { } fn projection_expressions( - table: &ConnectorTable, + table: &SourceTable, qualifier: &TableReference, projection: &Option>, ) -> DFResult> { let mut expressions: Vec = table - .fields + .schema_specs .iter() - .map(|field| match field { - FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => { - Expr::Column(Column { - relation: Some(qualifier.clone()), - name: field.name().to_string(), - spans: Default::default(), - }) - } - FieldSpec::Virtual { field, expression } => expression - .clone() - .alias_qualified(Some(qualifier.clone()), field.name().to_string()), - }) + .map(|col| Self::projection_expr_for_column(col, qualifier)) .collect(); if let Some(proj) = projection { expressions = proj.iter().map(|i| expressions[*i].clone()).collect(); } - if let Some(event_time_field) = table.event_time_field.clone() { + if let Some(event_time_field) = table.temporal_config.event_column.clone() { let expr = table - .fields + .schema_specs .iter() - .find_map(|f| { - if f.field().name() == &event_time_field { - return match f { - FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => { - Some(Expr::Column(Column { - relation: Some(qualifier.clone()), - name: field.name().to_string(), - spans: Default::default(), - })) - } - FieldSpec::Virtual { expression, .. } => Some(*expression.clone()), - }; + .find_map(|c| { + if c.arrow_field().name() == event_time_field.as_str() { + return Some(Self::projection_expr_for_column(c, qualifier)); } None }) @@ -133,10 +126,10 @@ impl SourceRewriter<'_> { Ok(expressions) } - fn projection(&self, table_scan: &TableScan, table: &ConnectorTable) -> DFResult { + fn projection(&self, table_scan: &TableScan, table: &SourceTable) -> DFResult { let qualifier = table_scan.table_name.clone(); - // TODO: replace with TableSourceExtension when available + // TODO: replace with StreamIngestionNode when available let source_input = LogicalPlan::TableScan(table_scan.clone()); Ok(LogicalPlan::Projection(Projection::try_new( @@ -148,27 +141,27 @@ impl SourceRewriter<'_> { fn mutate_connector_table( &self, table_scan: &TableScan, - table: &ConnectorTable, + table: &SourceTable, ) -> DFResult> { let input = self.projection(table_scan, table)?; let schema = input.schema().clone(); let remote = LogicalPlan::Extension(Extension { - node: Arc::new(RemoteTableExtension { - input, - name: table_scan.table_name.to_owned(), - schema, - materialize: true, + node: Arc::new(RemoteTableBoundaryNode { + upstream_plan: input, + table_identifier: table_scan.table_name.to_owned(), + resolved_schema: schema, + requires_materialization: true, }), }); - let watermark_node = WatermarkNode::new( + let watermark_node = EventTimeWatermarkNode::try_new( remote, table_scan.table_name.clone(), Self::watermark_expression(table)?, ) .map_err(|err| { - DataFusionError::Internal(format!("failed to create watermark expression: {err}")) + DataFusionError::Internal(format!("failed to create watermark node: {err}")) })?; Ok(Transformed::yes(LogicalPlan::Extension(Extension { diff --git a/src/sql/analysis/stream_rewriter.rs b/src/sql/analysis/stream_rewriter.rs index 999b1fb8..22ed3c83 100644 --- a/src/sql/analysis/stream_rewriter.rs +++ b/src/sql/analysis/stream_rewriter.rs @@ -1,8 +1,8 @@ use std::sync::Arc; use super::StreamSchemaProvider; -use crate::sql::extensions::StreamExtension; -use crate::sql::extensions::remote_table::RemoteTableExtension; +use crate::sql::extensions::StreamingOperatorBlueprint; +use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; use crate::sql::analysis::row_time_rewriter::RowTimeRewriter; use crate::sql::analysis::{ aggregate_rewriter::AggregateRewriter, join_rewriter::JoinRewriter, @@ -137,7 +137,7 @@ impl<'a> StreamRewriter<'a> { Ok(Transformed::yes(LogicalPlan::Projection(projection))) } - /// Harmonizes schemas across Union branches and wraps them in RemoteTableExtensions. + /// Harmonizes schemas across Union branches and wraps them in RemoteTableBoundaryNodes. /// /// This ensures that all inputs to a UNION operation share the exact same schema metadata, /// preventing "Schema Drift" where different branches have different field qualifiers. @@ -151,23 +151,23 @@ impl<'a> StreamRewriter<'a> { // Optimization: If the node is already a non-transparent Extension, // we skip wrapping to avoid unnecessary nesting of logical nodes. if let LogicalPlan::Extension(Extension { node }) = input.as_ref() { - let stream_ext: &dyn StreamExtension = node.try_into().map_err(|e| { - DataFusionError::Internal(format!("Failed to resolve StreamExtension: {}", e)) + let stream_ext: &dyn StreamingOperatorBlueprint = node.try_into().map_err(|e| { + DataFusionError::Internal(format!("Failed to resolve StreamingOperatorBlueprint: {}", e)) })?; - if !stream_ext.transparent() { + if !stream_ext.is_passthrough_boundary() { continue; } } - // Wrap each branch in a RemoteTableExtension. + // Wrap each branch in a RemoteTableBoundaryNode. // This acts as a logical "bridge" that forces the input to adopt the master_schema, // effectively stripping away branch-specific qualifiers (e.g., table aliases). - let remote_ext = Arc::new(RemoteTableExtension { - input: input.as_ref().clone(), - name: TableReference::bare("union_input"), - schema: master_schema.clone(), - materialize: false, // Internal logical boundary only; does not require physical sink. + let remote_ext = Arc::new(RemoteTableBoundaryNode { + upstream_plan: input.as_ref().clone(), + table_identifier: TableReference::bare("union_input"), + resolved_schema: master_schema.clone(), + requires_materialization: false, // Internal logical boundary only; does not require physical sink. }); // Atomically replace the input with the wrapped version. diff --git a/src/sql/analysis/streaming_window_analzer.rs b/src/sql/analysis/streaming_window_analzer.rs index 59ded792..5eed3d2b 100644 --- a/src/sql/analysis/streaming_window_analzer.rs +++ b/src/sql/analysis/streaming_window_analzer.rs @@ -1,12 +1,12 @@ use std::collections::HashSet; use std::sync::Arc; -use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; +use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor}; use datafusion::common::{Column, DFSchema, DataFusionError, Result}; -use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias}; +use datafusion::logical_expr::{Expr, Extension, LogicalPlan, expr::Alias}; -use crate::sql::extensions::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension}; -use crate::sql::extensions::join::JOIN_NODE_NAME; +use crate::sql::extensions::aggregate::{STREAM_AGG_EXTENSION_NAME, StreamWindowAggregateNode}; +use crate::sql::extensions::join::STREAM_JOIN_NODE_TYPE; use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window}; /// WindowDetectingVisitor identifies windowing strategies and tracks window-carrying fields @@ -89,7 +89,7 @@ impl TreeNodeVisitor<'_> for StreamingWindowAnalzer { fn f_down(&mut self, node: &Self::Node) -> Result { // Joins require cross-branch validation to ensure left and right sides align on time. if let LogicalPlan::Extension(Extension { node }) = node - && node.name() == JOIN_NODE_NAME + && node.name() == STREAM_JOIN_NODE_TYPE { let mut branch_windows = HashSet::new(); for input in node.inputs() { @@ -159,16 +159,16 @@ impl TreeNodeVisitor<'_> for StreamingWindowAnalzer { } LogicalPlan::Extension(Extension { node }) - if node.name() == AGGREGATE_EXTENSION_NAME => + if node.name() == STREAM_AGG_EXTENSION_NAME => { let ext = node .as_any() - .downcast_ref::() + .downcast_ref::() .ok_or_else(|| { - DataFusionError::Internal("AggregateExtension node is malformed".into()) + DataFusionError::Internal("StreamWindowAggregateNode is malformed".into()) })?; - match &ext.window_behavior { + match &ext.window_spec { WindowBehavior::FromOperator { window, window_field, diff --git a/src/sql/analysis/window_function_rewriter.rs b/src/sql/analysis/window_function_rewriter.rs index ce580eaf..8f195325 100644 --- a/src/sql/analysis/window_function_rewriter.rs +++ b/src/sql/analysis/window_function_rewriter.rs @@ -8,8 +8,8 @@ use datafusion_common::DataFusionError; use std::sync::Arc; use tracing::debug; -use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs}; -use crate::sql::extensions::window_fn::WindowFunctionExtension; +use crate::sql::extensions::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; +use crate::sql::extensions::windows_function::StreamingWindowFunctionNode; use crate::sql::analysis::streaming_window_analzer::{StreamingWindowAnalzer, extract_column}; use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields}; @@ -66,7 +66,7 @@ impl WindowFunctionRewriter { Ok(matched[0]) } - /// Wraps the input in a Projection and KeyCalculationExtension to handle data distribution. + /// Wraps the input in a Projection and KeyExtractionNode to handle data distribution. fn build_keyed_input( &self, input: Arc, @@ -101,11 +101,11 @@ impl WindowFunctionRewriter { let projection = LogicalPlan::Projection(Projection::try_new_with_schema(exprs, input, keyed_schema)?); - // 3. Wrap in KeyCalculationExtension for the physical planner + // 3. Wrap in KeyExtractionNode for the physical planner Ok(LogicalPlan::Extension(Extension { - node: Arc::new(KeyCalculationExtension::new( + node: Arc::new(KeyExtractionNode::new( projection, - KeysOrExprs::Keys((0..key_count).collect()), + KeyExtractionStrategy::ColumnIndices((0..key_count).collect()), )), })) } @@ -182,7 +182,7 @@ impl TreeNodeRewriter for WindowFunctionRewriter { LogicalPlan::Window(Window::try_new(vec![final_wf_expr], Arc::new(sorted_plan))?); Ok(Transformed::yes(LogicalPlan::Extension(Extension { - node: Arc::new(WindowFunctionExtension::new( + node: Arc::new(StreamingWindowFunctionNode::new( rewritten_window, (0..key_count).collect(), )), diff --git a/src/coordinator/tool/connector_options.rs b/src/sql/common/connector_options.rs similarity index 96% rename from src/coordinator/tool/connector_options.rs rename to src/sql/common/connector_options.rs index de39872f..308d5197 100644 --- a/src/coordinator/tool/connector_options.rs +++ b/src/sql/common/connector_options.rs @@ -307,6 +307,16 @@ impl ConnectorOptions { pub fn contains_key(&self, key: &str) -> bool { self.options.contains_key(key) } + + /// Drain all remaining options into string values (for connector runtime config). + pub fn drain_remaining_string_values(&mut self) -> DFResult> { + let taken = std::mem::take(&mut self.options); + let mut out = HashMap::with_capacity(taken.len()); + for (k, v) in taken { + out.insert(k, format!("{v}")); + } + Ok(out) + } } fn duration_from_sql_expr(expr: &Expr) -> Result { diff --git a/src/sql/common/format_from_opts.rs b/src/sql/common/format_from_opts.rs new file mode 100644 index 00000000..dc9a43da --- /dev/null +++ b/src/sql/common/format_from_opts.rs @@ -0,0 +1,162 @@ +//! Parse `WITH` clause format / framing / bad-data options (Arroyo-compatible keys). + +use std::str::FromStr; + +use datafusion::common::{Result as DFResult, plan_datafusion_err, plan_err}; + +use super::connector_options::ConnectorOptions; +use super::formats::{ + AvroFormat, BadData, DecimalEncoding, Format, Framing, JsonCompression, JsonFormat, + NewlineDelimitedFraming, ParquetCompression, ParquetFormat, ProtobufFormat, RawBytesFormat, + RawStringFormat, TimestampFormat, +}; + +impl JsonFormat { + pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult { + let mut j = JsonFormat::default(); + if let Some(v) = opts.pull_opt_bool("json.confluent_schema_registry")? { + j.confluent_schema_registry = v; + } + if let Some(v) = opts.pull_opt_u64("json.confluent_schema_version")? { + j.schema_id = Some(v as u32); + } + if let Some(v) = opts.pull_opt_bool("json.include_schema")? { + j.include_schema = v; + } + if let Some(v) = opts.pull_opt_bool("json.debezium")? { + j.debezium = v; + } + if let Some(v) = opts.pull_opt_bool("json.unstructured")? { + j.unstructured = v; + } + if let Some(s) = opts.pull_opt_str("json.timestamp_format")? { + j.timestamp_format = TimestampFormat::try_from(s.as_str()).map_err(|_| { + plan_datafusion_err!("invalid json.timestamp_format '{}'", s) + })?; + } + if let Some(s) = opts.pull_opt_str("json.decimal_encoding")? { + j.decimal_encoding = DecimalEncoding::try_from(s.as_str()).map_err(|_| { + plan_datafusion_err!("invalid json.decimal_encoding '{s}'") + })?; + } + if let Some(s) = opts.pull_opt_str("json.compression")? { + j.compression = JsonCompression::from_str(&s) + .map_err(|e| plan_datafusion_err!("invalid json.compression: {e}"))?; + } + Ok(j) + } +} + +impl Format { + pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult> { + let Some(name) = opts.pull_opt_str("format")? else { + return Ok(None); + }; + match name.to_lowercase().as_str() { + "json" => Ok(Some(Format::Json(JsonFormat::from_opts(opts)?))), + "debezium_json" => { + let mut j = JsonFormat::from_opts(opts)?; + j.debezium = true; + Ok(Some(Format::Json(j))) + } + "avro" => Ok(Some(Format::Avro(AvroFormat::from_opts(opts)?))), + "parquet" => Ok(Some(Format::Parquet(ParquetFormat::from_opts(opts)?))), + "protobuf" => Ok(Some(Format::Protobuf(ProtobufFormat::from_opts(opts)?))), + "raw_string" => Ok(Some(Format::RawString(RawStringFormat {}))), + "raw_bytes" => Ok(Some(Format::RawBytes(RawBytesFormat {}))), + _ => plan_err!("unknown format '{name}'"), + } + } +} + +impl AvroFormat { + fn from_opts(opts: &mut ConnectorOptions) -> DFResult { + let mut a = AvroFormat { + confluent_schema_registry: false, + raw_datums: false, + into_unstructured_json: false, + schema_id: None, + }; + if let Some(v) = opts.pull_opt_bool("avro.confluent_schema_registry")? { + a.confluent_schema_registry = v; + } + if let Some(v) = opts.pull_opt_bool("avro.raw_datums")? { + a.raw_datums = v; + } + if let Some(v) = opts.pull_opt_bool("avro.into_unstructured_json")? { + a.into_unstructured_json = v; + } + if let Some(v) = opts.pull_opt_u64("avro.schema_id")? { + a.schema_id = Some(v as u32); + } + Ok(a) + } +} + +impl ParquetFormat { + fn from_opts(opts: &mut ConnectorOptions) -> DFResult { + let mut p = ParquetFormat::default(); + if let Some(s) = opts.pull_opt_str("parquet.compression")? { + p.compression = ParquetCompression::from_str(&s) + .map_err(|e| plan_datafusion_err!("invalid parquet.compression: {e}"))?; + } + if let Some(v) = opts.pull_opt_u64("parquet.row_group_bytes")? { + p.row_group_bytes = Some(v); + } + Ok(p) + } +} + +impl ProtobufFormat { + fn from_opts(opts: &mut ConnectorOptions) -> DFResult { + let mut p = ProtobufFormat { + into_unstructured_json: false, + message_name: None, + compiled_schema: None, + confluent_schema_registry: false, + length_delimited: false, + }; + if let Some(v) = opts.pull_opt_bool("protobuf.into_unstructured_json")? { + p.into_unstructured_json = v; + } + if let Some(s) = opts.pull_opt_str("protobuf.message_name")? { + p.message_name = Some(s); + } + if let Some(v) = opts.pull_opt_bool("protobuf.confluent_schema_registry")? { + p.confluent_schema_registry = v; + } + if let Some(v) = opts.pull_opt_bool("protobuf.length_delimited")? { + p.length_delimited = v; + } + Ok(p) + } +} + +impl Framing { + pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult> { + let method = opts.pull_opt_str("framing.method")?; + match method.as_deref() { + None => Ok(None), + Some("newline") | Some("newline_delimited") => { + let max = opts.pull_opt_u64("framing.max_line_length")?; + Ok(Some(Framing::Newline(NewlineDelimitedFraming { + max_line_length: max, + }))) + } + Some(other) => plan_err!("unknown framing.method '{other}'"), + } + } +} + +impl BadData { + pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult { + let Some(s) = opts.pull_opt_str("bad_data")? else { + return Ok(BadData::Fail {}); + }; + match s.to_lowercase().as_str() { + "fail" => Ok(BadData::Fail {}), + "drop" => Ok(BadData::Drop {}), + _ => plan_err!("invalid bad_data '{s}'"), + } + } +} diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index d03511c0..730d6f37 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -16,11 +16,13 @@ //! analogous to `arroyo-types` + `arroyo-rpc` in Arroyo. pub mod arrow_ext; +pub mod connector_options; pub mod control; pub mod date; pub mod debezium; pub mod fs_schema; pub mod errors; +pub mod format_from_opts; pub mod formats; pub mod hash; pub mod message; @@ -46,8 +48,9 @@ pub use control::{ ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError, }; pub use fs_schema::{FsSchema, FsSchemaRef}; +pub use connector_options::{ConnectorOptions, FromOpts}; pub use errors::DataflowError; -pub use formats::{BadData, Format, Framing, JsonFormat}; +pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat}; pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; // ── Well-known column names ── diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs index 12cde08c..7ba16f7a 100644 --- a/src/sql/extensions/aggregate.rs +++ b/src/sql/extensions/aggregate.rs @@ -1,11 +1,24 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::fmt::Formatter; use std::sync::Arc; use std::time::Duration; + use arrow_array::types::IntervalMonthDayNanoType; use datafusion::common::{Column, DFSchemaRef, Result, ScalarValue, internal_err}; -use datafusion::logical_expr; use datafusion::logical_expr::{ - BinaryExpr, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, expr::ScalarFunction, + self, expr::ScalarFunction, BinaryExpr, Expr, Extension, LogicalPlan, + UserDefinedLogicalNodeCore, }; use datafusion_common::{plan_err, DFSchema, DataFusionError}; use datafusion_expr::Aggregate; @@ -13,149 +26,164 @@ use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionC use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use datafusion_proto::protobuf::PhysicalPlanNode; use prost::Message; -use protocol::grpc::api::{ SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use protocol::grpc::api::{ + SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator, +}; + use crate::multifield_partial_ord; -use crate::sql::logical_planner::{window, FsPhysicalExtensionCodec}; -use crate::sql::extensions::{ NodeWithIncomingEdges, StreamExtension, TimestampAppendExtension}; +use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::{ + CompiledTopologyNode, StreamingOperatorBlueprint, SystemTimestampInjectorNode, +}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::sql::logical_planner::planner::{NamedNode, Planner, SplitPlanOutput}; +use crate::sql::logical_planner::{window, FsPhysicalExtensionCodec}; use crate::sql::types::{ DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata, }; -use crate::sql::common::{FsSchema, FsSchemaRef}; -pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension"; +pub(crate) const STREAM_AGG_EXTENSION_NAME: &str = "StreamWindowAggregateNode"; +const INTERNAL_TIMESTAMP_COL: &str = "_timestamp"; +/// Represents a streaming windowed aggregation node in the logical plan. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct AggregateExtension { - pub(crate) window_behavior: WindowBehavior, - pub(crate) aggregate: LogicalPlan, - pub(crate) schema: DFSchemaRef, - pub(crate) key_fields: Vec, - pub(crate) final_calculation: LogicalPlan, +pub(crate) struct StreamWindowAggregateNode { + pub(crate) window_spec: WindowBehavior, + pub(crate) base_agg_plan: LogicalPlan, + pub(crate) output_schema: DFSchemaRef, + pub(crate) partition_keys: Vec, + pub(crate) post_aggregation_plan: LogicalPlan, } -multifield_partial_ord!(AggregateExtension, aggregate, key_fields, final_calculation); - -impl AggregateExtension { - pub fn new( - window_behavior: WindowBehavior, - aggregate: LogicalPlan, - key_fields: Vec, - ) -> Self { - let final_calculation = - Self::final_projection(&aggregate, window_behavior.clone()).unwrap(); - - Self { - window_behavior, - aggregate, - schema: final_calculation.schema().clone(), - key_fields, - final_calculation, - } +multifield_partial_ord!( + StreamWindowAggregateNode, + base_agg_plan, + partition_keys, + post_aggregation_plan +); + +impl StreamWindowAggregateNode { + /// Safely constructs a new node, computing the final projection without panicking. + pub fn try_new( + window_spec: WindowBehavior, + base_agg_plan: LogicalPlan, + partition_keys: Vec, + ) -> Result { + let post_aggregation_plan = + WindowBoundaryMath::build_post_aggregation(&base_agg_plan, window_spec.clone())?; + + Ok(Self { + window_spec, + base_agg_plan, + output_schema: post_aggregation_plan.schema().clone(), + partition_keys, + post_aggregation_plan, + }) } - pub fn tumbling_window_config( + fn build_tumbling_operator( &self, planner: &Planner, - index: usize, + node_id: usize, input_schema: DFSchemaRef, - width: Duration, + duration: Duration, ) -> Result { - let binning_function_proto = planner.binning_function_proto(width, input_schema.clone())?; + let binning_expr = planner.binning_function_proto(duration, input_schema.clone())?; + let SplitPlanOutput { partial_aggregation_plan, partial_schema, finish_plan, - } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?; + } = planner.split_physical_plan(self.partition_keys.clone(), &self.base_agg_plan, true)?; - let final_physical_plan = planner.sync_plan(&self.final_calculation)?; - let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan( - final_physical_plan, + let final_physical = planner.sync_plan(&self.post_aggregation_plan)?; + let final_physical_proto = PhysicalPlanNode::try_from_physical_plan( + final_physical, &FsPhysicalExtensionCodec::default(), )?; - let config = TumblingWindowAggregateOperator { + let operator_config = TumblingWindowAggregateOperator { name: "TumblingWindow".to_string(), - width_micros: width.as_micros() as u64, - binning_function: binning_function_proto.encode_to_vec(), + width_micros: duration.as_micros() as u64, + binning_function: binning_expr.encode_to_vec(), input_schema: Some( FsSchema::from_schema_keys( Arc::new(input_schema.as_ref().into()), - self.key_fields.clone(), - )?.into(), + self.partition_keys.clone(), + )? + .into(), ), partial_schema: Some(partial_schema.into()), partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(), final_aggregation_plan: finish_plan.encode_to_vec(), - final_projection: Some(final_physical_plan_node.encode_to_vec()), + final_projection: Some(final_physical_proto.encode_to_vec()), }; Ok(LogicalNode::single( - index as u32, - format!("tumbling_{index}"), + node_id as u32, + format!("tumbling_{node_id}"), OperatorName::TumblingWindowAggregate, - config.encode_to_vec(), - format!("TumblingWindow<{}>", config.name), + operator_config.encode_to_vec(), + format!("TumblingWindow<{}>", operator_config.name), 1, )) } - pub fn sliding_window_config( + fn build_sliding_operator( &self, planner: &Planner, - index: usize, + node_id: usize, input_schema: DFSchemaRef, - width: Duration, - slide: Duration, + duration: Duration, + slide_interval: Duration, ) -> Result { - let binning_function_proto = planner.binning_function_proto(slide, input_schema.clone())?; + let binning_expr = planner.binning_function_proto(slide_interval, input_schema.clone())?; let SplitPlanOutput { partial_aggregation_plan, partial_schema, finish_plan, - } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?; + } = planner.split_physical_plan(self.partition_keys.clone(), &self.base_agg_plan, true)?; - let final_physical_plan = planner.sync_plan(&self.final_calculation)?; - let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan( - final_physical_plan, + let final_physical = planner.sync_plan(&self.post_aggregation_plan)?; + let final_physical_proto = PhysicalPlanNode::try_from_physical_plan( + final_physical, &FsPhysicalExtensionCodec::default(), )?; - let config = SlidingWindowAggregateOperator { - name: format!("SlidingWindow<{width:?}>"), - width_micros: width.as_micros() as u64, - slide_micros: slide.as_micros() as u64, - binning_function: binning_function_proto.encode_to_vec(), + let operator_config = SlidingWindowAggregateOperator { + name: format!("SlidingWindow<{duration:?}>"), + width_micros: duration.as_micros() as u64, + slide_micros: slide_interval.as_micros() as u64, + binning_function: binning_expr.encode_to_vec(), input_schema: Some( FsSchema::from_schema_keys( Arc::new(input_schema.as_ref().into()), - self.key_fields.clone(), - )?.into(), + self.partition_keys.clone(), + )? + .into(), ), partial_schema: Some(partial_schema.into()), partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(), final_aggregation_plan: finish_plan.encode_to_vec(), - final_projection: final_physical_plan_node.encode_to_vec(), - // TODO add final aggregation. + final_projection: final_physical_proto.encode_to_vec(), }; Ok(LogicalNode::single( - index as u32, - format!("sliding_window_{index}"), + node_id as u32, + format!("sliding_window_{node_id}"), OperatorName::SlidingWindowAggregate, - config.encode_to_vec(), + operator_config.encode_to_vec(), "sliding window".to_string(), 1, )) } - pub fn session_window_config( + fn build_session_operator( &self, planner: &Planner, - index: usize, + node_id: usize, input_schema: DFSchemaRef, ) -> Result { let WindowBehavior::FromOperator { @@ -163,426 +191,421 @@ impl AggregateExtension { window_index, window_field, is_nested: false, - } = &self.window_behavior + } = &self.window_spec else { - return plan_err!("expected sliding window"); + return plan_err!("Expected standard session window configuration"); }; - let output_schema = fields_with_qualifiers(self.aggregate.schema()); - let LogicalPlan::Aggregate(agg) = self.aggregate.clone() else { - return plan_err!("expected aggregate"); + + let output_fields = fields_with_qualifiers(self.base_agg_plan.schema()); + let LogicalPlan::Aggregate(base_agg) = self.base_agg_plan.clone() else { + return plan_err!("Base plan must be an Aggregate node"); }; - let key_count = self.key_fields.len(); - let unkeyed_aggregate_schema = Arc::new(schema_from_df_fields_with_metadata( - &output_schema[key_count..], - self.aggregate.schema().metadata().clone(), + + let key_count = self.partition_keys.len(); + let unkeyed_schema = Arc::new(schema_from_df_fields_with_metadata( + &output_fields[key_count..], + self.base_agg_plan.schema().metadata().clone(), )?); - let unkeyed_aggregate = Aggregate::try_new_with_schema( - agg.input.clone(), + let unkeyed_agg_node = Aggregate::try_new_with_schema( + base_agg.input.clone(), vec![], - agg.aggr_expr.clone(), - unkeyed_aggregate_schema.clone(), + base_agg.aggr_expr.clone(), + unkeyed_schema, )?; - let aggregate_plan = planner.sync_plan(&LogicalPlan::Aggregate(unkeyed_aggregate))?; - let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( - aggregate_plan, + let physical_agg = planner.sync_plan(&LogicalPlan::Aggregate(unkeyed_agg_node))?; + let physical_agg_proto = PhysicalPlanNode::try_from_physical_plan( + physical_agg, &FsPhysicalExtensionCodec::default(), )?; - let input_schema = FsSchema::from_schema_keys( - Arc::new(input_schema.as_ref().into()), - self.key_fields.clone(), - )?; - let config = SessionWindowAggregateOperator { - name: format!("session_window_{index}"), + let operator_config = SessionWindowAggregateOperator { + name: format!("session_window_{node_id}"), gap_micros: gap.as_micros() as u64, window_field_name: window_field.name().to_string(), window_index: *window_index as u64, - input_schema: Some(input_schema.into()), + input_schema: Some( + FsSchema::from_schema_keys( + Arc::new(input_schema.as_ref().into()), + self.partition_keys.clone(), + )? + .into(), + ), unkeyed_aggregate_schema: None, partial_aggregation_plan: vec![], - final_aggregation_plan: physical_plan_node.encode_to_vec(), + final_aggregation_plan: physical_agg_proto.encode_to_vec(), }; Ok(LogicalNode::single( - index as u32, + node_id as u32, format!("SessionWindow<{gap:?}>"), OperatorName::SessionWindowAggregate, - config.encode_to_vec(), - config.name.clone(), + operator_config.encode_to_vec(), + operator_config.name.clone(), 1, )) } - pub fn instant_window_config( + fn build_instant_operator( &self, planner: &Planner, - index: usize, + node_id: usize, input_schema: DFSchemaRef, - use_final_projection: bool, + apply_final_projection: bool, ) -> Result { - let binning_function = planner.create_physical_expr( - &Expr::Column(Column::new_unqualified("_timestamp".to_string())), - &input_schema, - )?; - let binning_function_proto = - serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})?; - - let final_projection = use_final_projection - .then(|| { - let final_physical_plan = planner.sync_plan(&self.final_calculation)?; - let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan( - final_physical_plan, - &FsPhysicalExtensionCodec::default(), - )?; - Ok::, DataFusionError>(final_physical_plan_node.encode_to_vec()) - }) - .transpose()?; + let ts_column_expr = + Expr::Column(Column::new_unqualified(INTERNAL_TIMESTAMP_COL.to_string())); + let binning_expr = planner.create_physical_expr(&ts_column_expr, &input_schema)?; + let binning_proto = serialize_physical_expr(&binning_expr, &DefaultPhysicalExtensionCodec {})?; + + let final_projection_payload = if apply_final_projection { + let physical_plan = planner.sync_plan(&self.post_aggregation_plan)?; + let proto_node = PhysicalPlanNode::try_from_physical_plan( + physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + Some(proto_node.encode_to_vec()) + } else { + None + }; let SplitPlanOutput { partial_aggregation_plan, partial_schema, finish_plan, - } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?; + } = planner.split_physical_plan(self.partition_keys.clone(), &self.base_agg_plan, true)?; - let config = TumblingWindowAggregateOperator { + let operator_config = TumblingWindowAggregateOperator { name: "InstantWindow".to_string(), width_micros: 0, - binning_function: binning_function_proto.encode_to_vec(), + binning_function: binning_proto.encode_to_vec(), input_schema: Some( FsSchema::from_schema_keys( Arc::new(input_schema.as_ref().into()), - self.key_fields.clone(), - )?.into(), + self.partition_keys.clone(), + )? + .into(), ), partial_schema: Some(partial_schema.into()), partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(), final_aggregation_plan: finish_plan.encode_to_vec(), - final_projection, + final_projection: final_projection_payload, }; Ok(LogicalNode::single( - index as u32, - format!("instant_window_{index}"), + node_id as u32, + format!("instant_window_{node_id}"), OperatorName::TumblingWindowAggregate, - config.encode_to_vec(), + operator_config.encode_to_vec(), "instant window".to_string(), 1, )) } +} + +impl StreamingOperatorBlueprint for StreamWindowAggregateNode { + fn operator_identity(&self) -> Option { + None + } + + fn compile_to_graph_node( + &self, + planner: &Planner, + node_id: usize, + mut input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 1 { + return plan_err!("StreamWindowAggregateNode requires exactly one input schema"); + } + + let raw_schema = input_schemas.remove(0); + let df_schema = Arc::new(DFSchema::try_from(raw_schema.schema.as_ref().clone())?); + + let logical_operator = match &self.window_spec { + WindowBehavior::FromOperator { window, is_nested, .. } => { + if *is_nested { + self.build_instant_operator(planner, node_id, df_schema, true)? + } else { + match window { + WindowType::Tumbling { width } => { + self.build_tumbling_operator(planner, node_id, df_schema, *width)? + } + WindowType::Sliding { width, slide } => { + self.build_sliding_operator(planner, node_id, df_schema, *width, *slide)? + } + WindowType::Session { .. } => { + self.build_session_operator(planner, node_id, df_schema)? + } + WindowType::Instant => { + return plan_err!( + "Instant window is invalid within standard operator context" + ); + } + } + } + } + WindowBehavior::InData => self + .build_instant_operator(planner, node_id, df_schema, false) + .map_err(|e| e.context("Failed compiling instant window"))?, + }; + + let link = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*raw_schema).clone()); + Ok(CompiledTopologyNode { + execution_unit: logical_operator, + routing_edges: vec![link], + }) + } + + fn yielded_schema(&self) -> FsSchema { + let schema_ref = (*self.output_schema).clone().into(); + FsSchema::from_schema_unkeyed(Arc::new(schema_ref)).expect( + "StreamWindowAggregateNode output schema must contain timestamp column", + ) + } +} + +impl UserDefinedLogicalNodeCore for StreamWindowAggregateNode { + fn name(&self) -> &str { + STREAM_AGG_EXTENSION_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.base_agg_plan] + } + + fn schema(&self) -> &DFSchemaRef { + &self.output_schema + } + + fn expressions(&self) -> Vec { + vec![] + } - // projection assuming that _timestamp has been populated with the start of the bin. - pub fn final_projection( - aggregate_plan: &LogicalPlan, - window_behavior: WindowBehavior, + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + let spec_desc = match &self.window_spec { + WindowBehavior::InData => "InData".to_string(), + WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"), + }; + write!( + f, + "StreamWindowAggregate: {} | spec: {}", + self.schema(), + spec_desc + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!("StreamWindowAggregateNode expects exactly 1 input"); + } + Self::try_new( + self.window_spec.clone(), + inputs[0].clone(), + self.partition_keys.clone(), + ) + } +} + +// ----------------------------------------------------------------------------- +// Dedicated boundary math for window bin / post-aggregation projection +// ----------------------------------------------------------------------------- + +struct WindowBoundaryMath; + +impl WindowBoundaryMath { + fn interval_nanos(nanos: i64) -> Expr { + Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, nanos), + )), + None, + ) + } + + fn build_post_aggregation( + agg_plan: &LogicalPlan, + window_spec: WindowBehavior, ) -> Result { - let timestamp_field: DFField = aggregate_plan.inputs()[0] + let ts_field: DFField = agg_plan + .inputs() + .first() + .ok_or_else(|| DataFusionError::Plan("Aggregate has no inputs".into()))? .schema() .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)? .into(); - let timestamp_append = LogicalPlan::Extension(Extension { - node: Arc::new(TimestampAppendExtension::new( - aggregate_plan.clone(), - timestamp_field.qualifier().cloned(), - )), + + let plan_with_ts = LogicalPlan::Extension(Extension { + node: Arc::new(SystemTimestampInjectorNode::try_new( + agg_plan.clone(), + ts_field.qualifier().cloned(), + )?), }); - let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema()); - let mut aggregate_expressions: Vec<_> = aggregate_fields - .iter() - .map(|field| Expr::Column(field.qualified_column())) - .collect(); - let (window_field, window_index, width, is_nested) = match window_behavior { - WindowBehavior::InData => return Ok(timestamp_append), + + let (win_field, win_index, duration, is_nested) = match window_spec { + WindowBehavior::InData => return Ok(plan_with_ts), WindowBehavior::FromOperator { window, window_field, window_index, is_nested, } => match window { - WindowType::Tumbling { width, .. } | WindowType::Sliding { width, .. } => { + WindowType::Tumbling { width } | WindowType::Sliding { width, .. } => { (window_field, window_index, width, is_nested) } WindowType::Session { .. } => { return Ok(LogicalPlan::Extension(Extension { - node: Arc::new(WindowAppendExtension::new( - timestamp_append, + node: Arc::new(InjectWindowFieldNode::try_new( + plan_with_ts, window_field, window_index, - )), + )?), })); } - WindowType::Instant => return Ok(timestamp_append), + WindowType::Instant => return Ok(plan_with_ts), }, }; + if is_nested { - return Self::nested_final_projection( - timestamp_append, - window_field, - window_index, - width, - ); + return Self::build_nested_projection(plan_with_ts, win_field, win_index, duration); } - let timestamp_column = - Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name()); - aggregate_fields.insert(window_index, window_field.clone()); - let window_expression = Expr::ScalarFunction(ScalarFunction { + let mut output_fields = fields_with_qualifiers(agg_plan.schema()); + let mut projections: Vec<_> = output_fields + .iter() + .map(|f| Expr::Column(f.qualified_column())) + .collect(); + + let ts_col_expr = Expr::Column(Column::new(ts_field.qualifier().cloned(), ts_field.name())); + + output_fields.insert(win_index, win_field.clone()); + + let win_func_expr = Expr::ScalarFunction(ScalarFunction { func: window(), args: vec![ - // copy bin_start as first argument - Expr::Column(timestamp_column.clone()), - // add width interval to _timestamp for bin end + ts_col_expr.clone(), Expr::BinaryExpr(BinaryExpr { - left: Box::new(Expr::Column(timestamp_column.clone())), + left: Box::new(ts_col_expr.clone()), op: logical_expr::Operator::Plus, - right: Box::new(Expr::Literal( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, 0, width.as_nanos() as i64), - )), - None, - )), + right: Box::new(Self::interval_nanos(duration.as_nanos() as i64)), }), ], }); - aggregate_expressions.insert( - window_index, - window_expression - .alias_qualified(window_field.qualifier().cloned(), window_field.name()), + + projections.insert( + win_index, + win_func_expr.alias_qualified(win_field.qualifier().cloned(), win_field.name()), ); - aggregate_fields.push(timestamp_field); - let bin_end_calculation = Expr::BinaryExpr(BinaryExpr { - left: Box::new(Expr::Column(timestamp_column.clone())), + + output_fields.push(ts_field); + + let bin_end_expr = Expr::BinaryExpr(BinaryExpr { + left: Box::new(ts_col_expr), op: logical_expr::Operator::Plus, - right: Box::new(Expr::Literal( - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - 0, - 0, - (width.as_nanos() - 1) as i64, - ))), - None, - )), + right: Box::new(Self::interval_nanos((duration.as_nanos() - 1) as i64)), }); - aggregate_expressions.push(bin_end_calculation); - Ok(LogicalPlan::Projection( - logical_expr::Projection::try_new_with_schema( - aggregate_expressions, - Arc::new(timestamp_append), - Arc::new(schema_from_df_fields(&aggregate_fields)?), - )?, - )) + projections.push(bin_end_expr); + + Ok(LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema( + projections, + Arc::new(plan_with_ts), + Arc::new(schema_from_df_fields(&output_fields)?), + )?)) } - fn nested_final_projection( - aggregate_plan: LogicalPlan, - window_field: DFField, - window_index: usize, - width: Duration, + fn build_nested_projection( + plan: LogicalPlan, + win_field: DFField, + win_index: usize, + duration: Duration, ) -> Result { - let timestamp_field: DFField = aggregate_plan + let ts_field: DFField = plan .schema() - .qualified_field_with_unqualified_name(TIMESTAMP_FIELD) - .unwrap() + .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)? .into(); - let timestamp_column = - Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name()); + let ts_col_expr = Expr::Column(Column::new(ts_field.qualifier().cloned(), ts_field.name())); - let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema()); - let mut aggregate_expressions: Vec<_> = aggregate_fields + let mut output_fields = fields_with_qualifiers(plan.schema()); + let mut projections: Vec<_> = output_fields .iter() - .map(|field| Expr::Column(field.qualified_column())) + .map(|f| Expr::Column(f.qualified_column())) .collect(); - aggregate_fields.insert(window_index, window_field.clone()); - let window_expression = Expr::ScalarFunction(ScalarFunction { + + output_fields.insert(win_index, win_field.clone()); + + let win_func_expr = Expr::ScalarFunction(ScalarFunction { func: window(), args: vec![ - // calculate the start of the bin Expr::BinaryExpr(BinaryExpr { - left: Box::new(Expr::Column(timestamp_column.clone())), + left: Box::new(ts_col_expr.clone()), op: logical_expr::Operator::Minus, - right: Box::new(Expr::Literal( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, 0, width.as_nanos() as i64 - 1), - )), - None, - )), + right: Box::new(Self::interval_nanos(duration.as_nanos() as i64 - 1)), }), - // add 1 nanosecond to the timestamp Expr::BinaryExpr(BinaryExpr { - left: Box::new(Expr::Column(timestamp_column.clone())), + left: Box::new(ts_col_expr), op: logical_expr::Operator::Plus, - right: Box::new(Expr::Literal( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, 0, 1), - )), - None, - )), + right: Box::new(Self::interval_nanos(1)), }), ], }); - aggregate_expressions.insert( - window_index, - window_expression - .alias_qualified(window_field.qualifier().cloned(), window_field.name()), - ); - Ok(LogicalPlan::Projection( - logical_expr::Projection::try_new_with_schema( - aggregate_expressions, - Arc::new(aggregate_plan), - Arc::new(schema_from_df_fields(&aggregate_fields).unwrap()), - ) - .unwrap(), - )) - } -} - -impl UserDefinedLogicalNodeCore for AggregateExtension { - fn name(&self) -> &str { - AGGREGATE_EXTENSION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.aggregate] - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!( - f, - "AggregateExtension: {} | window_behavior: {:?}", - self.schema(), - match &self.window_behavior { - WindowBehavior::InData => "InData".to_string(), - WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"), - } - ) - } - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - if inputs.len() != 1 { - return internal_err!("input size inconsistent"); - } - - Ok(Self::new( - self.window_behavior.clone(), - inputs[0].clone(), - self.key_fields.clone(), - )) - } -} - -impl StreamExtension for AggregateExtension { - fn node_name(&self) -> Option { - None - } - - fn plan_node( - &self, - planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - if input_schemas.len() != 1 { - return plan_err!("AggregateExtension should have exactly one input"); - } - let input_schema = input_schemas[0].clone(); - let input_df_schema = - Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone()).unwrap()); - let logical_node = match &self.window_behavior { - WindowBehavior::FromOperator { - window, - window_field: _, - window_index: _, - is_nested, - } => { - if *is_nested { - self.instant_window_config(planner, index, input_df_schema, true)? - } else { - match window { - WindowType::Tumbling { width } => { - self.tumbling_window_config(planner, index, input_df_schema, *width)? - } - WindowType::Sliding { width, slide } => self.sliding_window_config( - planner, - index, - input_df_schema, - *width, - *slide, - )?, - WindowType::Instant => { - return plan_err!( - "instant window not supported in aggregate extension" - ); - } - WindowType::Session { gap: _ } => { - self.session_window_config(planner, index, input_df_schema)? - } - } - } - } - WindowBehavior::InData => self - .instant_window_config(planner, index, input_df_schema, false) - .map_err(|e| e.context("instant window"))?, - }; - let edge = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schema).clone()); - Ok(NodeWithIncomingEdges { - node: logical_node, - edges: vec![edge], - }) - } + projections.insert( + win_index, + win_func_expr.alias_qualified(win_field.qualifier().cloned(), win_field.name()), + ); - fn output_schema(&self) -> FsSchema { - let output_schema = (*self.schema).clone().into(); - FsSchema::from_schema_keys(Arc::new(output_schema), vec![]).unwrap() + Ok(LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema( + projections, + Arc::new(plan), + Arc::new(schema_from_df_fields(&output_fields)?), + )?)) } } -/* -This is a plan used for appending a _timestamp field to an existing record batch. - */ +// ----------------------------------------------------------------------------- +// Field injection node (session window column placement) +// ----------------------------------------------------------------------------- #[derive(Debug, Clone, PartialEq, Eq, Hash)] -struct WindowAppendExtension { - pub(crate) input: LogicalPlan, - pub(crate) window_field: DFField, - pub(crate) window_index: usize, - pub(crate) schema: DFSchemaRef, +struct InjectWindowFieldNode { + pub(crate) upstream_plan: LogicalPlan, + pub(crate) target_field: DFField, + pub(crate) insertion_index: usize, + pub(crate) new_schema: DFSchemaRef, } -multifield_partial_ord!(WindowAppendExtension, input, window_index); - -impl WindowAppendExtension { - fn new(input: LogicalPlan, window_field: DFField, window_index: usize) -> Self { - let mut fields = fields_with_qualifiers(input.schema()); - fields.insert(window_index, window_field.clone()); - let metadata = input.schema().metadata().clone(); - Self { - input, - window_field, - window_index, - schema: Arc::new(schema_from_df_fields_with_metadata(&fields, metadata).unwrap()), - } +multifield_partial_ord!(InjectWindowFieldNode, upstream_plan, insertion_index); + +impl InjectWindowFieldNode { + fn try_new( + upstream_plan: LogicalPlan, + target_field: DFField, + insertion_index: usize, + ) -> Result { + let mut fields = fields_with_qualifiers(upstream_plan.schema()); + fields.insert(insertion_index, target_field.clone()); + let meta = upstream_plan.schema().metadata().clone(); + + Ok(Self { + upstream_plan, + target_field, + insertion_index, + new_schema: Arc::new(schema_from_df_fields_with_metadata(&fields, meta)?), + }) } } -impl UserDefinedLogicalNodeCore for WindowAppendExtension { +impl UserDefinedLogicalNodeCore for InjectWindowFieldNode { fn name(&self) -> &str { - "WindowAppendExtension" + "InjectWindowFieldNode" } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.new_schema } fn expressions(&self) -> Vec { @@ -592,16 +615,19 @@ impl UserDefinedLogicalNodeCore for WindowAppendExtension { fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, - "WindowAppendExtension: field {:?} at {}", - self.window_field, self.window_index + "InjectWindowField: insert {:?} at offset {}", + self.target_field, self.insertion_index ) } fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self::new( + if inputs.len() != 1 { + return internal_err!("InjectWindowFieldNode expects exactly 1 input"); + } + Self::try_new( inputs[0].clone(), - self.window_field.clone(), - self.window_index, - )) + self.target_field.clone(), + self.insertion_index, + ) } } diff --git a/src/sql/extensions/async_udf.rs b/src/sql/extensions/async_udf.rs index da0bdff1..147e0f90 100644 --- a/src/sql/extensions/async_udf.rs +++ b/src/sql/extensions/async_udf.rs @@ -18,170 +18,225 @@ use datafusion::common::{DFSchemaRef, Result}; use datafusion::logical_expr::{ Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, }; -use datafusion_common::internal_err; +use datafusion_common::{internal_err, plan_err}; use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use prost::Message; use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering}; use crate::multifield_partial_ord; -use crate::sql::extensions::constants::ASYNC_RESULT_FIELD; -use crate::sql::extensions::stream_extension::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{ DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName, }; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields}; -use crate::sql::common::{FsSchema, FsSchemaRef}; +pub(crate) const NODE_TYPE_NAME: &str = "AsyncFunctionExecutionNode"; +pub const ASYNC_RESULT_FIELD: &str = "__async_result"; + +/// Represents a logical node that executes an external asynchronous function (UDF) +/// and projects the final results into the streaming pipeline. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct AsyncUDFExtension { - pub(crate) input: Arc, - pub(crate) name: String, - pub(crate) udf: DylibUdfConfig, - pub(crate) arg_exprs: Vec, - pub(crate) final_exprs: Vec, - pub(crate) ordered: bool, - pub(crate) max_concurrency: usize, - pub(crate) timeout: Duration, - pub(crate) final_schema: DFSchemaRef, +pub(crate) struct AsyncFunctionExecutionNode { + pub(crate) upstream_plan: Arc, + pub(crate) operator_name: String, + pub(crate) function_config: DylibUdfConfig, + pub(crate) invocation_args: Vec, + pub(crate) result_projections: Vec, + pub(crate) preserve_ordering: bool, + pub(crate) concurrency_limit: usize, + pub(crate) execution_timeout: Duration, + pub(crate) resolved_schema: DFSchemaRef, } multifield_partial_ord!( - AsyncUDFExtension, - input, - name, - udf, - arg_exprs, - final_exprs, - ordered, - max_concurrency, - timeout + AsyncFunctionExecutionNode, + upstream_plan, + operator_name, + function_config, + invocation_args, + result_projections, + preserve_ordering, + concurrency_limit, + execution_timeout ); -impl StreamExtension for AsyncUDFExtension { - fn node_name(&self) -> Option { - None - } - - fn plan_node( +impl AsyncFunctionExecutionNode { + /// Compiles logical expressions into serialized physical protobuf bytes. + fn compile_physical_expressions( &self, planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - let arg_exprs = self - .arg_exprs + expressions: &[Expr], + schema_context: &DFSchemaRef, + ) -> Result>> { + expressions .iter() - .map(|e| { - let p = planner.create_physical_expr(e, self.input.schema())?; - Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec()) + .map(|logical_expr| { + let physical_expr = planner.create_physical_expr(logical_expr, schema_context)?; + let serialized = + serialize_physical_expr(&physical_expr, &DefaultPhysicalExtensionCodec {})?; + Ok(serialized.encode_to_vec()) }) - .collect::>>()?; + .collect() + } + + /// Computes the intermediate schema which bridges the upstream output + /// and the raw asynchronous result injected by the UDF execution. + fn compute_intermediate_schema(&self) -> Result { + let mut fields = fields_with_qualifiers(self.upstream_plan.schema()); - let mut final_fields = fields_with_qualifiers(self.input.schema()); - final_fields.push(DFField::new( + let raw_result_field = DFField::new( None, ASYNC_RESULT_FIELD, - self.udf.return_type.clone(), + self.function_config.return_type.clone(), true, - )); - let post_udf_schema = schema_from_df_fields(&final_fields)?; + ); + fields.push(raw_result_field); - let final_exprs = self - .final_exprs - .iter() - .map(|e| { - let p = planner.create_physical_expr(e, &post_udf_schema)?; - Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec()) - }) - .collect::>>()?; - - let config = AsyncUdfOperator { - name: self.name.clone(), - udf: Some(self.udf.clone().into()), - arg_exprs, - final_exprs, - ordering: if self.ordered { - AsyncUdfOrdering::Ordered as i32 - } else { - AsyncUdfOrdering::Unordered as i32 - }, - max_concurrency: self.max_concurrency as u32, - timeout_micros: self.timeout.as_micros() as u64, + Ok(Arc::new(schema_from_df_fields(&fields)?)) + } + + fn to_protobuf_config( + &self, + compiled_args: Vec>, + compiled_projections: Vec>, + ) -> AsyncUdfOperator { + let ordering_strategy = if self.preserve_ordering { + AsyncUdfOrdering::Ordered + } else { + AsyncUdfOrdering::Unordered }; - let node = LogicalNode::single( - index as u32, - format!("async_udf_{index}"), + AsyncUdfOperator { + name: self.operator_name.clone(), + udf: Some(self.function_config.clone().into()), + arg_exprs: compiled_args, + final_exprs: compiled_projections, + ordering: ordering_strategy as i32, + max_concurrency: self.concurrency_limit as u32, + timeout_micros: self.execution_timeout.as_micros() as u64, + } + } +} + +impl StreamingOperatorBlueprint for AsyncFunctionExecutionNode { + fn operator_identity(&self) -> Option { + None + } + + fn compile_to_graph_node( + &self, + planner: &Planner, + node_index: usize, + mut input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 1 { + return plan_err!("AsyncFunctionExecutionNode requires exactly one input schema"); + } + + let compiled_args = self.compile_physical_expressions( + planner, + &self.invocation_args, + self.upstream_plan.schema(), + )?; + + let intermediate_schema = self.compute_intermediate_schema()?; + let compiled_projections = self.compile_physical_expressions( + planner, + &self.result_projections, + &intermediate_schema, + )?; + + let operator_config = self.to_protobuf_config(compiled_args, compiled_projections); + + let logical_node = LogicalNode::single( + node_index as u32, + format!("async_udf_{node_index}"), OperatorName::AsyncUdf, - config.encode_to_vec(), - format!("async_udf<{}>", self.name), + operator_config.encode_to_vec(), + format!("AsyncUdf<{}>", self.operator_name), 1, ); - let incoming_edge = - LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone()); - Ok(NodeWithIncomingEdges { - node, - edges: vec![incoming_edge], + let upstream_schema = input_schemas.remove(0); + let data_edge = + LogicalEdge::project_all(LogicalEdgeType::Forward, (*upstream_schema).clone()); + + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: vec![data_edge], }) } - fn output_schema(&self) -> FsSchema { - FsSchema::from_fields( - self.final_schema - .fields() - .iter() - .map(|f| (**f).clone()) - .collect(), - ) + fn yielded_schema(&self) -> FsSchema { + let arrow_fields: Vec<_> = self + .resolved_schema + .fields() + .iter() + .map(|f| (**f).clone()) + .collect(); + + FsSchema::from_fields(arrow_fields) } } -impl UserDefinedLogicalNodeCore for AsyncUDFExtension { +impl UserDefinedLogicalNodeCore for AsyncFunctionExecutionNode { fn name(&self) -> &str { - "AsyncUDFNode" + NODE_TYPE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.final_schema + &self.resolved_schema } fn expressions(&self) -> Vec { - self.arg_exprs + self.invocation_args .iter() - .chain(self.final_exprs.iter()) - .map(|e| e.to_owned()) + .chain(self.result_projections.iter()) + .cloned() .collect() } fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "AsyncUdfExtension<{}>: {}", self.name, self.final_schema) + write!( + f, + "AsyncFunctionExecution<{}>: Concurrency={}, Ordered={}", + self.operator_name, + self.concurrency_limit, + self.preserve_ordering + ) } - fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + fn with_exprs_and_inputs(&self, exprs: Vec, mut inputs: Vec) -> Result { if inputs.len() != 1 { - return internal_err!("input size inconsistent"); + return internal_err!( + "AsyncFunctionExecutionNode expects exactly 1 input, but received {}", + inputs.len() + ); } + if UserDefinedLogicalNode::expressions(self) != exprs { - return internal_err!("Tried to recreate async UDF node with different expressions"); + return internal_err!( + "Attempted to mutate async UDF expressions during logical planning, which is not supported." + ); } Ok(Self { - input: Arc::new(inputs[0].clone()), - name: self.name.clone(), - udf: self.udf.clone(), - arg_exprs: self.arg_exprs.clone(), - final_exprs: self.final_exprs.clone(), - ordered: self.ordered, - max_concurrency: self.max_concurrency, - timeout: self.timeout, - final_schema: self.final_schema.clone(), + upstream_plan: Arc::new(inputs.remove(0)), + operator_name: self.operator_name.clone(), + function_config: self.function_config.clone(), + invocation_args: self.invocation_args.clone(), + result_projections: self.result_projections.clone(), + preserve_ordering: self.preserve_ordering, + concurrency_limit: self.concurrency_limit, + execution_timeout: self.execution_timeout, + resolved_schema: self.resolved_schema.clone(), }) } } diff --git a/src/sql/extensions/constants.rs b/src/sql/extensions/constants.rs index 4f90ca6e..489af179 100644 --- a/src/sql/extensions/constants.rs +++ b/src/sql/extensions/constants.rs @@ -10,4 +10,5 @@ // See the License for the specific language governing permissions and // limitations under the License. +/// Column name substituted for an async UDF call after rewrite. pub const ASYNC_RESULT_FIELD: &str = "__async_result"; diff --git a/src/sql/extensions/debezium.rs b/src/sql/extensions/debezium.rs index 84407ee4..612c0d79 100644 --- a/src/sql/extensions/debezium.rs +++ b/src/sql/extensions/debezium.rs @@ -1,188 +1,250 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -use super::{ StreamExtension}; -use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD}; use std::sync::Arc; -use arrow_schema::{DataType, Schema}; - -use datafusion::common::{DFSchema, DFSchemaRef, Result, TableReference, internal_err, plan_err}; -use datafusion::error::DataFusionError; +use arrow_schema::{DataType, Field, Schema}; +use datafusion::common::{ + internal_err, plan_err, DFSchema, DFSchemaRef, DataFusionError, Result, TableReference, +}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use datafusion::physical_plan::DisplayAs; -use super::{NodeWithIncomingEdges}; use crate::multifield_partial_ord; -use crate::sql::logical_planner::updating_meta_field; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::logical_planner::updating_meta_field; +use crate::sql::types::TIMESTAMP_FIELD; -pub(crate) const DEBEZIUM_UNROLLING_EXTENSION_NAME: &str = "DebeziumUnrollingExtension"; -pub(crate) const TO_DEBEZIUM_EXTENSION_NAME: &str = "ToDebeziumExtension"; +use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DebeziumUnrollingExtension { - input: LogicalPlan, - schema: DFSchemaRef, - pub primary_keys: Vec, - primary_key_names: Arc>, -} +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- -multifield_partial_ord!( - DebeziumUnrollingExtension, - input, - primary_keys, - primary_key_names -); +pub(crate) const UNROLL_NODE_NAME: &str = "UnrollDebeziumPayloadNode"; +pub(crate) const PACK_NODE_NAME: &str = "PackDebeziumEnvelopeNode"; -impl DebeziumUnrollingExtension { - pub(crate) fn as_debezium_schema( - input_schema: &DFSchemaRef, - qualifier: Option, +const CDC_FIELD_BEFORE: &str = "before"; +const CDC_FIELD_AFTER: &str = "after"; +const CDC_FIELD_OP: &str = "op"; + +// ----------------------------------------------------------------------------- +// Core Schema Codec +// ----------------------------------------------------------------------------- + +/// Transforms between flat schemas and Debezium CDC envelopes. +pub(crate) struct DebeziumSchemaCodec; + +impl DebeziumSchemaCodec { + /// Wraps a flat physical schema into a Debezium CDC envelope structure. + pub(crate) fn wrap_into_envelope( + flat_schema: &DFSchemaRef, + qualifier_override: Option, ) -> Result { - let timestamp_field = if input_schema.has_column_with_unqualified_name(TIMESTAMP_FIELD) { - Some( - input_schema - .field_with_unqualified_name(TIMESTAMP_FIELD)? - .clone(), - ) + let ts_field = if flat_schema.has_column_with_unqualified_name(TIMESTAMP_FIELD) { + Some(flat_schema.field_with_unqualified_name(TIMESTAMP_FIELD)?.clone()) } else { None }; - let struct_schema: Vec<_> = input_schema + + let payload_fields: Vec<_> = flat_schema .fields() .iter() - .filter(|field| field.name() != TIMESTAMP_FIELD && field.name() != UPDATING_META_FIELD) + .filter(|f| f.name() != TIMESTAMP_FIELD && f.name() != UPDATING_META_FIELD) .cloned() .collect(); - let struct_type = DataType::Struct(struct_schema.into()); + let payload_struct_type = DataType::Struct(payload_fields.into()); - let before = Arc::new(arrow::datatypes::Field::new( - "before", - struct_type.clone(), - true, - )); - let after = Arc::new(arrow::datatypes::Field::new( - "after", - struct_type.clone(), - true, - )); + let mut envelope_fields = vec![ + Arc::new(Field::new( + CDC_FIELD_BEFORE, + payload_struct_type.clone(), + true, + )), + Arc::new(Field::new(CDC_FIELD_AFTER, payload_struct_type, true)), + Arc::new(Field::new(CDC_FIELD_OP, DataType::Utf8, true)), + ]; - let op = Arc::new(arrow::datatypes::Field::new("op", DataType::Utf8, true)); - let mut fields = vec![before, after, op]; - - if let Some(timestamp_field) = timestamp_field { - fields.push(Arc::new(timestamp_field)); + if let Some(ts) = ts_field { + envelope_fields.push(Arc::new(ts)); } - let schema = match qualifier { - Some(qualifier) => { - DFSchema::try_from_qualified_schema(qualifier, &Schema::new(fields))? - } - None => DFSchema::try_from(Schema::new(fields))?, + let arrow_schema = Schema::new(envelope_fields); + let final_schema = match qualifier_override { + Some(qualifier) => DFSchema::try_from_qualified_schema(qualifier, &arrow_schema)?, + None => DFSchema::try_from(arrow_schema)?, }; - Ok(Arc::new(schema)) + + Ok(Arc::new(final_schema)) } +} - pub fn try_new(input: LogicalPlan, primary_keys: Arc>) -> Result { - let input_schema = input.schema(); +// ----------------------------------------------------------------------------- +// Logical Node: Unroll Debezium Payload +// ----------------------------------------------------------------------------- - // confirm that the input schema has before, after and op columns, and before and after match - let Some(before_index) = input_schema.index_of_column_by_name(None, "before") else { - return plan_err!("DebeziumUnrollingExtension requires a before column"); - }; - let Some(after_index) = input_schema.index_of_column_by_name(None, "after") else { - return plan_err!("DebeziumUnrollingExtension requires an after column"); - }; - let Some(op_index) = input_schema.index_of_column_by_name(None, "op") else { - return plan_err!("DebeziumUnrollingExtension requires an op column"); - }; +/// Decodes an incoming Debezium envelope into a flat, updating stream representation. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct UnrollDebeziumPayloadNode { + upstream_plan: LogicalPlan, + resolved_schema: DFSchemaRef, + pub pk_indices: Vec, + pk_names: Arc>, +} + +multifield_partial_ord!( + UnrollDebeziumPayloadNode, + upstream_plan, + pk_indices, + pk_names +); + +impl UnrollDebeziumPayloadNode { + pub fn try_new(upstream_plan: LogicalPlan, pk_names: Arc>) -> Result { + let input_schema = upstream_plan.schema(); + + let (before_idx, after_idx) = Self::validate_envelope_structure(input_schema)?; + + let payload_fields = Self::extract_payload_fields(input_schema, before_idx)?; + + let pk_indices = Self::map_primary_keys(payload_fields, &pk_names)?; + + let qualifier = Self::resolve_schema_qualifier(input_schema, before_idx, after_idx)?; + + let resolved_schema = + Self::compile_unrolled_schema(input_schema, payload_fields, qualifier)?; + + Ok(Self { + upstream_plan, + resolved_schema, + pk_indices, + pk_names, + }) + } + + fn validate_envelope_structure(schema: &DFSchemaRef) -> Result<(usize, usize)> { + let before_idx = schema.index_of_column_by_name(None, CDC_FIELD_BEFORE).ok_or_else( + || DataFusionError::Plan("Missing 'before' state column in CDC stream".into()), + )?; + + let after_idx = schema.index_of_column_by_name(None, CDC_FIELD_AFTER).ok_or_else( + || DataFusionError::Plan("Missing 'after' state column in CDC stream".into()), + )?; + + let op_idx = schema.index_of_column_by_name(None, CDC_FIELD_OP).ok_or_else(|| { + DataFusionError::Plan("Missing 'op' operation column in CDC stream".into()) + })?; + + let before_type = schema.field(before_idx).data_type(); + let after_type = schema.field(after_idx).data_type(); - let before_type = input_schema.field(before_index).data_type(); - let after_type = input_schema.field(after_index).data_type(); if before_type != after_type { return plan_err!( - "before and after columns must have the same type, not {} and {}", - before_type, - after_type + "State column type mismatch: 'before' is {before_type}, but 'after' is {after_type}" ); } - // check that op is a string - let op_type = input_schema.field(op_index).data_type(); - if *op_type != DataType::Utf8 { - return plan_err!("op column must be a string, not {}", op_type); - } - - // create the output schema - let DataType::Struct(fields) = before_type else { + if *schema.field(op_idx).data_type() != DataType::Utf8 { return plan_err!( - "before and after columns must be structs, not {}", - before_type + "The '{}' column must be of type Utf8", + CDC_FIELD_OP ); - }; + } + + Ok((before_idx, after_idx)) + } - // get the primary keys - let primary_key_idx = primary_keys + fn extract_payload_fields<'a>( + schema: &'a DFSchemaRef, + state_idx: usize, + ) -> Result<&'a arrow_schema::Fields> { + match schema.field(state_idx).data_type() { + DataType::Struct(fields) => Ok(fields), + other => plan_err!("State columns must be of type Struct, found {other}"), + } + } + + fn map_primary_keys( + fields: &arrow_schema::Fields, + pk_names: &[String], + ) -> Result> { + pk_names .iter() - .map(|pk| fields.find(pk).map(|(i, _)| i)) + .map(|pk| fields.find(pk).map(|(idx, _)| idx)) .collect::>>() .ok_or_else(|| { - DataFusionError::Plan("primary key field not found in Debezium schema".to_string()) - })?; + DataFusionError::Plan("Specified primary key not found in payload schema".into()) + }) + } - // determine the qualifier from the before and after columns - let qualifier = match ( - input_schema.qualified_field(before_index).0, - input_schema.qualified_field(after_index).0, - ) { - (Some(before_qualifier), Some(after_qualifier)) => { - if before_qualifier != after_qualifier { - return plan_err!("before and after columns must have the same alias"); - } - Some(before_qualifier.clone()) - } - (None, None) => None, - _ => return plan_err!("before and after columns must both have an alias or neither"), - }; + fn resolve_schema_qualifier( + schema: &DFSchemaRef, + before_idx: usize, + after_idx: usize, + ) -> Result> { + let before_qualifier = schema.qualified_field(before_idx).0; + let after_qualifier = schema.qualified_field(after_idx).0; + + match (before_qualifier, after_qualifier) { + (Some(bq), Some(aq)) if bq == aq => Ok(Some(bq.clone())), + (None, None) => Ok(None), + _ => plan_err!( + "'before' and 'after' columns must share the same namespace/qualifier" + ), + } + } - let mut fields = fields.to_vec(); - fields.push(updating_meta_field()); + fn compile_unrolled_schema( + original_schema: &DFSchemaRef, + payload_fields: &arrow_schema::Fields, + qualifier: Option, + ) -> Result { + let mut flat_fields = payload_fields.to_vec(); - let Some(input_timestamp_field) = - input_schema.index_of_column_by_name(None, TIMESTAMP_FIELD) - else { - return plan_err!("DebeziumUnrollingExtension requires a timestamp field"); - }; + flat_fields.push(updating_meta_field()); - fields.push(Arc::new(input_schema.field(input_timestamp_field).clone())); - let arrow_schema = Schema::new(fields); + let ts_idx = original_schema + .index_of_column_by_name(None, TIMESTAMP_FIELD) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Required event time field '{TIMESTAMP_FIELD}' is missing" + )) + })?; - let schema = match qualifier { - Some(qualifier) => DFSchema::try_from_qualified_schema(qualifier, &arrow_schema)?, + flat_fields.push(Arc::new(original_schema.field(ts_idx).clone())); + + let arrow_schema = Schema::new(flat_fields); + let compiled_schema = match qualifier { + Some(q) => DFSchema::try_from_qualified_schema(q, &arrow_schema)?, None => DFSchema::try_from(arrow_schema)?, }; - Ok(Self { - input, - schema: Arc::new(schema), - primary_keys: primary_key_idx, - primary_key_names: primary_keys, - }) + Ok(Arc::new(compiled_schema)) } } -impl UserDefinedLogicalNodeCore for DebeziumUnrollingExtension { +impl UserDefinedLogicalNodeCore for UnrollDebeziumPayloadNode { fn name(&self) -> &str { - DEBEZIUM_UNROLLING_EXTENSION_NAME + UNROLL_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.resolved_schema } fn expressions(&self) -> Vec { @@ -190,116 +252,136 @@ impl UserDefinedLogicalNodeCore for DebeziumUnrollingExtension { } fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "DebeziumUnrollingExtension") + write!(f, "UnrollDebeziumPayload") } - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Self::try_new(inputs[0].clone(), self.primary_key_names.clone()) + fn with_exprs_and_inputs(&self, _exprs: Vec, mut inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!( + "UnrollDebeziumPayloadNode expects exactly 1 input, got {}", + inputs.len() + ); + } + Self::try_new(inputs.remove(0), self.pk_names.clone()) } } -impl StreamExtension for DebeziumUnrollingExtension { - fn node_name(&self) -> Option { +impl StreamingOperatorBlueprint for UnrollDebeziumPayloadNode { + fn operator_identity(&self) -> Option { None } - fn plan_node( - &self, - _planner: &Planner, - _index: usize, - _input_schemas: Vec, - ) -> Result { - plan_err!("DebeziumUnrollingExtension should not be planned") + fn is_passthrough_boundary(&self) -> bool { + true } - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + fn compile_to_graph_node( + &self, + _: &Planner, + _: usize, + _: Vec, + ) -> Result { + plan_err!("UnrollDebeziumPayloadNode is a logical boundary and should not be physically planned") } - fn transparent(&self) -> bool { - true + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.resolved_schema.as_ref().into())).unwrap_or_else( + |_| panic!("Failed to extract physical schema for {}", UNROLL_NODE_NAME), + ) } } +// ----------------------------------------------------------------------------- +// Logical Node: Pack Debezium Envelope +// ----------------------------------------------------------------------------- + +/// Encodes a flat updating stream back into a Debezium CDC envelope representation. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct ToDebeziumExtension { - input: Arc, - schema: DFSchemaRef, +pub(crate) struct PackDebeziumEnvelopeNode { + upstream_plan: Arc, + envelope_schema: DFSchemaRef, } -multifield_partial_ord!(ToDebeziumExtension, input); +multifield_partial_ord!(PackDebeziumEnvelopeNode, upstream_plan); + +impl PackDebeziumEnvelopeNode { + pub(crate) fn try_new(upstream_plan: LogicalPlan) -> Result { + let envelope_schema = DebeziumSchemaCodec::wrap_into_envelope(upstream_plan.schema(), None) + .map_err(|e| { + DataFusionError::Plan(format!("Failed to compile Debezium envelope schema: {e}")) + })?; -impl ToDebeziumExtension { - pub(crate) fn try_new(input: LogicalPlan) -> Result { - let input_schema = input.schema(); - let schema = DebeziumUnrollingExtension::as_debezium_schema(input_schema, None) - .expect("should be able to create ToDebeziumExtenison"); Ok(Self { - input: Arc::new(input), - schema, + upstream_plan: Arc::new(upstream_plan), + envelope_schema, }) } } -impl DisplayAs for ToDebeziumExtension { +impl DisplayAs for PackDebeziumEnvelopeNode { fn fmt_as( &self, _t: datafusion::physical_plan::DisplayFormatType, f: &mut std::fmt::Formatter, ) -> std::fmt::Result { - write!(f, "ToDebeziumExtension") + write!(f, "PackDebeziumEnvelope") } } -impl UserDefinedLogicalNodeCore for ToDebeziumExtension { +impl UserDefinedLogicalNodeCore for PackDebeziumEnvelopeNode { fn name(&self) -> &str { - TO_DEBEZIUM_EXTENSION_NAME + PACK_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.envelope_schema } - fn expressions(&self) -> Vec { + fn expressions(&self) -> Vec { vec![] } fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "ToDebeziumExtension") + write!(f, "PackDebeziumEnvelope") } - fn with_exprs_and_inputs( - &self, - _exprs: Vec, - inputs: Vec, - ) -> Result { - Self::try_new(inputs[0].clone()) + fn with_exprs_and_inputs(&self, _exprs: Vec, mut inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!( + "PackDebeziumEnvelopeNode expects exactly 1 input, got {}", + inputs.len() + ); + } + Self::try_new(inputs.remove(0)) } } -impl StreamExtension for ToDebeziumExtension { - fn node_name(&self) -> Option { +impl StreamingOperatorBlueprint for PackDebeziumEnvelopeNode { + fn operator_identity(&self) -> Option { None } - fn plan_node( - &self, - _planner: &Planner, - _index: usize, - _input_schemas: Vec, - ) -> Result { - internal_err!("ToDebeziumExtension should not be planned") + fn is_passthrough_boundary(&self) -> bool { + true } - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap() + fn compile_to_graph_node( + &self, + _: &Planner, + _: usize, + _: Vec, + ) -> Result { + internal_err!("PackDebeziumEnvelopeNode is a logical boundary and should not be physically planned") } - fn transparent(&self) -> bool { - true + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.envelope_schema.as_ref().into())) + .unwrap_or_else(|_| { + panic!("Failed to extract physical schema for {}", PACK_NODE_NAME) + }) } } diff --git a/src/sql/extensions/extension_try_from.rs b/src/sql/extensions/extension_try_from.rs index eb042a90..a64ac9cf 100644 --- a/src/sql/extensions/extension_try_from.rs +++ b/src/sql/extensions/extension_try_from.rs @@ -15,53 +15,53 @@ use std::sync::Arc; use datafusion::common::{DataFusionError, Result}; use datafusion::logical_expr::UserDefinedLogicalNode; -use crate::sql::extensions::aggregate::AggregateExtension; -use crate::sql::extensions::async_udf::AsyncUDFExtension; -use crate::sql::extensions::debezium::{DebeziumUnrollingExtension, ToDebeziumExtension}; -use crate::sql::extensions::join::JoinExtension; -use crate::sql::extensions::key_calculation::KeyCalculationExtension; -use crate::sql::extensions::lookup::LookupJoin; -use crate::sql::extensions::projection::ProjectionExtension; -use crate::sql::extensions::remote_table::RemoteTableExtension; -use crate::sql::extensions::sink::SinkExtension; -use crate::sql::extensions::stream_extension::StreamExtension; -use crate::sql::extensions::table_source::TableSourceExtension; -use crate::sql::extensions::updating_aggregate::UpdatingAggregateExtension; -use crate::sql::extensions::watermark_node::WatermarkNode; -use crate::sql::extensions::window_fn::WindowFunctionExtension; +use crate::sql::extensions::aggregate::StreamWindowAggregateNode; +use crate::sql::extensions::async_udf::AsyncFunctionExecutionNode; +use crate::sql::extensions::debezium::{PackDebeziumEnvelopeNode, UnrollDebeziumPayloadNode}; +use crate::sql::extensions::join::StreamingJoinNode; +use crate::sql::extensions::key_calculation::KeyExtractionNode; +use crate::sql::extensions::lookup::StreamReferenceJoinNode; +use crate::sql::extensions::projection::StreamProjectionNode; +use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; +use crate::sql::extensions::sink::StreamEgressNode; +use crate::sql::extensions::streaming_operator_blueprint::StreamingOperatorBlueprint; +use crate::sql::extensions::table_source::StreamIngestionNode; +use crate::sql::extensions::updating_aggregate::ContinuousAggregateNode; +use crate::sql::extensions::watermark_node::EventTimeWatermarkNode; +use crate::sql::extensions::windows_function::StreamingWindowFunctionNode; -fn try_from_t( +fn try_from_t( node: &dyn UserDefinedLogicalNode, -) -> std::result::Result<&dyn StreamExtension, ()> { +) -> std::result::Result<&dyn StreamingOperatorBlueprint, ()> { node.as_any() .downcast_ref::() - .map(|t| t as &dyn StreamExtension) + .map(|t| t as &dyn StreamingOperatorBlueprint) .ok_or(()) } -impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension { +impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamingOperatorBlueprint { type Error = DataFusionError; fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result { - try_from_t::(node) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) - .or_else(|_| try_from_t::(node)) + try_from_t::(node) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) + .or_else(|_| try_from_t::(node)) .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name()))) } } -impl<'a> TryFrom<&'a Arc> for &'a dyn StreamExtension { +impl<'a> TryFrom<&'a Arc> for &'a dyn StreamingOperatorBlueprint { type Error = DataFusionError; fn try_from(node: &'a Arc) -> Result { diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs index 74dcfde6..70fbf3a3 100644 --- a/src/sql/extensions/join.rs +++ b/src/sql/extensions/join.rs @@ -1,120 +1,208 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; use std::time::Duration; use datafusion::common::{DFSchemaRef, Result}; use datafusion::logical_expr::expr::Expr; use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore}; - -use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; -use crate::sql::types::StreamSchema; - -use std::sync::Arc; use datafusion_common::plan_err; use datafusion_proto::physical_plan::AsExecutionPlan; use datafusion_proto::protobuf::PhysicalPlanNode; use prost::Message; use protocol::grpc::api::JoinOperator; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_planner::FsPhysicalExtensionCodec; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; + use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::sql::logical_node::logical::{ + LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName, +}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::logical_planner::FsPhysicalExtensionCodec; + +// ----------------------------------------------------------------------------- +// Constants +// ----------------------------------------------------------------------------- -pub(crate) const JOIN_NODE_NAME: &str = "JoinNode"; +pub(crate) const STREAM_JOIN_NODE_TYPE: &str = "StreamingJoinNode"; +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- + +/// A logical plan node representing a streaming join operation. +/// It bridges the DataFusion logical plan with the physical streaming execution engine. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub struct JoinExtension { - pub(crate) rewritten_join: LogicalPlan, - pub(crate) is_instant: bool, - pub(crate) ttl: Option, +pub struct StreamingJoinNode { + pub(crate) underlying_plan: LogicalPlan, + pub(crate) instant_execution_mode: bool, + pub(crate) state_retention_ttl: Option, } -impl StreamExtension for JoinExtension { - fn node_name(&self) -> Option { - None +impl StreamingJoinNode { + /// Creates a new instance of the streaming join node. + pub fn new( + underlying_plan: LogicalPlan, + instant_execution_mode: bool, + state_retention_ttl: Option, + ) -> Self { + Self { + underlying_plan, + instant_execution_mode, + state_retention_ttl, + } } - fn plan_node( + /// Compiles the physical execution plan and serializes it into a Protobuf configuration payload. + fn compile_operator_config( &self, planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - if input_schemas.len() != 2 { - return plan_err!("join should have exactly two inputs"); - } - let left_schema = input_schemas[0].clone(); - let right_schema = input_schemas[1].clone(); - - let join_plan = planner.sync_plan(&self.rewritten_join)?; - let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( - join_plan.clone(), + node_identifier: &str, + left_schema: FsSchemaRef, + right_schema: FsSchemaRef, + ) -> Result { + let physical_plan = planner.sync_plan(&self.underlying_plan)?; + + let proto_node = PhysicalPlanNode::try_from_physical_plan( + physical_plan, &FsPhysicalExtensionCodec::default(), )?; - let operator_name = if self.is_instant { - OperatorName::InstantJoin - } else { - OperatorName::Join - }; - - let config = JoinOperator { - name: format!("join_{index}"), + Ok(JoinOperator { + name: node_identifier.to_string(), left_schema: Some(left_schema.as_ref().clone().into()), right_schema: Some(right_schema.as_ref().clone().into()), - output_schema: Some(self.output_schema().into()), - join_plan: physical_plan_node.encode_to_vec(), - ttl_micros: self.ttl.map(|t| t.as_micros() as u64), - }; - - let logical_node = LogicalNode::single( - index as u32, - format!("join_{index}"), - operator_name, - config.encode_to_vec(), - "join".to_string(), - 1, - ); - - let left_edge = - LogicalEdge::project_all(LogicalEdgeType::LeftJoin, left_schema.as_ref().clone()); - let right_edge = - LogicalEdge::project_all(LogicalEdgeType::RightJoin, right_schema.as_ref().clone()); - Ok(NodeWithIncomingEdges { - node: logical_node, - edges: vec![left_edge, right_edge], + output_schema: Some(self.extract_fs_schema().into()), + join_plan: proto_node.encode_to_vec(), + ttl_micros: self.state_retention_ttl.map(|ttl| ttl.as_micros() as u64), }) } - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_unkeyed(self.schema().inner().clone()).unwrap() + fn determine_operator_type(&self) -> OperatorName { + if self.instant_execution_mode { + OperatorName::InstantJoin + } else { + OperatorName::Join + } + } + + fn extract_fs_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(self.underlying_plan.schema().inner().clone()) + .expect("Fatal: Failed to convert internal join schema to FsSchema without keys") } } -impl UserDefinedLogicalNodeCore for JoinExtension { +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Core Implementation +// ----------------------------------------------------------------------------- + +impl UserDefinedLogicalNodeCore for StreamingJoinNode { fn name(&self) -> &str { - JOIN_NODE_NAME + STREAM_JOIN_NODE_TYPE } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.rewritten_join] + vec![&self.underlying_plan] } fn schema(&self) -> &DFSchemaRef { - self.rewritten_join.schema() + self.underlying_plan.schema() } fn expressions(&self) -> Vec { vec![] } - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "JoinExtension: {}", self.schema()) + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "StreamingJoinNode: Schema={}, InstantMode={}, TTL={:?}", + self.schema(), + self.instant_execution_mode, + self.state_retention_ttl + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, mut inputs: Vec) -> Result { + if inputs.len() != 1 { + return plan_err!( + "StreamingJoinNode expects exactly 1 underlying logical plan during recreation" + ); + } + + Ok(Self::new( + inputs.remove(0), + self.instant_execution_mode, + self.state_retention_ttl, + )) + } +} + +// ----------------------------------------------------------------------------- +// Streaming Graph Extension Implementation +// ----------------------------------------------------------------------------- + +impl StreamingOperatorBlueprint for StreamingJoinNode { + fn operator_identity(&self) -> Option { + None } - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self { - rewritten_join: inputs[0].clone(), - is_instant: self.is_instant, - ttl: self.ttl, + fn compile_to_graph_node( + &self, + planner: &Planner, + node_index: usize, + mut input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 2 { + return plan_err!( + "Invalid topology: StreamingJoinNode requires exactly two upstream inputs, received {}", + input_schemas.len() + ); + } + + let right_schema = input_schemas.pop().unwrap(); + let left_schema = input_schemas.pop().unwrap(); + + let node_identifier = format!("stream_join_{node_index}"); + + let operator_config = self.compile_operator_config( + planner, + &node_identifier, + left_schema.clone(), + right_schema.clone(), + )?; + + let logical_node = LogicalNode::single( + node_index as u32, + node_identifier.clone(), + self.determine_operator_type(), + operator_config.encode_to_vec(), + "streaming_join".to_string(), + 1, + ); + + let left_edge = + LogicalEdge::project_all(LogicalEdgeType::LeftJoin, left_schema.as_ref().clone()); + let right_edge = + LogicalEdge::project_all(LogicalEdgeType::RightJoin, right_schema.as_ref().clone()); + + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: vec![left_edge, right_edge], }) } + + fn yielded_schema(&self) -> FsSchema { + self.extract_fs_schema() + } } diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs index 3a94f592..484d464c 100644 --- a/src/sql/extensions/key_calculation.rs +++ b/src/sql/extensions/key_calculation.rs @@ -1,217 +1,263 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::fmt::Formatter; use std::sync::Arc; use datafusion::arrow::datatypes::{Field, Schema}; -use datafusion::common::{DFSchemaRef, Result, internal_err}; +use datafusion::common::{DFSchemaRef, Result, internal_err, plan_err}; use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion_common::{plan_err, DFSchema}; +use datafusion_common::DFSchema; use datafusion_expr::col; use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec}; use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use datafusion_proto::protobuf::PhysicalPlanNode; use itertools::Itertools; use prost::Message; + use protocol::grpc::api::{KeyPlanOperator, ProjectionOperator}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; + use crate::multifield_partial_ord; +use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::sql::logical_planner::FsPhysicalExtensionCodec; -use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::types::{ - StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata, -}; -use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::types::{fields_with_qualifiers, schema_from_df_fields_with_metadata}; -pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension"; +pub(crate) const EXTENSION_NODE_IDENTIFIER: &str = "KeyExtractionNode"; -/// Two ways of specifying keys — either as col indexes in the existing data or as a set of -/// exprs to evaluate +/// Routing strategy for shuffling data across the stream topology. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub enum KeysOrExprs { - Keys(Vec), - Exprs(Vec), +pub enum KeyExtractionStrategy { + ColumnIndices(Vec), + CalculatedExpressions(Vec), } -/// Calculation for computing keyed data, with a vec of keys -/// that will be used for shuffling data to the correct nodes. +/// Logical node that computes or extracts routing keys. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct KeyCalculationExtension { - pub(crate) name: Option, - pub(crate) input: LogicalPlan, - pub(crate) keys: KeysOrExprs, - pub(crate) schema: DFSchemaRef, +pub(crate) struct KeyExtractionNode { + pub(crate) operator_label: Option, + pub(crate) upstream_plan: LogicalPlan, + pub(crate) extraction_strategy: KeyExtractionStrategy, + pub(crate) resolved_schema: DFSchemaRef, } -multifield_partial_ord!(KeyCalculationExtension, name, input, keys); - -impl KeyCalculationExtension { - pub fn new_named_and_trimmed(input: LogicalPlan, keys: Vec, name: String) -> Self { - let output_fields: Vec<_> = fields_with_qualifiers(input.schema()) +multifield_partial_ord!( + KeyExtractionNode, + operator_label, + upstream_plan, + extraction_strategy +); + +impl KeyExtractionNode { + /// Extracts keys and hides them from the downstream projection. + pub fn try_new_with_projection( + upstream_plan: LogicalPlan, + target_indices: Vec, + label: String, + ) -> Result { + let projected_fields: Vec<_> = fields_with_qualifiers(upstream_plan.schema()) .into_iter() .enumerate() - .filter_map(|(index, field)| { - if !keys.contains(&index) { - Some(field.clone()) - } else { - None - } - }) + .filter(|(idx, _)| !target_indices.contains(idx)) + .map(|(_, field)| field) .collect(); - let schema = - schema_from_df_fields_with_metadata(&output_fields, input.schema().metadata().clone()) - .unwrap(); - Self { - name: Some(name), - input, - keys: KeysOrExprs::Keys(keys), - schema: Arc::new(schema), - } + let metadata = upstream_plan.schema().metadata().clone(); + let resolved_schema = schema_from_df_fields_with_metadata(&projected_fields, metadata)?; + + Ok(Self { + operator_label: Some(label), + upstream_plan, + extraction_strategy: KeyExtractionStrategy::ColumnIndices(target_indices), + resolved_schema: Arc::new(resolved_schema), + }) } - pub fn new(input: LogicalPlan, keys: KeysOrExprs) -> Self { - let schema = input.schema().clone(); + + /// Creates a node using an explicit strategy without changing the visible schema. + pub fn new(upstream_plan: LogicalPlan, strategy: KeyExtractionStrategy) -> Self { + let resolved_schema = upstream_plan.schema().clone(); Self { - name: None, - input, - keys, - schema, + operator_label: None, + upstream_plan, + extraction_strategy: strategy, + resolved_schema, } } -} -impl StreamExtension for KeyCalculationExtension { - fn node_name(&self) -> Option { - None + fn compile_index_router( + &self, + physical_plan_proto: PhysicalPlanNode, + indices: &[usize], + ) -> (Vec, OperatorName) { + let operator_config = KeyPlanOperator { + name: "key".into(), + physical_plan: physical_plan_proto.encode_to_vec(), + key_fields: indices.iter().map(|&idx| idx as u64).collect(), + }; + + (operator_config.encode_to_vec(), OperatorName::ArrowKey) } - fn plan_node( + fn compile_expression_router( &self, planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - // check there's only one input - if input_schemas.len() != 1 { - return plan_err!("KeyCalculationExtension should have exactly one input"); + expressions: &[Expr], + input_schema_ref: &FsSchemaRef, + input_df_schema: &DFSchemaRef, + ) -> Result<(Vec, OperatorName)> { + let mut target_exprs = expressions.to_vec(); + + for field in input_schema_ref.schema.fields.iter() { + target_exprs.push(col(field.name())); } - let input_schema = (*input_schemas[0]).clone(); - let input_df_schema = Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone())?); - let physical_plan = planner.sync_plan(&self.input)?; + let output_fs_schema = self.generate_fs_schema()?; + + for (compiled_expr, expected_field) in target_exprs + .iter() + .zip(output_fs_schema.schema.fields()) + { + let (expr_type, expr_nullable) = compiled_expr.data_type_and_nullable(input_df_schema)?; + if expr_type != *expected_field.data_type() || expr_nullable != expected_field.is_nullable() + { + return plan_err!( + "Type mismatch in key calculation: Expected {} (nullable: {}), got {} (nullable: {})", + expected_field.data_type(), + expected_field.is_nullable(), + expr_type, + expr_nullable + ); + } + } - let physical_plan_node: PhysicalPlanNode = PhysicalPlanNode::try_from_physical_plan( - physical_plan, - &FsPhysicalExtensionCodec::default(), - )?; + let mut physical_expr_payloads = Vec::with_capacity(target_exprs.len()); + for logical_expr in target_exprs { + let physical_expr = planner + .create_physical_expr(&logical_expr, input_df_schema) + .map_err(|e| e.context("Failed to physicalize PARTITION BY expression"))?; - let (config, name) = match &self.keys { - KeysOrExprs::Keys(keys) => ( - KeyPlanOperator { - name: "key".into(), - physical_plan: physical_plan_node.encode_to_vec(), - key_fields: keys.iter().map(|k| *k as u64).collect(), - } - .encode_to_vec(), - OperatorName::ArrowKey, - ), - KeysOrExprs::Exprs(key_exprs) => { - let mut exprs = vec![]; - for k in key_exprs { - exprs.push(k.clone()) - } + let serialized_expr = + serialize_physical_expr(&physical_expr, &DefaultPhysicalExtensionCodec {})?; + physical_expr_payloads.push(serialized_expr.encode_to_vec()); + } - for f in input_schema.schema.fields.iter() { - exprs.push(col(f.name())); - } + let operator_config = ProjectionOperator { + name: self.operator_label.as_deref().unwrap_or("key").to_string(), + input_schema: Some(input_schema_ref.as_ref().clone().into()), + output_schema: Some(output_fs_schema.into()), + exprs: physical_expr_payloads, + }; - let output_schema = self.output_schema(); + Ok((operator_config.encode_to_vec(), OperatorName::Projection)) + } - // ensure that the exprs generate the output schema - for (expr, expected) in exprs.iter().zip(output_schema.schema.fields()) { - let (data_type, nullable) = expr.data_type_and_nullable(&input_df_schema)?; - assert_eq!(data_type, *expected.data_type()); - assert_eq!(nullable, expected.is_nullable()); - } + fn generate_fs_schema(&self) -> Result { + let base_arrow_schema = self.upstream_plan.schema().as_ref(); - let mut physical_exprs = vec![]; + match &self.extraction_strategy { + KeyExtractionStrategy::ColumnIndices(indices) => { + FsSchema::from_schema_keys(Arc::new(base_arrow_schema.into()), indices.clone()) + } + KeyExtractionStrategy::CalculatedExpressions(expressions) => { + let mut composite_fields = + Vec::with_capacity(expressions.len() + base_arrow_schema.fields().len()); - for e in exprs { - let phys = planner - .create_physical_expr(&e, &input_df_schema) - .map_err(|e| e.context("in PARTITION BY"))?; - physical_exprs.push( - serialize_physical_expr(&phys, &DefaultPhysicalExtensionCodec {})? - .encode_to_vec(), - ); + for (idx, expr) in expressions.iter().enumerate() { + let (data_type, nullable) = expr.data_type_and_nullable(base_arrow_schema)?; + composite_fields.push(Field::new(format!("__key_{idx}"), data_type, nullable).into()); } - let config = ProjectionOperator { - name: self.name.as_deref().unwrap_or("key").to_string(), - input_schema: Some(input_schema.clone().into()), - - output_schema: Some(self.output_schema().into()), - exprs: physical_exprs, - }; + for field in base_arrow_schema.fields().iter() { + composite_fields.push(field.clone()); + } - (config.encode_to_vec(), OperatorName::Projection) + let final_schema = Arc::new(Schema::new(composite_fields)); + let key_mapping = (1..=expressions.len()).collect_vec(); + FsSchema::from_schema_keys(final_schema, key_mapping) } - }; + } + } +} - let node = LogicalNode::single( - index as u32, - format!("key_{index}"), - name, - config, - format!("ArrowKey<{}>", self.name.as_deref().unwrap_or("_")), - 1, - ); - let edge = LogicalEdge::project_all(LogicalEdgeType::Forward, input_schema); - Ok(NodeWithIncomingEdges { - node, - edges: vec![edge], - }) +impl StreamingOperatorBlueprint for KeyExtractionNode { + fn operator_identity(&self) -> Option { + None } - fn output_schema(&self) -> FsSchema { - let arrow_schema = self.input.schema().as_ref(); + fn compile_to_graph_node( + &self, + planner: &Planner, + node_index: usize, + mut input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 1 { + return plan_err!("KeyExtractionNode requires exactly one upstream input schema"); + } + + let input_schema_ref = input_schemas.remove(0); + let input_df_schema = Arc::new(DFSchema::try_from(input_schema_ref.schema.as_ref().clone())?); - match &self.keys { - KeysOrExprs::Keys(keys) => { - FsSchema::from_schema_keys(Arc::new(arrow_schema.into()), keys.clone()).unwrap() + let physical_plan = planner.sync_plan(&self.upstream_plan)?; + let physical_plan_proto = PhysicalPlanNode::try_from_physical_plan( + physical_plan, + &FsPhysicalExtensionCodec::default(), + )?; + + let (protobuf_payload, engine_operator_name) = match &self.extraction_strategy { + KeyExtractionStrategy::ColumnIndices(indices) => { + self.compile_index_router(physical_plan_proto, indices) + } + KeyExtractionStrategy::CalculatedExpressions(exprs) => { + self.compile_expression_router(planner, exprs, &input_schema_ref, &input_df_schema)? } - KeysOrExprs::Exprs(exprs) => { - let mut fields = vec![]; + }; - for (i, e) in exprs.iter().enumerate() { - let (dt, nullable) = e.data_type_and_nullable(arrow_schema).unwrap(); - fields.push(Field::new(format!("__key_{i}"), dt, nullable).into()); - } + let logical_node = LogicalNode::single( + node_index as u32, + format!("key_{node_index}"), + engine_operator_name, + protobuf_payload, + format!("ArrowKey<{}>", self.operator_label.as_deref().unwrap_or("_")), + 1, + ); - for f in arrow_schema.fields().iter() { - fields.push(f.clone()); - } + let data_edge = + LogicalEdge::project_all(LogicalEdgeType::Forward, (*input_schema_ref).clone()); - FsSchema::from_schema_keys( - Arc::new(Schema::new(fields)), - (1..=exprs.len()).collect_vec(), - ) - .unwrap() - } - } + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: vec![data_edge], + }) + } + + fn yielded_schema(&self) -> FsSchema { + self.generate_fs_schema() + .expect("Fatal: Failed to generate output schema for KeyExtractionNode") } } -impl UserDefinedLogicalNodeCore for KeyCalculationExtension { +impl UserDefinedLogicalNodeCore for KeyExtractionNode { fn name(&self) -> &str { - KEY_CALCULATION_NAME + EXTENSION_NODE_IDENTIFIER } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.resolved_schema } fn expressions(&self) -> Vec { @@ -219,24 +265,33 @@ impl UserDefinedLogicalNodeCore for KeyCalculationExtension { } fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "KeyCalculationExtension: {}", self.schema()) + write!( + f, + "KeyExtractionNode: Strategy={:?} | Schema={}", + self.extraction_strategy, + self.resolved_schema + ) } - fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + fn with_exprs_and_inputs(&self, exprs: Vec, mut inputs: Vec) -> Result { if inputs.len() != 1 { - return internal_err!("input size inconsistent"); + return internal_err!("KeyExtractionNode requires exactly 1 input logical plan"); } - let keys = match &self.keys { - KeysOrExprs::Keys(k) => KeysOrExprs::Keys(k.clone()), - KeysOrExprs::Exprs(_) => KeysOrExprs::Exprs(exprs), + let strategy = match &self.extraction_strategy { + KeyExtractionStrategy::ColumnIndices(indices) => { + KeyExtractionStrategy::ColumnIndices(indices.clone()) + } + KeyExtractionStrategy::CalculatedExpressions(_) => { + KeyExtractionStrategy::CalculatedExpressions(exprs) + } }; Ok(Self { - name: self.name.clone(), - input: inputs[0].clone(), - keys, - schema: self.schema.clone(), + operator_label: self.operator_label.clone(), + upstream_plan: inputs.remove(0), + extraction_strategy: strategy, + resolved_schema: self.resolved_schema.clone(), }) } } diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs index c2ef8f28..c34c5b10 100644 --- a/src/sql/extensions/lookup.rs +++ b/src/sql/extensions/lookup.rs @@ -1,35 +1,59 @@ -use datafusion::common::{Column, DFSchemaRef, JoinType, internal_err, plan_err}; +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::common::{Column, DFSchemaRef, JoinType, Result, internal_err, plan_err}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use datafusion::sql::TableReference; use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use prost::Message; -use std::fmt::Formatter; -use std::sync::Arc; + use protocol::grpc::api; use protocol::grpc::api::{ConnectorOp, LookupJoinCondition, LookupJoinOperator}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; + use crate::multifield_partial_ord; -use crate::sql::schema::ConnectorTable; -use crate::sql::schema::utils::add_timestamp_field_arrow; -use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::schema::SourceTable; +use crate::sql::schema::utils::add_timestamp_field_arrow; + +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- -pub const SOURCE_EXTENSION_NAME: &str = "LookupSource"; -pub const JOIN_EXTENSION_NAME: &str = "LookupJoin"; +pub const DICTIONARY_SOURCE_NODE_NAME: &str = "ReferenceTableSource"; +pub const STREAM_DICTIONARY_JOIN_NODE_NAME: &str = "StreamReferenceJoin"; +// ----------------------------------------------------------------------------- +// Logical Node: Reference Table Source +// ----------------------------------------------------------------------------- + +/// Static or periodically updated reference table used for lookups. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct LookupSource { - pub(crate) table: ConnectorTable, - pub(crate) schema: DFSchemaRef, +pub struct ReferenceTableSourceNode { + pub(crate) source_definition: SourceTable, + pub(crate) resolved_schema: DFSchemaRef, } -multifield_partial_ord!(LookupSource, table); +multifield_partial_ord!(ReferenceTableSourceNode, source_definition); -impl UserDefinedLogicalNodeCore for LookupSource { +impl UserDefinedLogicalNodeCore for ReferenceTableSourceNode { fn name(&self) -> &str { - SOURCE_EXTENSION_NAME + DICTIONARY_SOURCE_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { @@ -37,7 +61,7 @@ impl UserDefinedLogicalNodeCore for LookupSource { } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.resolved_schema } fn expressions(&self) -> Vec { @@ -45,150 +69,206 @@ impl UserDefinedLogicalNodeCore for LookupSource { } fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "LookupSource: {}", self.schema) + write!(f, "ReferenceTableSource: Schema={}", self.resolved_schema) } fn with_exprs_and_inputs( &self, _exprs: Vec, inputs: Vec, - ) -> datafusion::common::Result { + ) -> Result { if !inputs.is_empty() { - return internal_err!("LookupSource cannot have inputs"); + return internal_err!( + "ReferenceTableSource is a leaf node and cannot accept upstream inputs" + ); } Ok(Self { - table: self.table.clone(), - schema: self.schema.clone(), + source_definition: self.source_definition.clone(), + resolved_schema: self.resolved_schema.clone(), }) } } +// ----------------------------------------------------------------------------- +// Logical Node: Stream to Reference Join +// ----------------------------------------------------------------------------- + +/// Join between an unbounded stream and a reference (lookup) table. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct LookupJoin { - pub(crate) input: LogicalPlan, - pub(crate) schema: DFSchemaRef, - pub(crate) connector: ConnectorTable, - pub(crate) on: Vec<(Expr, Column)>, - pub(crate) filter: Option, - pub(crate) alias: Option, - pub(crate) join_type: JoinType, +pub struct StreamReferenceJoinNode { + pub(crate) upstream_stream_plan: LogicalPlan, + pub(crate) output_schema: DFSchemaRef, + pub(crate) external_dictionary: SourceTable, + pub(crate) equijoin_conditions: Vec<(Expr, Column)>, + pub(crate) post_join_filter: Option, + pub(crate) namespace_alias: Option, + pub(crate) join_semantics: JoinType, } -multifield_partial_ord!(LookupJoin, input, connector, on, filter, alias); +multifield_partial_ord!( + StreamReferenceJoinNode, + upstream_stream_plan, + external_dictionary, + equijoin_conditions, + post_join_filter, + namespace_alias +); -impl StreamExtension for LookupJoin { - fn node_name(&self) -> Option { - None +impl StreamReferenceJoinNode { + fn compile_join_conditions(&self, planner: &Planner) -> Result> { + self.equijoin_conditions + .iter() + .map(|(logical_left_expr, right_column)| { + let physical_expr = + planner.create_physical_expr(logical_left_expr, &self.output_schema)?; + let serialized_expr = + serialize_physical_expr(&physical_expr, &DefaultPhysicalExtensionCodec {})?; + + Ok(LookupJoinCondition { + left_expr: serialized_expr.encode_to_vec(), + right_key: right_column.name.clone(), + }) + }) + .collect() + } + + fn map_api_join_type(&self) -> Result { + match self.join_semantics { + JoinType::Inner => Ok(api::JoinType::Inner as i32), + JoinType::Left => Ok(api::JoinType::Left as i32), + unsupported => plan_err!( + "Unsupported join type '{unsupported}' for dictionary lookups. Only INNER and LEFT joins are permitted." + ), + } } - fn plan_node( + fn build_engine_operator( &self, planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> datafusion::common::Result { - let schema = FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into()))?; - let lookup_schema = FsSchema::from_schema_unkeyed(add_timestamp_field_arrow( - self.connector.physical_schema(), + _upstream_schema: &FsSchemaRef, + ) -> Result { + let internal_input_schema = FsSchema::from_schema_unkeyed(Arc::new( + self.output_schema.as_ref().into(), ))?; - let join_config = LookupJoinOperator { - input_schema: Some(schema.into()), - lookup_schema: Some(lookup_schema.into()), + let dictionary_physical_schema = self.external_dictionary.produce_physical_schema(); + let lookup_fs_schema = + FsSchema::from_schema_unkeyed(add_timestamp_field_arrow(dictionary_physical_schema))?; + + Ok(LookupJoinOperator { + input_schema: Some(internal_input_schema.into()), + lookup_schema: Some(lookup_fs_schema.into()), connector: Some(ConnectorOp { - connector: self.connector.connector.clone(), - config: self.connector.config.clone(), - description: self.connector.description.clone(), + connector: self.external_dictionary.adapter_type.clone(), + config: self.external_dictionary.opaque_config.clone(), + description: self.external_dictionary.description.clone(), }), - key_exprs: self - .on - .iter() - .map(|(l, r)| { - let expr = planner.create_physical_expr(l, &self.schema)?; - let expr = serialize_physical_expr(&expr, &DefaultPhysicalExtensionCodec {})?; - Ok(LookupJoinCondition { - left_expr: expr.encode_to_vec(), - right_key: r.name.clone(), - }) - }) - .collect::>>()?, - join_type: match self.join_type { - JoinType::Inner => api::JoinType::Inner as i32, - JoinType::Left => api::JoinType::Left as i32, - j => { - return plan_err!( - "unsupported join type '{j}' for lookup join; only inner and left joins are supported" - ); - } - }, + key_exprs: self.compile_join_conditions(planner)?, + join_type: self.map_api_join_type()?, ttl_micros: self - .connector + .external_dictionary .lookup_cache_ttl .map(|t| t.as_micros() as u64), - max_capacity_bytes: self.connector.lookup_cache_max_bytes, - }; - - let incoming_edge = - LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schemas[0]).clone()); - - Ok(NodeWithIncomingEdges { - node: LogicalNode::single( - index as u32, - format!("lookupjoin_{index}"), - OperatorName::LookupJoin, - join_config.encode_to_vec(), - format!("LookupJoin<{}>", self.connector.name), - 1, - ), - edges: vec![incoming_edge], + max_capacity_bytes: self.external_dictionary.lookup_cache_max_bytes, + }) + } +} + +impl StreamingOperatorBlueprint for StreamReferenceJoinNode { + fn operator_identity(&self) -> Option { + None + } + + fn compile_to_graph_node( + &self, + planner: &Planner, + node_index: usize, + mut input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 1 { + return plan_err!( + "StreamReferenceJoinNode requires exactly one upstream stream input" + ); + } + let upstream_schema = input_schemas.remove(0); + + let operator_config = self.build_engine_operator(planner, &upstream_schema)?; + + let logical_node = LogicalNode::single( + node_index as u32, + format!("lookup_join_{node_index}"), + OperatorName::LookupJoin, + operator_config.encode_to_vec(), + format!("DictionaryJoin<{}>", self.external_dictionary.table_identifier), + 1, + ); + + let incoming_edge = LogicalEdge::project_all( + LogicalEdgeType::Shuffle, + (*upstream_schema).clone(), + ); + + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: vec![incoming_edge], }) } - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_unkeyed(self.schema.inner().clone()).unwrap() + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(self.output_schema.inner().clone()) + .expect("Failed to convert lookup join output schema to FsSchema") } } -impl UserDefinedLogicalNodeCore for LookupJoin { +impl UserDefinedLogicalNodeCore for StreamReferenceJoinNode { fn name(&self) -> &str { - JOIN_EXTENSION_NAME + STREAM_DICTIONARY_JOIN_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_stream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.output_schema } fn expressions(&self) -> Vec { - let mut e: Vec<_> = self.on.iter().map(|(l, _)| l.clone()).collect(); - - if let Some(filter) = &self.filter { - e.push(filter.clone()); + let mut exprs: Vec<_> = self + .equijoin_conditions + .iter() + .map(|(l, _)| l.clone()) + .collect(); + if let Some(filter) = &self.post_join_filter { + exprs.push(filter.clone()); } - - e + exprs } fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "LookupJoinExtension: {}", self.schema) + write!( + f, + "StreamReferenceJoin: join_type={:?} | {}", + self.join_semantics, + self.output_schema + ) } - fn with_exprs_and_inputs( - &self, - _: Vec, - inputs: Vec, - ) -> datafusion::common::Result { + fn with_exprs_and_inputs(&self, _: Vec, inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!( + "StreamReferenceJoinNode expects exactly 1 upstream plan, got {}", + inputs.len() + ); + } Ok(Self { - input: inputs[0].clone(), - schema: self.schema.clone(), - connector: self.connector.clone(), - on: self.on.clone(), - filter: self.filter.clone(), - alias: self.alias.clone(), - join_type: self.join_type, + upstream_stream_plan: inputs[0].clone(), + output_schema: self.output_schema.clone(), + external_dictionary: self.external_dictionary.clone(), + equijoin_conditions: self.equijoin_conditions.clone(), + post_join_filter: self.post_join_filter.clone(), + namespace_alias: self.namespace_alias.clone(), + join_semantics: self.join_semantics, }) } -} \ No newline at end of file +} diff --git a/src/sql/extensions/mod.rs b/src/sql/extensions/mod.rs index a78ca419..eab2443b 100644 --- a/src/sql/extensions/mod.rs +++ b/src/sql/extensions/mod.rs @@ -15,8 +15,8 @@ mod macros; pub(crate) mod constants; pub(crate) use constants::ASYNC_RESULT_FIELD; -pub(crate) mod stream_extension; -pub(crate) use stream_extension::{NodeWithIncomingEdges, StreamExtension}; +pub(crate) mod streaming_operator_blueprint; +pub(crate) use streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint}; pub(crate) mod aggregate; pub(crate) mod debezium; @@ -29,13 +29,13 @@ pub(crate) mod sink; pub(crate) mod table_source; pub(crate) mod updating_aggregate; pub(crate) mod watermark_node; -pub(crate) mod window_fn; +pub(crate) mod windows_function; pub(crate) mod timestamp_append; -pub(crate) use timestamp_append::TimestampAppendExtension; +pub(crate) use timestamp_append::SystemTimestampInjectorNode; pub(crate) mod async_udf; -pub(crate) use async_udf::AsyncUDFExtension; +pub(crate) use async_udf::AsyncFunctionExecutionNode; pub(crate) mod is_retract; pub(crate) use is_retract::IsRetractExtension; diff --git a/src/sql/extensions/projection.rs b/src/sql/extensions/projection.rs index ff319d12..2175bddf 100644 --- a/src/sql/extensions/projection.rs +++ b/src/sql/extensions/projection.rs @@ -1,137 +1,213 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; +use std::sync::Arc; use datafusion::common::{DFSchema, DFSchemaRef, Result, internal_err}; -use std::{fmt::Formatter, sync::Arc}; - -use super::{StreamExtension, NodeWithIncomingEdges}; use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; -use itertools::Itertools; use prost::Message; + use protocol::grpc::api::ProjectionOperator; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; + use crate::multifield_partial_ord; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::types::{schema_from_df_fields, DFField}; use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::types::{DFField, schema_from_df_fields}; + +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- + +pub(crate) const STREAM_PROJECTION_NODE_NAME: &str = "StreamProjectionNode"; +const DEFAULT_PROJECTION_LABEL: &str = "projection"; -pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension"; +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- -/// Projection operations +/// Projection within a streaming execution topology. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct ProjectionExtension { - pub(crate) inputs: Vec, - pub(crate) name: Option, - pub(crate) exprs: Vec, - pub(crate) schema: DFSchemaRef, - pub(crate) shuffle: bool, +pub(crate) struct StreamProjectionNode { + pub(crate) upstream_plans: Vec, + pub(crate) operator_label: Option, + pub(crate) projection_exprs: Vec, + pub(crate) resolved_schema: DFSchemaRef, + pub(crate) requires_shuffle: bool, } -multifield_partial_ord!(ProjectionExtension, name, exprs); +multifield_partial_ord!(StreamProjectionNode, operator_label, projection_exprs); -impl ProjectionExtension { - pub(crate) fn new(inputs: Vec, name: Option, exprs: Vec) -> Self { - let input_schema = inputs.first().unwrap().schema(); - let fields = exprs - .iter() - .map(|e| DFField::from(e.to_field(input_schema).unwrap())) - .collect_vec(); - - let schema = Arc::new(schema_from_df_fields(&fields).unwrap()); +impl StreamProjectionNode { + pub(crate) fn try_new( + upstream_plans: Vec, + operator_label: Option, + projection_exprs: Vec, + ) -> Result { + if upstream_plans.is_empty() { + return internal_err!("StreamProjectionNode requires at least one upstream plan"); + } + let primary_input = &upstream_plans[0]; + let upstream_schema = primary_input.schema(); - Self { - inputs, - name, - exprs, - schema, - shuffle: false, + let mut projected_fields = Vec::with_capacity(projection_exprs.len()); + for logical_expr in &projection_exprs { + let arrow_field = logical_expr.to_field(upstream_schema)?; + projected_fields.push(DFField::from(arrow_field)); } - } - pub(crate) fn shuffled(mut self) -> Self { - self.shuffle = true; - self + let resolved_schema = Arc::new(schema_from_df_fields(&projected_fields)?); + + Ok(Self { + upstream_plans, + operator_label, + projection_exprs, + resolved_schema, + requires_shuffle: false, + }) } -} -impl StreamExtension for ProjectionExtension { - fn node_name(&self) -> Option { - None + pub(crate) fn with_shuffle_routing(mut self) -> Self { + self.requires_shuffle = true; + self } - fn plan_node( - &self, - planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - let input_schema = (*input_schemas[0]).clone(); + fn validate_uniform_schemas(input_schemas: &[FsSchemaRef]) -> Result { + if input_schemas.is_empty() { + return internal_err!("No input schemas provided to projection planner"); + } + let primary_schema = input_schemas[0].clone(); - // check that all inputs have the same schemas - for s in input_schemas.iter().skip(1) { - if **s != input_schema { - return internal_err!("all input schemas to a projection node must mast"); + for schema in input_schemas.iter().skip(1) { + if **schema != *primary_schema { + return internal_err!( + "Schema mismatch: All upstream inputs to a projection node must share the identical schema topology." + ); } } - let input_df_schema = Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone())?); - let mut physical_exprs = vec![]; + Ok(primary_schema) + } - for e in &self.exprs { - let phys = planner - .create_physical_expr(e, &input_df_schema) - .map_err(|e| e.context("projection"))?; - physical_exprs.push( - serialize_physical_expr(&phys, &DefaultPhysicalExtensionCodec {})?.encode_to_vec(), - ); - } + fn compile_physical_expressions( + &self, + planner: &Planner, + input_df_schema: &DFSchemaRef, + ) -> Result>> { + self.projection_exprs + .iter() + .map(|logical_expr| { + let physical_expr = planner + .create_physical_expr(logical_expr, input_df_schema) + .map_err(|e| e.context("Failed to compile physical projection expression"))?; + + let serialized_expr = serialize_physical_expr( + &physical_expr, + &DefaultPhysicalExtensionCodec {}, + )?; + + Ok(serialized_expr.encode_to_vec()) + }) + .collect() + } +} + +// ----------------------------------------------------------------------------- +// Stream Extension Trait Implementation +// ----------------------------------------------------------------------------- - let config = ProjectionOperator { - name: self.name.as_deref().unwrap_or("projection").to_string(), - input_schema: Some(input_schema.clone().into()), +impl StreamingOperatorBlueprint for StreamProjectionNode { + fn operator_identity(&self) -> Option { + None + } - output_schema: Some(self.output_schema().into()), - exprs: physical_exprs, + fn compile_to_graph_node( + &self, + planner: &Planner, + node_index: usize, + input_schemas: Vec, + ) -> Result { + let unified_input_schema = Self::validate_uniform_schemas(&input_schemas)?; + let input_df_schema = + Arc::new(DFSchema::try_from(unified_input_schema.schema.as_ref().clone())?); + + let compiled_expr_payloads = self.compile_physical_expressions(planner, &input_df_schema)?; + + let operator_config = ProjectionOperator { + name: self + .operator_label + .as_deref() + .unwrap_or(DEFAULT_PROJECTION_LABEL) + .to_string(), + input_schema: Some(unified_input_schema.as_ref().clone().into()), + output_schema: Some(self.yielded_schema().into()), + exprs: compiled_expr_payloads, }; - let node = LogicalNode::single( - index as u32, - format!("projection_{index}"), + let node_identifier = format!("projection_{node_index}"); + let label = format!( + "ArrowProjection<{}>", + self.operator_label.as_deref().unwrap_or("_") + ); + + let logical_node = LogicalNode::single( + node_index as u32, + node_identifier, OperatorName::Projection, - config.encode_to_vec(), - format!("ArrowProjection<{}>", self.name.as_deref().unwrap_or("_")), + operator_config.encode_to_vec(), + label, 1, ); - let edge_type = if self.shuffle { + let routing_strategy = if self.requires_shuffle { LogicalEdgeType::Shuffle } else { LogicalEdgeType::Forward }; - let edge = LogicalEdge::project_all(edge_type, input_schema); - Ok(NodeWithIncomingEdges { - node, - edges: vec![edge], + let outgoing_edge = + LogicalEdge::project_all(routing_strategy, (*unified_input_schema).clone()); + + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: vec![outgoing_edge], }) } - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_arrow().clone())).unwrap() + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.resolved_schema.as_arrow().clone())) + .expect("Fatal: Failed to generate unkeyed output schema for projection") } } -impl UserDefinedLogicalNodeCore for ProjectionExtension { +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Hooks +// ----------------------------------------------------------------------------- + +impl UserDefinedLogicalNodeCore for StreamProjectionNode { fn name(&self) -> &str { - PROJECTION_NAME + STREAM_PROJECTION_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - self.inputs.iter().collect() + self.upstream_plans.iter().collect() } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.resolved_schema } fn expressions(&self) -> Vec { @@ -139,16 +215,25 @@ impl UserDefinedLogicalNodeCore for ProjectionExtension { } fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "KeyCalculationExtension: {}", self.schema()) + write!( + f, + "StreamProjectionNode: RequiresShuffle={}, Schema={}", + self.requires_shuffle, + self.resolved_schema + ) } - fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { - Ok(Self { - name: self.name.clone(), + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + let mut new_node = Self::try_new( inputs, - exprs, - schema: self.schema.clone(), - shuffle: self.shuffle, - }) + self.operator_label.clone(), + self.projection_exprs.clone(), + )?; + + if self.requires_shuffle { + new_node = new_node.with_shuffle_routing(); + } + + Ok(new_node) } } diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs index 0bd2706f..5011bb4c 100644 --- a/src/sql/extensions/remote_table.rs +++ b/src/sql/extensions/remote_table.rs @@ -1,144 +1,158 @@ -use std::{fmt::Formatter, sync::Arc}; +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; +use std::sync::Arc; use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err, plan_err}; - use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode}; +use datafusion_proto::physical_plan::AsExecutionPlan; +use datafusion_proto::protobuf::PhysicalPlanNode; use prost::Message; + use protocol::grpc::api::ValuePlanOperator; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; + use crate::multifield_partial_ord; +use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::sql::logical_planner::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use super::{StreamExtension, NodeWithIncomingEdges}; -pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension"; +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- -/* Lightweight extension that allows us to segment the graph and merge nodes with the same name. - An Extension Planner will be used to isolate computation to individual nodes. -*/ +pub(crate) const REMOTE_TABLE_NODE_NAME: &str = "RemoteTableBoundaryNode"; + +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- + +/// Segments the execution graph and merges nodes sharing the same identifier; acts as a boundary. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct RemoteTableExtension { - pub(crate) input: LogicalPlan, - pub(crate) name: TableReference, - pub(crate) schema: DFSchemaRef, - pub(crate) materialize: bool, +pub(crate) struct RemoteTableBoundaryNode { + pub(crate) upstream_plan: LogicalPlan, + pub(crate) table_identifier: TableReference, + pub(crate) resolved_schema: DFSchemaRef, + pub(crate) requires_materialization: bool, } -multifield_partial_ord!(RemoteTableExtension, input, name, materialize); +multifield_partial_ord!( + RemoteTableBoundaryNode, + upstream_plan, + table_identifier, + requires_materialization +); -impl RemoteTableExtension { - fn plan_node_inlined( - planner: &Planner, - index: usize, - this: &RemoteTableExtension, - ) -> Result { - let physical_plan = planner.sync_plan(&this.input)?; - let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( +impl RemoteTableBoundaryNode { + fn compile_engine_operator(&self, planner: &Planner) -> Result> { + let physical_plan = planner.sync_plan(&self.upstream_plan)?; + + let physical_plan_proto = PhysicalPlanNode::try_from_physical_plan( physical_plan, &FsPhysicalExtensionCodec::default(), )?; - let config = ValuePlanOperator { - name: format!("value_calculation({})", this.name), - physical_plan: physical_plan_node.encode_to_vec(), + + let operator_config = ValuePlanOperator { + name: format!("value_calculation({})", self.table_identifier), + physical_plan: physical_plan_proto.encode_to_vec(), }; - let node = LogicalNode::single( - index as u32, - format!("value_{index}"), - OperatorName::ArrowValue, - config.encode_to_vec(), - this.name.to_string(), - 1, - ); - Ok(NodeWithIncomingEdges { - node, - edges: vec![], - }) + + Ok(operator_config.encode_to_vec()) } - fn plan_node_with_edges( - planner: &Planner, - index: usize, - this: &RemoteTableExtension, - input_schemas: Vec, - ) -> Result { - let physical_plan = planner.sync_plan(&this.input)?; - let physical_plan_node = PhysicalPlanNode::try_from_physical_plan( - physical_plan, - &FsPhysicalExtensionCodec::default(), - )?; - let config = ValuePlanOperator { - name: format!("value_calculation({})", this.name), - physical_plan: physical_plan_node.encode_to_vec(), - }; - let node = LogicalNode::single( - index as u32, - format!("value_{index}"), - OperatorName::ArrowValue, - config.encode_to_vec(), - this.name.to_string(), - 1, - ); + fn validate_uniform_schemas(input_schemas: &[FsSchemaRef]) -> Result<()> { + if input_schemas.len() <= 1 { + return Ok(()); + } - let edges = input_schemas - .into_iter() - .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone())) - .collect(); - Ok(NodeWithIncomingEdges { node, edges }) + let primary_schema = &input_schemas[0]; + for schema in input_schemas.iter().skip(1) { + if *schema != *primary_schema { + return plan_err!( + "Topology error: Multiple input streams routed to the same remote table must share an identical schema structure." + ); + } + } + + Ok(()) } } -impl StreamExtension for RemoteTableExtension { - fn node_name(&self) -> Option { - if self.materialize { - Some(NamedNode::RemoteTable(self.name.to_owned())) +// ----------------------------------------------------------------------------- +// Stream Extension Trait Implementation +// ----------------------------------------------------------------------------- + +impl StreamingOperatorBlueprint for RemoteTableBoundaryNode { + fn operator_identity(&self) -> Option { + if self.requires_materialization { + Some(NamedNode::RemoteTable(self.table_identifier.clone())) } else { None } } - fn plan_node( + fn compile_to_graph_node( &self, planner: &Planner, - index: usize, + node_index: usize, input_schemas: Vec, - ) -> Result { - match input_schemas.len() { - 0 => { - return Self::plan_node_inlined(planner, index, self); - } - 1 => {} - _multiple_inputs => { - let first = input_schemas[0].clone(); - for schema in input_schemas.iter().skip(1) { - if *schema != first { - return plan_err!( - "If a node has multiple inputs, they must all have the same schema" - ); - } - } - } - } - Self::plan_node_with_edges(planner, index, self, input_schemas) + ) -> Result { + Self::validate_uniform_schemas(&input_schemas)?; + + let operator_payload = self.compile_engine_operator(planner)?; + + let logical_node = LogicalNode::single( + node_index as u32, + format!("value_{node_index}"), + OperatorName::ArrowValue, + operator_payload, + self.table_identifier.to_string(), + 1, + ); + + let routing_edges: Vec = input_schemas + .into_iter() + .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone())) + .collect(); + + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: routing_edges, + }) } - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap() + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_schema_keys(Arc::new(self.resolved_schema.as_ref().into()), vec![]) + .expect("Fatal: Failed to generate output schema for remote table boundary") } } -impl UserDefinedLogicalNodeCore for RemoteTableExtension { +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Hooks +// ----------------------------------------------------------------------------- + +impl UserDefinedLogicalNodeCore for RemoteTableBoundaryNode { fn name(&self) -> &str { - REMOTE_TABLE_NAME + REMOTE_TABLE_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.resolved_schema } fn expressions(&self) -> Vec { @@ -146,19 +160,28 @@ impl UserDefinedLogicalNodeCore for RemoteTableExtension { } fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "RemoteTableExtension: {}", self.schema) + write!( + f, + "RemoteTableBoundaryNode: Identifier={}, Materialized={}, Schema={}", + self.table_identifier, + self.requires_materialization, + self.resolved_schema + ) } - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + fn with_exprs_and_inputs(&self, _exprs: Vec, mut inputs: Vec) -> Result { if inputs.len() != 1 { - return internal_err!("input size inconsistent"); + return internal_err!( + "RemoteTableBoundaryNode expects exactly 1 upstream logical plan, but received {}", + inputs.len() + ); } Ok(Self { - input: inputs[0].clone(), - name: self.name.clone(), - schema: self.schema.clone(), - materialize: self.materialize, + upstream_plan: inputs.remove(0), + table_identifier: self.table_identifier.clone(), + resolved_schema: self.resolved_schema.clone(), + requires_materialization: self.requires_materialization, }) } } diff --git a/src/sql/extensions/sink.rs b/src/sql/extensions/sink.rs index a1112c4b..8fc31aac 100644 --- a/src/sql/extensions/sink.rs +++ b/src/sql/extensions/sink.rs @@ -1,168 +1,228 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; use std::sync::Arc; use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; - use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; - use prost::Message; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; + use crate::multifield_partial_ord; -use crate::sql::schema::Table; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; -use super::{ - StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension, - remote_table::RemoteTableExtension, -}; +use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::schema::Table; + +use super::debezium::PackDebeziumEnvelopeNode; +use super::remote_table::RemoteTableBoundaryNode; -pub(crate) const SINK_NODE_NAME: &str = "SinkExtension"; +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- +pub(crate) const STREAM_EGRESS_NODE_NAME: &str = "StreamEgressNode"; + +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- + +/// Terminal node routing processed data into an external sink (e.g. Kafka, PostgreSQL). #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct SinkExtension { - pub(crate) name: TableReference, - pub(crate) table: Table, - pub(crate) schema: DFSchemaRef, - inputs: Arc>, +pub(crate) struct StreamEgressNode { + pub(crate) target_identifier: TableReference, + pub(crate) destination_table: Table, + pub(crate) egress_schema: DFSchemaRef, + upstream_plans: Arc>, } -multifield_partial_ord!(SinkExtension, name, inputs); +multifield_partial_ord!(StreamEgressNode, target_identifier, upstream_plans); -impl SinkExtension { - pub fn new( - name: TableReference, - table: Table, - mut schema: DFSchemaRef, - mut input: Arc, +impl StreamEgressNode { + pub fn try_new( + target_identifier: TableReference, + destination_table: Table, + initial_schema: DFSchemaRef, + upstream_plan: LogicalPlan, ) -> Result { - let input_is_updating = input + let (mut processed_plan, mut resolved_schema) = Self::apply_cdc_transformations( + upstream_plan, + initial_schema, + &destination_table, + )?; + + Self::enforce_computational_boundary(&mut resolved_schema, &mut processed_plan); + + Ok(Self { + target_identifier, + destination_table, + egress_schema: resolved_schema, + upstream_plans: Arc::new(vec![processed_plan]), + }) + } + + fn apply_cdc_transformations( + plan: LogicalPlan, + schema: DFSchemaRef, + destination: &Table, + ) -> Result<(LogicalPlan, DFSchemaRef)> { + let is_upstream_updating = plan .schema() .has_column_with_unqualified_name(UPDATING_META_FIELD); - match &table { - Table::ConnectorTable(connector_table) => { - match (input_is_updating, connector_table.is_updating()) { + + match destination { + Table::ConnectorTable(connector) => { + let is_sink_updating = connector.is_updating(); + + match (is_upstream_updating, is_sink_updating) { (_, true) => { - let to_debezium_extension = - ToDebeziumExtension::try_new(input.as_ref().clone())?; - input = Arc::new(LogicalPlan::Extension(Extension { - node: Arc::new(to_debezium_extension), - })); - schema = input.schema().clone(); + let debezium_encoder = PackDebeziumEnvelopeNode::try_new(plan)?; + let wrapped_plan = LogicalPlan::Extension(Extension { + node: Arc::new(debezium_encoder), + }); + let new_schema = wrapped_plan.schema().clone(); + + Ok((wrapped_plan, new_schema)) } (true, false) => { - return plan_err!( - "input is updating, but sink is not configured as an updating sink (hint: use `format = 'debezium_json'`)" - ); + plan_err!( + "Topology Mismatch: The upstream is producing an updating stream (CDC), \ + but the target sink '{}' is not configured to accept updates. \ + Hint: set `format = 'debezium_json'` in the WITH clause.", + connector.name() + ) } - (false, false) => {} + (false, false) => Ok((plan, schema)), } } - Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"), - Table::TableFromQuery { .. } => {} - - } - Self::add_remote_if_necessary(&schema, &mut input); - - let inputs = Arc::new(vec![(*input).clone()]); - Ok(Self { - name, - table, - schema, - inputs, - }) - } - - // The input to a sink needs to be a non-transparent logical plan extension. - // If it isn't, wrap the input in a RemoteTableExtension. - pub fn add_remote_if_necessary(schema: &DFSchemaRef, input: &mut Arc) { - if let LogicalPlan::Extension(node) = input.as_ref() { - let arroyo_extension: &dyn StreamExtension = (&node.node).try_into().unwrap(); - if !arroyo_extension.transparent() { - return; + Table::LookupTable(..) => { + plan_err!("Topology Violation: A Lookup Table cannot be used as a streaming data sink.") } + Table::TableFromQuery { .. } => Ok((plan, schema)), } - let remote_table_extension = RemoteTableExtension { - input: input.as_ref().clone(), - name: TableReference::bare("sink projection"), - schema: schema.clone(), - materialize: false, - }; - *input = Arc::new(LogicalPlan::Extension(Extension { - node: Arc::new(remote_table_extension), - })); - } -} - -impl UserDefinedLogicalNodeCore for SinkExtension { - fn name(&self) -> &str { - SINK_NODE_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - self.inputs.iter().collect() - } - - fn schema(&self) -> &DFSchemaRef { - &self.schema } - fn expressions(&self) -> Vec { - vec![] - } + fn enforce_computational_boundary(schema: &mut DFSchemaRef, plan: &mut LogicalPlan) { + let requires_boundary = if let LogicalPlan::Extension(extension) = plan { + let stream_ext: &dyn StreamingOperatorBlueprint = (&extension.node) + .try_into() + .expect("Fatal: Egress node encountered an extension that does not implement StreamingOperatorBlueprint"); - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "SinkExtension({:?}): {}", self.name, self.schema) - } + stream_ext.is_passthrough_boundary() + } else { + true + }; - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self { - name: self.name.clone(), - table: self.table.clone(), - schema: self.schema.clone(), - inputs: Arc::new(inputs), - }) + if requires_boundary { + let boundary_node = RemoteTableBoundaryNode { + upstream_plan: plan.clone(), + table_identifier: TableReference::bare("sink projection"), + resolved_schema: schema.clone(), + requires_materialization: false, + }; + + *plan = LogicalPlan::Extension(Extension { + node: Arc::new(boundary_node), + }); + } } } -impl StreamExtension for SinkExtension { - fn node_name(&self) -> Option { - match &self.table { - _ => Some(NamedNode::Sink(self.name.clone())), - } +// ----------------------------------------------------------------------------- +// Stream Extension Trait Implementation +// ----------------------------------------------------------------------------- + +impl StreamingOperatorBlueprint for StreamEgressNode { + fn operator_identity(&self) -> Option { + Some(NamedNode::Sink(self.target_identifier.clone())) } - fn plan_node( + fn compile_to_graph_node( &self, _planner: &Planner, - index: usize, + node_index: usize, input_schemas: Vec, - ) -> Result { - let operator_config = (self - .table + ) -> Result { + let connector_operator = self + .destination_table .connector_op() - .map_err(|e| e.context("connector op"))?) - .encode_to_vec(); + .map_err(|e| e.context("Failed to generate connector operation payload"))?; + + let operator_description = connector_operator.description.clone(); + let operator_payload = connector_operator.encode_to_vec(); - let node = LogicalNode::single( - index as u32, - format!("sink_{}_{}", self.name, index), + let logical_node = LogicalNode::single( + node_index as u32, + format!("sink_{}_{node_index}", self.target_identifier), OperatorName::ConnectorSink, - operator_config, - self.table.connector_op()?.description.clone(), + operator_payload, + operator_description, 1, ); - let edges = input_schemas + let routing_edges: Vec = input_schemas .into_iter() .map(|input_schema| { LogicalEdge::project_all(LogicalEdgeType::Forward, (*input_schema).clone()) }) .collect(); - Ok(NodeWithIncomingEdges { node, edges }) + + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: routing_edges, + }) } + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_fields(vec![]) + } +} +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Hooks +// ----------------------------------------------------------------------------- - fn output_schema(&self) -> FsSchema { - FsSchema::from_fields(vec![]) +impl UserDefinedLogicalNodeCore for StreamEgressNode { + fn name(&self) -> &str { + STREAM_EGRESS_NODE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + self.upstream_plans.iter().collect() + } + + fn schema(&self) -> &DFSchemaRef { + &self.egress_schema } -} \ No newline at end of file + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "StreamEgressNode({:?}): Schema={}", + self.target_identifier, self.egress_schema + ) + } + + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + Ok(Self { + target_identifier: self.target_identifier.clone(), + destination_table: self.destination_table.clone(), + egress_schema: self.egress_schema.clone(), + upstream_plans: Arc::new(inputs), + }) + } +} diff --git a/src/sql/extensions/stream_extension.rs b/src/sql/extensions/stream_extension.rs deleted file mode 100644 index 76954529..00000000 --- a/src/sql/extensions/stream_extension.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt::Debug; - -use datafusion::common::Result; - -use crate::sql::logical_node::logical::{LogicalEdge, LogicalNode}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::common::{FsSchema, FsSchemaRef}; - -pub(crate) trait StreamExtension: Debug { - fn node_name(&self) -> Option; - fn plan_node( - &self, - planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result; - fn output_schema(&self) -> FsSchema; - fn transparent(&self) -> bool { - false - } -} - -pub(crate) struct NodeWithIncomingEdges { - pub node: LogicalNode, - pub edges: Vec, -} diff --git a/src/sql/extensions/streaming_operator_blueprint.rs b/src/sql/extensions/streaming_operator_blueprint.rs new file mode 100644 index 00000000..d3f9d459 --- /dev/null +++ b/src/sql/extensions/streaming_operator_blueprint.rs @@ -0,0 +1,65 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Debug; + +use datafusion::common::Result; + +use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalNode}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; + +// ----------------------------------------------------------------------------- +// Core Execution Blueprint +// ----------------------------------------------------------------------------- + +/// Atomic unit within a streaming execution topology: translates streaming SQL into graph nodes. +pub(crate) trait StreamingOperatorBlueprint: Debug { + /// Canonical named identity for this operator, if any (sources, sinks, etc.). + fn operator_identity(&self) -> Option; + + /// Compiles this operator into a graph vertex and its incoming routing edges. + fn compile_to_graph_node( + &self, + compiler_context: &Planner, + node_id_sequence: usize, + upstream_schemas: Vec, + ) -> Result; + + /// Schema of records this operator yields downstream. + fn yielded_schema(&self) -> FsSchema; + + /// Logical passthrough boundary (no physical state change); default is stateful / materializing. + fn is_passthrough_boundary(&self) -> bool { + false + } +} + +// ----------------------------------------------------------------------------- +// Graph Topology Structures +// ----------------------------------------------------------------------------- + +/// Compiled vertex: execution unit plus upstream routing edges. +#[derive(Debug, Clone)] +pub(crate) struct CompiledTopologyNode { + pub execution_unit: LogicalNode, + pub routing_edges: Vec, +} + +impl CompiledTopologyNode { + pub fn new(execution_unit: LogicalNode, routing_edges: Vec) -> Self { + Self { + execution_unit, + routing_edges, + } + } +} diff --git a/src/sql/extensions/table_source.rs b/src/sql/extensions/table_source.rs index 0b069bbf..292284ba 100644 --- a/src/sql/extensions/table_source.rs +++ b/src/sql/extensions/table_source.rs @@ -1,65 +1,98 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; use std::sync::Arc; use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; - -use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; - +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use prost::Message; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; + use crate::multifield_partial_ord; -use crate::sql::schema::{ConnectorTable, FieldSpec, Table}; -use crate::sql::schema::utils::add_timestamp_field; -use crate::sql::extensions::debezium::DebeziumUnrollingExtension; +use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::debezium::DebeziumSchemaCodec; +use crate::sql::logical_node::logical::{LogicalNode, OperatorName}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::schema::SourceTable; +use crate::sql::schema::utils::add_timestamp_field; use crate::sql::types::schema_from_df_fields; -use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; -use super::{ - StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension, - remote_table::RemoteTableExtension, -}; -pub(crate) const TABLE_SOURCE_NAME: &str = "TableSourceExtension"; +use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; + +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- + +pub(crate) const STREAM_INGESTION_NODE_NAME: &str = "StreamIngestionNode"; + +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- + +/// Foundational ingestion point: connects to external systems and injects raw or CDC data. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct TableSourceExtension { - pub(crate) name: TableReference, - pub(crate) table: ConnectorTable, - pub(crate) schema: DFSchemaRef, +pub(crate) struct StreamIngestionNode { + pub(crate) source_identifier: TableReference, + pub(crate) source_definition: SourceTable, + pub(crate) resolved_schema: DFSchemaRef, } -multifield_partial_ord!(TableSourceExtension, name, table); +multifield_partial_ord!(StreamIngestionNode, source_identifier, source_definition); + +impl StreamIngestionNode { + pub fn try_new( + source_identifier: TableReference, + source_definition: SourceTable, + ) -> Result { + let resolved_schema = + Self::build_ingestion_schema(&source_identifier, &source_definition)?; + + Ok(Self { + source_identifier, + source_definition, + resolved_schema, + }) + } -impl TableSourceExtension { - pub fn new(name: TableReference, table: ConnectorTable) -> Self { - let physical_fields = table - .fields + fn build_ingestion_schema( + identifier: &TableReference, + definition: &SourceTable, + ) -> Result { + let physical_fields: Vec<_> = definition + .schema_specs .iter() - .filter_map(|field| match field { - FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => { - Some((Some(name.clone()), Arc::new(field.clone())).into()) - } - FieldSpec::Virtual { .. } => None, - }) - .collect::>(); - let base_schema = Arc::new(schema_from_df_fields(&physical_fields).unwrap()); - - let schema = if table.is_updating() { - DebeziumUnrollingExtension::as_debezium_schema(&base_schema, Some(name.clone())) - .unwrap() + .filter(|col| !col.is_computed()) + .map(|col| (Some(identifier.clone()), Arc::new(col.arrow_field().clone())).into()) + .collect(); + + let base_schema = Arc::new(schema_from_df_fields(&physical_fields)?); + + let enveloped_schema = if definition.is_updating() { + DebeziumSchemaCodec::wrap_into_envelope(&base_schema, Some(identifier.clone()))? } else { base_schema }; - let schema = add_timestamp_field(schema, Some(name.clone())).unwrap(); - Self { - name, - table, - schema, - } + + add_timestamp_field(enveloped_schema, Some(identifier.clone())) } } -impl UserDefinedLogicalNodeCore for TableSourceExtension { +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Hooks +// ----------------------------------------------------------------------------- + +impl UserDefinedLogicalNodeCore for StreamIngestionNode { fn name(&self) -> &str { - TABLE_SOURCE_NAME + STREAM_INGESTION_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { @@ -67,56 +100,76 @@ impl UserDefinedLogicalNodeCore for TableSourceExtension { } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.resolved_schema } fn expressions(&self) -> Vec { vec![] } - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "TableSourceExtension: {}", self.schema) + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "StreamIngestionNode({}): Schema={}", + self.source_identifier, self.resolved_schema + ) } - fn with_exprs_and_inputs(&self, _exprs: Vec, _inputs: Vec) -> Result { + fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { + if !inputs.is_empty() { + return plan_err!( + "StreamIngestionNode acts as a leaf boundary and cannot accept upstream inputs." + ); + } + Ok(Self { - name: self.name.clone(), - table: self.table.clone(), - schema: self.schema.clone(), + source_identifier: self.source_identifier.clone(), + source_definition: self.source_definition.clone(), + resolved_schema: self.resolved_schema.clone(), }) } } -impl StreamExtension for TableSourceExtension { - fn node_name(&self) -> Option { - Some(NamedNode::Source(self.name.clone())) +// ----------------------------------------------------------------------------- +// Core Execution Blueprint Implementation +// ----------------------------------------------------------------------------- + +impl StreamingOperatorBlueprint for StreamIngestionNode { + fn operator_identity(&self) -> Option { + Some(NamedNode::Source(self.source_identifier.clone())) } - fn plan_node( + fn compile_to_graph_node( &self, - _planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - if !input_schemas.is_empty() { - return plan_err!("TableSourceExtension should not have inputs"); + _compiler_context: &Planner, + node_id_sequence: usize, + upstream_schemas: Vec, + ) -> Result { + if !upstream_schemas.is_empty() { + return plan_err!( + "Topology Violation: StreamIngestionNode is a source origin and cannot process upstream routing edges." + ); } - let sql_source = self.table.as_sql_source()?; - let node = LogicalNode::single( - index as u32, - format!("source_{}_{}", self.name, index), + + let sql_source = self.source_definition.as_sql_source()?; + let connector_payload = sql_source.source.config.encode_to_vec(); + let operator_description = sql_source.source.config.description.clone(); + + let execution_unit = LogicalNode::single( + node_id_sequence as u32, + format!("source_{}_{node_id_sequence}", self.source_identifier), OperatorName::ConnectorSource, - sql_source.source.config.encode_to_vec(), - sql_source.source.config.description.clone(), + connector_payload, + operator_description, 1, ); - Ok(NodeWithIncomingEdges { - node, - edges: vec![], - }) + + Ok(CompiledTopologyNode::new(execution_unit, vec![])) } - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap() + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_schema_keys(Arc::new(self.resolved_schema.as_ref().into()), vec![]).expect( + "Fatal: Failed to generate output schema for stream ingestion", + ) } } diff --git a/src/sql/extensions/timestamp_append.rs b/src/sql/extensions/timestamp_append.rs index 069b288a..7a3a07e9 100644 --- a/src/sql/extensions/timestamp_append.rs +++ b/src/sql/extensions/timestamp_append.rs @@ -10,71 +10,107 @@ // See the License for the specific language governing permissions and // limitations under the License. -use datafusion::common::{DFSchemaRef, Result, TableReference}; +use std::fmt::Formatter; + +use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use crate::multifield_partial_ord; use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field}; +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- + +pub(crate) const TIMESTAMP_INJECTOR_NODE_NAME: &str = "SystemTimestampInjectorNode"; + +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- + +/// Injects the mandatory system `_timestamp` field into the upstream streaming schema. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct TimestampAppendExtension { - pub(crate) input: LogicalPlan, - pub(crate) qualifier: Option, - pub(crate) schema: DFSchemaRef, +pub(crate) struct SystemTimestampInjectorNode { + pub(crate) upstream_plan: LogicalPlan, + pub(crate) target_qualifier: Option, + pub(crate) resolved_schema: DFSchemaRef, } -impl TimestampAppendExtension { - pub(crate) fn new(input: LogicalPlan, qualifier: Option) -> Self { - if has_timestamp_field(input.schema()) { - unreachable!( - "shouldn't be adding timestamp to a plan that already has it: plan :\n {:?}\n schema: {:?}", - input, - input.schema() +multifield_partial_ord!(SystemTimestampInjectorNode, upstream_plan, target_qualifier); + +impl SystemTimestampInjectorNode { + pub(crate) fn try_new( + upstream_plan: LogicalPlan, + target_qualifier: Option, + ) -> Result { + let upstream_schema = upstream_plan.schema(); + + if has_timestamp_field(upstream_schema) { + return internal_err!( + "Topology Violation: Attempted to inject a system timestamp into an upstream plan \ + that already contains one. \ + \nPlan:\n {:?} \nSchema:\n {:?}", + upstream_plan, + upstream_schema ); } - let schema = add_timestamp_field(input.schema().clone(), qualifier.clone()).unwrap(); - Self { - input, - qualifier, - schema, - } + + let resolved_schema = + add_timestamp_field(upstream_schema.clone(), target_qualifier.clone())?; + + Ok(Self { + upstream_plan, + target_qualifier, + resolved_schema, + }) } } -multifield_partial_ord!(TimestampAppendExtension, input, qualifier); +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Hooks +// ----------------------------------------------------------------------------- -impl UserDefinedLogicalNodeCore for TimestampAppendExtension { +impl UserDefinedLogicalNodeCore for SystemTimestampInjectorNode { fn name(&self) -> &str { - "TimestampAppendExtension" + TIMESTAMP_INJECTOR_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.resolved_schema } fn expressions(&self) -> Vec { vec![] } - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + let field_names = self + .resolved_schema + .fields() + .iter() + .map(|field| field.name().to_string()) + .collect::>() + .join(", "); + write!( f, - "TimestampAppendExtension({:?}): {}", - self.qualifier, - self.schema - .fields() - .iter() - .map(|f| f.name().to_string()) - .collect::>() - .join(", ") + "SystemTimestampInjector(Qualifier={:?}): [{}]", + self.target_qualifier, field_names ) } - fn with_exprs_and_inputs(&self, _exprs: Vec, inputs: Vec) -> Result { - Ok(Self::new(inputs[0].clone(), self.qualifier.clone())) + fn with_exprs_and_inputs(&self, _exprs: Vec, mut inputs: Vec) -> Result { + if inputs.len() != 1 { + return internal_err!( + "SystemTimestampInjectorNode requires exactly 1 upstream logical plan, but received {}", + inputs.len() + ); + } + + Self::try_new(inputs.remove(0), self.target_qualifier.clone()) } } diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs index 8220945b..1671fb13 100644 --- a/src/sql/extensions/updating_aggregate.rs +++ b/src/sql/extensions/updating_aggregate.rs @@ -1,165 +1,241 @@ -use datafusion::common::{DFSchemaRef, Result, TableReference, ToDFSchema, plan_err}; +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::time::Duration; + +use datafusion::common::{DFSchemaRef, Result, TableReference, ToDFSchema, internal_err, plan_err}; use datafusion::logical_expr::expr::ScalarFunction; use datafusion::logical_expr::{ - Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, col, lit, + col, lit, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, }; use datafusion::prelude::named_struct; use datafusion::scalar::ScalarValue; use datafusion_proto::physical_plan::AsExecutionPlan; use datafusion_proto::protobuf::PhysicalPlanNode; use prost::Message; -use std::sync::Arc; -use std::time::Duration; use protocol::grpc::api::UpdatingAggregateOperator; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; + +use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::{CompiledTopologyNode, IsRetractExtension, StreamingOperatorBlueprint}; use crate::sql::functions::multi_hash; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::sql::logical_planner::FsPhysicalExtensionCodec; -use crate::sql::extensions::{IsRetractExtension, NodeWithIncomingEdges, StreamExtension}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension"; +// ----------------------------------------------------------------------------- +// Constants & Configuration +// ----------------------------------------------------------------------------- + +pub(crate) const CONTINUOUS_AGGREGATE_NODE_NAME: &str = "ContinuousAggregateNode"; + +const DEFAULT_FLUSH_INTERVAL_MICROS: u64 = 10_000_000; + +const STATIC_HASH_SIZE_BYTES: i32 = 16; +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- + +/// Stateful continuous aggregation: running aggregates with updating / retraction semantics. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub(crate) struct UpdatingAggregateExtension { - pub(crate) aggregate: LogicalPlan, - pub(crate) key_fields: Vec, - pub(crate) final_calculation: LogicalPlan, - pub(crate) timestamp_qualifier: Option, - pub(crate) ttl: Duration, +pub(crate) struct ContinuousAggregateNode { + pub(crate) base_aggregate_plan: LogicalPlan, + pub(crate) partition_key_indices: Vec, + pub(crate) retract_injected_plan: LogicalPlan, + pub(crate) namespace_qualifier: Option, + pub(crate) state_retention_ttl: Duration, } -impl UpdatingAggregateExtension { - pub fn new( - aggregate: LogicalPlan, - key_fields: Vec, - timestamp_qualifier: Option, - ttl: Duration, +impl ContinuousAggregateNode { + pub fn try_new( + base_aggregate_plan: LogicalPlan, + partition_key_indices: Vec, + namespace_qualifier: Option, + state_retention_ttl: Duration, ) -> Result { - let final_calculation = LogicalPlan::Extension(Extension { + let retract_injected_plan = LogicalPlan::Extension(Extension { node: Arc::new(IsRetractExtension::new( - aggregate.clone(), - timestamp_qualifier.clone(), + base_aggregate_plan.clone(), + namespace_qualifier.clone(), )), }); Ok(Self { - aggregate, - key_fields, - final_calculation, - timestamp_qualifier, - ttl, + base_aggregate_plan, + partition_key_indices, + retract_injected_plan, + namespace_qualifier, + state_retention_ttl, + }) + } + + fn construct_state_metadata_expr(&self, upstream_schema: &FsSchemaRef) -> Expr { + let routing_keys: Vec = self + .partition_key_indices + .iter() + .map(|&idx| col(upstream_schema.schema.field(idx).name())) + .collect(); + + let state_id_hash = if routing_keys.is_empty() { + Expr::Literal( + ScalarValue::FixedSizeBinary( + STATIC_HASH_SIZE_BYTES, + Some(vec![0; STATIC_HASH_SIZE_BYTES as usize]), + ), + None, + ) + } else { + Expr::ScalarFunction(ScalarFunction { + func: multi_hash(), + args: routing_keys, + }) + }; + + named_struct(vec![ + lit("is_retract"), + lit(false), + lit("id"), + state_id_hash, + ]) + } + + fn compile_operator_config( + &self, + planner: &Planner, + upstream_schema: &FsSchemaRef, + ) -> Result { + let upstream_df_schema = upstream_schema.schema.clone().to_dfschema()?; + + let physical_agg_plan = planner.sync_plan(&self.base_aggregate_plan)?; + let compiled_agg_payload = PhysicalPlanNode::try_from_physical_plan( + physical_agg_plan, + &FsPhysicalExtensionCodec::default(), + )? + .encode_to_vec(); + + let meta_expr = self.construct_state_metadata_expr(upstream_schema); + let compiled_meta_expr = + planner.serialize_as_physical_expr(&meta_expr, &upstream_df_schema)?; + + Ok(UpdatingAggregateOperator { + name: "UpdatingAggregate".to_string(), + input_schema: Some((**upstream_schema).clone().into()), + final_schema: Some(self.yielded_schema().into()), + aggregate_exec: compiled_agg_payload, + metadata_expr: compiled_meta_expr, + flush_interval_micros: DEFAULT_FLUSH_INTERVAL_MICROS, + ttl_micros: self.state_retention_ttl.as_micros() as u64, }) } } -impl UserDefinedLogicalNodeCore for UpdatingAggregateExtension { +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Hooks +// ----------------------------------------------------------------------------- + +impl UserDefinedLogicalNodeCore for ContinuousAggregateNode { fn name(&self) -> &str { - UPDATING_AGGREGATE_EXTENSION_NAME + CONTINUOUS_AGGREGATE_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.aggregate] + vec![&self.base_aggregate_plan] } fn schema(&self) -> &DFSchemaRef { - self.final_calculation.schema() + self.retract_injected_plan.schema() } - fn expressions(&self) -> Vec { + fn expressions(&self) -> Vec { vec![] } fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "UpdatingAggregateExtension") + write!( + f, + "ContinuousAggregateNode(TTL={:?})", + self.state_retention_ttl + ) } fn with_exprs_and_inputs( &self, - _exprs: Vec, - inputs: Vec, + _exprs: Vec, + mut inputs: Vec, ) -> Result { - Self::new( - inputs[0].clone(), - self.key_fields.clone(), - self.timestamp_qualifier.clone(), - self.ttl, + if inputs.len() != 1 { + return internal_err!( + "ContinuousAggregateNode requires exactly 1 upstream input, got {}", + inputs.len() + ); + } + + Self::try_new( + inputs.remove(0), + self.partition_key_indices.clone(), + self.namespace_qualifier.clone(), + self.state_retention_ttl, ) } } -impl StreamExtension for UpdatingAggregateExtension { - fn node_name(&self) -> Option { +// ----------------------------------------------------------------------------- +// Core Execution Blueprint Implementation +// ----------------------------------------------------------------------------- + +impl StreamingOperatorBlueprint for ContinuousAggregateNode { + fn operator_identity(&self) -> Option { None } - fn plan_node( + fn compile_to_graph_node( &self, planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - if input_schemas.len() != 1 { + node_index: usize, + mut upstream_schemas: Vec, + ) -> Result { + if upstream_schemas.len() != 1 { return plan_err!( - "UpdatingAggregateExtension requires exactly one input schema, found {}", - input_schemas.len() + "Topology Violation: ContinuousAggregateNode requires exactly 1 upstream input, received {}", + upstream_schemas.len() ); } - let input_schema = input_schemas[0].clone(); - let input_dfschema = input_schema.schema.clone().to_dfschema()?; + let upstream_schema = upstream_schemas.remove(0); - let aggregate_exec = PhysicalPlanNode::try_from_physical_plan( - planner.sync_plan(&self.aggregate)?, - &FsPhysicalExtensionCodec::default(), - )?; - - let key_exprs: Vec = self - .key_fields - .iter() - .map(|&i| col(input_schema.schema.field(i).name())) - .collect(); - let hash_expr = if key_exprs.is_empty() { - Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(vec![0; 16])), None) - } else { - Expr::ScalarFunction(ScalarFunction { - func: multi_hash(), - args: key_exprs, - }) - }; + let operator_config = self.compile_operator_config(planner, &upstream_schema)?; - let updating_meta_expr = - named_struct(vec![lit("is_retract"), lit(false), lit("id"), hash_expr]); - - let config = UpdatingAggregateOperator { - name: "UpdatingAggregate".to_string(), - input_schema: Some((*input_schema).clone().into()), - final_schema: Some(self.output_schema().into()), - aggregate_exec: aggregate_exec.encode_to_vec(), - metadata_expr: planner - .serialize_as_physical_expr(&updating_meta_expr, &input_dfschema)?, - flush_interval_micros: 10_000_000, - ttl_micros: self.ttl.as_micros() as u64, - }; - - let node = LogicalNode::single( - index as u32, - format!("updating_aggregate_{index}"), + let logical_node = LogicalNode::single( + node_index as u32, + format!("updating_aggregate_{node_index}"), OperatorName::UpdatingAggregate, - config.encode_to_vec(), + operator_config.encode_to_vec(), "UpdatingAggregate".to_string(), 1, ); - let edge = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schema).clone()); + let shuffle_edge = + LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*upstream_schema).clone()); - Ok(NodeWithIncomingEdges { - node, - edges: vec![edge], + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: vec![shuffle_edge], }) } - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap() + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).expect( + "Fatal: Failed to generate unkeyed output schema for continuous aggregate", + ) } } diff --git a/src/sql/extensions/watermark_node.rs b/src/sql/extensions/watermark_node.rs index 5ef8aa49..7cdb9b67 100644 --- a/src/sql/extensions/watermark_node.rs +++ b/src/sql/extensions/watermark_node.rs @@ -1,140 +1,231 @@ -use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err}; +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err, plan_err}; use datafusion::error::DataFusionError; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use prost::Message; -use std::fmt::Formatter; -use std::sync::Arc; use protocol::grpc::api::ExpressionWatermarkConfig; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; + use crate::multifield_partial_ord; -use crate::sql::schema::utils::add_timestamp_field; -use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::schema::utils::add_timestamp_field; + +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- + +pub(crate) const EVENT_TIME_WATERMARK_NODE_NAME: &str = "EventTimeWatermarkNode"; + +const INTERNAL_TIMESTAMP_COLUMN: &str = "_timestamp"; + +const DEFAULT_WATERMARK_EMISSION_PERIOD_MICROS: u64 = 1_000_000; + +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- -pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode"; +/// Event-time watermark from a user strategy; drives time progress in stateful operators. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct WatermarkNode { - pub input: LogicalPlan, - pub qualifier: TableReference, - pub watermark_expression: Expr, - pub schema: DFSchemaRef, - timestamp_index: usize, +pub(crate) struct EventTimeWatermarkNode { + pub(crate) upstream_plan: LogicalPlan, + pub(crate) namespace_qualifier: TableReference, + pub(crate) watermark_strategy_expr: Expr, + pub(crate) resolved_schema: DFSchemaRef, + pub(crate) internal_timestamp_offset: usize, } multifield_partial_ord!( - WatermarkNode, - input, - qualifier, - watermark_expression, - timestamp_index + EventTimeWatermarkNode, + upstream_plan, + namespace_qualifier, + watermark_strategy_expr, + internal_timestamp_offset ); -impl UserDefinedLogicalNodeCore for WatermarkNode { +impl EventTimeWatermarkNode { + pub(crate) fn try_new( + upstream_plan: LogicalPlan, + namespace_qualifier: TableReference, + watermark_strategy_expr: Expr, + ) -> Result { + let resolved_schema = add_timestamp_field( + upstream_plan.schema().clone(), + Some(namespace_qualifier.clone()), + )?; + + let internal_timestamp_offset = resolved_schema + .index_of_column_by_name(None, INTERNAL_TIMESTAMP_COLUMN) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Fatal: Failed to resolve mandatory temporal column '{}'", + INTERNAL_TIMESTAMP_COLUMN + )) + })?; + + Ok(Self { + upstream_plan, + namespace_qualifier, + watermark_strategy_expr, + resolved_schema, + internal_timestamp_offset, + }) + } + + pub(crate) fn generate_fs_schema(&self) -> FsSchema { + FsSchema::new_unkeyed( + Arc::new(self.resolved_schema.as_ref().into()), + self.internal_timestamp_offset, + ) + } + + fn compile_operator_config(&self, planner: &Planner) -> Result { + let physical_expr = planner.create_physical_expr( + &self.watermark_strategy_expr, + &self.resolved_schema, + )?; + + let serialized_expr = + serialize_physical_expr(&physical_expr, &DefaultPhysicalExtensionCodec {})?; + + Ok(ExpressionWatermarkConfig { + period_micros: DEFAULT_WATERMARK_EMISSION_PERIOD_MICROS, + idle_time_micros: None, + expression: serialized_expr.encode_to_vec(), + input_schema: Some(self.generate_fs_schema().into()), + }) + } +} + +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Hooks +// ----------------------------------------------------------------------------- + +impl UserDefinedLogicalNodeCore for EventTimeWatermarkNode { fn name(&self) -> &str { - WATERMARK_NODE_NAME + EVENT_TIME_WATERMARK_NODE_NAME } fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.input] + vec![&self.upstream_plan] } fn schema(&self) -> &DFSchemaRef { - &self.schema + &self.resolved_schema } fn expressions(&self) -> Vec { - vec![self.watermark_expression.clone()] + vec![self.watermark_strategy_expr.clone()] } fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "WatermarkNode({}): {}", self.qualifier, self.schema) + write!( + f, + "EventTimeWatermarkNode({}): Schema={}", + self.namespace_qualifier, self.resolved_schema + ) } - fn with_exprs_and_inputs(&self, exprs: Vec, inputs: Vec) -> Result { + fn with_exprs_and_inputs( + &self, + mut exprs: Vec, + mut inputs: Vec, + ) -> Result { if inputs.len() != 1 { - return internal_err!("input size inconsistent"); + return internal_err!( + "EventTimeWatermarkNode requires exactly 1 upstream logical plan, but received {}", + inputs.len() + ); } if exprs.len() != 1 { - return internal_err!("expected one expression; found {}", exprs.len()); + return internal_err!( + "EventTimeWatermarkNode requires exactly 1 watermark strategy expression, but received {}", + exprs.len() + ); } - let timestamp_index = self - .schema - .index_of_column_by_name(Some(&self.qualifier), "_timestamp") - .ok_or_else(|| DataFusionError::Plan("missing timestamp column".to_string()))?; + let internal_timestamp_offset = self + .resolved_schema + .index_of_column_by_name(Some(&self.namespace_qualifier), INTERNAL_TIMESTAMP_COLUMN) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Optimizer Error: Lost tracking of temporal column '{}'", + INTERNAL_TIMESTAMP_COLUMN + )) + })?; Ok(Self { - input: inputs[0].clone(), - qualifier: self.qualifier.clone(), - watermark_expression: exprs.into_iter().next().unwrap(), - schema: self.schema.clone(), - timestamp_index, + upstream_plan: inputs.remove(0), + namespace_qualifier: self.namespace_qualifier.clone(), + watermark_strategy_expr: exprs.remove(0), + resolved_schema: self.resolved_schema.clone(), + internal_timestamp_offset, }) } } -impl StreamExtension for WatermarkNode { - fn node_name(&self) -> Option { - Some(NamedNode::Watermark(self.qualifier.clone())) +// ----------------------------------------------------------------------------- +// Core Execution Blueprint Implementation +// ----------------------------------------------------------------------------- + +impl StreamingOperatorBlueprint for EventTimeWatermarkNode { + fn operator_identity(&self) -> Option { + Some(NamedNode::Watermark(self.namespace_qualifier.clone())) } - fn plan_node( + fn compile_to_graph_node( &self, planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - let expression = planner.create_physical_expr(&self.watermark_expression, &self.schema)?; - let expression = serialize_physical_expr(&expression, &DefaultPhysicalExtensionCodec {})?; - let node = LogicalNode::single( - index as u32, - format!("watermark_{index}"), + node_index: usize, + mut upstream_schemas: Vec, + ) -> Result { + if upstream_schemas.len() != 1 { + return plan_err!( + "Topology Violation: EventTimeWatermarkNode requires exactly 1 upstream input, received {}", + upstream_schemas.len() + ); + } + + let operator_config = self.compile_operator_config(planner)?; + + let execution_unit = LogicalNode::single( + node_index as u32, + format!("watermark_{node_index}"), OperatorName::ExpressionWatermark, - ExpressionWatermarkConfig { - period_micros: 1_000_000, - idle_time_micros: None, - expression: expression.encode_to_vec(), - input_schema: Some(self.arroyo_schema().into()), - } - .encode_to_vec(), - "watermark".to_string(), + operator_config.encode_to_vec(), + "watermark_generator".to_string(), 1, ); - let incoming_edge = - LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone()); - Ok(NodeWithIncomingEdges { - node, - edges: vec![incoming_edge], - }) - } - fn output_schema(&self) -> FsSchema { - self.arroyo_schema() - } -} + let incoming_edge = LogicalEdge::project_all( + LogicalEdgeType::Forward, + (*upstream_schemas.remove(0)).clone(), + ); -impl WatermarkNode { - pub(crate) fn new( - input: LogicalPlan, - qualifier: TableReference, - watermark_expression: Expr, - ) -> Result { - let schema = add_timestamp_field(input.schema().clone(), Some(qualifier.clone()))?; - let timestamp_index = schema - .index_of_column_by_name(None, "_timestamp") - .ok_or_else(|| DataFusionError::Plan("missing _timestamp column".to_string()))?; - Ok(Self { - input, - qualifier, - watermark_expression, - schema, - timestamp_index, + Ok(CompiledTopologyNode { + execution_unit, + routing_edges: vec![incoming_edge], }) } - pub(crate) fn arroyo_schema(&self) -> FsSchema { - FsSchema::new_unkeyed(Arc::new(self.schema.as_ref().into()), self.timestamp_index) + + fn yielded_schema(&self) -> FsSchema { + self.generate_fs_schema() } } diff --git a/src/sql/extensions/window_fn.rs b/src/sql/extensions/window_fn.rs deleted file mode 100644 index c2594546..00000000 --- a/src/sql/extensions/window_fn.rs +++ /dev/null @@ -1,123 +0,0 @@ -use std::sync::Arc; -use datafusion::common::{Column, DFSchema, DFSchemaRef, Result, plan_err}; -use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; -use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; -use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode}; -use prost::Message; -use protocol::grpc::api::WindowFunctionOperator; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_planner::FsPhysicalExtensionCodec; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::types::TIMESTAMP_FIELD; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use super::{ NodeWithIncomingEdges, StreamExtension}; - -pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension"; - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub(crate) struct WindowFunctionExtension { - window_plan: LogicalPlan, - key_fields: Vec, -} - -impl WindowFunctionExtension { - pub fn new(window_plan: LogicalPlan, key_fields: Vec) -> Self { - Self { - window_plan, - key_fields, - } - } -} - -impl UserDefinedLogicalNodeCore for WindowFunctionExtension { - fn name(&self) -> &str { - WINDOW_FUNCTION_EXTENSION_NAME - } - - fn inputs(&self) -> Vec<&LogicalPlan> { - vec![&self.window_plan] - } - - fn schema(&self) -> &DFSchemaRef { - self.window_plan.schema() - } - - fn expressions(&self) -> Vec { - vec![] - } - - fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "WindowFunction: {}", self.schema()) - } - - fn with_exprs_and_inputs( - &self, - _exprs: Vec, - inputs: Vec, - ) -> Result { - Ok(Self::new(inputs[0].clone(), self.key_fields.clone())) - } -} - -impl StreamExtension for WindowFunctionExtension { - fn node_name(&self) -> Option { - None - } - - fn plan_node( - &self, - planner: &Planner, - index: usize, - input_schemas: Vec, - ) -> Result { - if input_schemas.len() != 1 { - return plan_err!("WindowFunctionExtension requires exactly one input"); - } - let input_schema = input_schemas[0].clone(); - let input_df_schema = - Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone()).unwrap()); - - let binning_function = planner.create_physical_expr( - &Expr::Column(Column::new_unqualified(TIMESTAMP_FIELD.to_string())), - &input_df_schema, - )?; - let binning_function_proto = - serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})?; - - let window_plan = planner.sync_plan(&self.window_plan)?; - let codec = FsPhysicalExtensionCodec::default(); - let window_plan_proto = PhysicalPlanNode::try_from_physical_plan(window_plan, &codec)?; - - let config = WindowFunctionOperator { - name: "WindowFunction".to_string(), - input_schema: Some(input_schema.as_ref().clone().into()), - binning_function: binning_function_proto.encode_to_vec(), - window_function_plan: window_plan_proto.encode_to_vec(), - }; - - let logical_node = LogicalNode::single( - index as u32, - format!("window_function_{index}"), - OperatorName::WindowFunction, - config.encode_to_vec(), - "window function".to_string(), - 1, - ); - - let edge = LogicalEdge::project_all( - // TODO: detect when this shuffle is unnecessary - LogicalEdgeType::Shuffle, - input_schema.as_ref().clone(), - ); - - Ok(NodeWithIncomingEdges { - node: logical_node, - edges: vec![edge], - }) - } - - fn output_schema(&self) -> FsSchema { - FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).unwrap() - } -} diff --git a/src/sql/extensions/windows_function.rs b/src/sql/extensions/windows_function.rs new file mode 100644 index 00000000..e53e2ee9 --- /dev/null +++ b/src/sql/extensions/windows_function.rs @@ -0,0 +1,197 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Formatter; +use std::sync::Arc; + +use datafusion::common::{Column, DFSchema, DFSchemaRef, Result, internal_err, plan_err}; +use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; +use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode}; +use prost::Message; +use protocol::grpc::api::WindowFunctionOperator; + +use crate::sql::common::{FsSchema, FsSchemaRef}; +use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::sql::types::TIMESTAMP_FIELD; + +use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; + +// ----------------------------------------------------------------------------- +// Constants & Identifiers +// ----------------------------------------------------------------------------- + +pub(crate) const STREAMING_WINDOW_NODE_NAME: &str = "StreamingWindowFunctionNode"; + +// ----------------------------------------------------------------------------- +// Logical Node Definition +// ----------------------------------------------------------------------------- + +/// Stateful streaming window: temporal binning plus underlying window evaluation plan. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub(crate) struct StreamingWindowFunctionNode { + pub(crate) underlying_evaluation_plan: LogicalPlan, + pub(crate) partition_key_indices: Vec, +} + +impl StreamingWindowFunctionNode { + pub fn new( + underlying_evaluation_plan: LogicalPlan, + partition_key_indices: Vec, + ) -> Self { + Self { + underlying_evaluation_plan, + partition_key_indices, + } + } + + fn compile_temporal_binning_function( + &self, + planner: &Planner, + input_df_schema: &DFSchema, + ) -> Result> { + let timestamp_column = Expr::Column(Column::new_unqualified(TIMESTAMP_FIELD.to_string())); + + let physical_binning_expr = + planner.create_physical_expr(×tamp_column, input_df_schema)?; + + let serialized_expr = + serialize_physical_expr(&physical_binning_expr, &DefaultPhysicalExtensionCodec {})?; + + Ok(serialized_expr.encode_to_vec()) + } + + fn compile_physical_evaluation_plan(&self, planner: &Planner) -> Result> { + let physical_window_plan = planner.sync_plan(&self.underlying_evaluation_plan)?; + + let proto_plan_node = PhysicalPlanNode::try_from_physical_plan( + physical_window_plan, + &FsPhysicalExtensionCodec::default(), + )?; + + Ok(proto_plan_node.encode_to_vec()) + } +} + +// ----------------------------------------------------------------------------- +// DataFusion Logical Node Hooks +// ----------------------------------------------------------------------------- + +impl UserDefinedLogicalNodeCore for StreamingWindowFunctionNode { + fn name(&self) -> &str { + STREAMING_WINDOW_NODE_NAME + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.underlying_evaluation_plan] + } + + fn schema(&self) -> &DFSchemaRef { + self.underlying_evaluation_plan.schema() + } + + fn expressions(&self) -> Vec { + vec![] + } + + fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "StreamingWindowFunction: Schema={}", + self.schema() + ) + } + + fn with_exprs_and_inputs( + &self, + _exprs: Vec, + mut inputs: Vec, + ) -> Result { + if inputs.len() != 1 { + return internal_err!( + "StreamingWindowFunctionNode requires exactly 1 upstream input, got {}", + inputs.len() + ); + } + + Ok(Self::new( + inputs.remove(0), + self.partition_key_indices.clone(), + )) + } +} + +// ----------------------------------------------------------------------------- +// Core Execution Blueprint Implementation +// ----------------------------------------------------------------------------- + +impl StreamingOperatorBlueprint for StreamingWindowFunctionNode { + fn operator_identity(&self) -> Option { + None + } + + fn compile_to_graph_node( + &self, + planner: &Planner, + node_index: usize, + mut input_schemas: Vec, + ) -> Result { + if input_schemas.len() != 1 { + return plan_err!( + "Topology Violation: StreamingWindowFunctionNode requires exactly 1 upstream input schema, received {}", + input_schemas.len() + ); + } + + let input_schema = input_schemas.remove(0); + + let input_df_schema = DFSchema::try_from(input_schema.schema.as_ref().clone())?; + + let binning_payload = self.compile_temporal_binning_function(planner, &input_df_schema)?; + let evaluation_plan_payload = self.compile_physical_evaluation_plan(planner)?; + + let operator_config = WindowFunctionOperator { + name: "WindowFunction".to_string(), + input_schema: Some(input_schema.as_ref().clone().into()), + binning_function: binning_payload, + window_function_plan: evaluation_plan_payload, + }; + + let logical_node = LogicalNode::single( + node_index as u32, + format!("window_function_{node_index}"), + OperatorName::WindowFunction, + operator_config.encode_to_vec(), + "streaming_window_evaluator".to_string(), + 1, + ); + + let routing_edge = LogicalEdge::project_all( + LogicalEdgeType::Shuffle, + (*input_schema).clone(), + ); + + Ok(CompiledTopologyNode { + execution_unit: logical_node, + routing_edges: vec![routing_edge], + }) + } + + fn yielded_schema(&self) -> FsSchema { + FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).expect( + "Fatal: Failed to generate unkeyed output schema for StreamingWindowFunctionNode", + ) + } +} diff --git a/src/sql/frontend_sql_coverage_tests.rs b/src/sql/frontend_sql_coverage_tests.rs index fa730614..cee4d82e 100644 --- a/src/sql/frontend_sql_coverage_tests.rs +++ b/src/sql/frontend_sql_coverage_tests.rs @@ -23,7 +23,7 @@ use crate::coordinator::Coordinator; use crate::sql::common::TIMESTAMP_FIELD; use crate::sql::parse::parse_sql; use crate::sql::rewrite_plan; -use crate::sql::schema::optimizer::produce_optimized_plan; +use crate::sql::logical_planner::optimizers::produce_optimized_plan; use crate::sql::schema::StreamSchemaProvider; fn assert_parses_as(sql: &str, type_prefix: &str) { diff --git a/src/sql/logical_node/logical.rs b/src/sql/logical_node/logical.rs deleted file mode 100644 index 9fa139d1..00000000 --- a/src/sql/logical_node/logical.rs +++ /dev/null @@ -1,378 +0,0 @@ -use itertools::Itertools; - -use datafusion::arrow::datatypes::DataType; -use petgraph::Direction; -use petgraph::dot::Dot; -use petgraph::graph::DiGraph; -use std::collections::{HashMap, HashSet}; -use std::fmt::{Debug, Display, Formatter}; -use std::sync::Arc; -use datafusion_proto::protobuf::ArrowType; -use prost::Message; -use strum::{Display, EnumString}; -use protocol::grpc::api; -use crate::sql::common::FsSchema; - -#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] -pub enum OperatorName { - ExpressionWatermark, - ArrowValue, - ArrowKey, - Projection, - AsyncUdf, - Join, - InstantJoin, - LookupJoin, - WindowFunction, - TumblingWindowAggregate, - SlidingWindowAggregate, - SessionWindowAggregate, - UpdatingAggregate, - ConnectorSource, - ConnectorSink, -} - -#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] -pub enum LogicalEdgeType { - Forward, - Shuffle, - LeftJoin, - RightJoin, -} - -impl Display for LogicalEdgeType { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - LogicalEdgeType::Forward => write!(f, "→"), - LogicalEdgeType::Shuffle => write!(f, "⤨"), - LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"), - LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"), - } - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct LogicalEdge { - pub edge_type: LogicalEdgeType, - pub schema: Arc, -} - -impl LogicalEdge { - pub fn new(edge_type: LogicalEdgeType, schema: FsSchema) -> Self { - LogicalEdge { - edge_type, - schema: Arc::new(schema), - } - } - - pub fn project_all(edge_type: LogicalEdgeType, schema: FsSchema) -> Self { - LogicalEdge { - edge_type, - schema: Arc::new(schema), - } - } -} - -#[derive(Clone, Debug)] -pub struct ChainedLogicalOperator { - pub operator_id: String, - pub operator_name: OperatorName, - pub operator_config: Vec, -} - -#[derive(Clone, Debug)] -pub struct OperatorChain { - pub(crate) operators: Vec, - pub(crate) edges: Vec>, -} - -impl OperatorChain { - pub fn new(operator: ChainedLogicalOperator) -> Self { - Self { - operators: vec![operator], - edges: vec![], - } - } - - pub fn iter( - &self, - ) -> impl Iterator>)> { - self.operators - .iter() - .zip_longest(self.edges.iter()) - .map(|e| e.left_and_right()) - .map(|(l, r)| (l.unwrap(), r)) - } - - pub fn iter_mut( - &mut self, - ) -> impl Iterator>)> { - self.operators - .iter_mut() - .zip_longest(self.edges.iter()) - .map(|e| e.left_and_right()) - .map(|(l, r)| (l.unwrap(), r)) - } - - pub fn first(&self) -> &ChainedLogicalOperator { - &self.operators[0] - } - - pub fn len(&self) -> usize { - self.operators.len() - } - - pub fn is_empty(&self) -> bool { - self.operators.is_empty() - } - - pub fn is_source(&self) -> bool { - self.operators[0].operator_name == OperatorName::ConnectorSource - } - - pub fn is_sink(&self) -> bool { - self.operators[0].operator_name == OperatorName::ConnectorSink - } -} - -#[derive(Clone)] -pub struct LogicalNode { - pub node_id: u32, - pub description: String, - pub operator_chain: OperatorChain, - pub parallelism: usize, -} - -impl LogicalNode { - pub fn single( - id: u32, - operator_id: String, - name: OperatorName, - config: Vec, - description: String, - parallelism: usize, - ) -> Self { - Self { - node_id: id, - description, - operator_chain: OperatorChain { - operators: vec![ChainedLogicalOperator { - operator_id, - operator_name: name, - operator_config: config, - }], - edges: vec![], - }, - parallelism, - } - } -} - -impl Display for LogicalNode { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.description) - } -} - -impl Debug for LogicalNode { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}[{}]", - self.operator_chain - .operators - .iter() - .map(|op| op.operator_id.clone()) - .collect::>() - .join(" -> "), - self.parallelism - ) - } -} - -pub type LogicalGraph = DiGraph; - -pub trait Optimizer { - fn optimize_once(&self, plan: &mut LogicalGraph) -> bool; - - fn optimize(&self, plan: &mut LogicalGraph) { - loop { - if !self.optimize_once(plan) { - break; - } - } - } -} - -#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)] -pub struct DylibUdfConfig { - pub dylib_path: String, - pub arg_types: Vec, - pub return_type: DataType, - pub aggregate: bool, - pub is_async: bool, -} - -#[derive(Clone, Debug, Eq, PartialEq, Hash)] -pub struct PythonUdfConfig { - pub arg_types: Vec, - pub return_type: DataType, - pub name: Arc, - pub definition: Arc, -} - -#[derive(Clone, Debug, Default)] -pub struct ProgramConfig { - pub udf_dylibs: HashMap, - pub python_udfs: HashMap, -} - -#[derive(Clone, Debug, Default)] -pub struct LogicalProgram { - pub graph: LogicalGraph, - pub program_config: ProgramConfig, -} - -impl LogicalProgram { - pub fn new(graph: LogicalGraph, program_config: ProgramConfig) -> Self { - Self { - graph, - program_config, - } - } - - pub fn optimize(&mut self, optimizer: &dyn Optimizer) { - optimizer.optimize(&mut self.graph); - } - - pub fn update_parallelism(&mut self, overrides: &HashMap) { - for node in self.graph.node_weights_mut() { - if let Some(p) = overrides.get(&node.node_id) { - node.parallelism = *p; - } - } - } - - pub fn dot(&self) -> String { - format!("{:?}", Dot::with_config(&self.graph, &[])) - } - - pub fn task_count(&self) -> usize { - self.graph.node_weights().map(|nw| nw.parallelism).sum() - } - - pub fn sources(&self) -> HashSet { - self.graph - .externals(Direction::Incoming) - .map(|t| self.graph.node_weight(t).unwrap().node_id) - .collect() - } - - pub fn tasks_per_operator(&self) -> HashMap { - let mut tasks_per_operator = HashMap::new(); - for node in self.graph.node_weights() { - for op in &node.operator_chain.operators { - tasks_per_operator.insert(op.operator_id.clone(), node.parallelism); - } - } - tasks_per_operator - } - - pub fn operator_names_by_id(&self) -> HashMap { - let mut m = HashMap::new(); - for node in self.graph.node_weights() { - for op in &node.operator_chain.operators { - m.insert(op.operator_id.clone(), op.operator_name.to_string()); - } - } - m - } - - pub fn tasks_per_node(&self) -> HashMap { - let mut tasks_per_node = HashMap::new(); - for node in self.graph.node_weights() { - tasks_per_node.insert(node.node_id, node.parallelism); - } - tasks_per_node - } - - pub fn features(&self) -> HashSet { - let mut s = HashSet::new(); - for n in self.graph.node_weights() { - for t in &n.operator_chain.operators { - let feature = match &t.operator_name { - OperatorName::AsyncUdf => "async-udf".to_string(), - OperatorName::ExpressionWatermark - | OperatorName::ArrowValue - | OperatorName::ArrowKey - | OperatorName::Projection => continue, - OperatorName::Join => "join-with-expiration".to_string(), - OperatorName::InstantJoin => "windowed-join".to_string(), - OperatorName::WindowFunction => "sql-window-function".to_string(), - OperatorName::LookupJoin => "lookup-join".to_string(), - OperatorName::TumblingWindowAggregate => { - "sql-tumbling-window-aggregate".to_string() - } - OperatorName::SlidingWindowAggregate => { - "sql-sliding-window-aggregate".to_string() - } - OperatorName::SessionWindowAggregate => { - "sql-session-window-aggregate".to_string() - } - OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(), - OperatorName::ConnectorSource => "connector-source".to_string(), - OperatorName::ConnectorSink => "connector-sink".to_string(), - }; - s.insert(feature); - } - } - s - } -} - - -impl From for api::DylibUdfConfig { - fn from(from: DylibUdfConfig) -> Self { - api::DylibUdfConfig { - dylib_path: from.dylib_path, - arg_types: from - .arg_types - .iter() - .map(|t| { - ArrowType::try_from(t) - .expect("unsupported data type") - .encode_to_vec() - }) - .collect(), - return_type: ArrowType::try_from(&from.return_type) - .expect("unsupported data type") - .encode_to_vec(), - aggregate: from.aggregate, - is_async: from.is_async, - } - } -} - -impl From for DylibUdfConfig { - fn from(from: api::DylibUdfConfig) -> Self { - DylibUdfConfig { - dylib_path: from.dylib_path, - arg_types: from - .arg_types - .iter() - .map(|t| { - DataType::try_from( - &ArrowType::decode(&mut t.as_slice()).expect("invalid arrow type"), - ) - .expect("invalid arrow type") - }) - .collect(), - return_type: DataType::try_from( - &ArrowType::decode(&mut from.return_type.as_slice()).unwrap(), - ) - .expect("invalid arrow type"), - aggregate: from.aggregate, - is_async: from.is_async, - } - } -} \ No newline at end of file diff --git a/src/sql/logical_node/logical/dylib_udf_config.rs b/src/sql/logical_node/logical/dylib_udf_config.rs new file mode 100644 index 00000000..6c88054f --- /dev/null +++ b/src/sql/logical_node/logical/dylib_udf_config.rs @@ -0,0 +1,71 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::arrow::datatypes::DataType; +use datafusion_proto::protobuf::ArrowType; +use prost::Message; +use protocol::grpc::api; + +#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)] +pub struct DylibUdfConfig { + pub dylib_path: String, + pub arg_types: Vec, + pub return_type: DataType, + pub aggregate: bool, + pub is_async: bool, +} + +impl From for api::DylibUdfConfig { + fn from(from: DylibUdfConfig) -> Self { + api::DylibUdfConfig { + dylib_path: from.dylib_path, + arg_types: from + .arg_types + .iter() + .map(|t| { + ArrowType::try_from(t) + .expect("unsupported data type") + .encode_to_vec() + }) + .collect(), + return_type: ArrowType::try_from(&from.return_type) + .expect("unsupported data type") + .encode_to_vec(), + aggregate: from.aggregate, + is_async: from.is_async, + } + } +} + +impl From for DylibUdfConfig { + fn from(from: api::DylibUdfConfig) -> Self { + DylibUdfConfig { + dylib_path: from.dylib_path, + arg_types: from + .arg_types + .iter() + .map(|t| { + DataType::try_from( + &ArrowType::decode(&mut t.as_slice()).expect("invalid arrow type"), + ) + .expect("invalid arrow type") + }) + .collect(), + return_type: DataType::try_from( + &ArrowType::decode(&mut from.return_type.as_slice()).unwrap(), + ) + .expect("invalid arrow type"), + aggregate: from.aggregate, + is_async: from.is_async, + } + } +} diff --git a/src/sql/logical_node/logical/logical_edge.rs b/src/sql/logical_node/logical/logical_edge.rs new file mode 100644 index 00000000..2f850988 --- /dev/null +++ b/src/sql/logical_node/logical/logical_edge.rs @@ -0,0 +1,57 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::{Display, Formatter}; +use std::sync::Arc; + +use crate::sql::common::FsSchema; + +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +pub enum LogicalEdgeType { + Forward, + Shuffle, + LeftJoin, + RightJoin, +} + +impl Display for LogicalEdgeType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + LogicalEdgeType::Forward => write!(f, "→"), + LogicalEdgeType::Shuffle => write!(f, "⤨"), + LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"), + LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct LogicalEdge { + pub edge_type: LogicalEdgeType, + pub schema: Arc, +} + +impl LogicalEdge { + pub fn new(edge_type: LogicalEdgeType, schema: FsSchema) -> Self { + LogicalEdge { + edge_type, + schema: Arc::new(schema), + } + } + + pub fn project_all(edge_type: LogicalEdgeType, schema: FsSchema) -> Self { + LogicalEdge { + edge_type, + schema: Arc::new(schema), + } + } +} diff --git a/src/sql/logical_node/logical/logical_graph.rs b/src/sql/logical_node/logical/logical_graph.rs new file mode 100644 index 00000000..b877e2a0 --- /dev/null +++ b/src/sql/logical_node/logical/logical_graph.rs @@ -0,0 +1,30 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use petgraph::graph::DiGraph; + +use super::logical_edge::LogicalEdge; +use super::logical_node::LogicalNode; + +pub type LogicalGraph = DiGraph; + +pub trait Optimizer { + fn optimize_once(&self, plan: &mut LogicalGraph) -> bool; + + fn optimize(&self, plan: &mut LogicalGraph) { + loop { + if !self.optimize_once(plan) { + break; + } + } + } +} diff --git a/src/sql/logical_node/logical/logical_node.rs b/src/sql/logical_node/logical/logical_node.rs new file mode 100644 index 00000000..492eae26 --- /dev/null +++ b/src/sql/logical_node/logical/logical_node.rs @@ -0,0 +1,71 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::{Debug, Display, Formatter}; + +use super::operator_chain::{ChainedLogicalOperator, OperatorChain}; +use super::operator_name::OperatorName; + +#[derive(Clone)] +pub struct LogicalNode { + pub node_id: u32, + pub description: String, + pub operator_chain: OperatorChain, + pub parallelism: usize, +} + +impl LogicalNode { + pub fn single( + id: u32, + operator_id: String, + name: OperatorName, + config: Vec, + description: String, + parallelism: usize, + ) -> Self { + Self { + node_id: id, + description, + operator_chain: OperatorChain { + operators: vec![ChainedLogicalOperator { + operator_id, + operator_name: name, + operator_config: config, + }], + edges: vec![], + }, + parallelism, + } + } +} + +impl Display for LogicalNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.description) + } +} + +impl Debug for LogicalNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}[{}]", + self.operator_chain + .operators + .iter() + .map(|op| op.operator_id.clone()) + .collect::>() + .join(" -> "), + self.parallelism + ) + } +} diff --git a/src/sql/logical_node/logical/logical_program.rs b/src/sql/logical_node/logical/logical_program.rs new file mode 100644 index 00000000..db6883b8 --- /dev/null +++ b/src/sql/logical_node/logical/logical_program.rs @@ -0,0 +1,123 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{HashMap, HashSet}; + +use petgraph::Direction; +use petgraph::dot::Dot; + +use super::logical_graph::{LogicalGraph, Optimizer}; +use super::operator_name::OperatorName; +use super::program_config::ProgramConfig; + +#[derive(Clone, Debug, Default)] +pub struct LogicalProgram { + pub graph: LogicalGraph, + pub program_config: ProgramConfig, +} + +impl LogicalProgram { + pub fn new(graph: LogicalGraph, program_config: ProgramConfig) -> Self { + Self { + graph, + program_config, + } + } + + pub fn optimize(&mut self, optimizer: &dyn Optimizer) { + optimizer.optimize(&mut self.graph); + } + + pub fn update_parallelism(&mut self, overrides: &HashMap) { + for node in self.graph.node_weights_mut() { + if let Some(p) = overrides.get(&node.node_id) { + node.parallelism = *p; + } + } + } + + pub fn dot(&self) -> String { + format!("{:?}", Dot::with_config(&self.graph, &[])) + } + + pub fn task_count(&self) -> usize { + self.graph.node_weights().map(|nw| nw.parallelism).sum() + } + + pub fn sources(&self) -> HashSet { + self.graph + .externals(Direction::Incoming) + .map(|t| self.graph.node_weight(t).unwrap().node_id) + .collect() + } + + pub fn tasks_per_operator(&self) -> HashMap { + let mut tasks_per_operator = HashMap::new(); + for node in self.graph.node_weights() { + for op in &node.operator_chain.operators { + tasks_per_operator.insert(op.operator_id.clone(), node.parallelism); + } + } + tasks_per_operator + } + + pub fn operator_names_by_id(&self) -> HashMap { + let mut m = HashMap::new(); + for node in self.graph.node_weights() { + for op in &node.operator_chain.operators { + m.insert(op.operator_id.clone(), op.operator_name.to_string()); + } + } + m + } + + pub fn tasks_per_node(&self) -> HashMap { + let mut tasks_per_node = HashMap::new(); + for node in self.graph.node_weights() { + tasks_per_node.insert(node.node_id, node.parallelism); + } + tasks_per_node + } + + pub fn features(&self) -> HashSet { + let mut s = HashSet::new(); + for n in self.graph.node_weights() { + for t in &n.operator_chain.operators { + let feature = match &t.operator_name { + OperatorName::AsyncUdf => "async-udf".to_string(), + OperatorName::ExpressionWatermark + | OperatorName::ArrowValue + | OperatorName::ArrowKey + | OperatorName::Projection => continue, + OperatorName::Join => "join-with-expiration".to_string(), + OperatorName::InstantJoin => "windowed-join".to_string(), + OperatorName::WindowFunction => "sql-window-function".to_string(), + OperatorName::LookupJoin => "lookup-join".to_string(), + OperatorName::TumblingWindowAggregate => { + "sql-tumbling-window-aggregate".to_string() + } + OperatorName::SlidingWindowAggregate => { + "sql-sliding-window-aggregate".to_string() + } + OperatorName::SessionWindowAggregate => { + "sql-session-window-aggregate".to_string() + } + OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(), + OperatorName::ConnectorSource => "connector-source".to_string(), + OperatorName::ConnectorSink => "connector-sink".to_string(), + }; + s.insert(feature); + } + } + s + } +} diff --git a/src/sql/logical_node/logical/mod.rs b/src/sql/logical_node/logical/mod.rs new file mode 100644 index 00000000..96dd2ce5 --- /dev/null +++ b/src/sql/logical_node/logical/mod.rs @@ -0,0 +1,30 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod dylib_udf_config; +mod logical_edge; +mod logical_graph; +mod logical_node; +mod logical_program; +mod operator_chain; +mod operator_name; +mod program_config; +mod python_udf_config; + +pub use dylib_udf_config::DylibUdfConfig; +pub use logical_edge::{LogicalEdge, LogicalEdgeType}; +pub use logical_graph::{LogicalGraph, Optimizer}; +pub use logical_node::LogicalNode; +pub use logical_program::LogicalProgram; +pub use operator_name::OperatorName; +pub use program_config::ProgramConfig; +pub use python_udf_config::PythonUdfConfig; diff --git a/src/sql/logical_node/logical/operator_chain.rs b/src/sql/logical_node/logical/operator_chain.rs new file mode 100644 index 00000000..e3db96b2 --- /dev/null +++ b/src/sql/logical_node/logical/operator_chain.rs @@ -0,0 +1,80 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use itertools::Itertools; + +use super::operator_name::OperatorName; +use crate::sql::common::FsSchema; + +#[derive(Clone, Debug)] +pub struct ChainedLogicalOperator { + pub operator_id: String, + pub operator_name: OperatorName, + pub operator_config: Vec, +} + +#[derive(Clone, Debug)] +pub struct OperatorChain { + pub(crate) operators: Vec, + pub(crate) edges: Vec>, +} + +impl OperatorChain { + pub fn new(operator: ChainedLogicalOperator) -> Self { + Self { + operators: vec![operator], + edges: vec![], + } + } + + pub fn iter( + &self, + ) -> impl Iterator>)> { + self.operators + .iter() + .zip_longest(self.edges.iter()) + .map(|e| e.left_and_right()) + .map(|(l, r)| (l.unwrap(), r)) + } + + pub fn iter_mut( + &mut self, + ) -> impl Iterator>)> { + self.operators + .iter_mut() + .zip_longest(self.edges.iter()) + .map(|e| e.left_and_right()) + .map(|(l, r)| (l.unwrap(), r)) + } + + pub fn first(&self) -> &ChainedLogicalOperator { + &self.operators[0] + } + + pub fn len(&self) -> usize { + self.operators.len() + } + + pub fn is_empty(&self) -> bool { + self.operators.is_empty() + } + + pub fn is_source(&self) -> bool { + self.operators[0].operator_name == OperatorName::ConnectorSource + } + + pub fn is_sink(&self) -> bool { + self.operators[0].operator_name == OperatorName::ConnectorSink + } +} diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs new file mode 100644 index 00000000..057d8e82 --- /dev/null +++ b/src/sql/logical_node/logical/operator_name.rs @@ -0,0 +1,32 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use strum::{Display, EnumString}; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] +pub enum OperatorName { + ExpressionWatermark, + ArrowValue, + ArrowKey, + Projection, + AsyncUdf, + Join, + InstantJoin, + LookupJoin, + WindowFunction, + TumblingWindowAggregate, + SlidingWindowAggregate, + SessionWindowAggregate, + UpdatingAggregate, + ConnectorSource, + ConnectorSink, +} diff --git a/src/sql/logical_node/logical/program_config.rs b/src/sql/logical_node/logical/program_config.rs new file mode 100644 index 00000000..38c76e66 --- /dev/null +++ b/src/sql/logical_node/logical/program_config.rs @@ -0,0 +1,22 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use super::dylib_udf_config::DylibUdfConfig; +use super::python_udf_config::PythonUdfConfig; + +#[derive(Clone, Debug, Default)] +pub struct ProgramConfig { + pub udf_dylibs: HashMap, + pub python_udfs: HashMap, +} diff --git a/src/sql/logical_node/logical/python_udf_config.rs b/src/sql/logical_node/logical/python_udf_config.rs new file mode 100644 index 00000000..6e7d5c66 --- /dev/null +++ b/src/sql/logical_node/logical/python_udf_config.rs @@ -0,0 +1,23 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use datafusion::arrow::datatypes::DataType; + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct PythonUdfConfig { + pub arg_types: Vec, + pub return_type: DataType, + pub name: Arc, + pub definition: Arc, +} diff --git a/src/sql/logical_planner/optimizers.rs b/src/sql/logical_planner/optimizers/chaining.rs similarity index 81% rename from src/sql/logical_planner/optimizers.rs rename to src/sql/logical_planner/optimizers/chaining.rs index bdf32657..5935c985 100644 --- a/src/sql/logical_planner/optimizers.rs +++ b/src/sql/logical_planner/optimizers/chaining.rs @@ -1,7 +1,21 @@ -use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer}; +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::mem; + use petgraph::prelude::*; use petgraph::visit::NodeRef; -use std::mem; + +use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer}; pub struct ChainingOptimizer {} diff --git a/src/sql/schema/optimizer.rs b/src/sql/logical_planner/optimizers/datafusion_logical.rs similarity index 100% rename from src/sql/schema/optimizer.rs rename to src/sql/logical_planner/optimizers/datafusion_logical.rs diff --git a/src/sql/logical_planner/optimizers/mod.rs b/src/sql/logical_planner/optimizers/mod.rs new file mode 100644 index 00000000..0e0de6a2 --- /dev/null +++ b/src/sql/logical_planner/optimizers/mod.rs @@ -0,0 +1,20 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Logical planner optimizers: graph-level chaining ([`ChainingOptimizer`]) and +//! DataFusion SQL logical-plan rules ([`produce_optimized_plan`]). + +mod chaining; +mod datafusion_logical; + +pub use chaining::ChainingOptimizer; +pub use datafusion_logical::produce_optimized_plan; diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs index 0f2075c1..bd25423c 100644 --- a/src/sql/logical_planner/planner.rs +++ b/src/sql/logical_planner/planner.rs @@ -34,11 +34,9 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalGraph, LogicalNode}; use crate::sql::logical_planner::{ DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec, }; -use crate::sql::extensions::debezium::{ - DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME, -}; -use crate::sql::extensions::key_calculation::KeyCalculationExtension; -use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::extensions::debezium::{PACK_NODE_NAME, UNROLL_NODE_NAME, UnrollDebeziumPayloadNode}; +use crate::sql::extensions::key_calculation::KeyExtractionNode; +use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::schema::utils::add_timestamp_field_arrow; use crate::sql::schema::StreamSchemaProvider; use crate::sql::common::{FsSchema, FsSchemaRef}; @@ -238,21 +236,21 @@ impl ExtensionPlanner for FsExtensionPlanner { _session_state: &SessionState, ) -> Result>> { let schema = node.schema().as_ref().into(); - if let Ok::<&dyn StreamExtension, _>(stream_extension) = node.try_into() { - if stream_extension.transparent() { + if let Ok::<&dyn StreamingOperatorBlueprint, _>(stream_extension) = node.try_into() { + if stream_extension.is_passthrough_boundary() { match node.name() { - DEBEZIUM_UNROLLING_EXTENSION_NAME => { + UNROLL_NODE_NAME => { let node = node .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); let input = physical_inputs[0].clone(); return Ok(Some(Arc::new(DebeziumUnrollingExec::try_new( input, - node.primary_keys.clone(), + node.pk_indices.clone(), )?))); } - TO_DEBEZIUM_EXTENSION_NAME => { + PACK_NODE_NAME => { let input = physical_inputs[0].clone(); return Ok(Some(Arc::new(ToDebeziumExec::try_new(input)?))); } @@ -261,8 +259,8 @@ impl ExtensionPlanner for FsExtensionPlanner { } }; let name = - if let Some(key_extension) = node.as_any().downcast_ref::() { - key_extension.name.clone() + if let Some(key_extension) = node.as_any().downcast_ref::() { + key_extension.operator_label.clone() } else { None }; @@ -293,9 +291,9 @@ impl PlanToGraphVisitor<'_> { pub fn build_extension( &mut self, input_nodes: Vec, - extension: &dyn StreamExtension, + extension: &dyn StreamingOperatorBlueprint, ) -> Result<()> { - if let Some(node_name) = extension.node_name() { + if let Some(node_name) = extension.operator_identity() { if self.named_nodes.contains_key(&node_name) { return plan_err!( "extension {:?} has already been planned, shouldn't try again.", @@ -315,21 +313,24 @@ impl PlanToGraphVisitor<'_> { }) .collect::>>()?; - let NodeWithIncomingEdges { node, edges } = extension - .plan_node(&self.planner, self.graph.node_count(), input_schemas) + let CompiledTopologyNode { + execution_unit, + routing_edges, + } = extension + .compile_to_graph_node(&self.planner, self.graph.node_count(), input_schemas) .map_err(|e| e.context(format!("planning operator {extension:?}")))?; - let node_index = self.graph.add_node(node); + let node_index = self.graph.add_node(execution_unit); self.add_index_to_traversal(node_index); - for (source, edge) in input_nodes.into_iter().zip(edges.into_iter()) { + for (source, edge) in input_nodes.into_iter().zip(routing_edges.into_iter()) { self.graph.add_edge(source, node_index, edge); } self.output_schemas - .insert(node_index, extension.output_schema().into()); + .insert(node_index, extension.yielded_schema().into()); - if let Some(node_name) = extension.node_name() { + if let Some(node_name) = extension.operator_identity() { self.named_nodes.insert(node_name, node_index); } Ok(()) @@ -344,14 +345,14 @@ impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> { return Ok(TreeNodeRecursion::Continue); }; - let stream_extension: &dyn StreamExtension = node + let stream_extension: &dyn StreamingOperatorBlueprint = node .try_into() .map_err(|e: DataFusionError| e.context("converting extension"))?; - if stream_extension.transparent() { + if stream_extension.is_passthrough_boundary() { return Ok(TreeNodeRecursion::Continue); } - if let Some(name) = stream_extension.node_name() { + if let Some(name) = stream_extension.operator_identity() { if let Some(node_index) = self.named_nodes.get(&name) { self.add_index_to_traversal(*node_index); return Ok(TreeNodeRecursion::Jump); @@ -370,15 +371,15 @@ impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> { return Ok(TreeNodeRecursion::Continue); }; - let stream_extension: &dyn StreamExtension = node + let stream_extension: &dyn StreamingOperatorBlueprint = node .try_into() .map_err(|e: DataFusionError| e.context("planning extension"))?; - if stream_extension.transparent() { + if stream_extension.is_passthrough_boundary() { return Ok(TreeNodeRecursion::Continue); } - if let Some(name) = stream_extension.node_name() { + if let Some(name) = stream_extension.operator_identity() { if self.named_nodes.contains_key(&name) { return Ok(TreeNodeRecursion::Continue); } @@ -389,7 +390,7 @@ impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> { } else { vec![] }; - let stream_extension: &dyn StreamExtension = node + let stream_extension: &dyn StreamingOperatorBlueprint = node .try_into() .map_err(|e: DataFusionError| e.context("converting extension"))?; self.build_extension(input_nodes, stream_extension)?; diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 6e17e0f2..fc89787a 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -22,7 +22,7 @@ pub mod analysis; pub(crate) mod extensions; pub mod types; -pub use schema::StreamSchemaProvider; +pub use schema::{StreamPlanningContext, StreamSchemaProvider}; pub use parse::parse_sql; pub use analysis::rewrite_plan; pub use logical_planner::CompiledSql; diff --git a/src/sql/schema/column_descriptor.rs b/src/sql/schema/column_descriptor.rs new file mode 100644 index 00000000..941a7500 --- /dev/null +++ b/src/sql/schema/column_descriptor.rs @@ -0,0 +1,136 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::arrow::datatypes::{DataType, Field, TimeUnit}; +use datafusion::logical_expr::Expr; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ColumnDescriptor { + Physical(Field), + SystemMeta { + field: Field, + meta_key: String, + }, + Computed { + field: Field, + logic: Box, + }, +} + +impl ColumnDescriptor { + #[inline] + pub fn new_physical(field: Field) -> Self { + Self::Physical(field) + } + + #[inline] + pub fn new_system_meta(field: Field, meta_key: impl Into) -> Self { + Self::SystemMeta { + field, + meta_key: meta_key.into(), + } + } + + #[inline] + pub fn new_computed(field: Field, logic: Expr) -> Self { + Self::Computed { + field, + logic: Box::new(logic), + } + } + + #[inline] + pub fn arrow_field(&self) -> &Field { + match self { + Self::Physical(f) => f, + Self::SystemMeta { field: f, .. } => f, + Self::Computed { field: f, .. } => f, + } + } + + #[inline] + pub fn into_arrow_field(self) -> Field { + match self { + Self::Physical(f) => f, + Self::SystemMeta { field: f, .. } => f, + Self::Computed { field: f, .. } => f, + } + } + + #[inline] + pub fn is_computed(&self) -> bool { + matches!(self, Self::Computed { .. }) + } + + #[inline] + pub fn is_physical(&self) -> bool { + matches!(self, Self::Physical(_)) + } + + #[inline] + pub fn system_meta_key(&self) -> Option<&str> { + if let Self::SystemMeta { meta_key, .. } = self { + Some(meta_key.as_str()) + } else { + None + } + } + + #[inline] + pub fn computation_logic(&self) -> Option<&Expr> { + if let Self::Computed { logic, .. } = self { + Some(logic) + } else { + None + } + } + + #[inline] + pub fn data_type(&self) -> &DataType { + self.arrow_field().data_type() + } + + pub fn force_precision(&mut self, unit: TimeUnit) { + match self { + Self::Physical(f) => { + if let DataType::Timestamp(_, tz) = f.data_type() { + *f = Field::new(f.name(), DataType::Timestamp(unit, tz.clone()), f.is_nullable()); + } + } + Self::SystemMeta { field, .. } => { + if let DataType::Timestamp(_, tz) = field.data_type() { + *field = Field::new( + field.name(), + DataType::Timestamp(unit, tz.clone()), + field.is_nullable(), + ); + } + } + Self::Computed { field, .. } => { + if let DataType::Timestamp(_, tz) = field.data_type() { + *field = Field::new( + field.name(), + DataType::Timestamp(unit, tz.clone()), + field.is_nullable(), + ); + } + } + } + } +} + +impl From for ColumnDescriptor { + #[inline] + fn from(field: Field) -> Self { + Self::Physical(field) + } +} diff --git a/src/sql/schema/connector.rs b/src/sql/schema/connection_type.rs similarity index 100% rename from src/sql/schema/connector.rs rename to src/sql/schema/connection_type.rs diff --git a/src/sql/schema/connector_table.rs b/src/sql/schema/connector_table.rs deleted file mode 100644 index 25e37184..00000000 --- a/src/sql/schema/connector_table.rs +++ /dev/null @@ -1,205 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; -use std::time::Duration; - -use datafusion::arrow::datatypes::{FieldRef, Schema}; -use datafusion::common::{Result, plan_err}; -use datafusion::logical_expr::Expr; -use protocol::grpc::api::ConnectorOp; -use super::field_spec::FieldSpec; -use crate::multifield_partial_ord; -use crate::sql::schema::ConnectionType; -use crate::sql::schema::table::SqlSource; -use crate::sql::types::ProcessingMode; - -/// Represents a table backed by an external connector (e.g., Kafka, Pulsar, NATS). -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ConnectorTable { - pub id: Option, - pub connector: String, - pub name: String, - pub connection_type: ConnectionType, - pub fields: Vec, - pub config: String, - pub description: String, - pub event_time_field: Option, - pub watermark_field: Option, - pub idle_time: Option, - pub primary_keys: Arc>, - pub inferred_fields: Option>, - pub partition_exprs: Arc>>, - pub lookup_cache_max_bytes: Option, - pub lookup_cache_ttl: Option, -} - -multifield_partial_ord!( - ConnectorTable, - id, - connector, - name, - connection_type, - config, - description, - event_time_field, - watermark_field, - idle_time, - primary_keys -); - -impl ConnectorTable { - pub fn new( - name: impl Into, - connector: impl Into, - connection_type: ConnectionType, - ) -> Self { - Self { - id: None, - connector: connector.into(), - name: name.into(), - connection_type, - fields: Vec::new(), - config: String::new(), - description: String::new(), - event_time_field: None, - watermark_field: None, - idle_time: None, - primary_keys: Arc::new(Vec::new()), - inferred_fields: None, - partition_exprs: Arc::new(None), - lookup_cache_max_bytes: None, - lookup_cache_ttl: None, - } - } - - pub fn has_virtual_fields(&self) -> bool { - self.fields.iter().any(|f| f.is_virtual()) - } - - pub fn is_updating(&self) -> bool { - // TODO: check format for debezium/update mode - false - } - - pub fn physical_schema(&self) -> Schema { - Schema::new( - self.fields - .iter() - .filter(|f| !f.is_virtual()) - .map(|f| f.field().clone()) - .collect::>(), - ) - } - - pub fn connector_op(&self) -> ConnectorOp { - ConnectorOp { - connector: self.connector.clone(), - config: self.config.clone(), - description: self.description.clone(), - } - } - - pub fn processing_mode(&self) -> ProcessingMode { - if self.is_updating() { - ProcessingMode::Update - } else { - ProcessingMode::Append - } - } - - pub fn timestamp_override(&self) -> Result> { - if let Some(field_name) = &self.event_time_field { - if self.is_updating() { - return plan_err!("can't use event_time_field with update mode"); - } - let _field = self.get_time_field(field_name)?; - Ok(Some(Expr::Column(datafusion::common::Column::from_name( - field_name, - )))) - } else { - Ok(None) - } - } - - fn get_time_field(&self, field_name: &str) -> Result<&FieldSpec> { - self.fields - .iter() - .find(|f| { - f.field().name() == field_name - && matches!( - f.field().data_type(), - datafusion::arrow::datatypes::DataType::Timestamp(..) - ) - }) - .ok_or_else(|| { - datafusion::error::DataFusionError::Plan(format!( - "field {field_name} not found or not a timestamp" - )) - }) - } - - pub fn watermark_column(&self) -> Result> { - if let Some(field_name) = &self.watermark_field { - let _field = self.get_time_field(field_name)?; - Ok(Some(Expr::Column(datafusion::common::Column::from_name( - field_name, - )))) - } else { - Ok(None) - } - } - - pub fn as_sql_source(&self) -> Result { - match self.connection_type { - ConnectionType::Source => {} - ConnectionType::Sink | ConnectionType::Lookup => { - return plan_err!("cannot read from sink"); - } - }; - - if self.is_updating() && self.has_virtual_fields() { - return plan_err!("can't read from a source with virtual fields and update mode."); - } - - let timestamp_override = self.timestamp_override()?; - let watermark_column = self.watermark_column()?; - - let source = SqlSource { - id: self.id, - struct_def: self - .fields - .iter() - .filter(|f| !f.is_virtual()) - .map(|f| Arc::new(f.field().clone())) - .collect(), - config: self.connector_op(), - processing_mode: self.processing_mode(), - idle_time: self.idle_time, - }; - - Ok(SourceOperator { - name: self.name.clone(), - source, - timestamp_override, - watermark_column, - }) - } -} - -#[derive(Debug, Clone)] -pub struct SourceOperator { - pub name: String, - pub source: SqlSource, - pub timestamp_override: Option, - pub watermark_column: Option, -} diff --git a/src/sql/schema/data_encoding_format.rs b/src/sql/schema/data_encoding_format.rs new file mode 100644 index 00000000..5b93c90a --- /dev/null +++ b/src/sql/schema/data_encoding_format.rs @@ -0,0 +1,82 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use datafusion::arrow::datatypes::{DataType, Field}; +use datafusion::common::{Result, plan_err}; + +use super::column_descriptor::ColumnDescriptor; +use crate::sql::common::Format; + +/// High-level payload encoding (orthogonal to `Format` wire details in `ConnectionSchema`). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum DataEncodingFormat { + StandardJson, + DebeziumJson, + Avro, + Parquet, + Raw, +} + +impl DataEncodingFormat { + pub fn extract_from_map(opts: &HashMap) -> Result { + let format_str = opts.get("format").map(|s| s.as_str()).unwrap_or("json"); + let is_debezium = opts + .get("format.debezium") + .or_else(|| opts.get("json.debezium")) + .map(|s| s == "true") + .unwrap_or(false); + + match (format_str, is_debezium) { + ("json", true) | ("debezium_json", _) => Ok(Self::DebeziumJson), + ("json", false) => Ok(Self::StandardJson), + ("avro", _) => Ok(Self::Avro), + ("parquet", _) => Ok(Self::Parquet), + _ => Ok(Self::Raw), + } + } + + pub fn from_connection_format(format: &Format) -> Self { + match format { + Format::Json(j) if j.debezium => Self::DebeziumJson, + Format::Json(_) => Self::StandardJson, + Format::Avro(_) => Self::Avro, + Format::Parquet(_) => Self::Parquet, + Format::Protobuf(_) | Format::RawString(_) | Format::RawBytes(_) => Self::Raw, + } + } + + pub fn supports_delta_updates(&self) -> bool { + matches!(self, Self::DebeziumJson) + } + + pub fn apply_envelope(self, columns: Vec) -> Result> { + if !self.supports_delta_updates() { + return Ok(columns); + } + if columns.iter().any(|c| c.is_computed()) { + return plan_err!("Virtual fields are not supported with CDC envelope"); + } + if columns.is_empty() { + return Ok(columns); + } + let fields: Vec = columns.into_iter().map(|c| c.into_arrow_field()).collect(); + let struct_type = DataType::Struct(fields.into()); + + Ok(vec![ + ColumnDescriptor::new_physical(Field::new("before", struct_type.clone(), true)), + ColumnDescriptor::new_physical(Field::new("after", struct_type.clone(), true)), + ColumnDescriptor::new_physical(Field::new("op", DataType::Utf8, true)), + ]) + } +} diff --git a/src/sql/schema/field_spec.rs b/src/sql/schema/field_spec.rs deleted file mode 100644 index 2fe8a50e..00000000 --- a/src/sql/schema/field_spec.rs +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use datafusion::arrow::datatypes::Field; -use datafusion::logical_expr::Expr; - -/// Describes how a field in a connector table should be interpreted. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum FieldSpec { - /// A regular struct field that maps to a column in the data. - Struct(Field), - /// A metadata field extracted from message metadata (e.g., Kafka headers). - Metadata { field: Field, key: String }, - /// A virtual field computed from an expression over other fields. - Virtual { field: Field, expression: Box }, -} - -impl FieldSpec { - pub fn is_virtual(&self) -> bool { - matches!(self, FieldSpec::Virtual { .. }) - } - - pub fn field(&self) -> &Field { - match self { - FieldSpec::Struct(f) => f, - FieldSpec::Metadata { field, .. } => field, - FieldSpec::Virtual { field, .. } => field, - } - } - - pub fn metadata_key(&self) -> Option<&str> { - match self { - FieldSpec::Metadata { key, .. } => Some(key.as_str()), - _ => None, - } - } -} - -impl From for FieldSpec { - fn from(value: Field) -> Self { - FieldSpec::Struct(value) - } -} diff --git a/src/sql/schema/insert.rs b/src/sql/schema/insert.rs deleted file mode 100644 index fe91325b..00000000 --- a/src/sql/schema/insert.rs +++ /dev/null @@ -1,55 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use datafusion::common::Result; -use datafusion::logical_expr::{DmlStatement, LogicalPlan, WriteOp}; -use datafusion::sql::sqlparser::ast::Statement; - -use super::optimizer::produce_optimized_plan; -use crate::sql::schema::StreamSchemaProvider; - -/// Represents an INSERT operation in a streaming SQL pipeline. -#[derive(Debug)] -pub enum Insert { - /// Insert into a named sink table. - InsertQuery { - sink_name: String, - logical_plan: LogicalPlan, - }, - /// An anonymous query (no explicit INSERT target). - Anonymous { logical_plan: LogicalPlan }, -} - -impl Insert { - pub fn try_from_statement( - statement: &Statement, - schema_provider: &StreamSchemaProvider, - ) -> Result { - let logical_plan = produce_optimized_plan(statement, schema_provider)?; - - match &logical_plan { - LogicalPlan::Dml(DmlStatement { - table_name, - op: WriteOp::Insert(_), - input, - .. - }) => { - let sink_name = table_name.to_string(); - Ok(Insert::InsertQuery { - sink_name, - logical_plan: (**input).clone(), - }) - } - _ => Ok(Insert::Anonymous { logical_plan }), - } - } -} diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs index 0bf7e4ea..cac86d52 100644 --- a/src/sql/schema/mod.rs +++ b/src/sql/schema/mod.rs @@ -10,18 +10,34 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod connector; -pub mod connector_table; -pub mod field_spec; -pub mod insert; -pub mod optimizer; +pub mod column_descriptor; +pub mod connection_type; +pub mod source_table; +pub mod data_encoding_format; +pub mod schema_context; pub mod schema_provider; pub mod table; +pub mod table_execution_unit; +pub mod table_role; +pub mod temporal_pipeline_config; pub mod utils; -pub use connector::{ConnectionType}; -pub use connector_table::{ConnectorTable, SourceOperator}; -pub use field_spec::FieldSpec; -pub use insert::Insert; -pub use schema_provider::{LogicalBatchInput, StreamSchemaProvider, StreamTable}; +pub use column_descriptor::ColumnDescriptor; +pub use connection_type::ConnectionType; +pub use source_table::{SourceOperator, SourceTable}; + +/// Back-compat alias for [`SourceTable`]. +pub type ConnectorTable = SourceTable; +pub use data_encoding_format::DataEncodingFormat; +pub use schema_context::{DfSchemaContext, SchemaContext}; +pub use schema_provider::{ + FunctionCatalog, LogicalBatchInput, ObjectName, StreamPlanningContext, + StreamPlanningContextBuilder, StreamSchemaProvider, StreamTable, TableCatalog, +}; pub use table::Table; +pub use table_execution_unit::{EngineDescriptor, SyncMode, TableExecutionUnit}; +pub use table_role::{ + apply_adapter_specific_rules, deduce_role, serialize_backend_params, validate_adapter_availability, + TableRole, +}; +pub use temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineConfig, TemporalSpec}; diff --git a/src/sql/schema/schema_context.rs b/src/sql/schema/schema_context.rs new file mode 100644 index 00000000..232fd9e7 --- /dev/null +++ b/src/sql/schema/schema_context.rs @@ -0,0 +1,37 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::arrow::datatypes::{DataType, Schema}; +use datafusion::common::{Result, DFSchema}; +use datafusion::logical_expr::Expr; +use datafusion_expr::ExprSchemable; + +pub trait SchemaContext { + fn resolve_expression(&self, expr: &Expr, schema: &Schema) -> Result; + fn extract_datatype(&self, expr: &Expr, schema: &Schema) -> Result; +} + +/// [`SchemaContext`] backed by a [`DFSchema`] built from the physical Arrow schema. +pub struct DfSchemaContext; + +impl SchemaContext for DfSchemaContext { + fn resolve_expression(&self, expr: &Expr, schema: &Schema) -> Result { + let df = DFSchema::try_from(schema.clone())?; + let _ = expr.get_type(&df)?; + Ok(expr.clone()) + } + + fn extract_datatype(&self, expr: &Expr, schema: &Schema) -> Result { + let df = DFSchema::try_from(schema.clone())?; + expr.get_type(&df) + } +} diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs index 11c0d461..5e34991a 100644 --- a/src/sql/schema/schema_provider.rs +++ b/src/sql/schema/schema_provider.rs @@ -1,10 +1,21 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::{HashMap, HashSet}; use std::sync::Arc; use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema}; -use datafusion::common::{Result, plan_err}; -use datafusion::datasource::DefaultTableSource; -use datafusion::error::DataFusionError; +use datafusion::common::{DataFusionError, Result}; +use datafusion::datasource::{DefaultTableSource, TableProvider, TableType}; use datafusion::execution::{FunctionRegistry, SessionStateDefaults}; use datafusion::logical_expr::expr_rewriter::FunctionRewrite; use datafusion::logical_expr::planner::ExprPlanner; @@ -12,27 +23,20 @@ use datafusion::logical_expr::{ AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF, }; use datafusion::optimizer::Analyzer; -use datafusion::sql::TableReference; use datafusion::sql::planner::ContextProvider; +use datafusion::sql::TableReference; use unicase::UniCase; + use crate::sql::logical_node::logical::DylibUdfConfig; use crate::sql::schema::table::Table as CatalogTable; use crate::sql::schema::utils::window_arrow_struct; use crate::sql::types::{PlaceholderUdf, PlanningOptions}; -#[derive(Clone, Default)] -pub struct StreamSchemaProvider { - pub source_defs: HashMap, - tables: HashMap, StreamTable>, - catalog_tables: HashMap, CatalogTable>, - pub functions: HashMap>, - pub aggregate_functions: HashMap>, - pub window_functions: HashMap>, - pub dylib_udfs: HashMap, - config_options: datafusion::config::ConfigOptions, - pub expr_planners: Vec>, - pub planning_options: PlanningOptions, - pub analyzer: Analyzer, +pub type ObjectName = UniCase; + +#[inline] +fn object_name(s: impl Into) -> ObjectName { + UniCase::new(s.into()) } #[derive(Clone, Debug)] @@ -56,39 +60,36 @@ pub enum StreamTable { impl StreamTable { pub fn name(&self) -> &str { match self { - StreamTable::Source { name, .. } => name, - StreamTable::Sink { name, .. } => name, - StreamTable::Memory { name, .. } => name, + Self::Source { name, .. } | Self::Sink { name, .. } | Self::Memory { name, .. } => name, } } - pub fn get_fields(&self) -> Vec> { + pub fn schema(&self) -> Arc { match self { - StreamTable::Source { schema, .. } => schema.fields().to_vec(), - StreamTable::Sink { schema, .. } => schema.fields().to_vec(), - StreamTable::Memory { .. } => vec![], + Self::Source { schema, .. } | Self::Sink { schema, .. } => Arc::clone(schema), + Self::Memory { .. } => Arc::new(Schema::empty()), } } } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone)] pub struct LogicalBatchInput { pub table_name: String, pub schema: Arc, } #[async_trait::async_trait] -impl datafusion::datasource::TableProvider for LogicalBatchInput { +impl TableProvider for LogicalBatchInput { fn as_any(&self) -> &dyn std::any::Any { self } fn schema(&self) -> Arc { - self.schema.clone() + Arc::clone(&self.schema) } - fn table_type(&self) -> datafusion::datasource::TableType { - datafusion::datasource::TableType::Temporary + fn table_type(&self) -> TableType { + TableType::Temporary } async fn scan( @@ -100,85 +101,96 @@ impl datafusion::datasource::TableProvider for LogicalBatchInput { ) -> Result> { Ok(Arc::new(crate::sql::logical_planner::FsMemExec::new( self.table_name.clone(), - self.schema.clone(), + Arc::clone(&self.schema), ))) } } -fn create_table(table_name: String, schema: Arc) -> Arc { - let table_provider = LogicalBatchInput { table_name, schema }; - let wrapped = Arc::new(table_provider); - let provider = DefaultTableSource::new(wrapped); - Arc::new(provider) +#[derive(Clone, Default)] +pub struct FunctionCatalog { + pub scalars: HashMap>, + pub aggregates: HashMap>, + pub windows: HashMap>, + pub planners: Vec>, } -impl StreamSchemaProvider { - pub fn new() -> Self { - let mut registry = Self { - ..Default::default() - }; +#[derive(Clone, Default)] +pub struct TableCatalog { + pub streams: HashMap>, + pub catalogs: HashMap>, + pub source_defs: HashMap, +} - registry - .register_udf(PlaceholderUdf::with_return( - "hop", - vec![ - DataType::Interval(datatypes::IntervalUnit::MonthDayNano), - DataType::Interval(datatypes::IntervalUnit::MonthDayNano), - ], - window_arrow_struct(), - )) - .unwrap(); +#[derive(Clone)] +pub struct StreamPlanningContext { + pub tables: TableCatalog, + pub functions: FunctionCatalog, + pub dylib_udfs: HashMap, + pub config_options: datafusion::config::ConfigOptions, + pub planning_options: PlanningOptions, + pub analyzer: Analyzer, +} - registry - .register_udf(PlaceholderUdf::with_return( - "tumble", - vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], - window_arrow_struct(), - )) - .unwrap(); +impl Default for StreamPlanningContext { + fn default() -> Self { + Self { + tables: TableCatalog::default(), + functions: FunctionCatalog::default(), + dylib_udfs: HashMap::new(), + config_options: datafusion::config::ConfigOptions::default(), + planning_options: PlanningOptions::default(), + analyzer: Analyzer::default(), + } + } +} - registry - .register_udf(PlaceholderUdf::with_return( - "session", - vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], - window_arrow_struct(), - )) - .unwrap(); +/// Back-compat name for [`StreamPlanningContext`]. +pub type StreamSchemaProvider = StreamPlanningContext; - registry - .register_udf(PlaceholderUdf::with_return( - "unnest", - vec![DataType::List(Arc::new(Field::new( - "field", - DataType::Utf8, - true, - )))], - DataType::Utf8, - )) - .unwrap(); +impl StreamPlanningContext { + pub fn builder() -> StreamPlanningContextBuilder { + StreamPlanningContextBuilder::default() + } - registry - .register_udf(PlaceholderUdf::with_return( - "row_time", - vec![], - DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None), - )) - .unwrap(); + /// Same registration order as the historical `StreamSchemaProvider::new` (placeholders, then DataFusion defaults). + pub fn new() -> Self { + Self::builder() + .with_streaming_extensions() + .expect("streaming extensions") + .with_default_functions() + .expect("default functions") + .build() + } - for p in SessionStateDefaults::default_scalar_functions() { - registry.register_udf(p).unwrap(); - } - for p in SessionStateDefaults::default_aggregate_functions() { - registry.register_udaf(p).unwrap(); - } - for p in SessionStateDefaults::default_window_functions() { - registry.register_udwf(p).unwrap(); - } - for p in SessionStateDefaults::default_expr_planners() { - registry.register_expr_planner(p).unwrap(); - } + pub fn register_stream_table(&mut self, table: StreamTable) { + let key = object_name(table.name().to_string()); + self.tables.streams.insert(key, Arc::new(table)); + } + + pub fn get_stream_table(&self, name: &str) -> Option> { + self.tables.streams.get(&object_name(name.to_string())).cloned() + } + + pub fn register_catalog_table(&mut self, table: CatalogTable) { + let key = object_name(table.name().to_string()); + self.tables.catalogs.insert(key, Arc::new(table)); + } - registry + pub fn get_catalog_table(&self, table_name: impl AsRef) -> Option<&CatalogTable> { + self.tables + .catalogs + .get(&object_name(table_name.as_ref().to_string())) + .map(|t| t.as_ref()) + } + + pub fn get_catalog_table_mut( + &mut self, + table_name: impl AsRef, + ) -> Option<&mut CatalogTable> { + self.tables + .catalogs + .get_mut(&object_name(table_name.as_ref().to_string())) + .map(|t| Arc::make_mut(t)) } pub fn add_source_table( @@ -188,86 +200,70 @@ impl StreamSchemaProvider { event_time_field: Option, watermark_field: Option, ) { - self.tables.insert( - UniCase::new(name.clone()), - StreamTable::Source { - name, - schema, - event_time_field, - watermark_field, - }, - ); + self.register_stream_table(StreamTable::Source { + name, + schema, + event_time_field, + watermark_field, + }); } pub fn add_sink_table(&mut self, name: String, schema: Arc) { - self.tables.insert( - UniCase::new(name.clone()), - StreamTable::Sink { name, schema }, - ); + self.register_stream_table(StreamTable::Sink { name, schema }); } pub fn insert_table(&mut self, table: StreamTable) { - self.tables - .insert(UniCase::new(table.name().to_string()), table); - } - - pub fn get_table(&self, table_name: impl Into) -> Option<&StreamTable> { - self.tables.get(&UniCase::new(table_name.into())) - } - - pub fn get_table_mut(&mut self, table_name: impl Into) -> Option<&mut StreamTable> { - self.tables.get_mut(&UniCase::new(table_name.into())) + self.register_stream_table(table); } + /// Alias for [`Self::register_catalog_table`]. pub fn insert_catalog_table(&mut self, table: CatalogTable) { - self.catalog_tables - .insert(UniCase::new(table.name().to_string()), table); + self.register_catalog_table(table); } - pub fn get_catalog_table(&self, table_name: impl Into) -> Option<&CatalogTable> { - self.catalog_tables.get(&UniCase::new(table_name.into())) + pub fn get_table(&self, table_name: impl AsRef) -> Option<&StreamTable> { + self.tables + .streams + .get(&object_name(table_name.as_ref().to_string())) + .map(|a| a.as_ref()) } - pub fn get_catalog_table_mut( - &mut self, - table_name: impl Into, - ) -> Option<&mut CatalogTable> { - self.catalog_tables - .get_mut(&UniCase::new(table_name.into())) + pub fn get_table_mut(&mut self, table_name: impl AsRef) -> Option<&mut StreamTable> { + self.tables + .streams + .get_mut(&object_name(table_name.as_ref().to_string())) + .map(|a| Arc::make_mut(a)) } - pub fn get_async_udf_options( - &self, - _name: &str, - ) -> Option { - // TODO: implement async UDF lookup + pub fn get_async_udf_options(&self, _name: &str) -> Option { None } + + fn create_table_source(name: String, schema: Arc) -> Arc { + let provider = LogicalBatchInput { table_name: name, schema }; + Arc::new(DefaultTableSource::new(Arc::new(provider))) + } } -impl ContextProvider for StreamSchemaProvider { +impl ContextProvider for StreamPlanningContext { fn get_table_source(&self, name: TableReference) -> Result> { let table = self - .get_table(name.to_string()) - .ok_or_else(|| DataFusionError::Plan(format!("Table {name} not found")))?; + .get_stream_table(name.table()) + .ok_or_else(|| DataFusionError::Plan(format!("Table {} not found", name)))?; - let fields = table.get_fields(); - let schema = Arc::new(Schema::new_with_metadata( - fields - .iter() - .map(|f| f.as_ref().clone()) - .collect::>(), - HashMap::new(), - )); - Ok(create_table(name.to_string(), schema)) + Ok(Self::create_table_source(name.to_string(), table.schema())) } fn get_function_meta(&self, name: &str) -> Option> { - self.functions.get(name).cloned() + self.functions.scalars.get(name).cloned() } fn get_aggregate_meta(&self, name: &str) -> Option> { - self.aggregate_functions.get(name).cloned() + self.functions.aggregates.get(name).cloned() + } + + fn get_window_meta(&self, name: &str) -> Option> { + self.functions.windows.get(name).cloned() } fn get_variable_type(&self, _variable_names: &[String]) -> Option { @@ -278,54 +274,50 @@ impl ContextProvider for StreamSchemaProvider { &self.config_options } - fn get_window_meta(&self, name: &str) -> Option> { - self.window_functions.get(name).cloned() - } - fn udf_names(&self) -> Vec { - self.functions.keys().cloned().collect() + self.functions.scalars.keys().cloned().collect() } fn udaf_names(&self) -> Vec { - self.aggregate_functions.keys().cloned().collect() + self.functions.aggregates.keys().cloned().collect() } fn udwf_names(&self) -> Vec { - self.window_functions.keys().cloned().collect() + self.functions.windows.keys().cloned().collect() } fn get_expr_planners(&self) -> &[Arc] { - &self.expr_planners + &self.functions.planners } } -impl FunctionRegistry for StreamSchemaProvider { +impl FunctionRegistry for StreamPlanningContext { fn udfs(&self) -> HashSet { - self.functions.keys().cloned().collect() + self.functions.scalars.keys().cloned().collect() } fn udf(&self, name: &str) -> Result> { - if let Some(f) = self.functions.get(name) { - Ok(Arc::clone(f)) - } else { - plan_err!("No UDF with name {name}") - } + self.functions + .scalars + .get(name) + .cloned() + .ok_or_else(|| DataFusionError::Plan(format!("No UDF with name {name}"))) } fn udaf(&self, name: &str) -> Result> { - if let Some(f) = self.aggregate_functions.get(name) { - Ok(Arc::clone(f)) - } else { - plan_err!("No UDAF with name {name}") - } + self.functions + .aggregates + .get(name) + .cloned() + .ok_or_else(|| DataFusionError::Plan(format!("No UDAF with name {name}"))) } fn udwf(&self, name: &str) -> Result> { - if let Some(f) = self.window_functions.get(name) { - Ok(Arc::clone(f)) - } else { - plan_err!("No UDWF with name {name}") - } + self.functions + .windows + .get(name) + .cloned() + .ok_or_else(|| DataFusionError::Plan(format!("No UDWF with name {name}"))) } fn register_function_rewrite( @@ -337,25 +329,96 @@ impl FunctionRegistry for StreamSchemaProvider { } fn register_udf(&mut self, udf: Arc) -> Result>> { - Ok(self.functions.insert(udf.name().to_string(), udf)) + Ok(self.functions.scalars.insert(udf.name().to_string(), udf)) } fn register_udaf(&mut self, udaf: Arc) -> Result>> { Ok(self - .aggregate_functions + .functions + .aggregates .insert(udaf.name().to_string(), udaf)) } fn register_udwf(&mut self, udwf: Arc) -> Result>> { - Ok(self.window_functions.insert(udwf.name().to_string(), udwf)) + Ok(self.functions.windows.insert(udwf.name().to_string(), udwf)) } fn register_expr_planner(&mut self, expr_planner: Arc) -> Result<()> { - self.expr_planners.push(expr_planner); + self.functions.planners.push(expr_planner); Ok(()) } fn expr_planners(&self) -> Vec> { - self.expr_planners.clone() + self.functions.planners.clone() + } +} + +#[derive(Default)] +pub struct StreamPlanningContextBuilder { + context: StreamPlanningContext, +} + +impl StreamPlanningContextBuilder { + pub fn new() -> Self { + Self::default() + } + + pub fn with_default_functions(mut self) -> Result { + for p in SessionStateDefaults::default_scalar_functions() { + self.context.register_udf(p)?; + } + for p in SessionStateDefaults::default_aggregate_functions() { + self.context.register_udaf(p)?; + } + for p in SessionStateDefaults::default_window_functions() { + self.context.register_udwf(p)?; + } + for p in SessionStateDefaults::default_expr_planners() { + self.context.register_expr_planner(p)?; + } + Ok(self) + } + + pub fn with_streaming_extensions(mut self) -> Result { + let extensions = vec![ + PlaceholderUdf::with_return( + "hop", + vec![ + DataType::Interval(datatypes::IntervalUnit::MonthDayNano), + DataType::Interval(datatypes::IntervalUnit::MonthDayNano), + ], + window_arrow_struct(), + ), + PlaceholderUdf::with_return( + "tumble", + vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], + window_arrow_struct(), + ), + PlaceholderUdf::with_return( + "session", + vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], + window_arrow_struct(), + ), + PlaceholderUdf::with_return( + "unnest", + vec![DataType::List(Arc::new(Field::new("field", DataType::Utf8, true)))], + DataType::Utf8, + ), + PlaceholderUdf::with_return( + "row_time", + vec![], + DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None), + ), + ]; + + for ext in extensions { + self.context.register_udf(ext)?; + } + + Ok(self) + } + + pub fn build(self) -> StreamPlanningContext { + self.context } } diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs new file mode 100644 index 00000000..dd962e34 --- /dev/null +++ b/src/sql/schema/source_table.rs @@ -0,0 +1,564 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; +use std::time::Duration; + +use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema}; +use datafusion::common::{Column, DFSchema, Result, plan_datafusion_err, plan_err}; +use datafusion::error::DataFusionError; +use datafusion::logical_expr::Expr; +use datafusion_expr::ExprSchemable; +use datafusion::sql::planner::{PlannerContext, SqlToRel}; +use datafusion::sql::sqlparser::ast; +use datafusion::sql::TableReference; +use protocol::grpc::api::ConnectorOp; +use tracing::warn; + +use super::column_descriptor::ColumnDescriptor; +use super::data_encoding_format::DataEncodingFormat; +use super::schema_context::SchemaContext; +use super::table_execution_unit::{EngineDescriptor, SyncMode, TableExecutionUnit}; +use super::table_role::{ + apply_adapter_specific_rules, deduce_role, serialize_backend_params, + validate_adapter_availability, TableRole, +}; +use super::temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineConfig, TemporalSpec}; +use super::StreamSchemaProvider; +use crate::multifield_partial_ord; +use crate::sql::api::{ConnectionProfile, ConnectionSchema, SourceField}; +use crate::sql::common::connector_options::ConnectorOptions; +use crate::sql::common::{BadData, Format, Framing, JsonCompression, JsonFormat}; +use crate::sql::schema::ConnectionType; +use crate::sql::schema::table::SqlSource; +use crate::sql::types::ProcessingMode; + +/// Connector-backed catalog table (adapter / source-sink model). +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SourceTable { + pub registry_id: Option, + pub adapter_type: String, + pub table_identifier: String, + pub role: TableRole, + pub schema_specs: Vec, + /// Serialized runtime payload (e.g. JSON: connector + `connection_schema` + options). + pub opaque_config: String, + pub temporal_config: TemporalPipelineConfig, + pub key_constraints: Vec, + pub payload_format: Option, + /// Wire [`Format`] when built from SQL `WITH` (updating mode, `ConnectionSchema`). + pub connection_format: Option, + pub description: String, + pub partition_exprs: Arc>>, + pub lookup_cache_max_bytes: Option, + pub lookup_cache_ttl: Option, + pub inferred_fields: Option>, +} + +multifield_partial_ord!( + SourceTable, + registry_id, + adapter_type, + table_identifier, + role, + opaque_config, + description, + key_constraints, + connection_format +); + +impl SourceTable { + #[inline] + pub fn name(&self) -> &str { + self.table_identifier.as_str() + } + + pub fn new( + table_identifier: impl Into, + connector: impl Into, + connection_type: ConnectionType, + ) -> Self { + Self { + registry_id: None, + adapter_type: connector.into(), + table_identifier: table_identifier.into(), + role: connection_type.into(), + schema_specs: Vec::new(), + opaque_config: String::new(), + temporal_config: TemporalPipelineConfig::default(), + key_constraints: Vec::new(), + payload_format: None, + connection_format: None, + description: String::new(), + partition_exprs: Arc::new(None), + lookup_cache_max_bytes: None, + lookup_cache_ttl: None, + inferred_fields: None, + } + } + + #[inline] + pub fn connector(&self) -> &str { + self.adapter_type.as_str() + } + + #[inline] + pub fn connection_type(&self) -> ConnectionType { + self.role.into() + } + + pub fn event_time_field(&self) -> Option<&str> { + self.temporal_config.event_column.as_deref() + } + + pub fn watermark_field(&self) -> Option<&str> { + self.temporal_config.watermark_strategy_column.as_deref() + } + + pub fn idle_time(&self) -> Option { + self.temporal_config.liveness_timeout + } + + pub fn initialize_from_params( + id: &str, + adapter: &str, + raw_columns: Vec, + pk_list: Vec, + time_meta: Option, + options: &mut HashMap, + _schema_ctx: &dyn SchemaContext, + ) -> Result { + validate_adapter_availability(adapter)?; + + let encoding = DataEncodingFormat::extract_from_map(options)?; + + let mut refined_columns = apply_adapter_specific_rules(adapter, raw_columns); + refined_columns = encoding.apply_envelope(refined_columns)?; + + let temporal_settings = resolve_temporal_logic(&refined_columns, time_meta)?; + let finalized_config = serialize_backend_params(adapter, options)?; + let role = deduce_role(options)?; + + if role == TableRole::Ingestion && encoding.supports_delta_updates() && pk_list.is_empty() { + return plan_err!("CDC source requires at least one primary key"); + } + + Ok(Self { + registry_id: None, + adapter_type: adapter.to_string(), + table_identifier: id.to_string(), + role, + schema_specs: refined_columns, + opaque_config: finalized_config, + temporal_config: temporal_settings, + key_constraints: pk_list, + payload_format: Some(encoding), + connection_format: None, + description: String::new(), + partition_exprs: Arc::new(None), + lookup_cache_max_bytes: None, + lookup_cache_ttl: None, + inferred_fields: None, + }) + } + + pub fn produce_physical_schema(&self) -> Schema { + Schema::new( + self.schema_specs + .iter() + .filter(|c| !c.is_computed()) + .map(|c| c.arrow_field().clone()) + .collect::>(), + ) + } + + #[inline] + pub fn physical_schema(&self) -> Schema { + self.produce_physical_schema() + } + + pub fn convert_to_execution_unit(&self) -> Result { + if self.role == TableRole::Egress { + return plan_err!("Target [{}] is write-only", self.table_identifier); + } + + if self.is_cdc_enabled() && self.schema_specs.iter().any(|c| c.is_computed()) { + return plan_err!("CDC cannot be mixed with computed columns natively"); + } + + let mode = if self.is_cdc_enabled() { + SyncMode::Incremental + } else { + SyncMode::AppendOnly + }; + + Ok(TableExecutionUnit { + label: self.table_identifier.clone(), + engine_meta: EngineDescriptor { + engine_type: self.adapter_type.clone(), + raw_payload: self.opaque_config.clone(), + }, + sync_mode: mode, + temporal_offset: self.temporal_config.clone(), + }) + } + + #[inline] + pub fn to_execution_unit(&self) -> Result { + self.convert_to_execution_unit() + } + + fn is_cdc_enabled(&self) -> bool { + self.payload_format + .as_ref() + .is_some_and(|f| f.supports_delta_updates()) + } + + #[allow(clippy::too_many_arguments)] + pub fn from_options( + table_identifier: &str, + connector_name: &str, + temporary: bool, + fields: Vec, + primary_keys: Vec, + watermark: Option<(String, Option)>, + options: &mut ConnectorOptions, + connection_profile: Option<&ConnectionProfile>, + schema_provider: &StreamSchemaProvider, + connection_type_override: Option, + description: String, + ) -> Result { + let _ = connection_profile; + + if let Some(c) = options.pull_opt_str("connector")? { + if c != connector_name { + return plan_err!( + "WITH option `connector` is '{c}' but table uses connector '{connector_name}'" + ); + } + } + + validate_adapter_availability(connector_name)?; + + let inferred_empty = fields.is_empty(); + let mut columns = fields; + columns = apply_adapter_specific_rules(connector_name, columns); + + let format = Format::from_opts(options) + .map_err(|e| DataFusionError::Plan(format!("invalid format: '{e}'")))?; + + if let Some(Format::Json(JsonFormat { compression, .. })) = &format + && !matches!(compression, JsonCompression::Uncompressed) + && connector_name != "filesystem" + { + return plan_err!("'json.compression' is only supported for the filesystem connector"); + } + + let framing = Framing::from_opts(options) + .map_err(|e| DataFusionError::Plan(format!("invalid framing: '{e}'")))?; + + if temporary + && let Some(t) = options.insert_str("type", "lookup")? + && t != "lookup" + { + return plan_err!( + "Cannot have a temporary table with type '{t}'; temporary tables must be type 'lookup'" + ); + } + + let payload_format = format.as_ref().map(DataEncodingFormat::from_connection_format); + let encoding = payload_format.unwrap_or(DataEncodingFormat::Raw); + columns = encoding.apply_envelope(columns)?; + + let schema_fields: Vec = columns + .iter() + .filter(|c| !c.is_computed()) + .map(|c| { + let mut sf: SourceField = c.arrow_field().clone().try_into().map_err(|_| { + DataFusionError::Plan(format!( + "field '{}' has a type '{:?}' that cannot be used in a connection table", + c.arrow_field().name(), + c.arrow_field().data_type() + )) + })?; + if let Some(key) = c.system_meta_key() { + sf.metadata_key = Some(key.to_string()); + } + Ok(sf) + }) + .collect::>()?; + + let bad_data = BadData::from_opts(options) + .map_err(|e| DataFusionError::Plan(format!("Invalid bad_data: '{e}'")))?; + + let connection_schema = ConnectionSchema::try_new( + format.clone(), + Some(bad_data), + framing, + schema_fields, + None, + Some(inferred_empty), + primary_keys.iter().cloned().collect::>(), + ) + .map_err(|e| DataFusionError::Plan(format!("could not create connection schema: {e}")))?; + + let role = if let Some(t) = connection_type_override { + t.into() + } else { + match options.pull_opt_str("type")?.as_deref() { + None | Some("source") => TableRole::Ingestion, + Some("sink") => TableRole::Egress, + Some("lookup") => TableRole::Reference, + Some(other) => { + return plan_err!("invalid connection type '{other}' in WITH options"); + } + } + }; + + let mut table = SourceTable { + registry_id: None, + adapter_type: connector_name.to_string(), + table_identifier: table_identifier.to_string(), + role, + schema_specs: columns, + opaque_config: String::new(), + temporal_config: TemporalPipelineConfig::default(), + key_constraints: Vec::new(), + payload_format, + connection_format: format.clone(), + description, + partition_exprs: Arc::new(None), + lookup_cache_max_bytes: None, + lookup_cache_ttl: None, + inferred_fields: None, + }; + + if let Some(event_time_field) = options.pull_opt_field("event_time_field")? { + warn!("`event_time_field` WITH option is deprecated; use WATERMARK FOR syntax"); + table.temporal_config.event_column = Some(event_time_field); + } + + if let Some(watermark_field) = options.pull_opt_field("watermark_field")? { + warn!("`watermark_field` WITH option is deprecated; use WATERMARK FOR syntax"); + table.temporal_config.watermark_strategy_column = Some(watermark_field); + } + + if let Some((time_field, watermark_expr)) = watermark { + let table_ref = TableReference::bare(table.table_identifier.as_str()); + let df_schema = + DFSchema::try_from_qualified_schema(table_ref, &table.produce_physical_schema())?; + + let field = table + .schema_specs + .iter() + .find(|c| c.arrow_field().name().as_str() == time_field.as_str()) + .ok_or_else(|| { + plan_datafusion_err!( + "WATERMARK FOR field `{}` does not exist in table", + time_field + ) + })?; + + if !matches!(field.arrow_field().data_type(), DataType::Timestamp(_, None)) { + return plan_err!( + "WATERMARK FOR field `{time_field}` has type {}, but expected TIMESTAMP", + field.arrow_field().data_type() + ); + } + + table.temporal_config.event_column = Some(time_field.clone()); + + if let Some(expr) = watermark_expr { + let logical_expr = plan_generating_expr(&expr, &df_schema, schema_provider) + .map_err(|e| { + DataFusionError::Plan(format!("could not plan watermark expression: {e}")) + })?; + + let (data_type, nullable) = logical_expr.data_type_and_nullable(&df_schema)?; + if !matches!(data_type, DataType::Timestamp(_, _)) { + return plan_err!( + "the type of the WATERMARK FOR expression must be TIMESTAMP, but was {data_type}" + ); + } + if nullable { + return plan_err!( + "the type of the WATERMARK FOR expression must be NOT NULL" + ); + } + + table.schema_specs.push(ColumnDescriptor::new_computed( + Field::new( + "__watermark", + logical_expr.get_type(&df_schema)?, + false, + ), + logical_expr, + )); + table.temporal_config.watermark_strategy_column = Some("__watermark".to_string()); + } else { + table.temporal_config.watermark_strategy_column = Some(time_field); + } + } + + let idle_from_micros = options + .pull_opt_i64("idle_micros")? + .filter(|t| *t > 0) + .map(|t| Duration::from_micros(t as u64)); + let idle_from_duration = options.pull_opt_duration("idle_time")?; + table.temporal_config.liveness_timeout = idle_from_micros.or(idle_from_duration); + + table.lookup_cache_max_bytes = options.pull_opt_u64("lookup.cache.max_bytes")?; + + table.lookup_cache_ttl = options.pull_opt_duration("lookup.cache.ttl")?; + + let extra_opts = options.drain_remaining_string_values()?; + let mut config_root = serde_json::json!({ + "connector": connector_name, + "connection_schema": connection_schema, + }); + if let serde_json::Value::Object(ref mut map) = config_root { + for (k, v) in extra_opts { + map.insert(k, serde_json::Value::String(v)); + } + } + table.opaque_config = serde_json::to_string(&config_root).map_err(|e| { + DataFusionError::Plan(format!("failed to serialize connector config: {e}")) + })?; + + if role == TableRole::Ingestion && encoding.supports_delta_updates() && primary_keys.is_empty() + { + return plan_err!("Debezium source must have at least one PRIMARY KEY field"); + } + + table.key_constraints = primary_keys; + + Ok(table) + } + + pub fn has_virtual_fields(&self) -> bool { + self.schema_specs.iter().any(|c| c.is_computed()) + } + + pub fn is_updating(&self) -> bool { + self.connection_format + .as_ref() + .is_some_and(|f| f.is_updating()) + || self.payload_format == Some(DataEncodingFormat::DebeziumJson) + } + + pub fn connector_op(&self) -> ConnectorOp { + ConnectorOp { + connector: self.adapter_type.clone(), + config: self.opaque_config.clone(), + description: self.description.clone(), + } + } + + pub fn processing_mode(&self) -> ProcessingMode { + if self.is_updating() { + ProcessingMode::Update + } else { + ProcessingMode::Append + } + } + + pub fn timestamp_override(&self) -> Result> { + if let Some(field_name) = self.temporal_config.event_column.clone() { + if self.is_updating() { + return plan_err!("can't use event_time_field with update mode"); + } + let _field = self.get_time_column(&field_name)?; + Ok(Some(Expr::Column(Column::from_name(field_name.as_str())))) + } else { + Ok(None) + } + } + + fn get_time_column(&self, field_name: &str) -> Result<&ColumnDescriptor> { + self.schema_specs + .iter() + .find(|c| { + c.arrow_field().name() == field_name + && matches!(c.arrow_field().data_type(), DataType::Timestamp(..)) + }) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "field {field_name} not found or not a timestamp" + )) + }) + } + + pub fn watermark_column(&self) -> Result> { + if let Some(field_name) = self.temporal_config.watermark_strategy_column.clone() { + let _field = self.get_time_column(&field_name)?; + Ok(Some(Expr::Column(Column::from_name(field_name.as_str())))) + } else { + Ok(None) + } + } + + pub fn as_sql_source(&self) -> Result { + match self.role { + TableRole::Ingestion => {} + TableRole::Egress | TableRole::Reference => { + return plan_err!("cannot read from sink"); + } + }; + + if self.is_updating() && self.has_virtual_fields() { + return plan_err!("can't read from a source with virtual fields and update mode."); + } + + let timestamp_override = self.timestamp_override()?; + let watermark_column = self.watermark_column()?; + + let source = SqlSource { + id: self.registry_id, + struct_def: self + .schema_specs + .iter() + .filter(|c| !c.is_computed()) + .map(|c| Arc::new(c.arrow_field().clone())) + .collect(), + config: self.connector_op(), + processing_mode: self.processing_mode(), + idle_time: self.temporal_config.liveness_timeout, + }; + + Ok(SourceOperator { + name: self.table_identifier.clone(), + source, + timestamp_override, + watermark_column, + }) + } +} + +/// Plan a SQL scalar expression against a table-qualified schema (e.g. watermark `AS` clause). +fn plan_generating_expr( + ast: &ast::Expr, + df_schema: &DFSchema, + schema_provider: &StreamSchemaProvider, +) -> Result { + let planner = SqlToRel::new(schema_provider); + let mut ctx = PlannerContext::new(); + planner.sql_to_expr(ast.clone(), df_schema, &mut ctx) +} + +#[derive(Debug, Clone)] +pub struct SourceOperator { + pub name: String, + pub source: SqlSource, + pub timestamp_override: Option, + pub watermark_column: Option, +} diff --git a/src/sql/schema/table.rs b/src/sql/schema/table.rs index 21f064fe..156e8ffd 100644 --- a/src/sql/schema/table.rs +++ b/src/sql/schema/table.rs @@ -17,10 +17,10 @@ use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::{Extension, LogicalPlan}; use datafusion::sql::sqlparser::ast::Statement; use protocol::grpc::api::ConnectorOp; -use super::connector_table::ConnectorTable; -use super::optimizer::produce_optimized_plan; +use super::source_table::SourceTable; +use crate::sql::logical_planner::optimizers::produce_optimized_plan; use crate::sql::schema::StreamSchemaProvider; -use crate::sql::extensions::remote_table::RemoteTableExtension; +use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; use crate::sql::analysis::rewrite_plan; use crate::sql::types::{DFField, ProcessingMode}; @@ -29,9 +29,9 @@ use crate::sql::types::{DFField, ProcessingMode}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Table { /// A lookup table backed by an external connector. - LookupTable(ConnectorTable), + LookupTable(SourceTable), /// A source/sink table backed by an external connector. - ConnectorTable(ConnectorTable), + ConnectorTable(SourceTable), /// A table defined by a query (CREATE VIEW / CREATE TABLE AS SELECT). TableFromQuery { name: String, @@ -63,11 +63,11 @@ impl Table { }))) => { let rewritten = rewrite_plan(input.as_ref().clone(), schema_provider)?; let schema = rewritten.schema().clone(); - let remote = RemoteTableExtension { - input: rewritten, - name: name.to_owned(), - schema, - materialize: true, + let remote = RemoteTableBoundaryNode { + upstream_plan: rewritten, + table_identifier: name.to_owned(), + resolved_schema: schema, + requires_materialization: true, }; Ok(Some(Table::TableFromQuery { name: name.to_string(), @@ -83,25 +83,25 @@ impl Table { pub fn name(&self) -> &str { match self { Table::TableFromQuery { name, .. } => name.as_str(), - Table::ConnectorTable(c) | Table::LookupTable(c) => c.name.as_str(), + Table::ConnectorTable(c) | Table::LookupTable(c) => c.name(), } } pub fn get_fields(&self) -> Vec { match self { - Table::ConnectorTable(ConnectorTable { - fields, + Table::ConnectorTable(SourceTable { + schema_specs, inferred_fields, .. }) - | Table::LookupTable(ConnectorTable { - fields, + | Table::LookupTable(SourceTable { + schema_specs, inferred_fields, .. }) => inferred_fields.clone().unwrap_or_else(|| { - fields + schema_specs .iter() - .map(|field| field.field().clone().into()) + .map(|c| Arc::new(c.arrow_field().clone())) .collect() }), Table::TableFromQuery { logical_plan, .. } => { @@ -115,7 +115,7 @@ impl Table { return Ok(()); }; - if !t.fields.is_empty() { + if !t.schema_specs.is_empty() { return Ok(()); } diff --git a/src/sql/schema/table_execution_unit.rs b/src/sql/schema/table_execution_unit.rs new file mode 100644 index 00000000..c23dda7a --- /dev/null +++ b/src/sql/schema/table_execution_unit.rs @@ -0,0 +1,33 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::temporal_pipeline_config::TemporalPipelineConfig; + +#[derive(Debug, Clone)] +pub struct EngineDescriptor { + pub engine_type: String, + pub raw_payload: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum SyncMode { + AppendOnly, + Incremental, +} + +#[derive(Debug, Clone)] +pub struct TableExecutionUnit { + pub label: String, + pub engine_meta: EngineDescriptor, + pub sync_mode: SyncMode, + pub temporal_offset: TemporalPipelineConfig, +} diff --git a/src/sql/schema/table_role.rs b/src/sql/schema/table_role.rs new file mode 100644 index 00000000..31629ad8 --- /dev/null +++ b/src/sql/schema/table_role.rs @@ -0,0 +1,110 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use datafusion::arrow::datatypes::{DataType, TimeUnit}; +use datafusion::common::{Result, plan_err}; +use datafusion::error::DataFusionError; + +use super::column_descriptor::ColumnDescriptor; +use super::connection_type::ConnectionType; + +/// Role of a connector-backed table in the pipeline (ingest / egress / lookup). +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TableRole { + Ingestion, + Egress, + Reference, +} + +impl From for ConnectionType { + fn from(r: TableRole) -> Self { + match r { + TableRole::Ingestion => ConnectionType::Source, + TableRole::Egress => ConnectionType::Sink, + TableRole::Reference => ConnectionType::Lookup, + } + } +} + +impl From for TableRole { + fn from(c: ConnectionType) -> Self { + match c { + ConnectionType::Source => TableRole::Ingestion, + ConnectionType::Sink => TableRole::Egress, + ConnectionType::Lookup => TableRole::Reference, + } + } +} + +pub fn validate_adapter_availability(adapter: &str) -> Result<()> { + let supported = [ + "kafka", + "kinesis", + "filesystem", + "delta", + "iceberg", + "pulsar", + "nats", + "redis", + "mqtt", + "websocket", + "sse", + "nexmark", + "blackhole", + "lookup", + "memory", + "postgres", + ]; + if !supported.contains(&adapter) { + return Err(DataFusionError::Plan(format!("Unknown adapter '{adapter}'"))); + } + Ok(()) +} + +pub fn apply_adapter_specific_rules(adapter: &str, mut cols: Vec) -> Vec { + match adapter { + "delta" | "iceberg" => { + for c in &mut cols { + if matches!(c.data_type(), DataType::Timestamp(_, _)) { + c.force_precision(TimeUnit::Microsecond); + } + } + cols + } + _ => cols, + } +} + +pub fn deduce_role(options: &HashMap) -> Result { + match options.get("type").map(|s| s.as_str()) { + None | Some("source") => Ok(TableRole::Ingestion), + Some("sink") => Ok(TableRole::Egress), + Some("lookup") => Ok(TableRole::Reference), + Some(other) => plan_err!("Invalid role '{other}'"), + } +} + +pub fn serialize_backend_params(adapter: &str, options: &HashMap) -> Result { + let mut payload = serde_json::Map::new(); + payload.insert( + "adapter".to_string(), + serde_json::Value::String(adapter.to_string()), + ); + + for (k, v) in options { + payload.insert(k.clone(), serde_json::Value::String(v.clone())); + } + + serde_json::to_string(&payload).map_err(|e| DataFusionError::Plan(e.to_string())) +} diff --git a/src/sql/schema/temporal_pipeline_config.rs b/src/sql/schema/temporal_pipeline_config.rs new file mode 100644 index 00000000..eb29f71c --- /dev/null +++ b/src/sql/schema/temporal_pipeline_config.rs @@ -0,0 +1,57 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Duration; + +use datafusion::common::{Result, plan_err}; +use datafusion::logical_expr::Expr; + +use super::column_descriptor::ColumnDescriptor; + +/// Event-time and watermark configuration for streaming tables. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +pub struct TemporalPipelineConfig { + pub event_column: Option, + pub watermark_strategy_column: Option, + pub liveness_timeout: Option, +} + +#[derive(Debug, Clone)] +pub struct TemporalSpec { + pub time_field: String, + pub watermark_expr: Option, +} + +pub fn resolve_temporal_logic( + columns: &[ColumnDescriptor], + time_meta: Option, +) -> Result { + let mut config = TemporalPipelineConfig::default(); + + if let Some(meta) = time_meta { + let field_exists = columns + .iter() + .any(|c| c.arrow_field().name() == meta.time_field.as_str()); + if !field_exists { + return plan_err!("Temporal field {} does not exist", meta.time_field); + } + config.event_column = Some(meta.time_field.clone()); + + if meta.watermark_expr.is_some() { + config.watermark_strategy_column = Some("__watermark".to_string()); + } else { + config.watermark_strategy_column = Some(meta.time_field); + } + } + + Ok(config) +} From c830cbb0f037f6b6545054e2862ee75ed09e59fe Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 22 Mar 2026 02:25:22 +0800 Subject: [PATCH 11/44] update --- src/common/fs_schema.rs | 444 +++++++++++++++++++++++++++ src/common/mod.rs | 72 +++++ src/sql/datastream/logical.rs | 378 +++++++++++++++++++++++ src/sql/datastream/mod.rs | 1 + src/sql/physical/physical_planner.rs | 406 ++++++++++++++++++++++++ 5 files changed, 1301 insertions(+) create mode 100644 src/common/fs_schema.rs create mode 100644 src/common/mod.rs create mode 100644 src/sql/datastream/logical.rs create mode 100644 src/sql/datastream/mod.rs create mode 100644 src/sql/physical/physical_planner.rs diff --git a/src/common/fs_schema.rs b/src/common/fs_schema.rs new file mode 100644 index 00000000..e9ce6586 --- /dev/null +++ b/src/common/fs_schema.rs @@ -0,0 +1,444 @@ +//! FunctionStream table/stream schema: Arrow [`Schema`] plus timestamp index and optional key columns. +//! +//! [`Schema`]: datafusion::arrow::datatypes::Schema + +use datafusion::arrow::array::builder::{ArrayBuilder, make_builder}; +use datafusion::arrow::array::{RecordBatch, TimestampNanosecondArray}; +use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit}; +use datafusion::arrow::error::ArrowError; +use datafusion::common::{DataFusionError, Result as DFResult}; +use std::sync::Arc; +use std::time::SystemTime; +use arrow::compute::{filter_record_batch, lexsort_to_indices, partition, take, SortColumn}; +use arrow::compute::kernels::cmp::gt_eq; +use arrow::compute::kernels::numeric::div; +use arrow::row::SortField; +use arrow_array::{PrimitiveArray, UInt64Array}; +use arrow_array::types::UInt64Type; +use protocol::grpc::api; +use super::{to_nanos, TIMESTAMP_FIELD}; +use std::ops::Range; +use crate::common::converter::Converter; + +pub type FsSchemaRef = Arc; + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct FsSchema { + pub schema: Arc, + pub timestamp_index: usize, + key_indices: Option>, + /// If defined, these indices are used for routing (i.e., which subtask gets which piece of data) + routing_key_indices: Option>, +} + +impl TryFrom for FsSchema { + type Error = DataFusionError; + fn try_from(schema_proto: api::FsSchema) -> Result { + let schema: Schema = serde_json::from_str(&schema_proto.arrow_schema) + .map_err(|e| DataFusionError::Plan(format!("Invalid arrow schema: {e}")))?; + let timestamp_index = schema_proto.timestamp_index as usize; + + let key_indices = schema_proto.has_keys.then(|| { + schema_proto + .key_indices + .into_iter() + .map(|index| index as usize) + .collect() + }); + + let routing_key_indices = schema_proto.has_routing_keys.then(|| { + schema_proto + .routing_key_indices + .into_iter() + .map(|index| index as usize) + .collect() + }); + + Ok(Self { + schema: Arc::new(schema), + timestamp_index, + key_indices, + routing_key_indices, + }) + } +} + +impl From for api::FsSchema { + fn from(schema: FsSchema) -> Self { + let arrow_schema = serde_json::to_string(schema.schema.as_ref()).unwrap(); + let timestamp_index = schema.timestamp_index as u32; + + let has_keys = schema.key_indices.is_some(); + let key_indices = schema + .key_indices + .map(|ks| ks.into_iter().map(|index| index as u32).collect()) + .unwrap_or_default(); + + let has_routing_keys = schema.routing_key_indices.is_some(); + let routing_key_indices = schema + .routing_key_indices + .map(|ks| ks.into_iter().map(|index| index as u32).collect()) + .unwrap_or_default(); + + Self { + arrow_schema, + timestamp_index, + key_indices, + has_keys, + routing_key_indices, + has_routing_keys, + } + } +} + +impl FsSchema { + pub fn new( + schema: Arc, + timestamp_index: usize, + key_indices: Option>, + routing_key_indices: Option>, + ) -> Self { + Self { + schema, + timestamp_index, + key_indices, + routing_key_indices, + } + } + pub fn new_unkeyed(schema: Arc, timestamp_index: usize) -> Self { + Self { + schema, + timestamp_index, + key_indices: None, + routing_key_indices: None, + } + } + pub fn new_keyed(schema: Arc, timestamp_index: usize, key_indices: Vec) -> Self { + Self { + schema, + timestamp_index, + key_indices: Some(key_indices), + routing_key_indices: None, + } + } + + pub fn from_fields(mut fields: Vec) -> Self { + if !fields.iter().any(|f| f.name() == TIMESTAMP_FIELD) { + fields.push(Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )); + } + + Self::from_schema_keys(Arc::new(Schema::new(fields)), vec![]).unwrap() + } + + pub fn from_schema_unkeyed(schema: Arc) -> DFResult { + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}" + )) + })? + .0; + + Ok(Self { + schema, + timestamp_index, + key_indices: None, + routing_key_indices: None, + }) + } + + pub fn from_schema_keys(schema: Arc, key_indices: Vec) -> DFResult { + let timestamp_index = schema + .column_with_name(TIMESTAMP_FIELD) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}" + )) + })? + .0; + + Ok(Self { + schema, + timestamp_index, + key_indices: Some(key_indices), + routing_key_indices: None, + }) + } + + pub fn schema_without_timestamp(&self) -> Schema { + let mut builder = SchemaBuilder::from(self.schema.fields()); + builder.remove(self.timestamp_index); + builder.finish() + } + + pub fn remove_timestamp_column(&self, batch: &mut RecordBatch) { + batch.remove_column(self.timestamp_index); + } + + pub fn builders(&self) -> Vec> { + self.schema + .fields + .iter() + .map(|f| make_builder(f.data_type(), 8)) + .collect() + } + + pub fn timestamp_column<'a>(&self, batch: &'a RecordBatch) -> &'a TimestampNanosecondArray { + batch + .column(self.timestamp_index) + .as_any() + .downcast_ref::() + .unwrap() + } + + pub fn has_routing_keys(&self) -> bool { + self.routing_keys().map(|k| !k.is_empty()).unwrap_or(false) + } + + pub fn routing_keys(&self) -> Option<&Vec> { + self.routing_key_indices + .as_ref() + .or(self.key_indices.as_ref()) + } + + pub fn storage_keys(&self) -> Option<&Vec> { + self.key_indices.as_ref() + } + + pub fn filter_by_time( + &self, + batch: RecordBatch, + cutoff: Option, + ) -> Result { + let Some(cutoff) = cutoff else { + // no watermark, so we just return the same batch. + return Ok(batch); + }; + // filter out late data + let timestamp_column = batch + .column(self.timestamp_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| ArrowError::CastError( + format!("failed to downcast column {} of {:?} to timestamp. Schema is supposed to be {:?}", + self.timestamp_index, batch, self.schema)))?; + let cutoff_scalar = TimestampNanosecondArray::new_scalar(to_nanos(cutoff) as i64); + let on_time = gt_eq(timestamp_column, &cutoff_scalar)?; + filter_record_batch(&batch, &on_time) + } + + pub fn sort_columns(&self, batch: &RecordBatch, with_timestamp: bool) -> Vec { + let mut columns = vec![]; + if let Some(keys) = &self.key_indices { + columns.extend(keys.iter().map(|index| SortColumn { + values: batch.column(*index).clone(), + options: None, + })); + } + if with_timestamp { + columns.push(SortColumn { + values: batch.column(self.timestamp_index).clone(), + options: None, + }); + } + columns + } + + pub fn sort_fields(&self, with_timestamp: bool) -> Vec { + let mut sort_fields = vec![]; + if let Some(keys) = &self.key_indices { + sort_fields.extend(keys.iter()); + } + if with_timestamp { + sort_fields.push(self.timestamp_index); + } + self.sort_fields_by_indices(&sort_fields) + } + + fn sort_fields_by_indices(&self, indices: &[usize]) -> Vec { + indices + .iter() + .map(|index| SortField::new(self.schema.field(*index).data_type().clone())) + .collect() + } + + pub fn converter(&self, with_timestamp: bool) -> Result { + Converter::new(self.sort_fields(with_timestamp)) + } + + pub fn value_converter( + &self, + with_timestamp: bool, + generation_index: usize, + ) -> Result { + match &self.key_indices { + None => { + let mut indices = (0..self.schema.fields().len()).collect::>(); + indices.remove(generation_index); + if !with_timestamp { + indices.remove(self.timestamp_index); + } + Converter::new(self.sort_fields_by_indices(&indices)) + } + Some(keys) => { + let indices = (0..self.schema.fields().len()) + .filter(|index| { + !keys.contains(index) + && (with_timestamp || *index != self.timestamp_index) + && *index != generation_index + }) + .collect::>(); + Converter::new(self.sort_fields_by_indices(&indices)) + } + } + } + + pub fn value_indices(&self, with_timestamp: bool) -> Vec { + let field_count = self.schema.fields().len(); + match &self.key_indices { + None => { + let mut indices = (0..field_count).collect::>(); + + if !with_timestamp { + indices.remove(self.timestamp_index); + } + indices + } + Some(keys) => (0..field_count) + .filter(|index| { + !keys.contains(index) && (with_timestamp || *index != self.timestamp_index) + }) + .collect::>(), + } + } + + pub fn sort( + &self, + batch: RecordBatch, + with_timestamp: bool, + ) -> Result { + if self.key_indices.is_none() && !with_timestamp { + return Ok(batch); + } + let sort_columns = self.sort_columns(&batch, with_timestamp); + let sort_indices = lexsort_to_indices(&sort_columns, None).expect("should be able to sort"); + let columns = batch + .columns() + .iter() + .map(|c| take(c, &sort_indices, None).unwrap()) + .collect(); + + RecordBatch::try_new(batch.schema(), columns) + } + + pub fn partition( + &self, + batch: &RecordBatch, + with_timestamp: bool, + ) -> Result>, ArrowError> { + if self.key_indices.is_none() && !with_timestamp { + #[allow(clippy::single_range_in_vec_init)] + return Ok(vec![0..batch.num_rows()]); + } + + let mut partition_columns = vec![]; + + if let Some(keys) = &self.routing_keys() { + partition_columns.extend(keys.iter().map(|index| batch.column(*index).clone())); + } + if with_timestamp { + partition_columns.push(batch.column(self.timestamp_index).clone()); + } + + Ok(partition(&partition_columns)?.ranges()) + } + + pub fn unkeyed_batch(&self, batch: &RecordBatch) -> Result { + if self.key_indices.is_none() { + return Ok(batch.clone()); + } + let columns: Vec<_> = (0..batch.num_columns()) + .filter(|index| !self.key_indices.as_ref().unwrap().contains(index)) + .collect(); + batch.project(&columns) + } + + pub fn schema_without_keys(&self) -> Result { + if self.key_indices.is_none() { + return Ok(self.clone()); + } + let key_indices = self.key_indices.as_ref().unwrap(); + let unkeyed_schema = Schema::new( + self.schema + .fields() + .iter() + .enumerate() + .filter(|(index, _field)| !key_indices.contains(index)) + .map(|(_, field)| field.as_ref().clone()) + .collect::>(), + ); + let timestamp_index = unkeyed_schema.index_of(TIMESTAMP_FIELD)?; + Ok(Self { + schema: Arc::new(unkeyed_schema), + timestamp_index, + key_indices: None, + routing_key_indices: None, + }) + } + + pub fn with_fields(&self, fields: Vec) -> Result { + let schema = Arc::new(Schema::new_with_metadata( + fields, + self.schema.metadata.clone(), + )); + + let timestamp_index = schema.index_of(TIMESTAMP_FIELD)?; + let max_index = *[&self.key_indices, &self.routing_key_indices] + .iter() + .map(|indices| indices.as_ref().and_then(|k| k.iter().max())) + .max() + .flatten() + .unwrap_or(&0); + + if schema.fields.len() - 1 < max_index { + return Err(ArrowError::InvalidArgumentError(format!( + "expected at least {} fields, but were only {}", + max_index + 1, + schema.fields.len() + ))); + } + + Ok(Self { + schema, + timestamp_index, + key_indices: self.key_indices.clone(), + routing_key_indices: self.routing_key_indices.clone(), + }) + } + + pub fn with_additional_fields( + &self, + new_fields: impl Iterator, + ) -> Result { + let mut fields = self.schema.fields.to_vec(); + fields.extend(new_fields.map(Arc::new)); + + self.with_fields(fields) + } +} + +pub fn server_for_hash_array( + hash: &PrimitiveArray, + n: usize, +) -> Result, ArrowError> { + let range_size = u64::MAX / (n as u64) + 1; + let range_scalar = UInt64Array::new_scalar(range_size); + let division = div(hash, &range_scalar)?; + let result: &PrimitiveArray = division.as_any().downcast_ref().unwrap(); + Ok(result.clone()) +} diff --git a/src/common/mod.rs b/src/common/mod.rs new file mode 100644 index 00000000..e3c103a2 --- /dev/null +++ b/src/common/mod.rs @@ -0,0 +1,72 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Shared core types and constants for FunctionStream (`crate::common`). +//! +//! Used by the runtime, SQL planner, coordinator, and other subsystems — +//! analogous to `arroyo-types` + `arroyo-rpc` in Arroyo. + +pub mod arrow_ext; +pub mod control; +pub mod date; +pub mod debezium; +pub mod fs_schema; +pub mod errors; +pub mod formats; +pub mod hash; +pub mod message; +pub mod operator_config; +pub mod task_info; +pub mod time_utils; +pub mod worker; +mod converter; + +// ── Re-exports from existing modules ── +pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType}; +pub use date::{DatePart, DateTruncPrecision}; +pub use debezium::{Debezium, DebeziumOp, UpdatingData}; +pub use hash::{range_for_server, server_for_hash, HASH_SEEDS}; +pub use message::{ArrowMessage, CheckpointBarrier, SignalMessage, Watermark}; +pub use task_info::{ChainInfo, TaskInfo}; +pub use time_utils::{from_micros, from_millis, from_nanos, to_micros, to_millis, to_nanos}; +pub use worker::{MachineId, WorkerId}; + +// ── Re-exports from new modules ── +pub use control::{ + CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp, + ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError, +}; +pub use fs_schema::{FsSchema, FsSchemaRef}; +pub use errors::DataflowError; +pub use formats::{BadData, Format, Framing, JsonFormat}; +pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; + +// ── Well-known column names ── +pub const TIMESTAMP_FIELD: &str = "_timestamp"; +pub const UPDATING_META_FIELD: &str = "_updating_meta"; + +// ── Environment variables ── +pub const JOB_ID_ENV: &str = "JOB_ID"; +pub const RUN_ID_ENV: &str = "RUN_ID"; + +// ── Metric names ── +pub const MESSAGES_RECV: &str = "fs_worker_messages_recv"; +pub const MESSAGES_SENT: &str = "fs_worker_messages_sent"; +pub const BYTES_RECV: &str = "fs_worker_bytes_recv"; +pub const BYTES_SENT: &str = "fs_worker_bytes_sent"; +pub const BATCHES_RECV: &str = "fs_worker_batches_recv"; +pub const BATCHES_SENT: &str = "fs_worker_batches_sent"; +pub const TX_QUEUE_SIZE: &str = "fs_worker_tx_queue_size"; +pub const TX_QUEUE_REM: &str = "fs_worker_tx_queue_rem"; +pub const DESERIALIZATION_ERRORS: &str = "fs_worker_deserialization_errors"; + +pub const LOOKUP_KEY_INDEX_FIELD: &str = "__lookup_key_index"; diff --git a/src/sql/datastream/logical.rs b/src/sql/datastream/logical.rs new file mode 100644 index 00000000..13560a3e --- /dev/null +++ b/src/sql/datastream/logical.rs @@ -0,0 +1,378 @@ +use itertools::Itertools; + +use datafusion::arrow::datatypes::DataType; +use petgraph::Direction; +use petgraph::dot::Dot; +use petgraph::graph::DiGraph; +use std::collections::{HashMap, HashSet}; +use std::fmt::{Debug, Display, Formatter}; +use std::sync::Arc; +use datafusion_proto::protobuf::ArrowType; +use prost::Message; +use strum::{Display, EnumString}; +use protocol::grpc::api; +use crate::types::FsSchema; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] +pub enum OperatorName { + ExpressionWatermark, + ArrowValue, + ArrowKey, + Projection, + AsyncUdf, + Join, + InstantJoin, + LookupJoin, + WindowFunction, + TumblingWindowAggregate, + SlidingWindowAggregate, + SessionWindowAggregate, + UpdatingAggregate, + ConnectorSource, + ConnectorSink, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +pub enum LogicalEdgeType { + Forward, + Shuffle, + LeftJoin, + RightJoin, +} + +impl Display for LogicalEdgeType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + LogicalEdgeType::Forward => write!(f, "→"), + LogicalEdgeType::Shuffle => write!(f, "⤨"), + LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"), + LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct LogicalEdge { + pub edge_type: LogicalEdgeType, + pub schema: Arc, +} + +impl LogicalEdge { + pub fn new(edge_type: LogicalEdgeType, schema: FsSchema) -> Self { + LogicalEdge { + edge_type, + schema: Arc::new(schema), + } + } + + pub fn project_all(edge_type: LogicalEdgeType, schema: FsSchema) -> Self { + LogicalEdge { + edge_type, + schema: Arc::new(schema), + } + } +} + +#[derive(Clone, Debug)] +pub struct ChainedLogicalOperator { + pub operator_id: String, + pub operator_name: OperatorName, + pub operator_config: Vec, +} + +#[derive(Clone, Debug)] +pub struct OperatorChain { + pub(crate) operators: Vec, + pub(crate) edges: Vec>, +} + +impl OperatorChain { + pub fn new(operator: ChainedLogicalOperator) -> Self { + Self { + operators: vec![operator], + edges: vec![], + } + } + + pub fn iter( + &self, + ) -> impl Iterator>)> { + self.operators + .iter() + .zip_longest(self.edges.iter()) + .map(|e| e.left_and_right()) + .map(|(l, r)| (l.unwrap(), r)) + } + + pub fn iter_mut( + &mut self, + ) -> impl Iterator>)> { + self.operators + .iter_mut() + .zip_longest(self.edges.iter()) + .map(|e| e.left_and_right()) + .map(|(l, r)| (l.unwrap(), r)) + } + + pub fn first(&self) -> &ChainedLogicalOperator { + &self.operators[0] + } + + pub fn len(&self) -> usize { + self.operators.len() + } + + pub fn is_empty(&self) -> bool { + self.operators.is_empty() + } + + pub fn is_source(&self) -> bool { + self.operators[0].operator_name == OperatorName::ConnectorSource + } + + pub fn is_sink(&self) -> bool { + self.operators[0].operator_name == OperatorName::ConnectorSink + } +} + +#[derive(Clone)] +pub struct LogicalNode { + pub node_id: u32, + pub description: String, + pub operator_chain: OperatorChain, + pub parallelism: usize, +} + +impl LogicalNode { + pub fn single( + id: u32, + operator_id: String, + name: OperatorName, + config: Vec, + description: String, + parallelism: usize, + ) -> Self { + Self { + node_id: id, + description, + operator_chain: OperatorChain { + operators: vec![ChainedLogicalOperator { + operator_id, + operator_name: name, + operator_config: config, + }], + edges: vec![], + }, + parallelism, + } + } +} + +impl Display for LogicalNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.description) + } +} + +impl Debug for LogicalNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}[{}]", + self.operator_chain + .operators + .iter() + .map(|op| op.operator_id.clone()) + .collect::>() + .join(" -> "), + self.parallelism + ) + } +} + +pub type LogicalGraph = DiGraph; + +pub trait Optimizer { + fn optimize_once(&self, plan: &mut LogicalGraph) -> bool; + + fn optimize(&self, plan: &mut LogicalGraph) { + loop { + if !self.optimize_once(plan) { + break; + } + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)] +pub struct DylibUdfConfig { + pub dylib_path: String, + pub arg_types: Vec, + pub return_type: DataType, + pub aggregate: bool, + pub is_async: bool, +} + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct PythonUdfConfig { + pub arg_types: Vec, + pub return_type: DataType, + pub name: Arc, + pub definition: Arc, +} + +#[derive(Clone, Debug, Default)] +pub struct ProgramConfig { + pub udf_dylibs: HashMap, + pub python_udfs: HashMap, +} + +#[derive(Clone, Debug, Default)] +pub struct LogicalProgram { + pub graph: LogicalGraph, + pub program_config: ProgramConfig, +} + +impl LogicalProgram { + pub fn new(graph: LogicalGraph, program_config: ProgramConfig) -> Self { + Self { + graph, + program_config, + } + } + + pub fn optimize(&mut self, optimizer: &dyn Optimizer) { + optimizer.optimize(&mut self.graph); + } + + pub fn update_parallelism(&mut self, overrides: &HashMap) { + for node in self.graph.node_weights_mut() { + if let Some(p) = overrides.get(&node.node_id) { + node.parallelism = *p; + } + } + } + + pub fn dot(&self) -> String { + format!("{:?}", Dot::with_config(&self.graph, &[])) + } + + pub fn task_count(&self) -> usize { + self.graph.node_weights().map(|nw| nw.parallelism).sum() + } + + pub fn sources(&self) -> HashSet { + self.graph + .externals(Direction::Incoming) + .map(|t| self.graph.node_weight(t).unwrap().node_id) + .collect() + } + + pub fn tasks_per_operator(&self) -> HashMap { + let mut tasks_per_operator = HashMap::new(); + for node in self.graph.node_weights() { + for op in &node.operator_chain.operators { + tasks_per_operator.insert(op.operator_id.clone(), node.parallelism); + } + } + tasks_per_operator + } + + pub fn operator_names_by_id(&self) -> HashMap { + let mut m = HashMap::new(); + for node in self.graph.node_weights() { + for op in &node.operator_chain.operators { + m.insert(op.operator_id.clone(), op.operator_name.to_string()); + } + } + m + } + + pub fn tasks_per_node(&self) -> HashMap { + let mut tasks_per_node = HashMap::new(); + for node in self.graph.node_weights() { + tasks_per_node.insert(node.node_id, node.parallelism); + } + tasks_per_node + } + + pub fn features(&self) -> HashSet { + let mut s = HashSet::new(); + for n in self.graph.node_weights() { + for t in &n.operator_chain.operators { + let feature = match &t.operator_name { + OperatorName::AsyncUdf => "async-udf".to_string(), + OperatorName::ExpressionWatermark + | OperatorName::ArrowValue + | OperatorName::ArrowKey + | OperatorName::Projection => continue, + OperatorName::Join => "join-with-expiration".to_string(), + OperatorName::InstantJoin => "windowed-join".to_string(), + OperatorName::WindowFunction => "sql-window-function".to_string(), + OperatorName::LookupJoin => "lookup-join".to_string(), + OperatorName::TumblingWindowAggregate => { + "sql-tumbling-window-aggregate".to_string() + } + OperatorName::SlidingWindowAggregate => { + "sql-sliding-window-aggregate".to_string() + } + OperatorName::SessionWindowAggregate => { + "sql-session-window-aggregate".to_string() + } + OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(), + OperatorName::ConnectorSource => "connector-source".to_string(), + OperatorName::ConnectorSink => "connector-sink".to_string(), + }; + s.insert(feature); + } + } + s + } +} + + +impl From for api::DylibUdfConfig { + fn from(from: DylibUdfConfig) -> Self { + api::DylibUdfConfig { + dylib_path: from.dylib_path, + arg_types: from + .arg_types + .iter() + .map(|t| { + ArrowType::try_from(t) + .expect("unsupported data type") + .encode_to_vec() + }) + .collect(), + return_type: ArrowType::try_from(&from.return_type) + .expect("unsupported data type") + .encode_to_vec(), + aggregate: from.aggregate, + is_async: from.is_async, + } + } +} + +impl From for DylibUdfConfig { + fn from(from: api::DylibUdfConfig) -> Self { + DylibUdfConfig { + dylib_path: from.dylib_path, + arg_types: from + .arg_types + .iter() + .map(|t| { + DataType::try_from( + &ArrowType::decode(&mut t.as_slice()).expect("invalid arrow type"), + ) + .expect("invalid arrow type") + }) + .collect(), + return_type: DataType::try_from( + &ArrowType::decode(&mut from.return_type.as_slice()).unwrap(), + ) + .expect("invalid arrow type"), + aggregate: from.aggregate, + is_async: from.is_async, + } + } +} \ No newline at end of file diff --git a/src/sql/datastream/mod.rs b/src/sql/datastream/mod.rs new file mode 100644 index 00000000..82d25f24 --- /dev/null +++ b/src/sql/datastream/mod.rs @@ -0,0 +1 @@ +pub mod logical; diff --git a/src/sql/physical/physical_planner.rs b/src/sql/physical/physical_planner.rs new file mode 100644 index 00000000..963fa76f --- /dev/null +++ b/src/sql/physical/physical_planner.rs @@ -0,0 +1,406 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +use datafusion::arrow::datatypes::IntervalMonthDayNanoType; +use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; +use datafusion::common::{ + DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, Spans, plan_err, +}; +use datafusion::execution::context::SessionState; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::functions::datetime::date_bin; +use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNode}; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner}; +use datafusion_proto::protobuf::{PhysicalExprNode, PhysicalPlanNode}; +use datafusion_proto::{ + physical_plan::AsExecutionPlan, + protobuf::{AggregateMode, physical_plan_node::PhysicalPlanType}, +}; +use petgraph::graph::{DiGraph, NodeIndex}; +use prost::Message; +use tokio::runtime::Builder; +use tokio::sync::oneshot; + +use async_trait::async_trait; +use datafusion_common::TableReference; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; + +use crate::sql::datastream::logical::{LogicalEdge, LogicalGraph, LogicalNode}; +use crate::sql::physical::{ + DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec, +}; +use crate::sql::logical_node::debezium::{ + DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME, +}; +use crate::sql::logical_node::key_calculation::KeyCalculationExtension; +use crate::sql::logical_node::{NodeWithIncomingEdges, StreamExtension}; +use crate::sql::schema::utils::add_timestamp_field_arrow; +use crate::sql::schema::StreamSchemaProvider; +use crate::types::{FsSchema, FsSchemaRef}; + +#[derive(Eq, Hash, PartialEq)] +#[derive(Debug)] +pub(crate) enum NamedNode { + Source(TableReference), + Watermark(TableReference), + RemoteTable(TableReference), + Sink(TableReference), +} + +pub(crate) struct PlanToGraphVisitor<'a> { + graph: DiGraph, + output_schemas: HashMap, + named_nodes: HashMap, + traversal: Vec>, + planner: Planner<'a>, +} + +impl<'a> PlanToGraphVisitor<'a> { + pub fn new(schema_provider: &'a StreamSchemaProvider, session_state: &'a SessionState) -> Self { + Self { + graph: Default::default(), + output_schemas: Default::default(), + named_nodes: Default::default(), + traversal: vec![], + planner: Planner::new(schema_provider, session_state), + } + } +} + +pub(crate) struct Planner<'a> { + schema_provider: &'a StreamSchemaProvider, + planner: DefaultPhysicalPlanner, + session_state: &'a SessionState, +} + +impl<'a> Planner<'a> { + pub(crate) fn new( + schema_provider: &'a StreamSchemaProvider, + session_state: &'a SessionState, + ) -> Self { + let planner = + DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(FsExtensionPlanner {})]); + Self { + schema_provider, + planner, + session_state, + } + } + + pub(crate) fn sync_plan(&self, plan: &LogicalPlan) -> Result> { + let fut = self.planner.create_physical_plan(plan, self.session_state); + let (tx, mut rx) = oneshot::channel(); + thread::scope(|s| { + let _handle = tokio::runtime::Handle::current(); + let builder = thread::Builder::new(); + let builder = if cfg!(debug_assertions) { + builder.stack_size(10_000_000) + } else { + builder + }; + builder + .spawn_scoped(s, move || { + let rt = Builder::new_current_thread().enable_all().build().unwrap(); + rt.block_on(async { + let plan = fut.await; + tx.send(plan).unwrap(); + }); + }) + .unwrap(); + }); + + rx.try_recv().unwrap() + } + + pub(crate) fn create_physical_expr( + &self, + expr: &Expr, + input_dfschema: &DFSchema, + ) -> Result> { + self.planner + .create_physical_expr(expr, input_dfschema, self.session_state) + } + + pub(crate) fn serialize_as_physical_expr( + &self, + expr: &Expr, + schema: &DFSchema, + ) -> Result> { + let physical = self.create_physical_expr(expr, schema)?; + let proto = serialize_physical_expr(&physical, &DefaultPhysicalExtensionCodec {})?; + Ok(proto.encode_to_vec()) + } + + pub(crate) fn split_physical_plan( + &self, + key_indices: Vec, + aggregate: &LogicalPlan, + add_timestamp_field: bool, + ) -> Result { + let physical_plan = self.sync_plan(aggregate)?; + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::Planning, + }; + let mut physical_plan_node = + PhysicalPlanNode::try_from_physical_plan(physical_plan.clone(), &codec)?; + let PhysicalPlanType::Aggregate(mut final_aggregate_proto) = physical_plan_node + .physical_plan_type + .take() + .ok_or_else(|| DataFusionError::Plan("missing physical plan type".to_string()))? + else { + return plan_err!("unexpected physical plan type"); + }; + let AggregateMode::Final = final_aggregate_proto.mode() else { + return plan_err!("unexpected physical plan type"); + }; + + let partial_aggregation_plan = *final_aggregate_proto + .input + .take() + .ok_or_else(|| DataFusionError::Plan("missing input".to_string()))?; + + let partial_aggregation_exec_plan = partial_aggregation_plan.try_into_physical_plan( + self.schema_provider, + &RuntimeEnvBuilder::new().build().unwrap(), + &codec, + )?; + + let partial_schema = partial_aggregation_exec_plan.schema(); + let final_input_table_provider = FsMemExec::new("partial".into(), partial_schema.clone()); + + final_aggregate_proto.input = Some(Box::new(PhysicalPlanNode::try_from_physical_plan( + Arc::new(final_input_table_provider), + &codec, + )?)); + + let finish_plan = PhysicalPlanNode { + physical_plan_type: Some(PhysicalPlanType::Aggregate(final_aggregate_proto)), + }; + + let (partial_schema, timestamp_index) = if add_timestamp_field { + ( + add_timestamp_field_arrow((*partial_schema).clone()), + partial_schema.fields().len(), + ) + } else { + (partial_schema.clone(), partial_schema.fields().len() - 1) + }; + + let partial_schema = FsSchema::new_keyed(partial_schema, timestamp_index, key_indices); + + Ok(SplitPlanOutput { + partial_aggregation_plan, + partial_schema, + finish_plan, + }) + } + + pub fn binning_function_proto( + &self, + width: Duration, + input_schema: DFSchemaRef, + ) -> Result { + let date_bin = date_bin().call(vec![ + Expr::Literal( + ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 0, + 0, + width.as_nanos() as i64, + ))), + None, + ), + Expr::Column(datafusion::common::Column { + relation: None, + name: "_timestamp".into(), + spans: Spans::new(), + }), + ]); + + let binning_function = self.create_physical_expr(&date_bin, &input_schema)?; + serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {}) + } +} + +struct FsExtensionPlanner {} + +#[async_trait] +impl ExtensionPlanner for FsExtensionPlanner { + async fn plan_extension( + &self, + _planner: &dyn PhysicalPlanner, + node: &dyn UserDefinedLogicalNode, + _logical_inputs: &[&LogicalPlan], + physical_inputs: &[Arc], + _session_state: &SessionState, + ) -> Result>> { + let schema = node.schema().as_ref().into(); + if let Ok::<&dyn StreamExtension, _>(stream_extension) = node.try_into() { + if stream_extension.transparent() { + match node.name() { + DEBEZIUM_UNROLLING_EXTENSION_NAME => { + let node = node + .as_any() + .downcast_ref::() + .unwrap(); + let input = physical_inputs[0].clone(); + return Ok(Some(Arc::new(DebeziumUnrollingExec::try_new( + input, + node.primary_keys.clone(), + )?))); + } + TO_DEBEZIUM_EXTENSION_NAME => { + let input = physical_inputs[0].clone(); + return Ok(Some(Arc::new(ToDebeziumExec::try_new(input)?))); + } + _ => return Ok(None), + } + } + }; + let name = + if let Some(key_extension) = node.as_any().downcast_ref::() { + key_extension.name.clone() + } else { + None + }; + Ok(Some(Arc::new(FsMemExec::new( + name.unwrap_or("memory".to_string()), + Arc::new(schema), + )))) + } +} + +impl PlanToGraphVisitor<'_> { + fn add_index_to_traversal(&mut self, index: NodeIndex) { + if let Some(last) = self.traversal.last_mut() { + last.push(index); + } + } + + pub(crate) fn add_plan(&mut self, plan: LogicalPlan) -> Result<()> { + self.traversal.clear(); + plan.visit(self)?; + Ok(()) + } + + pub fn into_graph(self) -> LogicalGraph { + self.graph + } + + pub fn build_extension( + &mut self, + input_nodes: Vec, + extension: &dyn StreamExtension, + ) -> Result<()> { + if let Some(node_name) = extension.node_name() { + if self.named_nodes.contains_key(&node_name) { + return plan_err!( + "extension {:?} has already been planned, shouldn't try again.", + node_name + ); + } + } + + let input_schemas = input_nodes + .iter() + .map(|index| { + Ok(self + .output_schemas + .get(index) + .ok_or_else(|| DataFusionError::Plan("missing input node".to_string()))? + .clone()) + }) + .collect::>>()?; + + let NodeWithIncomingEdges { node, edges } = extension + .plan_node(&self.planner, self.graph.node_count(), input_schemas) + .map_err(|e| e.context(format!("planning operator {extension:?}")))?; + + let node_index = self.graph.add_node(node); + self.add_index_to_traversal(node_index); + + for (source, edge) in input_nodes.into_iter().zip(edges.into_iter()) { + self.graph.add_edge(source, node_index, edge); + } + + self.output_schemas + .insert(node_index, extension.output_schema().into()); + + if let Some(node_name) = extension.node_name() { + self.named_nodes.insert(node_name, node_index); + } + Ok(()) + } +} + +impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> { + type Node = LogicalPlan; + + fn f_down(&mut self, node: &Self::Node) -> Result { + let LogicalPlan::Extension(Extension { node }) = node else { + return Ok(TreeNodeRecursion::Continue); + }; + + let stream_extension: &dyn StreamExtension = node + .try_into() + .map_err(|e: DataFusionError| e.context("converting extension"))?; + if stream_extension.transparent() { + return Ok(TreeNodeRecursion::Continue); + } + + if let Some(name) = stream_extension.node_name() { + if let Some(node_index) = self.named_nodes.get(&name) { + self.add_index_to_traversal(*node_index); + return Ok(TreeNodeRecursion::Jump); + } + } + + if !node.inputs().is_empty() { + self.traversal.push(vec![]); + } + + Ok(TreeNodeRecursion::Continue) + } + + fn f_up(&mut self, node: &Self::Node) -> Result { + let LogicalPlan::Extension(Extension { node }) = node else { + return Ok(TreeNodeRecursion::Continue); + }; + + let stream_extension: &dyn StreamExtension = node + .try_into() + .map_err(|e: DataFusionError| e.context("planning extension"))?; + + if stream_extension.transparent() { + return Ok(TreeNodeRecursion::Continue); + } + + if let Some(name) = stream_extension.node_name() { + if self.named_nodes.contains_key(&name) { + return Ok(TreeNodeRecursion::Continue); + } + } + + let input_nodes = if !node.inputs().is_empty() { + self.traversal.pop().unwrap_or_default() + } else { + vec![] + }; + let stream_extension: &dyn StreamExtension = node + .try_into() + .map_err(|e: DataFusionError| e.context("converting extension"))?; + self.build_extension(input_nodes, stream_extension)?; + + Ok(TreeNodeRecursion::Continue) + } +} + +pub(crate) struct SplitPlanOutput { + pub(crate) partial_aggregation_plan: PhysicalPlanNode, + pub(crate) partial_schema: FsSchema, + pub(crate) finish_plan: PhysicalPlanNode, +} From e768a48979b56667154d554776074c93ff1d7bc6 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 22 Mar 2026 14:43:45 +0800 Subject: [PATCH 12/44] update --- protocol/build.rs | 11 +- protocol/proto/storage.proto | 91 ++++ protocol/src/lib.rs | 9 + src/coordinator/analyze/analyzer.rs | 14 +- src/coordinator/coordinator.rs | 383 ++++++++--------- src/coordinator/execution/executor.rs | 185 +++++--- src/coordinator/mod.rs | 6 +- src/coordinator/plan/create_table_plan.rs | 29 +- src/coordinator/plan/drop_table_plan.rs | 34 ++ src/coordinator/plan/logical_plan_visitor.rs | 420 ++++++++++++------- src/coordinator/plan/mod.rs | 4 +- src/coordinator/plan/visitor.rs | 8 +- src/coordinator/runtime_context.rs | 64 +++ src/coordinator/statement/create_table.rs | 4 + src/coordinator/statement/drop_table.rs | 41 ++ src/coordinator/statement/mod.rs | 14 + src/coordinator/statement/streaming_table.rs | 4 + src/coordinator/statement/visitor.rs | 10 +- src/main.rs | 4 +- src/server/handler.rs | 398 +++++++----------- src/server/initializer.rs | 142 +++---- src/server/mod.rs | 2 +- src/sql/parse.rs | 74 +++- src/storage/mod.rs | 1 + src/storage/stream_catalog/codec.rs | 57 +++ src/storage/stream_catalog/manager.rs | 333 +++++++++++++++ src/storage/stream_catalog/meta_store.rs | 70 ++++ src/storage/stream_catalog/mod.rs | 23 + src/storage/task/mod.rs | 1 + src/storage/task/proto_codec.rs | 271 ++++++++++++ src/storage/task/rocksdb_storage.rs | 83 ++-- src/storage/task/storage.rs | 2 +- 32 files changed, 2003 insertions(+), 789 deletions(-) create mode 100644 protocol/proto/storage.proto create mode 100644 src/coordinator/plan/drop_table_plan.rs create mode 100644 src/coordinator/runtime_context.rs create mode 100644 src/coordinator/statement/drop_table.rs create mode 100644 src/storage/stream_catalog/codec.rs create mode 100644 src/storage/stream_catalog/manager.rs create mode 100644 src/storage/stream_catalog/meta_store.rs create mode 100644 src/storage/stream_catalog/mod.rs create mode 100644 src/storage/task/proto_codec.rs diff --git a/protocol/build.rs b/protocol/build.rs index e258f456..d3943f53 100644 --- a/protocol/build.rs +++ b/protocol/build.rs @@ -39,7 +39,6 @@ fn main() -> Result<(), Box> { .build_server(true) .compile_protos(&["proto/function_stream.proto"], &["proto"])?; - // 2. fs_api.proto → with file descriptor set + serde for REST/JSON let api_dir = out_dir.join("api"); std::fs::create_dir_all(&api_dir)?; @@ -56,10 +55,20 @@ fn main() -> Result<(), Box> { .build_server(false) .compile_protos(&["proto/fs_api.proto"], &["proto"])?; + let storage_dir = out_dir.join("storage"); + std::fs::create_dir_all(&storage_dir)?; + tonic_build::configure() + .out_dir(&storage_dir) + .protoc_arg("--experimental_allow_proto3_optional") + .build_client(false) + .build_server(false) + .compile_protos(&["proto/storage.proto"], &["proto"])?; + log::info!("Protocol Buffers code generated successfully"); println!("cargo:rustc-env=PROTO_GEN_DIR={}", out_dir.display()); println!("cargo:rerun-if-changed=proto/function_stream.proto"); println!("cargo:rerun-if-changed=proto/fs_api.proto"); + println!("cargo:rerun-if-changed=proto/storage.proto"); Ok(()) } diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto new file mode 100644 index 00000000..b11037a2 --- /dev/null +++ b/protocol/proto/storage.proto @@ -0,0 +1,91 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// All durable / persisted payloads for FunctionStream (single source of truth for storage wire format). +// - Stream table catalog (MetaStore KV) +// - Task rows (RocksDB task_meta / task_payload; values may be prefixed — see runtime codec) + +syntax = "proto3"; + +package function_stream.storage; + +// ============================================================================= +// Stream catalog (coordinator stream tables: source / sink / memory) +// ============================================================================= + +// Top-level persisted record for one stream table. +message TableDefinition { + string table_name = 1; + int64 updated_at_millis = 2; + oneof table_type { + StreamSource source = 3; + StreamSink sink = 4; + StreamMemory memory = 5; + } +} + +message StreamSource { + bytes arrow_schema_ipc = 1; + optional string event_time_field = 2; + optional string watermark_field = 3; +} + +message StreamSink { + bytes arrow_schema_ipc = 1; +} + +message StreamMemory { + optional bytes logical_plan_bytes = 1; +} + +// ============================================================================= +// Task storage (RocksDB metadata + module payload) +// ============================================================================= + +// Lifecycle state persisted for task recovery. New enum values MUST be appended +// with new numbers (never renumber) for forward compatibility. +enum ComponentStateKind { + COMPONENT_STATE_KIND_UNSPECIFIED = 0; + UNINITIALIZED = 1; + INITIALIZED = 2; + STARTING = 3; + RUNNING = 4; + CHECKPOINTING = 5; + STOPPING = 6; + STOPPED = 7; + CLOSING = 8; + CLOSED = 9; + ERROR = 10; +} + +message ComponentStateProto { + ComponentStateKind kind = 1; + // Set when kind == ERROR + string error_message = 2; +} + +// Stored in CF task_meta (after magic prefix FSP1). +message TaskMetadataProto { + string task_type = 1; + ComponentStateProto state = 2; + uint64 created_at = 3; + optional uint64 checkpoint_id = 4; +} + +message TaskModuleWasm { + bytes wasm_binary = 1; +} + +message TaskModulePython { + string class_name = 1; + string module_path = 2; + optional bytes embedded_code = 3; +} + +// Stored in CF task_payload (after magic prefix FSP1). +message TaskModulePayloadProto { + oneof payload { + TaskModuleWasm wasm = 1; + TaskModulePython python = 2; + } +} diff --git a/protocol/src/lib.rs b/protocol/src/lib.rs index f924a5c6..d1bdfff9 100644 --- a/protocol/src/lib.rs +++ b/protocol/src/lib.rs @@ -37,3 +37,12 @@ pub mod grpc { /// File descriptor set for fs_api.proto (for gRPC reflection / REST gateway). pub const FS_API_FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("fs_api_descriptor"); + +// ─────────────── Durable storage (storage.proto: catalog + task rows) ─────────────── + +/// Prost types for persisted stream catalog and task storage (`proto/storage.proto`). +pub mod storage { + #![allow(clippy::all)] + #![allow(warnings)] + include!("../generated/storage/function_stream.storage.rs"); +} diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs index c351f3ae..3889431e 100644 --- a/src/coordinator/analyze/analyzer.rs +++ b/src/coordinator/analyze/analyzer.rs @@ -13,9 +13,9 @@ use super::Analysis; use crate::coordinator::execution_context::ExecutionContext; use crate::coordinator::statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction, - Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, - StreamingTableStatement, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, + ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext, + StatementVisitorResult, StopFunction, StreamingTableStatement, }; use std::fmt; @@ -134,4 +134,12 @@ impl StatementVisitor for Analyzer<'_> { stmt.statement.clone(), ))) } + + fn visit_drop_table_statement( + &self, + stmt: &DropTableStatement, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Analyze(Box::new(DropTableStatement::new(stmt.statement.clone()))) + } } diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs index 0ddca660..ec81132a 100644 --- a/src/coordinator/coordinator.rs +++ b/src/coordinator/coordinator.rs @@ -10,252 +10,248 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; use std::time::Instant; use anyhow::{Context, Result}; -use crate::coordinator::analyze::{Analysis, Analyzer}; +use crate::coordinator::analyze::Analyzer; use crate::coordinator::dataset::ExecuteResult; use crate::coordinator::execution::Executor; use crate::coordinator::plan::{LogicalPlanVisitor, LogicalPlanner, PlanNode}; use crate::coordinator::statement::Statement; -use crate::runtime::taskexecutor::TaskManager; use crate::sql::schema::StreamSchemaProvider; use super::execution_context::ExecutionContext; +use super::runtime_context::CoordinatorRuntimeContext; +#[derive(Default)] pub struct Coordinator {} -impl Default for Coordinator { - fn default() -> Self { - Self::new() - } -} - impl Coordinator { pub fn new() -> Self { Self {} } - pub fn compile_plan( - &self, - stmt: &dyn Statement, - schema_provider: StreamSchemaProvider, - ) -> Result, anyhow::Error> { - let context = ExecutionContext::new(); - let analysis = self.step_analyze(&context, stmt)?; - let plan = self.step_build_logical_plan(&analysis, schema_provider)?; - self.step_optimize(&analysis, plan) - } + // ======================================================================== + // Plan compilation + // ======================================================================== - /// Same as [`Self::execute`], but uses the provided catalog / stream tables (e.g. tests). - pub fn execute_with_schema_provider( + pub fn compile_plan( &self, stmt: &dyn Statement, schema_provider: StreamSchemaProvider, - ) -> ExecuteResult { - let start_time = Instant::now(); - let context = ExecutionContext::new(); - let execution_id = context.execution_id; - - match self.execute_pipeline(&context, stmt, schema_provider) { - Ok(result) => { - log::debug!( - "[{}] Execution completed in {}ms", - execution_id, - start_time.elapsed().as_millis() - ); - result - } - Err(e) => { - log::error!( - "[{}] Execution failed after {}ms. Error: {:#}", - execution_id, - start_time.elapsed().as_millis(), - e - ); - ExecuteResult::err(format!("Execution failed: {:#}", e)) - } - } - } - - pub fn execute(&self, stmt: &dyn Statement) -> ExecuteResult { - self.execute_with_schema_provider(stmt, StreamSchemaProvider::new()) + ) -> Result> { + self.compile_plan_internal(&ExecutionContext::new(), stmt, schema_provider) } - fn execute_pipeline( + /// Internal pipeline: Analyze → build logical plan → optimize. + fn compile_plan_internal( &self, context: &ExecutionContext, stmt: &dyn Statement, schema_provider: StreamSchemaProvider, - ) -> Result { - let analysis = self.step_analyze(context, stmt)?; - let plan = self.step_build_logical_plan(&analysis, schema_provider)?; - let optimized_plan = self.step_optimize(&analysis, plan)?; - self.step_execute(optimized_plan) - } - - fn step_analyze(&self, context: &ExecutionContext, stmt: &dyn Statement) -> Result { + ) -> Result> { + let exec_id = context.execution_id; let start = Instant::now(); - let analyzer = Analyzer::new(context); - let result = analyzer + + let analysis = Analyzer::new(context) .analyze(stmt) .map_err(|e| anyhow::anyhow!(e)) - .context("Analyzer phase failed"); - + .context("Analyzer phase failed")?; log::debug!( "[{}] Analyze phase finished in {}ms", - context.execution_id, + exec_id, start.elapsed().as_millis() ); - result - } - fn step_build_logical_plan( - &self, - analysis: &Analysis, - schema_provider: StreamSchemaProvider, - ) -> Result> { - let visitor = LogicalPlanVisitor::new(schema_provider); - let plan = visitor.visit(analysis); - Ok(plan) - } - - fn step_optimize( - &self, - analysis: &Analysis, - plan: Box, - ) -> Result> { - let start = Instant::now(); - let planner = LogicalPlanner::new(); - let optimized = planner.optimize(plan, analysis); + let plan = LogicalPlanVisitor::new(schema_provider).visit(&analysis); + let opt_start = Instant::now(); + let optimized = LogicalPlanner::new().optimize(plan, &analysis); log::debug!( - "Optimizer phase finished in {}ms", - start.elapsed().as_millis() + "[{}] Optimizer phase finished in {}ms", + exec_id, + opt_start.elapsed().as_millis() ); + Ok(optimized) } - fn step_execute(&self, plan: Box) -> Result { + // ======================================================================== + // Execution + // ======================================================================== + + pub fn execute(&self, stmt: &dyn Statement) -> ExecuteResult { + match CoordinatorRuntimeContext::try_from_globals() { + Ok(ctx) => self.execute_with_runtime_context(stmt, &ctx), + Err(e) => ExecuteResult::err(e.to_string()), + } + } + + pub async fn execute_with_stream_catalog(&self, stmt: &dyn Statement) -> ExecuteResult { + self.execute(stmt) + } + + /// Same as [`Self::execute`], but uses an explicit [`CoordinatorRuntimeContext`] (e.g. tests or custom wiring). + pub fn execute_with_runtime_context( + &self, + stmt: &dyn Statement, + runtime: &CoordinatorRuntimeContext, + ) -> ExecuteResult { let start = Instant::now(); - let task_manager = match TaskManager::get() { - Ok(tm) => tm, - Err(e) => { - return Ok(ExecuteResult::err(format!( - "Failed to get TaskManager: {}", - e - ))); - } - }; - let executor = Executor::new(task_manager.clone()); - let result = executor + let context = ExecutionContext::new(); + let exec_id = context.execution_id; + let schema_provider = runtime.planning_schema_provider(); + + let result = (|| -> Result { + let plan = self.compile_plan_internal(&context, stmt, schema_provider)?; + + let exec_start = Instant::now(); + let res = Executor::new( + Arc::clone(&runtime.task_manager), + runtime.catalog_manager.clone(), + ) .execute(plan.as_ref()) .map_err(|e| anyhow::anyhow!(e)) - .context("Executor phase failed"); + .context("Executor phase failed")?; - log::debug!( - "Executor phase finished in {}ms", - start.elapsed().as_millis() - ); - result + log::debug!( + "[{}] Executor phase finished in {}ms", + exec_id, + exec_start.elapsed().as_millis() + ); + Ok(res) + })(); + + match result { + Ok(res) => { + log::debug!( + "[{}] Execution completed in {}ms", + exec_id, + start.elapsed().as_millis() + ); + res + } + Err(e) => { + log::error!( + "[{}] Execution failed after {}ms. Error: {:#}", + exec_id, + start.elapsed().as_millis(), + e + ); + ExecuteResult::err(format!("Execution failed: {:#}", e)) + } + } } } +// --------------------------------------------------------------------------- +// Test-only helpers (used by `create_streaming_table_coordinator_tests` below) +// --------------------------------------------------------------------------- + #[cfg(test)] -mod create_streaming_table_coordinator_tests { - use std::sync::Arc; +use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + +#[cfg(test)] +use crate::sql::common::TIMESTAMP_FIELD; +#[cfg(test)] +use crate::sql::parse::parse_sql; - use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; +#[cfg(test)] +fn fake_stream_schema_provider() -> StreamSchemaProvider { + let mut provider = StreamSchemaProvider::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "src".to_string(), + schema, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider +} - use crate::sql::common::TIMESTAMP_FIELD; - use crate::sql::parse::parse_sql; - use crate::sql::schema::StreamSchemaProvider; - - use super::Coordinator; - - fn fake_stream_schema_provider() -> StreamSchemaProvider { - let mut provider = StreamSchemaProvider::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "src".to_string(), - schema, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider - } +#[cfg(test)] +fn fake_stream_schema_provider_with_v() -> StreamSchemaProvider { + let mut provider = StreamSchemaProvider::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("v", DataType::Utf8, true), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "src".to_string(), + schema, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider +} - fn fake_stream_schema_provider_with_v() -> StreamSchemaProvider { - let mut provider = StreamSchemaProvider::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("v", DataType::Utf8, true), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "src".to_string(), - schema, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider - } +#[cfg(test)] +fn fake_src_dim_provider() -> StreamSchemaProvider { + let mut provider = fake_stream_schema_provider_with_v(); + let dim = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + Field::new("amt", DataType::Float64, true), + Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ), + ])); + provider.add_source_table( + "dim".to_string(), + dim, + Some(TIMESTAMP_FIELD.to_string()), + None, + ); + provider +} - fn fake_src_dim_provider() -> StreamSchemaProvider { - let mut provider = fake_stream_schema_provider_with_v(); - let dim = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("name", DataType::Utf8, true), - Field::new("amt", DataType::Float64, true), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "dim".to_string(), - dim, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider - } +#[cfg(test)] +fn assert_coordinator_streaming_build_ok( + sql: &str, + provider: StreamSchemaProvider, + expect_sink_substring: &str, + expect_connector_substring: &str, +) { + let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); + assert_eq!(stmts.len(), 1); + let plan = Coordinator::new() + .compile_plan(stmts[0].as_ref(), provider) + .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}")); + let rendered = format!("{plan:?}"); + assert!(rendered.contains("StreamingTable"), "{rendered}"); + assert!( + rendered.contains(expect_sink_substring), + "expected sink name fragment {expect_sink_substring:?} in:\n{rendered}" + ); + assert!( + rendered.contains(expect_connector_substring), + "expected connector fragment {expect_connector_substring:?} in:\n{rendered}" + ); +} - fn assert_coordinator_streaming_build_ok( - sql: &str, - provider: StreamSchemaProvider, - expect_sink_substring: &str, - expect_connector_substring: &str, - ) { - let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); - assert_eq!(stmts.len(), 1); - let plan = Coordinator::new() - .compile_plan(stmts[0].as_ref(), provider) - .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}")); - let rendered = format!("{plan:?}"); - assert!(rendered.contains("StreamingTable"), "{rendered}"); - assert!( - rendered.contains(expect_sink_substring), - "expected sink name fragment {expect_sink_substring:?} in:\n{rendered}" - ); - assert!( - rendered.contains(expect_connector_substring), - "expected connector fragment {expect_connector_substring:?} in:\n{rendered}" - ); - } +#[cfg(test)] +mod create_streaming_table_coordinator_tests { + use super::{ + assert_coordinator_streaming_build_ok, fake_src_dim_provider, + fake_stream_schema_provider, fake_stream_schema_provider_with_v, + }; + use crate::sql::common::TIMESTAMP_FIELD; #[test] fn coordinator_build_create_streaming_table_select_star_kafka() { @@ -333,7 +329,12 @@ mod create_streaming_table_coordinator_tests { let sql = format!( "CREATE STREAMING TABLE sink_w_{label} WITH ('connector'='kafka') AS {body}" ); - assert_coordinator_streaming_build_ok(&sql, p.clone(), &format!("sink_w_{label}"), "kafka"); + assert_coordinator_streaming_build_ok( + &sql, + p.clone(), + &format!("sink_w_{label}"), + "kafka", + ); } } diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 4dae91d5..b8fbb3a5 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -10,20 +10,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_record_batch}; +use std::sync::Arc; + +use thiserror::Error; +use tracing::{debug, info}; + +use crate::coordinator::dataset::{empty_record_batch, ExecuteResult, ShowFunctionsResult}; use crate::coordinator::plan::{ - CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, - LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult, - ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, CreateTablePlanBody, + DropFunctionPlan, DropTablePlan, LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext, + PlanVisitorResult, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan, }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::taskexecutor::TaskManager; -use crate::sql::schema::table::Table as CatalogTable; -use crate::sql::analysis::{ StreamSchemaProvider}; -use std::sync::Arc; -use thiserror::Error; -use tracing::{debug, info}; +use crate::sql::schema::StreamTable; +use crate::storage::stream_catalog::CatalogManager; #[derive(Error, Debug)] pub enum ExecuteError { @@ -39,11 +41,15 @@ pub enum ExecuteError { pub struct Executor { task_manager: Arc, + catalog_manager: Arc, } impl Executor { - pub fn new(task_manager: Arc) -> Self { - Self { task_manager } + pub fn new(task_manager: Arc, catalog_manager: Arc) -> Self { + Self { + task_manager, + catalog_manager, + } } pub fn execute(&self, plan: &dyn PlanNode) -> Result { @@ -54,8 +60,11 @@ impl Executor { match visitor_result { PlanVisitorResult::Execute(result) => { - let elapsed = timer.elapsed(); - debug!(target: "executor", elapsed_ms = elapsed.as_millis(), "Execution completed"); + debug!( + target: "executor", + elapsed_ms = timer.elapsed().as_millis(), + "Execution completed" + ); result } } @@ -63,23 +72,22 @@ impl Executor { } impl PlanVisitor for Executor { - #[allow(clippy::redundant_closure_call)] fn visit_create_function( &self, plan: &CreateFunctionPlan, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let result = (|| -> Result { + let execute = || -> Result { let function_bytes = match &plan.function_source { FunctionSource::Path(path) => std::fs::read(path).map_err(|e| { - ExecuteError::Validation(format!("Failed to read function at {}: {}", path, e)) + ExecuteError::Validation(format!("Failed to read function at {path}: {e}")) })?, FunctionSource::Bytes(bytes) => bytes.clone(), }; let config_bytes = match &plan.config_source { Some(ConfigSource::Path(path)) => std::fs::read(path).map_err(|e| { - ExecuteError::Validation(format!("Failed to read config at {}: {}", path, e)) + ExecuteError::Validation(format!("Failed to read config at {path}: {e}")) })?, Some(ConfigSource::Bytes(bytes)) => bytes.clone(), None => { @@ -92,35 +100,34 @@ impl PlanVisitor for Executor { info!(config_size = config_bytes.len(), "Registering Wasm task"); self.task_manager .register_task(&config_bytes, &function_bytes) - .map_err(|e| ExecuteError::Task(format!("Registration failed: {:?}", e)))?; + .map_err(|e| ExecuteError::Task(format!("Registration failed: {e:?}")))?; Ok(ExecuteResult::ok_with_data( "Function registered successfully", empty_record_batch(), )) - })(); + }; - PlanVisitorResult::Execute(result) + PlanVisitorResult::Execute(execute()) } - #[allow(clippy::redundant_closure_call)] fn visit_drop_function( &self, plan: &DropFunctionPlan, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let result = (|| -> Result { + let execute = || -> Result { self.task_manager .remove_task(&plan.name) - .map_err(|e| ExecuteError::Task(format!("Removal failed: {}", e)))?; + .map_err(|e| ExecuteError::Task(format!("Removal failed: {e}")))?; Ok(ExecuteResult::ok_with_data( format!("Function '{}' dropped", plan.name), empty_record_batch(), )) - })(); + }; - PlanVisitorResult::Execute(result) + PlanVisitorResult::Execute(execute()) } fn visit_start_function( @@ -142,48 +149,43 @@ impl PlanVisitor for Executor { PlanVisitorResult::Execute(result) } - #[allow(clippy::redundant_closure_call)] fn visit_show_functions( &self, _plan: &ShowFunctionsPlan, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let result = { - let functions = self.task_manager.list_all_functions(); - - Ok(ExecuteResult::ok_with_data( - format!("Found {} task(s)", functions.len()), - ShowFunctionsResult::new(functions), - )) - }; + let functions = self.task_manager.list_all_functions(); + let result = ExecuteResult::ok_with_data( + format!("Found {} task(s)", functions.len()), + ShowFunctionsResult::new(functions), + ); - PlanVisitorResult::Execute(result) + PlanVisitorResult::Execute(Ok(result)) } - #[allow(clippy::redundant_closure_call)] fn visit_create_python_function( &self, plan: &CreatePythonFunctionPlan, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let result = (|| -> Result { - let modules: Vec<(String, Vec)> = plan + let execute = || -> Result { + let modules = plan .modules .iter() .map(|m| (m.name.clone(), m.bytes.clone())) - .collect(); + .collect::>(); self.task_manager .register_python_task(plan.config_content.as_bytes(), &modules) - .map_err(|e| ExecuteError::Task(format!("Python registration failed: {}", e)))?; + .map_err(|e| ExecuteError::Task(format!("Python registration failed: {e}")))?; Ok(ExecuteResult::ok_with_data( format!("Python function '{}' deployed", plan.class_name), empty_record_batch(), )) - })(); + }; - PlanVisitorResult::Execute(result) + PlanVisitorResult::Execute(execute()) } fn visit_stop_function( @@ -210,12 +212,50 @@ impl PlanVisitor for Executor { plan: &CreateTablePlan, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - // TODO: register table in catalog and execute DDL - let result = Err(ExecuteError::Internal(format!( - "CREATE TABLE execution not yet implemented. LogicalPlan:\n{}", - plan.logical_plan.display_indent() - ))); - PlanVisitorResult::Execute(result) + let execute = || -> Result { + let (table_name, if_not_exists, stream_table) = match &plan.body { + CreateTablePlanBody::ConnectorSource { + source_table, + if_not_exists, + } => { + let table_name = source_table.name().to_string(); + let schema = Arc::new(source_table.produce_physical_schema()); + let table_instance = StreamTable::Source { + name: table_name.clone(), + schema, + event_time_field: source_table.event_time_field().map(str::to_string), + watermark_field: source_table.watermark_field().map(str::to_string), + }; + (table_name, *if_not_exists, table_instance) + } + CreateTablePlanBody::DataFusion(_) => { + return Err(ExecuteError::Internal( + "Operation not supported: Currently, the system strictly supports creating tables backed by an external Connector Source (e.g., Kafka, Postgres). In-memory tables, Views, or CTAS (Create Table As Select) are not supported." + .into(), + )); + } + }; + + if if_not_exists && self.catalog_manager.has_stream_table(&table_name) { + return Ok(ExecuteResult::ok(format!( + "Table '{table_name}' already exists (skipped)" + ))); + } + + self.catalog_manager + .add_table(stream_table) + .map_err(|e| { + ExecuteError::Internal(format!( + "Failed to register connector source table '{table_name}': {e}" + )) + })?; + + Ok(ExecuteResult::ok(format!( + "Created connector source table '{table_name}'" + ))) + }; + + PlanVisitorResult::Execute(execute()) } fn visit_streaming_table( @@ -223,19 +263,23 @@ impl PlanVisitor for Executor { plan: &StreamingTable, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let result = (|| -> Result { - let catalog_table = - CatalogTable::ConnectorTable(plan.source_table.clone()); - let mut schema_provider = StreamSchemaProvider::new(); - schema_provider.insert_catalog_table(catalog_table.clone()); + let execute = || -> Result { + let sink = StreamTable::Sink { + name: plan.name.clone(), + schema: Arc::new(plan.logical_plan.schema().as_arrow().clone()), + }; + self.catalog_manager + .add_table(sink) + .map_err(|e| ExecuteError::Internal(e.to_string()))?; Ok(ExecuteResult::ok_with_data( - format!("Streaming table '{}' compiled successfully", plan.name), + format!("Registered streaming table '{}'", plan.name), empty_record_batch(), )) - })(); - PlanVisitorResult::Execute(result) + }; + + PlanVisitorResult::Execute(execute()) } fn visit_lookup_table( @@ -243,10 +287,9 @@ impl PlanVisitor for Executor { _plan: &LookupTablePlan, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let result = Err(ExecuteError::Internal( + PlanVisitorResult::Execute(Err(ExecuteError::Internal( "LookupTable execution not yet implemented".to_string(), - )); - PlanVisitorResult::Execute(result) + ))) } fn visit_streaming_connector_table( @@ -254,9 +297,27 @@ impl PlanVisitor for Executor { _plan: &StreamingTableConnectorPlan, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let result = Err(ExecuteError::Internal( + PlanVisitorResult::Execute(Err(ExecuteError::Internal( "StreamingTableConnector execution not yet implemented".to_string(), - )); - PlanVisitorResult::Execute(result) + ))) + } + + fn visit_drop_table_plan( + &self, + plan: &DropTablePlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + let execute = || -> Result { + self.catalog_manager + .drop_table(&plan.table_name, plan.if_exists) + .map_err(|e| ExecuteError::Internal(e.to_string()))?; + + Ok(ExecuteResult::ok(format!( + "Dropped table '{}'", + plan.table_name + ))) + }; + + PlanVisitorResult::Execute(execute()) } } diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs index 7791e8a8..a781f1e1 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/mod.rs @@ -17,12 +17,14 @@ mod dataset; mod execution; mod execution_context; mod plan; +mod runtime_context; mod statement; mod tool; pub use coordinator::Coordinator; +pub use runtime_context::CoordinatorRuntimeContext; pub use dataset::{DataSet, ShowFunctionsResult}; pub use statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, PythonModule, ShowFunctions, - StartFunction, Statement, StopFunction, StreamingTableStatement, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, + PythonModule, ShowFunctions, StartFunction, Statement, StopFunction, StreamingTableStatement, }; diff --git a/src/coordinator/plan/create_table_plan.rs b/src/coordinator/plan/create_table_plan.rs index 450c8813..7ad82bb3 100644 --- a/src/coordinator/plan/create_table_plan.rs +++ b/src/coordinator/plan/create_table_plan.rs @@ -12,16 +12,39 @@ use datafusion::logical_expr::LogicalPlan; +use crate::sql::schema::SourceTable; + use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; -#[derive(Debug)] +/// Payload for [`CreateTablePlan`]: either a DataFusion DDL plan or a connector `CREATE TABLE` (no `AS SELECT`). +#[derive(Debug, Clone)] +pub enum CreateTablePlanBody { + DataFusion(LogicalPlan), + ConnectorSource { + source_table: SourceTable, + if_not_exists: bool, + }, +} + +#[derive(Debug, Clone)] pub struct CreateTablePlan { - pub logical_plan: LogicalPlan, + pub body: CreateTablePlanBody, } impl CreateTablePlan { pub fn new(logical_plan: LogicalPlan) -> Self { - Self { logical_plan } + Self { + body: CreateTablePlanBody::DataFusion(logical_plan), + } + } + + pub fn connector_source(source_table: SourceTable, if_not_exists: bool) -> Self { + Self { + body: CreateTablePlanBody::ConnectorSource { + source_table, + if_not_exists, + }, + } } } diff --git a/src/coordinator/plan/drop_table_plan.rs b/src/coordinator/plan/drop_table_plan.rs new file mode 100644 index 00000000..7d80a7b7 --- /dev/null +++ b/src/coordinator/plan/drop_table_plan.rs @@ -0,0 +1,34 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +#[derive(Debug, Clone)] +pub struct DropTablePlan { + pub table_name: String, + pub if_exists: bool, +} + +impl DropTablePlan { + pub fn new(table_name: String, if_exists: bool) -> Self { + Self { + table_name, + if_exists, + } + } +} + +impl PlanNode for DropTablePlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_drop_table_plan(self, context) + } +} diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 4a747fdf..aa8364ef 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -12,54 +12,45 @@ use std::sync::Arc; -use datafusion::common::{Result, plan_datafusion_err, plan_err}; +use datafusion::common::{plan_datafusion_err, plan_err, Result}; use datafusion::execution::SessionStateBuilder; -use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement}; +use datafusion::sql::sqlparser::ast::{ + CreateTable as SqlCreateTable, Expr as SqlExpr, ObjectType, SqlOption, Statement as DFStatement, + TableConstraint, +}; use datafusion_common::TableReference; use datafusion_execution::config::SessionConfig; -use datafusion_expr::{Expr, Extension, LogicalPlan, col}; +use datafusion_expr::{col, Extension, Expr, LogicalPlan}; use sqlparser::ast::Statement; use tracing::debug; use crate::coordinator::analyze::analysis::Analysis; use crate::coordinator::plan::{ - CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, PlanNode, - ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan, + PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, }; use crate::coordinator::statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction, - StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, - StreamingTableStatement, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, + ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext, + StatementVisitorResult, StopFunction, StreamingTableStatement, }; use crate::coordinator::tool::ConnectorOptions; -use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig}; -use crate::sql::logical_planner::optimizers::{ChainingOptimizer, produce_optimized_plan}; -use crate::sql::schema::Table; -use crate::sql::schema::ConnectionType; -use crate::sql::schema::source_table::SourceTable; -use crate::sql::schema::ColumnDescriptor; -use crate::sql::functions::{is_json_union, serialize_outgoing_json}; +use crate::sql::analysis::{ + maybe_add_key_extension_to_sink, rewrite_sinks, StreamSchemaProvider, +}; use crate::sql::extensions::sink::StreamEgressNode; -use crate::sql::logical_planner::planner; -use crate::sql::analysis::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks}; +use crate::sql::functions::{is_json_union, serialize_outgoing_json}; +use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig}; +use crate::sql::logical_planner::optimizers::{produce_optimized_plan, ChainingOptimizer}; +use crate::sql::logical_planner::planner::PlanToGraphVisitor; use crate::sql::rewrite_plan; +use crate::sql::schema::source_table::SourceTable; +use crate::sql::schema::{ColumnDescriptor, ConnectionType, Table}; -const CONNECTOR: &str = "connector"; -const PARTITION_BY: &str = "partition_by"; - -fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap { - options - .iter() - .filter_map(|opt| match opt { - SqlOption::KeyValue { key, value } => Some(( - key.value.clone(), - value.to_string().trim_matches('\'').to_string(), - )), - _ => None, - }) - .collect() -} +const OPT_CONNECTOR: &str = "connector"; +const OPT_PARTITION_BY: &str = "partition_by"; +#[derive(Clone)] pub struct LogicalPlanVisitor { schema_provider: StreamSchemaProvider, } @@ -70,20 +61,26 @@ impl LogicalPlanVisitor { } pub fn visit(&self, analysis: &Analysis) -> Box { - let context = StatementVisitorContext::Empty; let stmt = analysis.statement(); + let context = StatementVisitorContext::Empty; - let result = stmt.accept(self, &context); - - match result { + match stmt.accept(self, &context) { StatementVisitorResult::Plan(plan) => plan, - _ => panic!("LogicalPlanVisitor should return Plan"), + _ => panic!("Fatal: LogicalPlanVisitor must yield a PlanNode variant"), } } - fn build_create_streaming_table_plan( + + pub fn build_streaming_table( + schema_provider: &StreamSchemaProvider, + stmt: &StreamingTableStatement, + ) -> Result { + Self::new(schema_provider.clone()).compile_streaming_sink(stmt) + } + + fn compile_streaming_sink( &self, stmt: &StreamingTableStatement, - ) -> Result> { + ) -> Result { let DFStatement::CreateStreamingTable { name, with_options, @@ -91,123 +88,233 @@ impl LogicalPlanVisitor { query, } = &stmt.statement else { - return plan_err!("Only CREATE STREAMING TABLE is supported in this context"); + return plan_err!("Statement mismatch: Expected CREATE STREAMING TABLE AST node"); }; - let table_name = name.to_string(); - debug!("Compiling Streaming Table Sink for: {}", table_name); + let target_name = name.to_string(); + debug!( + "Initiating streaming sink compilation for identifier: {}", + target_name + ); - let mut opts = ConnectorOptions::new(with_options, &None)?; - let connector = opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| { + let mut connector_options = ConnectorOptions::new(with_options, &None)?; + let adapter_type = connector_options.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| { plan_datafusion_err!( - "Streaming Table '{}' must specify the '{}' option", - table_name, - CONNECTOR + "Validation Error: Streaming table '{}' requires the '{}' property", + target_name, + OPT_CONNECTOR ) })?; - let partition_exprs = self.resolve_partition_expressions(&mut opts)?; + let routing_exprs = Self::extract_partitioning_keys(&mut connector_options)?; - let base_plan = - produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?; - let mut plan = rewrite_plan(base_plan, &self.schema_provider)?; + let mut logical_plan = rewrite_plan( + produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?, + &self.schema_provider, + )?; - if plan + if logical_plan .schema() .fields() .iter() .any(|f| is_json_union(f.data_type())) { - plan = serialize_outgoing_json(&self.schema_provider, Arc::new(plan)); + logical_plan = serialize_outgoing_json(&self.schema_provider, Arc::new(logical_plan)); } - let fields: Vec = plan + let output_descriptors = logical_plan .schema() .fields() .iter() .map(|f| ColumnDescriptor::from((**f).clone())) - .collect(); + .collect::>(); - let mut source_table = SourceTable::from_options( - &table_name, - &connector, + let mut source_definition = SourceTable::from_options( + &target_name, + &adapter_type, false, - fields, + output_descriptors, vec![], None, - &mut opts, + &mut connector_options, None, &self.schema_provider, Some(ConnectionType::Sink), comment.clone().unwrap_or_default(), )?; - source_table.partition_exprs = Arc::new(partition_exprs); - - let sink_extension = StreamEgressNode::try_new( - TableReference::bare(table_name.clone()), - Table::ConnectorTable(source_table.clone()), - plan.schema().clone(), - plan, + source_definition.partition_exprs = Arc::new(routing_exprs); + + let sink_schema = logical_plan.schema().clone(); + let egress_node = StreamEgressNode::try_new( + TableReference::bare(target_name.clone()), + Table::ConnectorTable(source_definition.clone()), + sink_schema, + logical_plan, )?; - let plan_with_keys = maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension { - node: Arc::new(sink_extension), - }))?; + let mut plan_topology = rewrite_sinks(vec![maybe_add_key_extension_to_sink( + LogicalPlan::Extension(Extension { + node: Arc::new(egress_node), + }), + )?])?; - let final_extensions = rewrite_sinks(vec![plan_with_keys])?; - let final_plan = final_extensions.into_iter().next().unwrap(); + let final_execution_plan = plan_topology.remove(0); + self.validate_graph_topology(&final_execution_plan)?; + Ok(StreamingTable { + name: target_name, + comment: comment.clone(), + source_table: source_definition, + logical_plan: final_execution_plan, + }) + } - let mut config = SessionConfig::new(); - config - .options_mut() - .optimizer - .enable_round_robin_repartition = false; - config.options_mut().optimizer.repartition_aggregations = false; - config.options_mut().optimizer.repartition_windows = false; - config.options_mut().optimizer.repartition_sorts = false; - config.options_mut().optimizer.repartition_joins = false; - config.options_mut().execution.target_partitions = 1; + fn validate_graph_topology(&self, logical_plan: &LogicalPlan) -> Result<()> { + let mut session_config = SessionConfig::new(); + let opts = session_config.options_mut(); + opts.optimizer.enable_round_robin_repartition = false; + opts.optimizer.repartition_aggregations = false; + opts.optimizer.repartition_windows = false; + opts.optimizer.repartition_sorts = false; + opts.optimizer.repartition_joins = false; + opts.execution.target_partitions = 1; let session_state = SessionStateBuilder::new() - .with_config(config) + .with_config(session_config) .with_default_features() .with_physical_optimizer_rules(vec![]) .build(); - let mut plan_to_graph_visitor = - planner::PlanToGraphVisitor::new(&self.schema_provider, &session_state); + let mut graph_compiler = PlanToGraphVisitor::new(&self.schema_provider, &session_state); + graph_compiler.add_plan(logical_plan.clone())?; - plan_to_graph_visitor.add_plan(final_plan.clone())?; + let mut executable_program = + LogicalProgram::new(graph_compiler.into_graph(), ProgramConfig::default()); + executable_program.optimize(&ChainingOptimizer {}); - let graph = plan_to_graph_visitor.into_graph(); + Ok(()) + } - let mut program = LogicalProgram::new(graph, ProgramConfig::default()); + fn extract_partitioning_keys( + options: &mut ConnectorOptions, + ) -> Result>> { + options + .pull_opt_str(OPT_PARTITION_BY)? + .map(|raw_cols| raw_cols.split(',').map(|c| col(c.trim())).collect()) + .map(Ok) + .transpose() + } - program.optimize(&ChainingOptimizer {}); + fn contains_connector_property(options: &[SqlOption]) -> bool { + options.iter().any(|opt| match opt { + SqlOption::KeyValue { key, .. } => key.value.eq_ignore_ascii_case(OPT_CONNECTOR), + _ => false, + }) + } + fn parse_primary_keys(constraints: &[TableConstraint]) -> Result> { + let mut keys = None; + for constraint in constraints { + if let TableConstraint::PrimaryKey { columns, .. } = constraint { + if keys.is_some() { + return plan_err!( + "Constraint Violation: Multiple PRIMARY KEY constraints are forbidden" + ); + } + keys = Some(columns.iter().map(|ident| ident.value.clone()).collect()); + } + } + Ok(keys.unwrap_or_default()) + } - Ok(Box::new(StreamingTable { - name: table_name, - comment: comment.clone(), - source_table, - logical_plan: final_plan, - })) + fn parse_watermark_strategy( + constraints: &[TableConstraint], + ) -> Result)>> { + let mut strategy = None; + for constraint in constraints { + if let TableConstraint::Watermark { + column_name, + watermark_expr, + } = constraint + { + if strategy.is_some() { + return plan_err!( + "Constraint Violation: Only a single WATERMARK FOR clause is permitted" + ); + } + strategy = Some((column_name.value.clone(), watermark_expr.clone())); + } + } + Ok(strategy) } - fn resolve_partition_expressions( + fn compile_connector_source_plan( &self, - opts: &mut ConnectorOptions, - ) -> Result>> { - opts.pull_opt_str(PARTITION_BY)? - .map(|cols| { - cols.split(',') - .map(|c| col(c.trim())) - .collect::>() - }) - .map(Ok) - .transpose() + stmt: &SqlCreateTable, + ) -> Result { + if stmt.query.is_some() { + return plan_err!("Syntax Error: CREATE TABLE ... AS SELECT combined with WITH ('connector'=...) is invalid. Use CREATE STREAMING TABLE instead."); + } + if stmt.or_replace { + return plan_err!( + "Syntax Error: OR REPLACE is not supported for external connector tables." + ); + } + if stmt.temporary { + return plan_err!( + "Syntax Error: TEMPORARY is not supported for external connector tables." + ); + } + if stmt.external { + return plan_err!("Syntax Error: EXTERNAL keyword is redundant and unsupported for connector configurations."); + } + + let target_name = stmt.name.to_string(); + let table_description = stmt + .comment + .clone() + .map(|c| c.to_string()) + .unwrap_or_default(); + + let schema_compiler = datafusion::sql::planner::SqlToRel::new(&self.schema_provider); + let arrow_schema = schema_compiler.build_schema(stmt.columns.clone())?; + + let schema_descriptors = arrow_schema + .fields() + .iter() + .map(|f| ColumnDescriptor::from((**f).clone())) + .collect::>(); + + let mut connector_options = ConnectorOptions::new(&stmt.with_options, &None)?; + let adapter_type = connector_options.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| { + plan_datafusion_err!( + "Configuration Error: Missing required property '{}' in WITH clause", + OPT_CONNECTOR + ) + })?; + + let pk_constraints = Self::parse_primary_keys(&stmt.constraints)?; + let watermark_strategy = Self::parse_watermark_strategy(&stmt.constraints)?; + + let source_definition = SourceTable::from_options( + &target_name, + &adapter_type, + false, + schema_descriptors, + pk_constraints, + watermark_strategy, + &mut connector_options, + None, + &self.schema_provider, + Some(ConnectionType::Source), + table_description, + )?; + + Ok(CreateTablePlan::connector_source( + source_definition, + stmt.if_not_exists, + )) } } @@ -215,23 +322,19 @@ impl StatementVisitor for LogicalPlanVisitor { fn visit_create_function( &self, stmt: &CreateFunction, - _context: &StatementVisitorContext, + _ctx: &StatementVisitorContext, ) -> StatementVisitorResult { - let function_source = stmt.get_function_source().clone(); - let config_source = stmt.get_config_source().cloned(); - let extra_props = stmt.get_extra_properties().clone(); - StatementVisitorResult::Plan(Box::new(CreateFunctionPlan::new( - function_source, - config_source, - extra_props, + stmt.get_function_source().clone(), + stmt.get_config_source().cloned(), + stmt.get_extra_properties().clone(), ))) } fn visit_drop_function( &self, stmt: &DropFunction, - _context: &StatementVisitorContext, + _ctx: &StatementVisitorContext, ) -> StatementVisitorResult { StatementVisitorResult::Plan(Box::new(DropFunctionPlan::new(stmt.name.clone()))) } @@ -239,7 +342,7 @@ impl StatementVisitor for LogicalPlanVisitor { fn visit_start_function( &self, stmt: &StartFunction, - _context: &StatementVisitorContext, + _ctx: &StatementVisitorContext, ) -> StatementVisitorResult { StatementVisitorResult::Plan(Box::new(StartFunctionPlan::new(stmt.name.clone()))) } @@ -247,7 +350,7 @@ impl StatementVisitor for LogicalPlanVisitor { fn visit_stop_function( &self, stmt: &StopFunction, - _context: &StatementVisitorContext, + _ctx: &StatementVisitorContext, ) -> StatementVisitorResult { StatementVisitorResult::Plan(Box::new(StopFunctionPlan::new(stmt.name.clone()))) } @@ -255,7 +358,7 @@ impl StatementVisitor for LogicalPlanVisitor { fn visit_show_functions( &self, _stmt: &ShowFunctions, - _context: &StatementVisitorContext, + _ctx: &StatementVisitorContext, ) -> StatementVisitorResult { StatementVisitorResult::Plan(Box::new(ShowFunctionsPlan::new())) } @@ -263,46 +366,83 @@ impl StatementVisitor for LogicalPlanVisitor { fn visit_create_python_function( &self, stmt: &CreatePythonFunction, - _context: &StatementVisitorContext, + _ctx: &StatementVisitorContext, ) -> StatementVisitorResult { - let class_name = stmt.get_class_name().to_string(); - let modules = stmt.get_modules().to_vec(); - let config_content = stmt.get_config_content().to_string(); - StatementVisitorResult::Plan(Box::new(CreatePythonFunctionPlan::new( - class_name, - modules, - config_content, + stmt.get_class_name().to_string(), + stmt.get_modules().to_vec(), + stmt.get_config_content().to_string(), ))) } fn visit_create_table( &self, stmt: &CreateTable, - _context: &StatementVisitorContext, + _ctx: &StatementVisitorContext, ) -> StatementVisitorResult { - let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider); - - match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) { - Ok(plan) => { - debug!("Create table plan:\n{}", plan.display_graphviz()); - StatementVisitorResult::Plan(Box::new(CreateTablePlan::new(plan))) + if let Statement::CreateTable(ast_node) = &stmt.statement { + if ast_node.query.is_none() + && Self::contains_connector_property(&ast_node.with_options) + { + let execution_plan = self.compile_connector_source_plan(ast_node).unwrap_or_else( + |err| { + panic!("Fatal Compiler Error: Connector source resolution failed - {err:#}"); + }, + ); + return StatementVisitorResult::Plan(Box::new(execution_plan)); } - Err(e) => { - panic!("Failed to convert CREATE TABLE to logical plan: {e}"); + } + + let schema_compiler = datafusion::sql::planner::SqlToRel::new(&self.schema_provider); + match schema_compiler.sql_statement_to_plan(stmt.statement.clone()) { + Ok(logical_plan) => { + debug!( + "Successfully compiled logical DDL topology:\n{}", + logical_plan.display_graphviz() + ); + StatementVisitorResult::Plan(Box::new(CreateTablePlan::new(logical_plan))) } + Err(err) => panic!("Fatal Compiler Error: Logical plan translation failed - {err}"), } } fn visit_streaming_table_statement( &self, stmt: &StreamingTableStatement, - _context: &StatementVisitorContext, + _ctx: &StatementVisitorContext, + ) -> StatementVisitorResult { + let execution_plan = self.compile_streaming_sink(stmt).unwrap_or_else(|err| { + panic!("Fatal Compiler Error: Streaming sink compilation aborted - {err}"); + }); + StatementVisitorResult::Plan(Box::new(execution_plan)) + } + + fn visit_drop_table_statement( + &self, + stmt: &DropTableStatement, + _ctx: &StatementVisitorContext, ) -> StatementVisitorResult { - match self.build_create_streaming_table_plan(stmt) { - Ok(plan) => StatementVisitorResult::Plan(plan), - Err(e) => panic!("Failed to build CreateStreamingTable plan: {e}"), + let DFStatement::Drop { + object_type, + if_exists, + names, + .. + } = &stmt.statement + else { + panic!("Fatal Compiler Error: AST mismatch on DropTableStatement"); + }; + + if *object_type != ObjectType::Table { + panic!("Fatal Compiler Error: Drop target must be of type TABLE"); + } + if names.len() != 1 { + panic!("Fatal Compiler Error: Bulk drop operations are not supported. Specify exactly one table."); } + + StatementVisitorResult::Plan(Box::new(DropTablePlan::new( + names[0].to_string(), + *if_exists, + ))) } } @@ -316,8 +456,8 @@ mod create_streaming_table_tests { use datafusion::sql::sqlparser::parser::Parser; use crate::sql::common::TIMESTAMP_FIELD; - use crate::sql::rewrite_plan; use crate::sql::logical_planner::optimizers::produce_optimized_plan; + use crate::sql::rewrite_plan; use crate::sql::schema::StreamSchemaProvider; fn schema_provider_with_src() -> StreamSchemaProvider { diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs index d68320d8..2dbbab77 100644 --- a/src/coordinator/plan/mod.rs +++ b/src/coordinator/plan/mod.rs @@ -14,6 +14,7 @@ mod create_function_plan; mod create_python_function_plan; mod create_table_plan; mod drop_function_plan; +mod drop_table_plan; mod logical_plan_visitor; mod lookup_table_plan; mod optimizer; @@ -26,8 +27,9 @@ mod visitor; pub use create_function_plan::CreateFunctionPlan; pub use create_python_function_plan::CreatePythonFunctionPlan; -pub use create_table_plan::CreateTablePlan; +pub use create_table_plan::{CreateTablePlan, CreateTablePlanBody}; pub use drop_function_plan::DropFunctionPlan; +pub use drop_table_plan::DropTablePlan; pub use logical_plan_visitor::LogicalPlanVisitor; pub use lookup_table_plan::LookupTablePlan; pub use optimizer::LogicalPlanner; diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs index fc764b2b..e8efcf32 100644 --- a/src/coordinator/plan/visitor.rs +++ b/src/coordinator/plan/visitor.rs @@ -11,7 +11,7 @@ // limitations under the License. use super::{ - CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan, LookupTablePlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan, }; @@ -109,4 +109,10 @@ pub trait PlanVisitor { plan: &StreamingTableConnectorPlan, context: &PlanVisitorContext, ) -> PlanVisitorResult; + + fn visit_drop_table_plan( + &self, + plan: &DropTablePlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; } diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs new file mode 100644 index 00000000..7b1d82dc --- /dev/null +++ b/src/coordinator/runtime_context.rs @@ -0,0 +1,64 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Runtime resources for a single coordinator run: [`TaskManager`] and [`CatalogManager`]. + +use std::sync::Arc; + +use anyhow::Result; + +use crate::runtime::taskexecutor::TaskManager; +use crate::sql::schema::StreamSchemaProvider; +use crate::storage::stream_catalog::CatalogManager; + +/// Dependencies shared by analyze / plan / execute, analogous to installing globals in +/// [`TaskManager`] and [`CatalogManager`]. +#[derive(Clone)] +pub struct CoordinatorRuntimeContext { + pub task_manager: Arc, + pub catalog_manager: Arc, + /// When set (e.g. unit tests), used for SQL planning instead of a catalog snapshot. + planning_schema_override: Option, +} + +impl CoordinatorRuntimeContext { + /// Resolve [`TaskManager`] and global stream catalog (same pattern as server startup). + pub fn try_from_globals() -> Result { + Ok(Self { + task_manager: TaskManager::get() + .map_err(|e| anyhow::anyhow!("Failed to get TaskManager: {}", e))?, + catalog_manager: CatalogManager::global() + .map_err(|e| anyhow::anyhow!("Failed to get CatalogManager: {}", e))?, + planning_schema_override: None, + }) + } + + pub fn new( + task_manager: Arc, + catalog_manager: Arc, + planning_schema_override: Option, + ) -> Self { + Self { + task_manager, + catalog_manager, + planning_schema_override, + } + } + + /// Schema provider for [`LogicalPlanVisitor`] / [`SqlToRel`]: override if set, else catalog snapshot. + pub fn planning_schema_provider(&self) -> StreamSchemaProvider { + if let Some(ref p) = self.planning_schema_override { + return p.clone(); + } + self.catalog_manager.acquire_planning_context() + } +} diff --git a/src/coordinator/statement/create_table.rs b/src/coordinator/statement/create_table.rs index 8aa16bf0..67a500d1 100644 --- a/src/coordinator/statement/create_table.rs +++ b/src/coordinator/statement/create_table.rs @@ -37,4 +37,8 @@ impl Statement for CreateTable { ) -> StatementVisitorResult { visitor.visit_create_table(self, context) } + + fn as_create_table(&self) -> Option<&CreateTable> { + Some(self) + } } diff --git a/src/coordinator/statement/drop_table.rs b/src/coordinator/statement/drop_table.rs new file mode 100644 index 00000000..fa547dca --- /dev/null +++ b/src/coordinator/statement/drop_table.rs @@ -0,0 +1,41 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use datafusion::sql::sqlparser::ast::Statement as DFStatement; + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// `DROP TABLE` / `DROP TABLE IF EXISTS` (and `DROP STREAMING TABLE`, normalized at parse time). +#[derive(Debug, Clone)] +pub struct DropTableStatement { + pub statement: DFStatement, +} + +impl DropTableStatement { + pub fn new(statement: DFStatement) -> Self { + Self { statement } + } +} + +impl Statement for DropTableStatement { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_drop_table_statement(self, context) + } + + fn as_drop_table_statement(&self) -> Option<&DropTableStatement> { + Some(self) + } +} diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs index 15880284..7b39787d 100644 --- a/src/coordinator/statement/mod.rs +++ b/src/coordinator/statement/mod.rs @@ -14,6 +14,7 @@ mod create_function; mod create_python_function; mod create_table; mod drop_function; +mod drop_table; mod show_functions; mod start_function; mod stop_function; @@ -24,6 +25,7 @@ pub use create_function::{ConfigSource, CreateFunction, FunctionSource}; pub use create_python_function::{CreatePythonFunction, PythonModule}; pub use create_table::CreateTable; pub use drop_function::DropFunction; +pub use drop_table::DropTableStatement; pub use show_functions::ShowFunctions; pub use start_function::StartFunction; pub use stop_function::StopFunction; @@ -38,4 +40,16 @@ pub trait Statement: fmt::Debug + Send + Sync { visitor: &dyn StatementVisitor, context: &StatementVisitorContext, ) -> StatementVisitorResult; + + fn as_create_table(&self) -> Option<&CreateTable> { + None + } + + fn as_drop_table_statement(&self) -> Option<&DropTableStatement> { + None + } + + fn as_streaming_table_statement(&self) -> Option<&StreamingTableStatement> { + None + } } diff --git a/src/coordinator/statement/streaming_table.rs b/src/coordinator/statement/streaming_table.rs index 86ec1a85..bfef3503 100644 --- a/src/coordinator/statement/streaming_table.rs +++ b/src/coordinator/statement/streaming_table.rs @@ -37,4 +37,8 @@ impl Statement for StreamingTableStatement { ) -> StatementVisitorResult { visitor.visit_streaming_table_statement(self, context) } + + fn as_streaming_table_statement(&self) -> Option<&StreamingTableStatement> { + Some(self) + } } diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs index 1867b603..641abf98 100644 --- a/src/coordinator/statement/visitor.rs +++ b/src/coordinator/statement/visitor.rs @@ -11,8 +11,8 @@ // limitations under the License. use super::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction, - StopFunction, StreamingTableStatement, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, + ShowFunctions, StartFunction, StopFunction, StreamingTableStatement, }; use crate::coordinator::plan::PlanNode; use crate::coordinator::statement::Statement; @@ -100,4 +100,10 @@ pub trait StatementVisitor { stmt: &StreamingTableStatement, context: &StatementVisitorContext, ) -> StatementVisitorResult; + + fn visit_drop_table_statement( + &self, + stmt: &DropTableStatement, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; } diff --git a/src/main.rs b/src/main.rs index 562b1526..1faf45f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -179,9 +179,7 @@ fn main() -> Result<()> { ); // 2. Component Initialization - let registry = server::register_components(); - registry - .initialize_all(&config) + server::bootstrap_system(&config) .context("Component initialization failed")?; // 3. Server Startup diff --git a/src/server/handler.rs b/src/server/handler.rs index 1920680c..8ed484d6 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -14,20 +14,19 @@ use std::sync::Arc; use std::time::Instant; use arrow_ipc::writer::StreamWriter; -use log::{error, info}; use tonic::{Request, Response as TonicResponse, Status}; +use tracing::{debug, error, info, warn}; use protocol::service::FunctionInfo as ProtoFunctionInfo; use protocol::service::{ - CreateFunctionRequest, CreatePythonFunctionRequest, DropFunctionRequest, Response, - ShowFunctionsRequest, ShowFunctionsResponse, SqlRequest, StartFunctionRequest, StatusCode, - StopFunctionRequest, function_stream_service_server::FunctionStreamService, + function_stream_service_server::FunctionStreamService, CreateFunctionRequest, + CreatePythonFunctionRequest, DropFunctionRequest, Response, ShowFunctionsRequest, + ShowFunctionsResponse, SqlRequest, StartFunctionRequest, StatusCode, StopFunctionRequest, }; -use crate::coordinator::Coordinator; use crate::coordinator::{ - CreateFunction, CreatePythonFunction, DataSet, DropFunction, ShowFunctions, - ShowFunctionsResult, StartFunction, Statement, StopFunction, + Coordinator, CreateFunction, CreatePythonFunction, DataSet, DropFunction, PythonModule, + ShowFunctions, ShowFunctionsResult, StartFunction, Statement, StopFunction, }; use crate::sql::parse::parse_sql; @@ -40,23 +39,66 @@ impl FunctionStreamServiceImpl { Self { coordinator } } - fn build_response(status_code: StatusCode, message: String, data: Option>) -> Response { + fn serialize_dataset(ds: &dyn DataSet) -> Result, String> { + let batch = ds.to_record_batch(); + let mut buf = Vec::new(); + + let mut writer = StreamWriter::try_new(&mut buf, &batch.schema()) + .map_err(|e| format!("IPC writer initialization failed: {e}"))?; + + writer + .write(&batch) + .map_err(|e| format!("IPC write failed: {e}"))?; + + writer + .finish() + .map_err(|e| format!("IPC finish failed: {e}"))?; + + Ok(buf) + } + + fn build_success_response( + status: StatusCode, + message: String, + data: Option>, + ) -> Response { + let payload = match data { + Some(ds) => match Self::serialize_dataset(ds.as_ref()) { + Ok(bytes) => Some(bytes), + Err(e) => { + error!("Data serialization error: {}", e); + return Self::build_error_response( + StatusCode::InternalServerError, + "Internal data serialization error".to_string(), + ); + } + }, + None => None, + }; + Response { - status_code: status_code as i32, + status_code: status as i32, message, - data, + data: payload, } } - fn data_set_to_ipc_bytes(ds: &dyn DataSet) -> Option> { - let batch = ds.to_record_batch(); - let mut buf = Vec::new(); - { - let mut writer = StreamWriter::try_new(&mut buf, &batch.schema()).ok()?; - writer.write(&batch).ok()?; - writer.finish().ok()?; + fn build_error_response(status: StatusCode, message: String) -> Response { + Response { + status_code: status as i32, + message, + data: None, + } + } + + async fn execute_statement(&self, stmt: &dyn Statement, success_status: StatusCode) -> Response { + let result = self.coordinator.execute_with_stream_catalog(stmt).await; + + if result.success { + Self::build_success_response(success_status, result.message, result.data) + } else { + Self::build_error_response(StatusCode::InternalServerError, result.message) } - Some(buf) } } @@ -66,236 +108,133 @@ impl FunctionStreamService for FunctionStreamServiceImpl { &self, request: Request, ) -> Result, Status> { - let start_time = Instant::now(); + let timer = Instant::now(); let req = request.into_inner(); - let parse_start = Instant::now(); - let statements = match parse_sql(&req.sql) { - Ok(stmts) => { - log::debug!( - "SQL parsed {} statement(s) in {}ms", - stmts.len(), - parse_start.elapsed().as_millis() - ); - stmts - } - Err(e) => { - return Ok(TonicResponse::new(Self::build_response( - StatusCode::BadRequest, - format!("Parse error: {}", e), - None, + let statements = parse_sql(&req.sql).map_err(|e| { + warn!("SQL parse rejection: {}", e); + Status::invalid_argument("Provided SQL syntax is invalid") + })?; + + if statements.is_empty() { + return Ok(TonicResponse::new(Self::build_success_response( + StatusCode::Ok, + "No statements executed".to_string(), + None, + ))); + } + + let mut final_response = None; + + for stmt in statements { + let result = self + .coordinator + .execute_with_stream_catalog(stmt.as_ref()) + .await; + + if !result.success { + error!("SQL execution aborted: {}", result.message); + return Ok(TonicResponse::new(Self::build_error_response( + StatusCode::InternalServerError, + result.message, ))); } - }; - let exec_start = Instant::now(); - let mut last_result = self.coordinator.execute(statements[0].as_ref()); - for stmt in &statements[1..] { - if !last_result.success { - break; - } - last_result = self.coordinator.execute(stmt.as_ref()); + final_response = Some(result); } - let result = last_result; - log::debug!( - "Coordinator execution finished in {}ms", - exec_start.elapsed().as_millis() - ); - let status_code = if result.success { - StatusCode::Ok - } else { - error!("Execution failed: {}", result.message); - StatusCode::InternalServerError - }; - - log::debug!( - "Total SQL request cost: {}ms", - start_time.elapsed().as_millis() - ); + let result = final_response.unwrap(); + let response = Self::build_success_response(StatusCode::Ok, result.message, result.data); - Ok(TonicResponse::new(Self::build_response( - status_code, - result.message, - result - .data - .as_ref() - .and_then(|ds| Self::data_set_to_ipc_bytes(ds.as_ref())), - ))) + debug!("execute_sql completed in {}ms", timer.elapsed().as_millis()); + Ok(TonicResponse::new(response)) } async fn create_function( &self, request: Request, ) -> Result, Status> { - let start_time = Instant::now(); + let timer = Instant::now(); let req = request.into_inner(); - info!( - "Received CreateFunction request. Config size: {}, Function size: {}", - req.config_bytes.len(), - req.function_bytes.len() - ); - - let config_bytes = if !req.config_bytes.is_empty() { - Some(req.config_bytes) - } else { - None - }; + let config_bytes = (!req.config_bytes.is_empty()).then_some(req.config_bytes); let stmt = CreateFunction::from_bytes(req.function_bytes, config_bytes); - let exec_start = Instant::now(); - let result = self.coordinator.execute(&stmt as &dyn Statement); - info!( - "Coordinator execution finished in {}ms", - exec_start.elapsed().as_millis() - ); - - let status_code = if result.success { - StatusCode::Created - } else { - error!("CreateFunction failed: {}", result.message); - StatusCode::InternalServerError - }; - - info!( - "Total CreateFunction request cost: {}ms", - start_time.elapsed().as_millis() - ); + let response = self.execute_statement(&stmt, StatusCode::Created).await; - Ok(TonicResponse::new(Self::build_response( - status_code, - result.message, - result - .data - .as_ref() - .and_then(|ds| Self::data_set_to_ipc_bytes(ds.as_ref())), - ))) + info!("create_function completed in {}ms", timer.elapsed().as_millis()); + Ok(TonicResponse::new(response)) } async fn create_python_function( &self, request: Request, ) -> Result, Status> { - let start_time = Instant::now(); + let timer = Instant::now(); let req = request.into_inner(); - info!( - "Received CreatePythonFunction request. Class name: {}, Modules: {}", - req.class_name, - req.modules.len() - ); - // Convert proto modules to PythonModule - let modules: Vec = req + if req.modules.is_empty() { + return Ok(TonicResponse::new(Self::build_error_response( + StatusCode::BadRequest, + "Python function creation requires at least one module".to_string(), + ))); + } + + let modules: Vec = req .modules .into_iter() - .map(|m| crate::coordinator::PythonModule { + .map(|m| PythonModule { name: m.module_name, bytes: m.module_bytes, }) .collect(); - if modules.is_empty() { - return Ok(TonicResponse::new(Self::build_response( - StatusCode::BadRequest, - "At least one module is required".to_string(), - None, - ))); - } - let stmt = CreatePythonFunction::new(req.class_name, modules, req.config_content); + let response = self.execute_statement(&stmt, StatusCode::Created).await; - let exec_start = Instant::now(); - let result = self.coordinator.execute(&stmt as &dyn Statement); info!( - "Coordinator execution finished in {}ms", - exec_start.elapsed().as_millis() + "create_python_function completed in {}ms", + timer.elapsed().as_millis() ); - - let status_code = if result.success { - StatusCode::Created - } else { - error!("CreatePythonFunction failed: {}", result.message); - StatusCode::InternalServerError - }; - - info!( - "Total CreatePythonFunction request cost: {}ms", - start_time.elapsed().as_millis() - ); - - Ok(TonicResponse::new(Self::build_response( - status_code, - result.message, - result - .data - .as_ref() - .and_then(|ds| Self::data_set_to_ipc_bytes(ds.as_ref())), - ))) + Ok(TonicResponse::new(response)) } async fn drop_function( &self, request: Request, ) -> Result, Status> { - let start_time = Instant::now(); + let timer = Instant::now(); let req = request.into_inner(); - info!( - "Received DropFunction request: function_name={}", - req.function_name - ); let stmt = DropFunction::new(req.function_name); - let exec_start = Instant::now(); - let result = self.coordinator.execute(&stmt as &dyn Statement); - info!( - "Coordinator execution finished in {}ms", - exec_start.elapsed().as_millis() - ); + let response = self.execute_statement(&stmt, StatusCode::Ok).await; - let status_code = if result.success { - StatusCode::Ok - } else { - error!("DropFunction failed: {}", result.message); - StatusCode::InternalServerError - }; - - info!( - "Total DropFunction request cost: {}ms", - start_time.elapsed().as_millis() - ); - - Ok(TonicResponse::new(Self::build_response( - status_code, - result.message, - None, - ))) + info!("drop_function completed in {}ms", timer.elapsed().as_millis()); + Ok(TonicResponse::new(response)) } async fn show_functions( &self, - request: Request, + _request: Request, ) -> Result, Status> { - let start_time = Instant::now(); - let _req = request.into_inner(); - info!("Received ShowFunctions request"); - + let timer = Instant::now(); let stmt = ShowFunctions::new(); - let exec_start = Instant::now(); - let result = self.coordinator.execute(&stmt as &dyn Statement); - info!( - "Coordinator execution finished in {}ms", - exec_start.elapsed().as_millis() - ); - let (status_code, message) = if result.success { - (StatusCode::Ok as i32, result.message) - } else { - error!("ShowFunctions failed: {}", result.message); - (StatusCode::InternalServerError as i32, result.message) - }; + let result = self + .coordinator + .execute_with_stream_catalog(&stmt) + .await; + + if !result.success { + error!("show_functions execution failed: {}", result.message); + return Ok(TonicResponse::new(ShowFunctionsResponse { + status_code: StatusCode::InternalServerError as i32, + message: "Failed to retrieve function definitions".to_string(), + functions: vec![], + })); + } - let functions: Vec = result + let functions = result .data .as_ref() .and_then(|arc_ds| { @@ -313,15 +252,10 @@ impl FunctionStreamService for FunctionStreamServiceImpl { }) .unwrap_or_default(); - info!( - "Total ShowFunctions request cost: {}ms, count={}", - start_time.elapsed().as_millis(), - functions.len() - ); - + info!("show_functions completed in {}ms", timer.elapsed().as_millis()); Ok(TonicResponse::new(ShowFunctionsResponse { - status_code, - message, + status_code: StatusCode::Ok as i32, + message: result.message, functions, })) } @@ -330,76 +264,28 @@ impl FunctionStreamService for FunctionStreamServiceImpl { &self, request: Request, ) -> Result, Status> { - let start_time = Instant::now(); + let timer = Instant::now(); let req = request.into_inner(); - info!( - "Received StartFunction request: function_name={}", - req.function_name - ); let stmt = StartFunction::new(req.function_name); - let exec_start = Instant::now(); - let result = self.coordinator.execute(&stmt as &dyn Statement); - info!( - "Coordinator execution finished in {}ms", - exec_start.elapsed().as_millis() - ); - - let status_code = if result.success { - StatusCode::Ok - } else { - error!("StartFunction failed: {}", result.message); - StatusCode::InternalServerError - }; + let response = self.execute_statement(&stmt, StatusCode::Ok).await; - info!( - "Total StartFunction request cost: {}ms", - start_time.elapsed().as_millis() - ); - - Ok(TonicResponse::new(Self::build_response( - status_code, - result.message, - None, - ))) + info!("start_function completed in {}ms", timer.elapsed().as_millis()); + Ok(TonicResponse::new(response)) } async fn stop_function( &self, request: Request, ) -> Result, Status> { - let start_time = Instant::now(); + let timer = Instant::now(); let req = request.into_inner(); - info!( - "Received StopFunction request: function_name={}", - req.function_name - ); let stmt = StopFunction::new(req.function_name); - let exec_start = Instant::now(); - let result = self.coordinator.execute(&stmt as &dyn Statement); - info!( - "Coordinator execution finished in {}ms", - exec_start.elapsed().as_millis() - ); - - let status_code = if result.success { - StatusCode::Ok - } else { - error!("StopFunction failed: {}", result.message); - StatusCode::InternalServerError - }; - - info!( - "Total StopFunction request cost: {}ms", - start_time.elapsed().as_millis() - ); + let response = self.execute_statement(&stmt, StatusCode::Ok).await; - Ok(TonicResponse::new(Self::build_response( - status_code, - result.message, - None, - ))) + info!("stop_function completed in {}ms", timer.elapsed().as_millis()); + Ok(TonicResponse::new(response)) } } diff --git a/src/server/initializer.rs b/src/server/initializer.rs index ccb02788..46eca375 100644 --- a/src/server/initializer.rs +++ b/src/server/initializer.rs @@ -10,15 +10,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::config::GlobalConfig; +use std::time::Instant; + use anyhow::{Context, Result}; +use tracing::{debug, info, warn}; + +use crate::config::GlobalConfig; -type InitializerFn = fn(&GlobalConfig) -> Result<()>; +pub type InitializerFn = fn(&GlobalConfig) -> Result<()>; #[derive(Clone)] -struct Component { - name: &'static str, - initializer: InitializerFn, +pub struct Component { + pub name: &'static str, + pub initializer: InitializerFn, +} + +pub struct ComponentRegistry { + components: Vec, } #[derive(Default)] @@ -27,25 +35,17 @@ pub struct ComponentRegistryBuilder { } impl ComponentRegistryBuilder { - #[inline] pub fn new() -> Self { - Self::with_capacity(8) - } - - #[inline] - pub fn with_capacity(capacity: usize) -> Self { Self { - components: Vec::with_capacity(capacity), + components: Vec::with_capacity(8), } } - #[inline] pub fn register(mut self, name: &'static str, initializer: InitializerFn) -> Self { self.components.push(Component { name, initializer }); self } - #[inline] pub fn build(self) -> ComponentRegistry { ComponentRegistry { components: self.components, @@ -53,57 +53,69 @@ impl ComponentRegistryBuilder { } } -pub struct ComponentRegistry { - components: Vec, -} - impl ComponentRegistry { pub fn initialize_all(&self, config: &GlobalConfig) -> Result<()> { if self.components.is_empty() { - log::warn!("No components registered for initialization"); + warn!("Component registry is empty; no components to initialize"); return Ok(()); } - log::info!("Initializing {} components...", self.components.len()); + let total = self.components.len(); + info!(total_components = total, "Commencing system initialization sequence"); + + for (index, component) in self.components.iter().enumerate() { + let start_time = Instant::now(); - for (idx, component) in self.components.iter().enumerate() { - let start = std::time::Instant::now(); - log::debug!( - "[{}/{}] Initializing component: {}", - idx + 1, - self.components.len(), - component.name + debug!( + component = component.name, + step = format!("{}/{}", index + 1, total), + "Initializing component" ); - (component.initializer)(config) - .with_context(|| format!("Component '{}' initialization failed", component.name))?; + (component.initializer)(config).with_context(|| { + format!("Fatal error initializing component: {}", component.name) + })?; - let elapsed = start.elapsed(); - log::debug!( - "[{}/{}] Component '{}' initialized successfully in {:?}", - idx + 1, - self.components.len(), - component.name, - elapsed + debug!( + component = component.name, + elapsed_ms = start_time.elapsed().as_millis(), + "Component initialized successfully" ); } - log::info!( - "All {} components initialized successfully", - self.components.len() - ); + info!("System initialization sequence completed successfully"); Ok(()) } +} - #[inline] - pub fn len(&self) -> usize { - self.components.len() - } +pub fn build_core_registry() -> ComponentRegistry { + let builder = { + let b = ComponentRegistryBuilder::new() + .register("WasmCache", initialize_wasm_cache) + .register("TaskManager", initialize_task_manager); + #[cfg(feature = "python")] + let b = b.register("PythonService", initialize_python_service); + b + }; - #[inline] - pub fn is_empty(&self) -> bool { - self.components.is_empty() - } + builder + .register( + "StreamCatalog", + crate::storage::stream_catalog::initialize_stream_catalog, + ) + .register("Coordinator", initialize_coordinator) + .build() +} + +pub fn bootstrap_system(config: &GlobalConfig) -> Result<()> { + let registry = build_core_registry(); + + registry.initialize_all(config)?; + + crate::storage::stream_catalog::restore_global_catalog_from_store(); + + info!("System bootstrap finished. Node is ready to accept traffic."); + Ok(()) } fn initialize_wasm_cache(config: &GlobalConfig) -> Result<()> { @@ -114,18 +126,20 @@ fn initialize_wasm_cache(config: &GlobalConfig) -> Result<()> { max_size: config.wasm.max_cache_size, }, ); - log::info!( - "WASM cache configuration: enabled={}, dir={}, max_size={} bytes", - config.wasm.enable_cache, - config.wasm.cache_dir, - config.wasm.max_cache_size + + debug!( + enabled = config.wasm.enable_cache, + dir = %config.wasm.cache_dir, + max_size = config.wasm.max_cache_size, + "WASM cache configured" ); + Ok(()) } fn initialize_task_manager(config: &GlobalConfig) -> Result<()> { crate::runtime::taskexecutor::TaskManager::init(config) - .context("TaskManager initialization failed")?; + .context("TaskManager service failed to start")?; Ok(()) } @@ -138,22 +152,10 @@ fn initialize_python_service(config: &GlobalConfig) -> Result<()> { fn initialize_coordinator(_config: &GlobalConfig) -> Result<()> { crate::runtime::taskexecutor::TaskManager::get() - .context("Coordinator requires TaskManager to be initialized first")?; - log::info!("Coordinator verified and ready"); - Ok(()) -} + .context("Dependency violation: Coordinator requires TaskManager")?; -pub fn register_components() -> ComponentRegistry { - let builder = { - let b = ComponentRegistryBuilder::new() - .register("WasmCache", initialize_wasm_cache) - .register("TaskManager", initialize_task_manager); - #[cfg(feature = "python")] - let b = b.register("PythonService", initialize_python_service); - b - }; + crate::storage::stream_catalog::CatalogManager::global() + .context("Dependency violation: Coordinator requires StreamCatalog")?; - builder - .register("Coordinator", initialize_coordinator) - .build() + Ok(()) } diff --git a/src/server/mod.rs b/src/server/mod.rs index 03254af3..7795f29b 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -17,5 +17,5 @@ mod initializer; mod service; pub use handler::FunctionStreamServiceImpl; -pub use initializer::register_components; +pub use initializer::{bootstrap_system, build_core_registry}; pub use service::start_server_with_shutdown; diff --git a/src/sql/parse.rs b/src/sql/parse.rs index 78c8bac0..1feff64a 100644 --- a/src/sql/parse.rs +++ b/src/sql/parse.rs @@ -14,8 +14,11 @@ //! //! **Data-definition / pipeline shape (this entry point)** //! Only these table-related forms are supported: -//! - **`CREATE TABLE ...`** (including `CREATE TABLE ... AS SELECT` where the planner accepts it) +//! - **`CREATE TABLE ... (cols [, WATERMARK FOR ...]) WITH ('connector' = '...', 'format' = '...', ...)`** +//! connector-backed **source** DDL (no `AS SELECT`; `connector` in `WITH` selects this path) +//! - **`CREATE TABLE ...`** other forms (including `CREATE TABLE ... AS SELECT` where DataFusion accepts it) //! - **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (streaming sink DDL) +//! - **`DROP TABLE`** / **`DROP TABLE IF EXISTS`** / **`DROP STREAMING TABLE`** (alias for `DROP TABLE` on the stream catalog) //! //! **`INSERT` is not supported** here — use `CREATE TABLE ... AS SELECT` or //! `CREATE STREAMING TABLE ... AS SELECT` to define the query shape instead. @@ -26,15 +29,30 @@ use std::collections::HashMap; use datafusion::common::{Result, plan_err}; use datafusion::error::DataFusionError; -use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement}; +use datafusion::sql::sqlparser::ast::{ObjectType, SqlOption, Statement as DFStatement}; use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; use datafusion::sql::sqlparser::parser::Parser; use crate::coordinator::{ - CreateFunction, CreateTable, DropFunction, ShowFunctions, StartFunction, + CreateFunction, CreateTable, DropFunction, DropTableStatement, ShowFunctions, StartFunction, Statement as CoordinatorStatement, StopFunction, StreamingTableStatement, }; +/// `DROP STREAMING TABLE t` is accepted as sugar for `DROP TABLE t` against the same catalog. +fn rewrite_drop_streaming_table(sql: &str) -> String { + let trimmed = sql.trim_start(); + let tokens: Vec<&str> = trimmed.split_whitespace().collect(); + if tokens.len() >= 4 + && tokens[0].eq_ignore_ascii_case("drop") + && tokens[1].eq_ignore_ascii_case("streaming") + && tokens[2].eq_ignore_ascii_case("table") + { + let rest = tokens[3..].join(" "); + return format!("DROP TABLE {rest}"); + } + sql.to_string() +} + pub fn parse_sql(query: &str) -> Result>> { let trimmed = query.trim(); if trimmed.is_empty() { @@ -42,7 +60,8 @@ pub fn parse_sql(query: &str) -> Result>> { } let dialect = FunctionStreamDialect {}; - let statements = Parser::parse_sql(&dialect, trimmed) + let to_parse = rewrite_drop_streaming_table(trimmed); + let statements = Parser::parse_sql(&dialect, &to_parse) .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?; if statements.is_empty() { @@ -74,6 +93,25 @@ fn classify_statement(stmt: DFStatement) -> Result s @ DFStatement::CreateStreamingTable { .. } => { Ok(Box::new(StreamingTableStatement::new(s))) } + stmt @ DFStatement::Drop { .. } => { + { + let DFStatement::Drop { + object_type, + names, + .. + } = &stmt + else { + unreachable!() + }; + if *object_type != ObjectType::Table { + return plan_err!("Only DROP TABLE is supported in this SQL frontend"); + } + if names.len() != 1 { + return plan_err!("DROP TABLE supports exactly one table name per statement"); + } + } + Ok(Box::new(DropTableStatement::new(stmt))) + } DFStatement::Insert { .. } => plan_err!( "INSERT is not supported; only CREATE TABLE and CREATE STREAMING TABLE (with AS SELECT) \ are supported for defining table/query pipelines in this SQL frontend" @@ -158,6 +196,34 @@ mod tests { assert!(is_type(stmt.as_ref(), "CreateTable")); } + #[test] + fn test_parse_create_table_connector_source_ddl() { + let sql = concat!( + "CREATE TABLE kafka_src (id BIGINT, ts TIMESTAMP NOT NULL, WATERMARK FOR ts) ", + "WITH ('connector' = 'kafka', 'format' = 'json', 'topic' = 'events')", + ); + let stmt = first_stmt(sql); + assert!(is_type(stmt.as_ref(), "CreateTable")); + } + + #[test] + fn test_parse_drop_table() { + let stmt = first_stmt("DROP TABLE foo"); + assert!(is_type(stmt.as_ref(), "DropTableStatement")); + } + + #[test] + fn test_parse_drop_table_if_exists() { + let stmt = first_stmt("DROP TABLE IF EXISTS foo"); + assert!(is_type(stmt.as_ref(), "DropTableStatement")); + } + + #[test] + fn test_parse_drop_streaming_table_rewritten() { + let stmt = first_stmt("DROP STREAMING TABLE my_sink"); + assert!(is_type(stmt.as_ref(), "DropTableStatement")); + } + /// `CREATE STREAMING TABLE` is the sink DDL supported by FunctionStream (not `CREATE STREAM TABLE`). #[test] fn test_parse_create_streaming_table() { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a4898619..823425d2 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -11,4 +11,5 @@ // limitations under the License. pub mod state_backend; +pub mod stream_catalog; pub mod task; diff --git a/src/storage/stream_catalog/codec.rs b/src/storage/stream_catalog/codec.rs new file mode 100644 index 00000000..dacaebf8 --- /dev/null +++ b/src/storage/stream_catalog/codec.rs @@ -0,0 +1,57 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Arrow Schema IPC and DataFusion logical plan serialization. + +use std::io::Cursor; +use std::sync::Arc; + +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::ipc::reader::StreamReader; +use datafusion::arrow::ipc::writer::StreamWriter; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::common::{DataFusionError, Result}; +use datafusion::execution::context::SessionContext; +use datafusion::logical_expr::LogicalPlan; + +pub struct CatalogCodec; + +impl CatalogCodec { + pub fn encode_schema(schema: &Arc) -> Result> { + let mut buffer = Vec::new(); + let empty_batch = RecordBatch::new_empty(Arc::clone(schema)); + let mut writer = StreamWriter::try_new(&mut buffer, schema.as_ref()) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + writer + .write(&empty_batch) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + writer + .finish() + .map_err(|e| DataFusionError::External(Box::new(e)))?; + Ok(buffer) + } + + pub fn decode_schema(bytes: &[u8]) -> Result> { + let cursor = Cursor::new(bytes); + let reader = StreamReader::try_new(cursor, None) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + Ok(reader.schema()) + } + + pub fn encode_logical_plan(plan: &LogicalPlan) -> Result> { + datafusion_proto::bytes::logical_plan_to_bytes(plan).map(|b| b.to_vec()) + } + + pub fn decode_logical_plan(bytes: &[u8], ctx: &SessionContext) -> Result { + datafusion_proto::bytes::logical_plan_from_bytes(bytes, ctx) + } +} diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs new file mode 100644 index 00000000..7e61b20e --- /dev/null +++ b/src/storage/stream_catalog/manager.rs @@ -0,0 +1,333 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::{Arc, OnceLock}; + +use anyhow::{anyhow, bail, Context}; +use datafusion::common::{internal_err, plan_err, Result as DFResult}; +use datafusion::execution::context::SessionContext; +use parking_lot::RwLock; +use prost::Message; +use protocol::storage::{self as pb, table_definition}; +use tracing::warn; +use unicase::UniCase; + +use crate::sql::schema::{ObjectName, StreamPlanningContext, StreamTable}; + +use super::codec::CatalogCodec; +use super::meta_store::MetaStore; + +const CATALOG_KEY_PREFIX: &str = "catalog:stream_table:"; + +#[derive(Clone, Default, Debug)] +pub struct StreamTableCatalogCache { + pub streams: HashMap>, +} + +pub struct CatalogManager { + store: Arc, + cache: RwLock, + session_ctx: Arc, +} + +static GLOBAL_CATALOG: OnceLock> = OnceLock::new(); + +impl CatalogManager { + pub fn new(store: Arc, session_ctx: Arc) -> Self { + Self { + store, + cache: RwLock::new(StreamTableCatalogCache::default()), + session_ctx, + } + } + + pub fn init_global_in_memory() -> anyhow::Result<()> { + Self::init_global( + Arc::new(super::InMemoryMetaStore::new()), + Arc::new(SessionContext::new()), + ) + } + + pub fn init_global( + store: Arc, + session_ctx: Arc, + ) -> anyhow::Result<()> { + if GLOBAL_CATALOG.get().is_some() { + bail!("CatalogManager already initialized"); + } + + let mgr = Arc::new(CatalogManager::new(store, session_ctx)); + GLOBAL_CATALOG + .set(mgr) + .map_err(|_| anyhow!("CatalogManager global install failed"))?; + + Ok(()) + } + + pub fn try_global() -> Option> { + GLOBAL_CATALOG.get().cloned() + } + + pub fn global() -> anyhow::Result> { + Self::try_global().ok_or_else(|| anyhow!("CatalogManager not initialized")) + } + + #[inline] + fn build_store_key(table_name: &str) -> String { + format!("{CATALOG_KEY_PREFIX}{}", table_name.to_lowercase()) + } + + pub fn add_table(&self, table: StreamTable) -> DFResult<()> { + let proto_def = self.encode_table(&table)?; + let payload = proto_def.encode_to_vec(); + let key = Self::build_store_key(table.name()); + + self.store.put(&key, payload)?; + + let object_name = UniCase::new(table.name().to_string()); + self.cache.write().streams.insert(object_name, Arc::new(table)); + + Ok(()) + } + + pub fn has_stream_table(&self, name: &str) -> bool { + let object_name = UniCase::new(name.to_string()); + self.cache.read().streams.contains_key(&object_name) + } + + pub fn drop_table(&self, table_name: &str, if_exists: bool) -> DFResult<()> { + let object_name = UniCase::new(table_name.to_string()); + + let exists = self.cache.read().streams.contains_key(&object_name); + + if !exists { + if if_exists { + return Ok(()); + } + return plan_err!("Table '{table_name}' not found"); + } + + let key = Self::build_store_key(table_name); + self.store.delete(&key)?; + + self.cache.write().streams.remove(&object_name); + + Ok(()) + } + + pub fn restore_from_store(&self) -> DFResult<()> { + let records = self.store.scan_prefix(CATALOG_KEY_PREFIX)?; + let mut restored = StreamTableCatalogCache::default(); + + for (_key, payload) in records { + let proto_def = pb::TableDefinition::decode(payload.as_slice()).map_err(|e| { + datafusion::common::DataFusionError::Execution(format!( + "Failed to decode stream catalog protobuf: {e}" + )) + })?; + + let table = self.decode_table(proto_def)?; + let object_name = UniCase::new(table.name().to_string()); + restored.streams.insert(object_name, Arc::new(table)); + } + + *self.cache.write() = restored; + + Ok(()) + } + + pub fn acquire_planning_context(&self) -> StreamPlanningContext { + let mut ctx = StreamPlanningContext::new(); + ctx.tables.streams = self.cache.read().streams.clone(); + ctx + } + + fn encode_table(&self, table: &StreamTable) -> DFResult { + let table_type = match table { + StreamTable::Source { + schema, + event_time_field, + watermark_field, + .. + } => table_definition::TableType::Source(pb::StreamSource { + arrow_schema_ipc: CatalogCodec::encode_schema(schema)?, + event_time_field: event_time_field.clone(), + watermark_field: watermark_field.clone(), + }), + StreamTable::Sink { schema, .. } => table_definition::TableType::Sink(pb::StreamSink { + arrow_schema_ipc: CatalogCodec::encode_schema(schema)?, + }), + StreamTable::Memory { logical_plan, .. } => { + let logical_plan_bytes = logical_plan + .as_ref() + .map(|plan| CatalogCodec::encode_logical_plan(plan)) + .transpose()?; + + table_definition::TableType::Memory(pb::StreamMemory { logical_plan_bytes }) + } + }; + + Ok(pb::TableDefinition { + table_name: table.name().to_string(), + updated_at_millis: chrono::Utc::now().timestamp_millis(), + table_type: Some(table_type), + }) + } + + fn decode_table(&self, proto_def: pb::TableDefinition) -> DFResult { + let Some(table_type) = proto_def.table_type else { + return internal_err!( + "Corrupted catalog row: missing table_type for {}", + proto_def.table_name + ); + }; + + match table_type { + table_definition::TableType::Source(src) => Ok(StreamTable::Source { + name: proto_def.table_name, + schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?, + event_time_field: src.event_time_field, + watermark_field: src.watermark_field, + }), + table_definition::TableType::Sink(sink) => Ok(StreamTable::Sink { + name: proto_def.table_name, + schema: CatalogCodec::decode_schema(&sink.arrow_schema_ipc)?, + }), + table_definition::TableType::Memory(mem) => { + let logical_plan = mem + .logical_plan_bytes + .map(|bytes| CatalogCodec::decode_logical_plan(&bytes, &self.session_ctx)) + .transpose()?; + + Ok(StreamTable::Memory { + name: proto_def.table_name, + logical_plan, + }) + } + } + } +} + +pub fn restore_global_catalog_from_store() { + let Some(mgr) = CatalogManager::try_global() else { + return; + }; + if let Err(e) = mgr.restore_from_store() { + warn!("Stream catalog restore_from_store skipped or failed: {e:#}"); + } +} + +pub fn initialize_stream_catalog(_config: &crate::config::GlobalConfig) -> anyhow::Result<()> { + CatalogManager::init_global_in_memory().context("Stream catalog (CatalogManager) init failed") +} + +pub fn planning_schema_provider() -> StreamPlanningContext { + CatalogManager::try_global() + .map(|m| m.acquire_planning_context()) + .unwrap_or_else(StreamPlanningContext::new) +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::execution::context::SessionContext; + + use crate::sql::schema::StreamTable; + use crate::storage::stream_catalog::{InMemoryMetaStore, MetaStore}; + + use super::CatalogManager; + + fn create_test_manager() -> CatalogManager { + CatalogManager::new( + Arc::new(InMemoryMetaStore::new()), + Arc::new(SessionContext::new()), + ) + } + + #[test] + fn add_table_roundtrip_snapshot() { + let mgr = create_test_manager(); + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); + + let table = StreamTable::Source { + name: "t1".into(), + schema: Arc::clone(&schema), + event_time_field: Some("ts".into()), + watermark_field: None, + }; + + mgr.add_table(table).unwrap(); + + let ctx = mgr.acquire_planning_context(); + let got = ctx.get_stream_table("t1").expect("table present"); + + assert_eq!(got.name(), "t1"); + + if let StreamTable::Source { + event_time_field, + watermark_field, + .. + } = got.as_ref() + { + assert_eq!(event_time_field.as_deref(), Some("ts")); + assert!(watermark_field.is_none()); + } else { + panic!("expected Source"); + } + } + + #[test] + fn drop_table_if_exists() { + let mgr = create_test_manager(); + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); + + mgr.add_table(StreamTable::Source { + name: "t_drop".into(), + schema, + event_time_field: None, + watermark_field: None, + }) + .unwrap(); + + mgr.drop_table("t_drop", false).unwrap(); + assert!(!mgr.has_stream_table("t_drop")); + + mgr.drop_table("t_drop", true).unwrap(); + assert!(mgr.drop_table("nope", false).is_err()); + mgr.drop_table("nope", true).unwrap(); + } + + #[test] + fn restore_from_store_rebuilds_cache() { + let store: Arc = Arc::new(InMemoryMetaStore::new()); + let session = Arc::new(SessionContext::new()); + + let mgr_a = CatalogManager::new(Arc::clone(&store), Arc::clone(&session)); + let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Utf8, true)])); + + mgr_a + .add_table(StreamTable::Sink { + name: "sink1".into(), + schema, + }) + .unwrap(); + + let mgr_b = CatalogManager::new(store, session); + mgr_b.restore_from_store().unwrap(); + + let ctx = mgr_b.acquire_planning_context(); + assert!(ctx.get_stream_table("sink1").is_some()); + } +} diff --git a/src/storage/stream_catalog/meta_store.rs b/src/storage/stream_catalog/meta_store.rs new file mode 100644 index 00000000..6f61b3f7 --- /dev/null +++ b/src/storage/stream_catalog/meta_store.rs @@ -0,0 +1,70 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Pluggable metadata KV backend (memory, etcd, Redis, …). + +use std::collections::HashMap; + +use datafusion::common::Result; +use parking_lot::RwLock; + +/// Synchronous metadata store for catalog records. +pub trait MetaStore: Send + Sync { + fn put(&self, key: &str, value: Vec) -> Result<()>; + fn get(&self, key: &str) -> Result>>; + fn delete(&self, key: &str) -> Result<()>; + fn scan_prefix(&self, prefix: &str) -> Result)>>; +} + +/// In-process KV store for single-node deployments and tests. +pub struct InMemoryMetaStore { + db: RwLock>>, +} + +impl InMemoryMetaStore { + pub fn new() -> Self { + Self { + db: RwLock::new(HashMap::new()), + } + } +} + +impl Default for InMemoryMetaStore { + fn default() -> Self { + Self::new() + } +} + +impl MetaStore for InMemoryMetaStore { + fn put(&self, key: &str, value: Vec) -> Result<()> { + self.db.write().insert(key.to_string(), value); + Ok(()) + } + + fn get(&self, key: &str) -> Result>> { + Ok(self.db.read().get(key).cloned()) + } + + fn delete(&self, key: &str) -> Result<()> { + self.db.write().remove(key); + Ok(()) + } + + fn scan_prefix(&self, prefix: &str) -> Result)>> { + let db = self.db.read(); + Ok(db + .iter() + .filter(|(k, _)| k.starts_with(prefix)) + .map(|(k, v)| (k.clone(), v.clone())) + .collect()) + } +} diff --git a/src/storage/stream_catalog/mod.rs b/src/storage/stream_catalog/mod.rs new file mode 100644 index 00000000..f4f84469 --- /dev/null +++ b/src/storage/stream_catalog/mod.rs @@ -0,0 +1,23 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Stream table catalog: protobuf persistence, MVCC-style planning snapshots for the coordinator. + +mod codec; +mod manager; +mod meta_store; + +pub use manager::{ + CatalogManager, initialize_stream_catalog, planning_schema_provider, + restore_global_catalog_from_store, +}; +pub use meta_store::{InMemoryMetaStore, MetaStore}; diff --git a/src/storage/task/mod.rs b/src/storage/task/mod.rs index b4b3680f..3123415a 100644 --- a/src/storage/task/mod.rs +++ b/src/storage/task/mod.rs @@ -16,6 +16,7 @@ pub mod factory; mod function_info; +mod proto_codec; mod rocksdb_storage; pub mod storage; diff --git a/src/storage/task/proto_codec.rs b/src/storage/task/proto_codec.rs new file mode 100644 index 00000000..1e0bedb3 --- /dev/null +++ b/src/storage/task/proto_codec.rs @@ -0,0 +1,271 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Protobuf wire format for RocksDB task rows, with legacy bincode read support. + +use anyhow::{Context, Result, anyhow}; +use prost::Message; +use protocol::storage::{ + ComponentStateKind, ComponentStateProto, TaskMetadataProto, TaskModulePayloadProto, + TaskModulePython, TaskModuleWasm, task_module_payload_proto, +}; +use serde::{Deserialize, Serialize}; + +use crate::runtime::common::ComponentState; + +use super::storage::TaskModuleBytes; + +/// Magic prefix for protobuf-encoded task values (meta + payload). Legacy rows have no prefix. +pub const TASK_STORAGE_PROTO_MAGIC: &[u8; 4] = b"FSP1"; + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct LegacyTaskMetadata { + task_type: String, + state: ComponentState, + created_at: u64, + checkpoint_id: Option, +} + +fn component_state_to_proto(state: &ComponentState) -> ComponentStateProto { + let (kind, error_message) = match state { + ComponentState::Uninitialized => (ComponentStateKind::Uninitialized, String::new()), + ComponentState::Initialized => (ComponentStateKind::Initialized, String::new()), + ComponentState::Starting => (ComponentStateKind::Starting, String::new()), + ComponentState::Running => (ComponentStateKind::Running, String::new()), + ComponentState::Checkpointing => (ComponentStateKind::Checkpointing, String::new()), + ComponentState::Stopping => (ComponentStateKind::Stopping, String::new()), + ComponentState::Stopped => (ComponentStateKind::Stopped, String::new()), + ComponentState::Closing => (ComponentStateKind::Closing, String::new()), + ComponentState::Closed => (ComponentStateKind::Closed, String::new()), + ComponentState::Error { error } => (ComponentStateKind::Error, error.clone()), + }; + ComponentStateProto { + kind: kind as i32, + error_message, + } +} + +fn component_state_from_proto(p: &ComponentStateProto) -> ComponentState { + let kind = ComponentStateKind::try_from(p.kind).unwrap_or(ComponentStateKind::Unspecified); + match kind { + ComponentStateKind::Unspecified | ComponentStateKind::Uninitialized => { + ComponentState::Uninitialized + } + ComponentStateKind::Initialized => ComponentState::Initialized, + ComponentStateKind::Starting => ComponentState::Starting, + ComponentStateKind::Running => ComponentState::Running, + ComponentStateKind::Checkpointing => ComponentState::Checkpointing, + ComponentStateKind::Stopping => ComponentState::Stopping, + ComponentStateKind::Stopped => ComponentState::Stopped, + ComponentStateKind::Closing => ComponentState::Closing, + ComponentStateKind::Closed => ComponentState::Closed, + ComponentStateKind::Error => ComponentState::Error { + error: if p.error_message.is_empty() { + "unknown error".to_string() + } else { + p.error_message.clone() + }, + }, + } +} + +/// Encode task metadata for `task_meta` column family (always protobuf + magic). +pub fn encode_task_metadata_bytes( + task_type: &str, + state: &ComponentState, + created_at: u64, + checkpoint_id: Option, +) -> Result> { + let proto = TaskMetadataProto { + task_type: task_type.to_string(), + state: Some(component_state_to_proto(state)), + created_at, + checkpoint_id, + }; + let mut out = TASK_STORAGE_PROTO_MAGIC.to_vec(); + proto + .encode(&mut out) + .context("encode TaskMetadataProto")?; + Ok(out) +} + +pub struct DecodedTaskMetadata { + pub task_type: String, + pub state: ComponentState, + pub created_at: u64, + pub checkpoint_id: Option, +} + +/// Decode metadata written by this version (protobuf) or legacy bincode+serde. +pub fn decode_task_metadata_bytes(raw: &[u8]) -> Result { + if raw.len() >= TASK_STORAGE_PROTO_MAGIC.len() + && &raw[..TASK_STORAGE_PROTO_MAGIC.len()] == TASK_STORAGE_PROTO_MAGIC.as_slice() + { + let proto = TaskMetadataProto::decode(&raw[TASK_STORAGE_PROTO_MAGIC.len()..]) + .context("decode TaskMetadataProto")?; + let state = proto + .state + .as_ref() + .map(component_state_from_proto) + .unwrap_or_default(); + return Ok(DecodedTaskMetadata { + task_type: proto.task_type, + state, + created_at: proto.created_at, + checkpoint_id: proto.checkpoint_id, + }); + } + + let (legacy, _): (LegacyTaskMetadata, _) = bincode::serde::decode_from_slice( + raw, + bincode::config::standard(), + ) + .map_err(|e| anyhow!("legacy task metadata bincode decode failed: {e}"))?; + Ok(DecodedTaskMetadata { + task_type: legacy.task_type, + state: legacy.state, + created_at: legacy.created_at, + checkpoint_id: legacy.checkpoint_id, + }) +} + +fn module_to_proto(module: &TaskModuleBytes) -> TaskModulePayloadProto { + match module { + TaskModuleBytes::Wasm(bytes) => TaskModulePayloadProto { + payload: Some(task_module_payload_proto::Payload::Wasm(TaskModuleWasm { + wasm_binary: bytes.clone(), + })), + }, + TaskModuleBytes::Python { + class_name, + module, + bytes, + } => TaskModulePayloadProto { + payload: Some(task_module_payload_proto::Payload::Python(TaskModulePython { + class_name: class_name.clone(), + module_path: module.clone(), + embedded_code: bytes.clone(), + })), + }, + } +} + +/// Encode module payload for `task_payload` column family (always protobuf + magic). +pub fn encode_task_module_bytes(module: &TaskModuleBytes) -> Result> { + let proto = module_to_proto(module); + let mut out = TASK_STORAGE_PROTO_MAGIC.to_vec(); + proto + .encode(&mut out) + .context("encode TaskModulePayloadProto")?; + Ok(out) +} + +/// Decode module payload: protobuf+magic or legacy bincode+serde [`TaskModuleBytes`]. +pub fn decode_task_module_bytes(raw: &[u8]) -> Result { + if raw.len() >= TASK_STORAGE_PROTO_MAGIC.len() + && &raw[..TASK_STORAGE_PROTO_MAGIC.len()] == TASK_STORAGE_PROTO_MAGIC.as_slice() + { + let proto = TaskModulePayloadProto::decode(&raw[TASK_STORAGE_PROTO_MAGIC.len()..]) + .context("decode TaskModulePayloadProto")?; + return proto.try_into_task_module(); + } + + let (legacy, _): (TaskModuleBytes, _) = bincode::serde::decode_from_slice( + raw, + bincode::config::standard(), + ) + .map_err(|e| anyhow!("legacy task module bincode decode failed: {e}"))?; + Ok(legacy) +} + +trait TryIntoTaskModule { + fn try_into_task_module(self) -> Result; +} + +impl TryIntoTaskModule for TaskModulePayloadProto { + fn try_into_task_module(self) -> Result { + match self.payload { + Some(task_module_payload_proto::Payload::Wasm(w)) => { + Ok(TaskModuleBytes::Wasm(w.wasm_binary)) + } + Some(task_module_payload_proto::Payload::Python(p)) => Ok(TaskModuleBytes::Python { + class_name: p.class_name, + module: p.module_path, + bytes: p.embedded_code, + }), + None => Err(anyhow!("TaskModulePayloadProto missing payload")), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn metadata_roundtrip_proto() { + let enc = encode_task_metadata_bytes( + "wasm", + &ComponentState::Running, + 42, + Some(7), + ) + .unwrap(); + let dec = decode_task_metadata_bytes(&enc).unwrap(); + assert_eq!(dec.task_type, "wasm"); + assert_eq!(dec.state, ComponentState::Running); + assert_eq!(dec.created_at, 42); + assert_eq!(dec.checkpoint_id, Some(7)); + } + + #[test] + fn module_roundtrip_wasm_proto() { + let m = TaskModuleBytes::Wasm(vec![1, 2, 3]); + let enc = encode_task_module_bytes(&m).unwrap(); + let dec = decode_task_module_bytes(&enc).unwrap(); + assert_eq!(dec, m); + } + + #[test] + fn module_roundtrip_python_proto() { + let m = TaskModuleBytes::Python { + class_name: "C".into(), + module: "m".into(), + bytes: Some(vec![9]), + }; + let enc = encode_task_module_bytes(&m).unwrap(); + let dec = decode_task_module_bytes(&enc).unwrap(); + assert_eq!(dec, m); + } + + #[test] + fn legacy_bincode_metadata_still_decodes() { + let legacy = LegacyTaskMetadata { + task_type: "legacy".into(), + state: ComponentState::Stopped, + created_at: 99, + checkpoint_id: None, + }; + let raw = bincode::serde::encode_to_vec(&legacy, bincode::config::standard()).unwrap(); + let dec = decode_task_metadata_bytes(&raw).unwrap(); + assert_eq!(dec.task_type, "legacy"); + assert_eq!(dec.state, ComponentState::Stopped); + assert_eq!(dec.created_at, 99); + } + + #[test] + fn legacy_bincode_module_still_decodes() { + let m = TaskModuleBytes::Wasm(vec![8, 9]); + let raw = bincode::serde::encode_to_vec(&m, bincode::config::standard()).unwrap(); + assert_eq!(decode_task_module_bytes(&raw).unwrap(), m); + } +} diff --git a/src/storage/task/rocksdb_storage.rs b/src/storage/task/rocksdb_storage.rs index 714a9143..cea0ceb9 100644 --- a/src/storage/task/rocksdb_storage.rs +++ b/src/storage/task/rocksdb_storage.rs @@ -14,12 +14,15 @@ //! //! Uses three column families: task_meta, task_config, task_payload. -use super::storage::{StoredTaskInfo, TaskModuleBytes, TaskStorage}; +use super::proto_codec::{ + decode_task_metadata_bytes, decode_task_module_bytes, encode_task_metadata_bytes, + encode_task_module_bytes, +}; +use super::storage::{StoredTaskInfo, TaskStorage}; use crate::config::storage::RocksDBStorageConfig; use crate::runtime::common::ComponentState; use anyhow::{Context, Result, anyhow}; use rocksdb::{ColumnFamilyDescriptor, DB, IteratorMode, Options, WriteBatch}; -use serde::{Deserialize, Serialize}; use std::path::Path; use std::sync::Arc; @@ -27,14 +30,6 @@ const CF_METADATA: &str = "task_meta"; const CF_CONFIG: &str = "task_config"; const CF_PAYLOAD: &str = "task_payload"; -#[derive(Debug, Clone, Serialize, Deserialize)] -struct TaskMetadata { - task_type: String, - state: ComponentState, - created_at: u64, - checkpoint_id: Option, -} - pub struct RocksDBTaskStorage { db: Arc, } @@ -95,27 +90,19 @@ impl TaskStorage for RocksDBTaskStorage { return Err(anyhow!("Task uniqueness violation: {}", task_info.name)); } - let meta = TaskMetadata { - task_type: task_info.task_type.clone(), - state: task_info.state.clone(), - created_at: task_info.created_at, - checkpoint_id: task_info.checkpoint_id, - }; + let meta_bytes = encode_task_metadata_bytes( + &task_info.task_type, + &task_info.state, + task_info.created_at, + task_info.checkpoint_id, + )?; let mut batch = WriteBatch::default(); - batch.put_cf( - &cf_meta, - key, - bincode::serde::encode_to_vec(&meta, bincode::config::standard())?, - ); + batch.put_cf(&cf_meta, key, meta_bytes); batch.put_cf(&cf_conf, key, &task_info.config_bytes); if let Some(ref module) = task_info.module_bytes { - batch.put_cf( - &cf_payl, - key, - bincode::serde::encode_to_vec(module, bincode::config::standard())?, - ); + batch.put_cf(&cf_payl, key, encode_task_module_bytes(module)?); } self.db @@ -132,14 +119,18 @@ impl TaskStorage for RocksDBTaskStorage { .get_cf(&cf, key)? .ok_or_else(|| anyhow!("Task {} not found", task_name))?; - let (mut meta, _): (TaskMetadata, _) = - bincode::serde::decode_from_slice(&raw, bincode::config::standard())?; - meta.state = new_state; + let mut decoded = decode_task_metadata_bytes(&raw)?; + decoded.state = new_state; self.db.put_cf( &cf, key, - bincode::serde::encode_to_vec(&meta, bincode::config::standard())?, + encode_task_metadata_bytes( + &decoded.task_type, + &decoded.state, + decoded.created_at, + decoded.checkpoint_id, + )?, )?; Ok(()) } @@ -153,14 +144,18 @@ impl TaskStorage for RocksDBTaskStorage { .get_cf(&cf, key)? .ok_or_else(|| anyhow!("Task {} not found", task_name))?; - let (mut meta, _): (TaskMetadata, _) = - bincode::serde::decode_from_slice(&raw, bincode::config::standard())?; - meta.checkpoint_id = checkpoint_id; + let mut decoded = decode_task_metadata_bytes(&raw)?; + decoded.checkpoint_id = checkpoint_id; self.db.put_cf( &cf, key, - bincode::serde::encode_to_vec(&meta, bincode::config::standard())?, + encode_task_metadata_bytes( + &decoded.task_type, + &decoded.state, + decoded.created_at, + decoded.checkpoint_id, + )?, )?; Ok(()) } @@ -189,20 +184,12 @@ impl TaskStorage for RocksDBTaskStorage { .get_cf(&self.get_cf(CF_CONFIG)?, key)? .ok_or_else(|| anyhow!("Config missing: {}", task_name))?; - let module_bytes = self - .db - .get_cf(&self.get_cf(CF_PAYLOAD)?, key)? - .and_then(|b| { - bincode::serde::decode_from_slice::( - &b, - bincode::config::standard(), - ) - .ok() - .map(|(v, _)| v) - }); - - let (meta, _): (TaskMetadata, _) = - bincode::serde::decode_from_slice(&meta_raw, bincode::config::standard())?; + let module_bytes = match self.db.get_cf(&self.get_cf(CF_PAYLOAD)?, key)? { + None => None, + Some(b) => Some(decode_task_module_bytes(&b)?), + }; + + let meta = decode_task_metadata_bytes(&meta_raw)?; Ok(StoredTaskInfo { name: task_name.to_string(), diff --git a/src/storage/task/storage.rs b/src/storage/task/storage.rs index 3c9e4080..156ee5d8 100644 --- a/src/storage/task/storage.rs +++ b/src/storage/task/storage.rs @@ -15,7 +15,7 @@ use anyhow::Result; use serde::{Deserialize, Serialize}; #[allow(dead_code)] -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum TaskModuleBytes { Wasm(Vec), Python { From cdb6ddb157682433a544041785bbffc231a82987 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 22 Mar 2026 15:25:24 +0800 Subject: [PATCH 13/44] update --- .../logical_planner/optimizers/chaining.rs | 140 +++++++++++------- 1 file changed, 84 insertions(+), 56 deletions(-) diff --git a/src/sql/logical_planner/optimizers/chaining.rs b/src/sql/logical_planner/optimizers/chaining.rs index 5935c985..11c072d3 100644 --- a/src/sql/logical_planner/optimizers/chaining.rs +++ b/src/sql/logical_planner/optimizers/chaining.rs @@ -10,93 +10,121 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::mem; - -use petgraph::prelude::*; -use petgraph::visit::NodeRef; +use petgraph::graph::{EdgeIndex, NodeIndex}; +use petgraph::visit::EdgeRef; +use petgraph::Direction::{Incoming, Outgoing}; use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer}; -pub struct ChainingOptimizer {} - -fn remove_in_place(graph: &mut DiGraph, node: NodeIndex) { - let incoming = graph.edges_directed(node, Incoming).next().unwrap(); - - let parent = incoming.source().id(); - let incoming = incoming.id(); - graph.remove_edge(incoming); +pub type NodeId = NodeIndex; +pub type EdgeId = EdgeIndex; - let outgoing: Vec<_> = graph - .edges_directed(node, Outgoing) - .map(|e| (e.id(), e.target().id())) - .collect(); +pub struct ChainingOptimizer; - for (edge, target) in outgoing { - let weight = graph.remove_edge(edge).unwrap(); - graph.add_edge(parent, target, weight); - } +impl ChainingOptimizer { + fn find_fusion_candidate(plan: &LogicalGraph) -> Option<(NodeId, NodeId, EdgeId)> { + let node_ids: Vec = plan.node_indices().collect(); - graph.remove_node(node); -} + for upstream_id in node_ids { + let upstream_node = plan.node_weight(upstream_id)?; -impl Optimizer for ChainingOptimizer { - fn optimize_once(&self, plan: &mut LogicalGraph) -> bool { - let node_indices: Vec = plan.node_indices().collect(); - - for &node_idx in &node_indices { - let cur = plan.node_weight(node_idx).unwrap(); - - if cur.operator_chain.is_source() { + if upstream_node.operator_chain.is_source() { continue; } - let mut successors = plan.edges_directed(node_idx, Outgoing).collect::>(); + let outgoing_edges: Vec<_> = plan.edges_directed(upstream_id, Outgoing).collect(); - if successors.len() != 1 { + if outgoing_edges.len() != 1 { continue; } - let edge = successors.remove(0); - let edge_type = edge.weight().edge_type; + let bridging_edge = &outgoing_edges[0]; - if edge_type != LogicalEdgeType::Forward { + if bridging_edge.weight().edge_type != LogicalEdgeType::Forward { continue; } - let successor_idx = edge.target(); + let downstream_id = bridging_edge.target(); + let downstream_node = plan.node_weight(downstream_id)?; - let successor_node = plan.node_weight(successor_idx).unwrap(); + if downstream_node.operator_chain.is_sink() { + continue; + } - if cur.parallelism != successor_node.parallelism - || successor_node.operator_chain.is_sink() - { + if upstream_node.parallelism != downstream_node.parallelism { continue; } - if plan.edges_directed(successor_idx, Incoming).count() > 1 { + let incoming_edges: Vec<_> = plan.edges_directed(downstream_id, Incoming).collect(); + if incoming_edges.len() != 1 { continue; } - let mut new_cur = cur.clone(); + return Some((upstream_id, downstream_id, bridging_edge.id())); + } + + None + } - new_cur.description = format!("{} -> {}", cur.description, successor_node.description); + fn apply_fusion( + plan: &mut LogicalGraph, + upstream_id: NodeId, + downstream_id: NodeId, + bridging_edge_id: EdgeId, + ) { + let bridging_edge = plan + .remove_edge(bridging_edge_id) + .expect("Graph Integrity Violation: Bridging edge missing"); + + let propagated_schema = bridging_edge.schema.clone(); + + let downstream_outgoing: Vec<_> = plan + .edges_directed(downstream_id, Outgoing) + .map(|e| (e.id(), e.target())) + .collect(); + + for (edge_id, target_id) in downstream_outgoing { + let edge_weight = plan + .remove_edge(edge_id) + .expect("Graph Integrity Violation: Outgoing edge missing"); + + plan.add_edge(upstream_id, target_id, edge_weight); + } - new_cur - .operator_chain - .operators - .extend(successor_node.operator_chain.operators.clone()); + let downstream_node = plan + .remove_node(downstream_id) + .expect("Graph Integrity Violation: Downstream node missing"); - new_cur - .operator_chain - .edges - .push(edge.weight().schema.clone()); + let upstream_node = plan + .node_weight_mut(upstream_id) + .expect("Graph Integrity Violation: Upstream node missing"); - mem::swap(&mut new_cur, plan.node_weight_mut(node_idx).unwrap()); + upstream_node.description = format!( + "{} -> {}", + upstream_node.description, downstream_node.description + ); - remove_in_place(plan, successor_idx); - return true; - } + upstream_node + .operator_chain + .operators + .extend(downstream_node.operator_chain.operators); - false + upstream_node + .operator_chain + .edges + .push(propagated_schema); + } +} + +impl Optimizer for ChainingOptimizer { + fn optimize_once(&self, plan: &mut LogicalGraph) -> bool { + if let Some((upstream_id, downstream_id, bridging_edge_id)) = + Self::find_fusion_candidate(plan) + { + Self::apply_fusion(plan, upstream_id, downstream_id, bridging_edge_id); + true + } else { + false + } } } From 27bd75c2a04f6787a735f7b6c2d926412904f2d6 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 22 Mar 2026 16:49:12 +0800 Subject: [PATCH 14/44] update --- Cargo.lock | 1 + Cargo.toml | 1 + protocol/proto/storage.proto | 8 +- src/coordinator/execution/executor.rs | 2 +- src/coordinator/plan/logical_plan_visitor.rs | 78 +++---- src/coordinator/plan/streaming_table_plan.rs | 4 +- src/sql/common/fs_schema.rs | 3 +- .../logical/fs_program_convert.rs | 201 ++++++++++++++++++ src/sql/logical_node/logical/logical_edge.rs | 59 ++++- src/sql/logical_node/logical/logical_node.rs | 42 ++-- .../logical_node/logical/logical_program.rs | 133 +++++++----- src/sql/logical_node/logical/mod.rs | 1 + .../logical_node/logical/operator_chain.rs | 72 +++++-- src/sql/logical_node/logical/operator_name.rs | 22 ++ .../logical_node/logical/program_config.rs | 25 ++- src/sql/logical_planner/compiled_sql.rs | 21 -- src/sql/logical_planner/mod.rs | 3 - src/sql/mod.rs | 1 - src/sql/schema/schema_provider.rs | 24 +-- src/storage/stream_catalog/codec.rs | 14 +- src/storage/stream_catalog/manager.rs | 74 +++---- 21 files changed, 557 insertions(+), 232 deletions(-) create mode 100644 src/sql/logical_node/logical/fs_program_convert.rs delete mode 100644 src/sql/logical_planner/compiled_sql.rs diff --git a/Cargo.lock b/Cargo.lock index f39d5d3e..7cd510f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2375,6 +2375,7 @@ dependencies = [ "proctitle", "prost", "protocol", + "rand 0.8.5", "rdkafka", "rocksdb", "serde", diff --git a/Cargo.toml b/Cargo.toml index 8b38dfe4..cee98282 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ xxhash-rust = { version = "0.8", features = ["xxh3"] } proctitle = "0.1" unicase = "2.7" petgraph = "0.7" +rand = { version = "0.8", features = ["small_rng"] } itertools = "0.14" strum = { version = "0.26", features = ["derive"] } datafusion-functions-aggregate = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto index b11037a2..cace3107 100644 --- a/protocol/proto/storage.proto +++ b/protocol/proto/storage.proto @@ -10,7 +10,7 @@ syntax = "proto3"; package function_stream.storage; // ============================================================================= -// Stream catalog (coordinator stream tables: source / sink / memory) +// Stream catalog (coordinator stream tables: source / sink) // ============================================================================= // Top-level persisted record for one stream table. @@ -20,7 +20,6 @@ message TableDefinition { oneof table_type { StreamSource source = 3; StreamSink sink = 4; - StreamMemory memory = 5; } } @@ -32,10 +31,7 @@ message StreamSource { message StreamSink { bytes arrow_schema_ipc = 1; -} - -message StreamMemory { - optional bytes logical_plan_bytes = 1; + bytes logical_program_bincode = 2; } // ============================================================================= diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index b8fbb3a5..3639ee7a 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -266,7 +266,7 @@ impl PlanVisitor for Executor { let execute = || -> Result { let sink = StreamTable::Sink { name: plan.name.clone(), - schema: Arc::new(plan.logical_plan.schema().as_arrow().clone()), + program: plan.program.clone(), }; self.catalog_manager diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index aa8364ef..14ed01b8 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -91,86 +91,78 @@ impl LogicalPlanVisitor { return plan_err!("Statement mismatch: Expected CREATE STREAMING TABLE AST node"); }; - let target_name = name.to_string(); - debug!( - "Initiating streaming sink compilation for identifier: {}", - target_name - ); + let sink_table_name = name.to_string(); + debug!("Initiating streaming sink compilation for identifier: {}", sink_table_name); - let mut connector_options = ConnectorOptions::new(with_options, &None)?; - let adapter_type = connector_options.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| { + let mut sink_properties = ConnectorOptions::new(with_options, &None)?; + let connector_type = sink_properties.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| { plan_datafusion_err!( - "Validation Error: Streaming table '{}' requires the '{}' property", - target_name, - OPT_CONNECTOR - ) + "Validation Error: Streaming table '{}' requires the '{}' property", + sink_table_name, + OPT_CONNECTOR + ) })?; - let routing_exprs = Self::extract_partitioning_keys(&mut connector_options)?; + let partition_keys = Self::extract_partitioning_keys(&mut sink_properties)?; - let mut logical_plan = rewrite_plan( + let mut query_logical_plan = rewrite_plan( produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?, &self.schema_provider, )?; - if logical_plan - .schema() - .fields() - .iter() - .any(|f| is_json_union(f.data_type())) - { - logical_plan = serialize_outgoing_json(&self.schema_provider, Arc::new(logical_plan)); + if query_logical_plan.schema().fields().iter().any(|f| is_json_union(f.data_type())) { + query_logical_plan = serialize_outgoing_json(&self.schema_provider, Arc::new(query_logical_plan)); } - let output_descriptors = logical_plan + let output_schema_fields = query_logical_plan .schema() .fields() .iter() .map(|f| ColumnDescriptor::from((**f).clone())) .collect::>(); - let mut source_definition = SourceTable::from_options( - &target_name, - &adapter_type, + let mut sink_definition = SourceTable::from_options( + &sink_table_name, + &connector_type, false, - output_descriptors, + output_schema_fields, vec![], None, - &mut connector_options, + &mut sink_properties, None, &self.schema_provider, Some(ConnectionType::Sink), comment.clone().unwrap_or_default(), )?; - source_definition.partition_exprs = Arc::new(routing_exprs); - - let sink_schema = logical_plan.schema().clone(); - let egress_node = StreamEgressNode::try_new( - TableReference::bare(target_name.clone()), - Table::ConnectorTable(source_definition.clone()), - sink_schema, - logical_plan, + sink_definition.partition_exprs = Arc::new(partition_keys); + + let output_schema = query_logical_plan.schema().clone(); + let sink_plan_node = StreamEgressNode::try_new( + TableReference::bare(sink_table_name.clone()), + Table::ConnectorTable(sink_definition.clone()), + output_schema, + query_logical_plan, )?; - let mut plan_topology = rewrite_sinks(vec![maybe_add_key_extension_to_sink( + let mut rewritten_plans = rewrite_sinks(vec![maybe_add_key_extension_to_sink( LogicalPlan::Extension(Extension { - node: Arc::new(egress_node), + node: Arc::new(sink_plan_node), }), )?])?; - let final_execution_plan = plan_topology.remove(0); + let final_logical_plan = rewritten_plans.remove(0); - self.validate_graph_topology(&final_execution_plan)?; + let validated_program = self.validate_graph_topology(&final_logical_plan)?; Ok(StreamingTable { - name: target_name, + name: sink_table_name, comment: comment.clone(), - source_table: source_definition, - logical_plan: final_execution_plan, + source_table: sink_definition, + program: validated_program, }) } - fn validate_graph_topology(&self, logical_plan: &LogicalPlan) -> Result<()> { + fn validate_graph_topology(&self, logical_plan: &LogicalPlan) -> Result { let mut session_config = SessionConfig::new(); let opts = session_config.options_mut(); opts.optimizer.enable_round_robin_repartition = false; @@ -193,7 +185,7 @@ impl LogicalPlanVisitor { LogicalProgram::new(graph_compiler.into_graph(), ProgramConfig::default()); executable_program.optimize(&ChainingOptimizer {}); - Ok(()) + Ok(executable_program) } fn extract_partitioning_keys( diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs index 01b8dbb8..c7b09c26 100644 --- a/src/coordinator/plan/streaming_table_plan.rs +++ b/src/coordinator/plan/streaming_table_plan.rs @@ -11,8 +11,8 @@ // limitations under the License. use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; +use crate::sql::logical_node::logical::LogicalProgram; use crate::sql::schema::source_table::SourceTable; -use datafusion::logical_expr::LogicalPlan; /// Plan node representing a fully resolved streaming table (DDL). #[derive(Debug)] @@ -20,7 +20,7 @@ pub struct StreamingTable { pub name: String, pub comment: Option, pub source_table: SourceTable, - pub logical_plan: LogicalPlan, + pub program: LogicalProgram, } impl PlanNode for StreamingTable { diff --git a/src/sql/common/fs_schema.rs b/src/sql/common/fs_schema.rs index e1507e3e..5233bd0c 100644 --- a/src/sql/common/fs_schema.rs +++ b/src/sql/common/fs_schema.rs @@ -7,6 +7,7 @@ use datafusion::arrow::array::{RecordBatch, TimestampNanosecondArray}; use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit}; use datafusion::arrow::error::ArrowError; use datafusion::common::{DataFusionError, Result as DFResult}; +use serde::{Deserialize, Serialize}; use std::sync::Arc; use std::time::SystemTime; use arrow::compute::{filter_record_batch, lexsort_to_indices, partition, take, SortColumn}; @@ -22,7 +23,7 @@ use crate::sql::common::converter::Converter; pub type FsSchemaRef = Arc; -#[derive(Debug, Clone, Eq, PartialEq, Hash)] +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] pub struct FsSchema { pub schema: Arc, pub timestamp_index: usize, diff --git a/src/sql/logical_node/logical/fs_program_convert.rs b/src/sql/logical_node/logical/fs_program_convert.rs new file mode 100644 index 00000000..a8ac20b1 --- /dev/null +++ b/src/sql/logical_node/logical/fs_program_convert.rs @@ -0,0 +1,201 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Conversions between [`LogicalProgram`] and `protocol::grpc::api::FsProgram` / pipeline API types. + +use std::collections::HashMap; +use std::str::FromStr; +use std::sync::Arc; + +use datafusion::common::{DataFusionError, Result as DFResult}; +use petgraph::graph::DiGraph; +use petgraph::prelude::EdgeRef; +use protocol::grpc::api::{ + ChainedOperator, EdgeType as ProtoEdgeType, FsEdge, FsNode, FsProgram, FsSchema as ProtoFsSchema, +}; + +use crate::sql::api::pipelines::{PipelineEdge, PipelineGraph, PipelineNode}; +use crate::sql::common::FsSchema; + +use super::logical_edge::logical_edge_type_from_proto_i32; +use super::operator_chain::{ChainedLogicalOperator, OperatorChain}; +use super::operator_name::OperatorName; +use super::{LogicalEdge, LogicalNode, LogicalProgram, ProgramConfig}; + +impl TryFrom for LogicalProgram { + type Error = DataFusionError; + + fn try_from(value: FsProgram) -> DFResult { + let mut graph = DiGraph::new(); + let mut id_map = HashMap::with_capacity(value.nodes.len()); + + for node in value.nodes { + let operators = node + .operators + .into_iter() + .map(|op| { + let ChainedOperator { + operator_id, + operator_name: name_str, + operator_config, + } = op; + let operator_name = OperatorName::from_str(&name_str).map_err(|_| { + DataFusionError::Plan(format!("Invalid operator name: {name_str}")) + })?; + Ok(ChainedLogicalOperator { + operator_id, + operator_name, + operator_config, + }) + }) + .collect::>>()?; + + let edges = node + .edges + .into_iter() + .map(|e| { + let fs: FsSchema = e.try_into()?; + Ok(Arc::new(fs)) + }) + .collect::>>()?; + + let logical_node = LogicalNode { + node_id: node.node_id, + description: node.description, + operator_chain: OperatorChain { operators, edges }, + parallelism: node.parallelism as usize, + }; + + id_map.insert(node.node_index, graph.add_node(logical_node)); + } + + for edge in value.edges { + let source = *id_map.get(&edge.source).ok_or_else(|| { + DataFusionError::Plan("Graph integrity error: Missing source node".into()) + })?; + let target = *id_map.get(&edge.target).ok_or_else(|| { + DataFusionError::Plan("Graph integrity error: Missing target node".into()) + })?; + let schema = edge + .schema + .ok_or_else(|| DataFusionError::Plan("Graph integrity error: Missing edge schema".into()))?; + let edge_type = logical_edge_type_from_proto_i32(edge.edge_type)?; + + graph.add_edge( + source, + target, + LogicalEdge { + edge_type, + schema: Arc::new(FsSchema::try_from(schema)?), + }, + ); + } + + let program_config = value + .program_config + .map(ProgramConfig::from) + .unwrap_or_default(); + + Ok(LogicalProgram::new(graph, program_config)) + } +} + +impl From for FsProgram { + fn from(value: LogicalProgram) -> Self { + let nodes = value + .graph + .node_indices() + .filter_map(|idx| value.graph.node_weight(idx).map(|node| (idx, node))) + .map(|(idx, node)| FsNode { + node_index: idx.index() as i32, + node_id: node.node_id, + parallelism: node.parallelism as u32, + description: node.description.clone(), + operators: node + .operator_chain + .operators + .iter() + .map(|op| ChainedOperator { + operator_id: op.operator_id.clone(), + operator_name: op.operator_name.to_string(), + operator_config: op.operator_config.clone(), + }) + .collect(), + edges: node + .operator_chain + .edges + .iter() + .map(|edge| ProtoFsSchema::from((**edge).clone())) + .collect(), + }) + .collect(); + + let edges = value + .graph + .edge_indices() + .filter_map(|eidx| { + let edge = value.graph.edge_weight(eidx)?; + let (source, target) = value.graph.edge_endpoints(eidx)?; + Some(FsEdge { + source: source.index() as i32, + target: target.index() as i32, + schema: Some(ProtoFsSchema::from((*edge.schema).clone())), + edge_type: ProtoEdgeType::from(edge.edge_type) as i32, + }) + }) + .collect(); + + FsProgram { + nodes, + edges, + program_config: Some(value.program_config.into()), + } + } +} + +impl TryFrom for PipelineGraph { + type Error = DataFusionError; + + fn try_from(value: LogicalProgram) -> DFResult { + let nodes = value + .graph + .node_weights() + .map(|node| { + Ok(PipelineNode { + node_id: node.node_id, + operator: node.resolve_pipeline_operator_name()?, + description: node.description.clone(), + parallelism: node.parallelism as u32, + }) + }) + .collect::>>()?; + + let edges = value + .graph + .edge_references() + .filter_map(|edge| { + let src = value.graph.node_weight(edge.source())?; + let target = value.graph.node_weight(edge.target())?; + Some(PipelineEdge { + src_id: src.node_id, + dest_id: target.node_id, + key_type: "()".to_string(), + value_type: "()".to_string(), + edge_type: format!("{:?}", edge.weight().edge_type), + }) + }) + .collect(); + + Ok(PipelineGraph { nodes, edges }) + } +} diff --git a/src/sql/logical_node/logical/logical_edge.rs b/src/sql/logical_node/logical/logical_edge.rs index 2f850988..1a169c1d 100644 --- a/src/sql/logical_node/logical/logical_edge.rs +++ b/src/sql/logical_node/logical/logical_edge.rs @@ -13,9 +13,13 @@ use std::fmt::{Display, Formatter}; use std::sync::Arc; +use datafusion::common::{DataFusionError, Result}; +use protocol::grpc::api::EdgeType as ProtoEdgeType; +use serde::{Deserialize, Serialize}; + use crate::sql::common::FsSchema; -#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] pub enum LogicalEdgeType { Forward, Shuffle, @@ -25,16 +29,57 @@ pub enum LogicalEdgeType { impl Display for LogicalEdgeType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - LogicalEdgeType::Forward => write!(f, "→"), - LogicalEdgeType::Shuffle => write!(f, "⤨"), - LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"), - LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"), + let symbol = match self { + LogicalEdgeType::Forward => "→", + LogicalEdgeType::Shuffle => "⤨", + LogicalEdgeType::LeftJoin => "-[left]⤨", + LogicalEdgeType::RightJoin => "-[right]⤨", + }; + write!(f, "{symbol}") + } +} + +impl From for LogicalEdgeType { + fn from(value: ProtoEdgeType) -> Self { + match value { + ProtoEdgeType::Unused => { + panic!("Critical: Invalid EdgeType 'Unused' encountered") + } + ProtoEdgeType::Forward => Self::Forward, + ProtoEdgeType::Shuffle => Self::Shuffle, + ProtoEdgeType::LeftJoin => Self::LeftJoin, + ProtoEdgeType::RightJoin => Self::RightJoin, } } } -#[derive(Clone, Debug, Eq, PartialEq)] +impl From for ProtoEdgeType { + fn from(value: LogicalEdgeType) -> Self { + match value { + LogicalEdgeType::Forward => Self::Forward, + LogicalEdgeType::Shuffle => Self::Shuffle, + LogicalEdgeType::LeftJoin => Self::LeftJoin, + LogicalEdgeType::RightJoin => Self::RightJoin, + } + } +} + +pub(crate) fn logical_edge_type_from_proto_i32(i: i32) -> Result { + let e = ProtoEdgeType::try_from(i).map_err(|_| { + DataFusionError::Plan(format!("invalid protobuf EdgeType discriminant {i}")) + })?; + match e { + ProtoEdgeType::Unused => Err(DataFusionError::Plan( + "Critical: Invalid EdgeType 'Unused' encountered".into(), + )), + ProtoEdgeType::Forward => Ok(LogicalEdgeType::Forward), + ProtoEdgeType::Shuffle => Ok(LogicalEdgeType::Shuffle), + ProtoEdgeType::LeftJoin => Ok(LogicalEdgeType::LeftJoin), + ProtoEdgeType::RightJoin => Ok(LogicalEdgeType::RightJoin), + } +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct LogicalEdge { pub edge_type: LogicalEdgeType, pub schema: Arc, diff --git a/src/sql/logical_node/logical/logical_node.rs b/src/sql/logical_node/logical/logical_node.rs index 492eae26..26129b26 100644 --- a/src/sql/logical_node/logical/logical_node.rs +++ b/src/sql/logical_node/logical/logical_node.rs @@ -12,10 +12,14 @@ use std::fmt::{Debug, Display, Formatter}; +use datafusion::common::{DataFusionError, Result}; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + use super::operator_chain::{ChainedLogicalOperator, OperatorChain}; use super::operator_name::OperatorName; -#[derive(Clone)] +#[derive(Clone, Serialize, Deserialize)] pub struct LogicalNode { pub node_id: u32, pub description: String, @@ -46,6 +50,24 @@ impl LogicalNode { parallelism, } } + + pub fn resolve_pipeline_operator_name(&self) -> Result { + let first_op = self + .operator_chain + .operators + .first() + .ok_or_else(|| DataFusionError::Plan("Invalid LogicalNode: Operator chain is empty".into()))?; + + if let Some(connector_name) = first_op.extract_connector_name() { + return Ok(connector_name); + } + + if self.operator_chain.len() == 1 { + return Ok(first_op.operator_id.clone()); + } + + Ok("chained_op".to_string()) + } } impl Display for LogicalNode { @@ -56,16 +78,12 @@ impl Display for LogicalNode { impl Debug for LogicalNode { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}[{}]", - self.operator_chain - .operators - .iter() - .map(|op| op.operator_id.clone()) - .collect::>() - .join(" -> "), - self.parallelism - ) + let chain_path = self + .operator_chain + .operators + .iter() + .map(|op| op.operator_id.as_str()) + .join(" -> "); + write!(f, "{chain_path}[{}]", self.parallelism) } } diff --git a/src/sql/logical_node/logical/logical_program.rs b/src/sql/logical_node/logical/logical_program.rs index db6883b8..888f4292 100644 --- a/src/sql/logical_node/logical/logical_program.rs +++ b/src/sql/logical_node/logical/logical_program.rs @@ -1,5 +1,6 @@ // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. +// // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 @@ -10,10 +11,20 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::hash_map::DefaultHasher; use std::collections::{HashMap, HashSet}; +use std::hash::Hasher; +use std::sync::Arc; +use datafusion::arrow::datatypes::Schema; +use datafusion::common::{DataFusionError, Result as DFResult}; use petgraph::Direction; use petgraph::dot::Dot; +use prost::Message; +use protocol::grpc::api::FsProgram; +use rand::distributions::Alphanumeric; +use rand::rngs::SmallRng; +use rand::{Rng, SeedableRng}; use super::logical_graph::{LogicalGraph, Optimizer}; use super::operator_name::OperatorName; @@ -39,8 +50,8 @@ impl LogicalProgram { pub fn update_parallelism(&mut self, overrides: &HashMap) { for node in self.graph.node_weights_mut() { - if let Some(p) = overrides.get(&node.node_id) { - node.parallelism = *p; + if let Some(&p) = overrides.get(&node.node_id) { + node.parallelism = p; } } } @@ -56,68 +67,90 @@ impl LogicalProgram { pub fn sources(&self) -> HashSet { self.graph .externals(Direction::Incoming) - .map(|t| self.graph.node_weight(t).unwrap().node_id) + .filter_map(|idx| self.graph.node_weight(idx)) + .map(|node| node.node_id) + .collect() + } + + pub fn get_hash(&self) -> String { + let mut hasher = DefaultHasher::new(); + let program_bytes = FsProgram::from(self.clone()).encode_to_vec(); + hasher.write(&program_bytes); + let rng = SmallRng::seed_from_u64(hasher.finish()); + rng.sample_iter(&Alphanumeric) + .take(16) + .map(|c| (c as char).to_ascii_lowercase()) .collect() } pub fn tasks_per_operator(&self) -> HashMap { - let mut tasks_per_operator = HashMap::new(); - for node in self.graph.node_weights() { - for op in &node.operator_chain.operators { - tasks_per_operator.insert(op.operator_id.clone(), node.parallelism); - } - } - tasks_per_operator + self.graph + .node_weights() + .flat_map(|node| { + node.operator_chain + .operators + .iter() + .map(move |op| (op.operator_id.clone(), node.parallelism)) + }) + .collect() } pub fn operator_names_by_id(&self) -> HashMap { - let mut m = HashMap::new(); - for node in self.graph.node_weights() { - for op in &node.operator_chain.operators { - m.insert(op.operator_id.clone(), op.operator_name.to_string()); - } - } - m + self.graph + .node_weights() + .flat_map(|node| &node.operator_chain.operators) + .map(|op| { + let resolved_name = op + .extract_connector_name() + .unwrap_or_else(|| op.operator_name.to_string()); + (op.operator_id.clone(), resolved_name) + }) + .collect() } pub fn tasks_per_node(&self) -> HashMap { - let mut tasks_per_node = HashMap::new(); - for node in self.graph.node_weights() { - tasks_per_node.insert(node.node_id, node.parallelism); - } - tasks_per_node + self.graph + .node_weights() + .map(|node| (node.node_id, node.parallelism)) + .collect() } pub fn features(&self) -> HashSet { - let mut s = HashSet::new(); - for n in self.graph.node_weights() { - for t in &n.operator_chain.operators { - let feature = match &t.operator_name { - OperatorName::AsyncUdf => "async-udf".to_string(), - OperatorName::ExpressionWatermark - | OperatorName::ArrowValue - | OperatorName::ArrowKey - | OperatorName::Projection => continue, - OperatorName::Join => "join-with-expiration".to_string(), - OperatorName::InstantJoin => "windowed-join".to_string(), - OperatorName::WindowFunction => "sql-window-function".to_string(), - OperatorName::LookupJoin => "lookup-join".to_string(), - OperatorName::TumblingWindowAggregate => { - "sql-tumbling-window-aggregate".to_string() - } - OperatorName::SlidingWindowAggregate => { - "sql-sliding-window-aggregate".to_string() - } - OperatorName::SessionWindowAggregate => { - "sql-session-window-aggregate".to_string() - } - OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(), - OperatorName::ConnectorSource => "connector-source".to_string(), - OperatorName::ConnectorSink => "connector-sink".to_string(), - }; - s.insert(feature); + self.graph + .node_weights() + .flat_map(|node| &node.operator_chain.operators) + .filter_map(|op| op.extract_feature()) + .collect() + } + + /// Arrow schema carried on edges into the connector-sink node, if present. + pub fn egress_arrow_schema(&self) -> Option> { + for idx in self.graph.node_indices() { + let node = self.graph.node_weight(idx)?; + if node + .operator_chain + .operators + .iter() + .any(|op| op.operator_name == OperatorName::ConnectorSink) + { + let e = self + .graph + .edges_directed(idx, Direction::Incoming) + .next()?; + return Some(Arc::clone(&e.weight().schema.schema)); } } - s + None + } + + pub fn encode_for_catalog(&self) -> DFResult> { + Ok(FsProgram::from(self.clone()).encode_to_vec()) + } + + pub fn decode_for_catalog(bytes: &[u8]) -> DFResult { + let proto = FsProgram::decode(bytes).map_err(|e| { + DataFusionError::Execution(format!("FsProgram catalog decode failed: {e}")) + })?; + LogicalProgram::try_from(proto) } } diff --git a/src/sql/logical_node/logical/mod.rs b/src/sql/logical_node/logical/mod.rs index 96dd2ce5..3a94d1f3 100644 --- a/src/sql/logical_node/logical/mod.rs +++ b/src/sql/logical_node/logical/mod.rs @@ -11,6 +11,7 @@ // limitations under the License. mod dylib_udf_config; +mod fs_program_convert; mod logical_edge; mod logical_graph; mod logical_node; diff --git a/src/sql/logical_node/logical/operator_chain.rs b/src/sql/logical_node/logical/operator_chain.rs index e3db96b2..e74684ba 100644 --- a/src/sql/logical_node/logical/operator_chain.rs +++ b/src/sql/logical_node/logical/operator_chain.rs @@ -12,19 +12,62 @@ use std::sync::Arc; -use itertools::Itertools; +use itertools::{EitherOrBoth, Itertools}; +use prost::Message; +use protocol::grpc::api::ConnectorOp; +use serde::{Deserialize, Serialize}; use super::operator_name::OperatorName; use crate::sql::common::FsSchema; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct ChainedLogicalOperator { pub operator_id: String, pub operator_name: OperatorName, pub operator_config: Vec, } -#[derive(Clone, Debug)] +impl ChainedLogicalOperator { + pub fn extract_connector_name(&self) -> Option { + if matches!( + self.operator_name, + OperatorName::ConnectorSource | OperatorName::ConnectorSink + ) { + ConnectorOp::decode(self.operator_config.as_slice()) + .ok() + .map(|op| op.connector) + } else { + None + } + } + + pub fn extract_feature(&self) -> Option { + match self.operator_name { + OperatorName::AsyncUdf => Some("async-udf".to_string()), + OperatorName::Join => Some("join-with-expiration".to_string()), + OperatorName::InstantJoin => Some("windowed-join".to_string()), + OperatorName::WindowFunction => Some("sql-window-function".to_string()), + OperatorName::LookupJoin => Some("lookup-join".to_string()), + OperatorName::TumblingWindowAggregate => { + Some("sql-tumbling-window-aggregate".to_string()) + } + OperatorName::SlidingWindowAggregate => { + Some("sql-sliding-window-aggregate".to_string()) + } + OperatorName::SessionWindowAggregate => { + Some("sql-session-window-aggregate".to_string()) + } + OperatorName::UpdatingAggregate => Some("sql-updating-aggregate".to_string()), + OperatorName::ConnectorSource => self + .extract_connector_name() + .map(|c| format!("{c}-source")), + OperatorName::ConnectorSink => self.extract_connector_name().map(|c| format!("{c}-sink")), + _ => None, + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct OperatorChain { pub(crate) operators: Vec, pub(crate) edges: Vec>, @@ -41,11 +84,11 @@ impl OperatorChain { pub fn iter( &self, ) -> impl Iterator>)> { - self.operators - .iter() - .zip_longest(self.edges.iter()) - .map(|e| e.left_and_right()) - .map(|(l, r)| (l.unwrap(), r)) + self.operators.iter().zip_longest(&self.edges).filter_map(|e| match e { + EitherOrBoth::Both(op, edge) => Some((op, Some(edge))), + EitherOrBoth::Left(op) => Some((op, None)), + EitherOrBoth::Right(_) => None, + }) } pub fn iter_mut( @@ -53,13 +96,18 @@ impl OperatorChain { ) -> impl Iterator>)> { self.operators .iter_mut() - .zip_longest(self.edges.iter()) - .map(|e| e.left_and_right()) - .map(|(l, r)| (l.unwrap(), r)) + .zip_longest(&self.edges) + .filter_map(|e| match e { + EitherOrBoth::Both(op, edge) => Some((op, Some(edge))), + EitherOrBoth::Left(op) => Some((op, None)), + EitherOrBoth::Right(_) => None, + }) } pub fn first(&self) -> &ChainedLogicalOperator { - &self.operators[0] + self.operators + .first() + .expect("OperatorChain must contain at least one operator") } pub fn len(&self) -> usize { diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs index 057d8e82..22f58bbe 100644 --- a/src/sql/logical_node/logical/operator_name.rs +++ b/src/sql/logical_node/logical/operator_name.rs @@ -10,6 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::str::FromStr; + +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use strum::{Display, EnumString}; #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] @@ -30,3 +33,22 @@ pub enum OperatorName { ConnectorSource, ConnectorSink, } + +impl Serialize for OperatorName { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + +impl<'de> Deserialize<'de> for OperatorName { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Self::from_str(&s).map_err(serde::de::Error::custom) + } +} diff --git a/src/sql/logical_node/logical/program_config.rs b/src/sql/logical_node/logical/program_config.rs index 38c76e66..931a5424 100644 --- a/src/sql/logical_node/logical/program_config.rs +++ b/src/sql/logical_node/logical/program_config.rs @@ -1,5 +1,6 @@ // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. +// // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 @@ -10,13 +11,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; - -use super::dylib_udf_config::DylibUdfConfig; -use super::python_udf_config::PythonUdfConfig; +use protocol::grpc::api::FsProgramConfig; +/// Placeholder program-level config (UDF tables live elsewhere; wire maps stay empty). #[derive(Clone, Debug, Default)] -pub struct ProgramConfig { - pub udf_dylibs: HashMap, - pub python_udfs: HashMap, +pub struct ProgramConfig {} + +impl From for FsProgramConfig { + fn from(_: ProgramConfig) -> Self { + Self { + udf_dylibs: Default::default(), + python_udfs: Default::default(), + } + } +} + +impl From for ProgramConfig { + fn from(_: FsProgramConfig) -> Self { + Self::default() + } } diff --git a/src/sql/logical_planner/compiled_sql.rs b/src/sql/logical_planner/compiled_sql.rs deleted file mode 100644 index e0525097..00000000 --- a/src/sql/logical_planner/compiled_sql.rs +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::sql::logical_node::logical::LogicalProgram; - -// ── Compilation pipeline ────────────────────────────────────────────── - -#[derive(Clone, Debug)] -pub struct CompiledSql { - pub program: LogicalProgram, - pub connection_ids: Vec, -} diff --git a/src/sql/logical_planner/mod.rs b/src/sql/logical_planner/mod.rs index 8b7d9e76..85046c0d 100644 --- a/src/sql/logical_planner/mod.rs +++ b/src/sql/logical_planner/mod.rs @@ -56,12 +56,9 @@ use std::fmt::Debug; use tokio::sync::mpsc::UnboundedReceiver; use tokio_stream::wrappers::UnboundedReceiverStream; -pub mod compiled_sql; pub(crate) mod planner; pub mod optimizers; -pub use compiled_sql::CompiledSql; - // ─────────────────── Updating Meta Helpers ─────────────────── pub fn updating_meta_fields() -> Fields { diff --git a/src/sql/mod.rs b/src/sql/mod.rs index fc89787a..04f6c897 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -25,7 +25,6 @@ pub mod types; pub use schema::{StreamPlanningContext, StreamSchemaProvider}; pub use parse::parse_sql; pub use analysis::rewrite_plan; -pub use logical_planner::CompiledSql; #[cfg(test)] mod frontend_sql_coverage_tests; diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs index 5e34991a..2bb5ef87 100644 --- a/src/sql/schema/schema_provider.rs +++ b/src/sql/schema/schema_provider.rs @@ -19,15 +19,13 @@ use datafusion::datasource::{DefaultTableSource, TableProvider, TableType}; use datafusion::execution::{FunctionRegistry, SessionStateDefaults}; use datafusion::logical_expr::expr_rewriter::FunctionRewrite; use datafusion::logical_expr::planner::ExprPlanner; -use datafusion::logical_expr::{ - AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF, -}; +use datafusion::logical_expr::{AggregateUDF, Expr, ScalarUDF, TableSource, WindowUDF}; use datafusion::optimizer::Analyzer; use datafusion::sql::planner::ContextProvider; use datafusion::sql::TableReference; use unicase::UniCase; -use crate::sql::logical_node::logical::DylibUdfConfig; +use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalProgram}; use crate::sql::schema::table::Table as CatalogTable; use crate::sql::schema::utils::window_arrow_struct; use crate::sql::types::{PlaceholderUdf, PlanningOptions}; @@ -49,25 +47,23 @@ pub enum StreamTable { }, Sink { name: String, - schema: Arc, - }, - Memory { - name: String, - logical_plan: Option, + program: LogicalProgram, }, } impl StreamTable { pub fn name(&self) -> &str { match self { - Self::Source { name, .. } | Self::Sink { name, .. } | Self::Memory { name, .. } => name, + Self::Source { name, .. } | Self::Sink { name, .. } => name, } } pub fn schema(&self) -> Arc { match self { - Self::Source { schema, .. } | Self::Sink { schema, .. } => Arc::clone(schema), - Self::Memory { .. } => Arc::new(Schema::empty()), + Self::Source { schema, .. } => Arc::clone(schema), + Self::Sink { program, .. } => program + .egress_arrow_schema() + .unwrap_or_else(|| Arc::new(Schema::empty())), } } } @@ -208,8 +204,8 @@ impl StreamPlanningContext { }); } - pub fn add_sink_table(&mut self, name: String, schema: Arc) { - self.register_stream_table(StreamTable::Sink { name, schema }); + pub fn add_sink_table(&mut self, name: String, program: LogicalProgram) { + self.register_stream_table(StreamTable::Sink { name, program }); } pub fn insert_table(&mut self, table: StreamTable) { diff --git a/src/storage/stream_catalog/codec.rs b/src/storage/stream_catalog/codec.rs index dacaebf8..34c2c4ba 100644 --- a/src/storage/stream_catalog/codec.rs +++ b/src/storage/stream_catalog/codec.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Arrow Schema IPC and DataFusion logical plan serialization. +//! Arrow Schema IPC and [`LogicalProgram`] bincode payloads for stream catalog rows. use std::io::Cursor; use std::sync::Arc; @@ -20,8 +20,8 @@ use datafusion::arrow::ipc::reader::StreamReader; use datafusion::arrow::ipc::writer::StreamWriter; use datafusion::arrow::record_batch::RecordBatch; use datafusion::common::{DataFusionError, Result}; -use datafusion::execution::context::SessionContext; -use datafusion::logical_expr::LogicalPlan; + +use crate::sql::logical_node::logical::LogicalProgram; pub struct CatalogCodec; @@ -47,11 +47,11 @@ impl CatalogCodec { Ok(reader.schema()) } - pub fn encode_logical_plan(plan: &LogicalPlan) -> Result> { - datafusion_proto::bytes::logical_plan_to_bytes(plan).map(|b| b.to_vec()) + pub fn encode_logical_program(program: &LogicalProgram) -> Result> { + program.encode_for_catalog() } - pub fn decode_logical_plan(bytes: &[u8], ctx: &SessionContext) -> Result { - datafusion_proto::bytes::logical_plan_from_bytes(bytes, ctx) + pub fn decode_logical_program(bytes: &[u8]) -> Result { + LogicalProgram::decode_for_catalog(bytes) } } diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs index 7e61b20e..7e75f786 100644 --- a/src/storage/stream_catalog/manager.rs +++ b/src/storage/stream_catalog/manager.rs @@ -14,8 +14,8 @@ use std::collections::HashMap; use std::sync::{Arc, OnceLock}; use anyhow::{anyhow, bail, Context}; +use datafusion::arrow::datatypes::Schema; use datafusion::common::{internal_err, plan_err, Result as DFResult}; -use datafusion::execution::context::SessionContext; use parking_lot::RwLock; use prost::Message; use protocol::storage::{self as pb, table_definition}; @@ -37,36 +37,28 @@ pub struct StreamTableCatalogCache { pub struct CatalogManager { store: Arc, cache: RwLock, - session_ctx: Arc, } static GLOBAL_CATALOG: OnceLock> = OnceLock::new(); impl CatalogManager { - pub fn new(store: Arc, session_ctx: Arc) -> Self { + pub fn new(store: Arc) -> Self { Self { store, cache: RwLock::new(StreamTableCatalogCache::default()), - session_ctx, } } pub fn init_global_in_memory() -> anyhow::Result<()> { - Self::init_global( - Arc::new(super::InMemoryMetaStore::new()), - Arc::new(SessionContext::new()), - ) + Self::init_global(Arc::new(super::InMemoryMetaStore::new())) } - pub fn init_global( - store: Arc, - session_ctx: Arc, - ) -> anyhow::Result<()> { + pub fn init_global(store: Arc) -> anyhow::Result<()> { if GLOBAL_CATALOG.get().is_some() { bail!("CatalogManager already initialized"); } - let mgr = Arc::new(CatalogManager::new(store, session_ctx)); + let mgr = Arc::new(CatalogManager::new(store)); GLOBAL_CATALOG .set(mgr) .map_err(|_| anyhow!("CatalogManager global install failed"))?; @@ -164,16 +156,15 @@ impl CatalogManager { event_time_field: event_time_field.clone(), watermark_field: watermark_field.clone(), }), - StreamTable::Sink { schema, .. } => table_definition::TableType::Sink(pb::StreamSink { - arrow_schema_ipc: CatalogCodec::encode_schema(schema)?, - }), - StreamTable::Memory { logical_plan, .. } => { - let logical_plan_bytes = logical_plan - .as_ref() - .map(|plan| CatalogCodec::encode_logical_plan(plan)) - .transpose()?; - - table_definition::TableType::Memory(pb::StreamMemory { logical_plan_bytes }) + StreamTable::Sink { program, .. } => { + let logical_program_bincode = CatalogCodec::encode_logical_program(program)?; + let schema = program + .egress_arrow_schema() + .unwrap_or_else(|| Arc::new(Schema::empty())); + table_definition::TableType::Sink(pb::StreamSink { + arrow_schema_ipc: CatalogCodec::encode_schema(&schema)?, + logical_program_bincode, + }) } }; @@ -199,19 +190,17 @@ impl CatalogManager { event_time_field: src.event_time_field, watermark_field: src.watermark_field, }), - table_definition::TableType::Sink(sink) => Ok(StreamTable::Sink { - name: proto_def.table_name, - schema: CatalogCodec::decode_schema(&sink.arrow_schema_ipc)?, - }), - table_definition::TableType::Memory(mem) => { - let logical_plan = mem - .logical_plan_bytes - .map(|bytes| CatalogCodec::decode_logical_plan(&bytes, &self.session_ctx)) - .transpose()?; - - Ok(StreamTable::Memory { + table_definition::TableType::Sink(sink) => { + if sink.logical_program_bincode.is_empty() { + return internal_err!( + "Corrupted catalog row: sink '{}' missing logical_program_bincode", + proto_def.table_name + ); + } + let program = CatalogCodec::decode_logical_program(&sink.logical_program_bincode)?; + Ok(StreamTable::Sink { name: proto_def.table_name, - logical_plan, + program, }) } } @@ -242,18 +231,15 @@ mod tests { use std::sync::Arc; use datafusion::arrow::datatypes::{DataType, Field, Schema}; - use datafusion::execution::context::SessionContext; + use crate::sql::logical_node::logical::LogicalProgram; use crate::sql::schema::StreamTable; use crate::storage::stream_catalog::{InMemoryMetaStore, MetaStore}; use super::CatalogManager; fn create_test_manager() -> CatalogManager { - CatalogManager::new( - Arc::new(InMemoryMetaStore::new()), - Arc::new(SessionContext::new()), - ) + CatalogManager::new(Arc::new(InMemoryMetaStore::new())) } #[test] @@ -312,19 +298,17 @@ mod tests { #[test] fn restore_from_store_rebuilds_cache() { let store: Arc = Arc::new(InMemoryMetaStore::new()); - let session = Arc::new(SessionContext::new()); - let mgr_a = CatalogManager::new(Arc::clone(&store), Arc::clone(&session)); - let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Utf8, true)])); + let mgr_a = CatalogManager::new(Arc::clone(&store)); mgr_a .add_table(StreamTable::Sink { name: "sink1".into(), - schema, + program: LogicalProgram::default(), }) .unwrap(); - let mgr_b = CatalogManager::new(store, session); + let mgr_b = CatalogManager::new(store); mgr_b.restore_from_store().unwrap(); let ctx = mgr_b.acquire_planning_context(); From b4149bce95356e50cf812874737bc49609718ee5 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Tue, 24 Mar 2026 00:50:44 +0800 Subject: [PATCH 15/44] update --- Cargo.lock | 3490 +++++++++++++++-- Cargo.toml | 5 + src/runtime/mod.rs | 9 +- src/runtime/source/mod.rs | 15 - src/runtime/streaming/api/context.rs | 95 + src/runtime/streaming/api/mod.rs | 9 + src/runtime/streaming/api/operator.rs | 90 + src/runtime/streaming/api/source.rs | 43 + src/runtime/streaming/arrow/mod.rs | 68 + src/runtime/streaming/cluster/graph.rs | 136 + src/runtime/streaming/cluster/manager.rs | 164 + src/runtime/streaming/cluster/master.rs | 274 ++ src/runtime/streaming/cluster/mod.rs | 11 + src/runtime/streaming/cluster/wiring.rs | 46 + src/runtime/streaming/error.rs | 10 + src/runtime/streaming/execution/mod.rs | 8 + src/runtime/streaming/execution/runner.rs | 298 ++ src/runtime/streaming/execution/source.rs | 120 + .../execution/tracker/barrier_aligner.rs | 57 + .../streaming/execution/tracker/mod.rs | 7 + .../execution/tracker/watermark_tracker.rs | 86 + src/runtime/streaming/factory/mod.rs | 3 + src/runtime/streaming/factory/registry.rs | 44 + src/runtime/streaming/format/mod.rs | 0 src/runtime/streaming/lib.rs | 44 + src/runtime/streaming/memory/mod.rs | 5 + src/runtime/streaming/memory/pool.rs | 75 + src/runtime/streaming/memory/ticket.rs | 24 + src/runtime/streaming/mod.rs | 45 + src/runtime/streaming/network/endpoint.rs | 59 + src/runtime/streaming/network/environment.rs | 82 + src/runtime/streaming/network/mod.rs | 5 + .../grouping/incremental_aggregate.rs | 847 ++++ .../streaming/operators/grouping/mod.rs | 5 + .../operators/grouping/updating_cache.rs | 498 +++ .../operators/joins/join_instance.rs | 351 ++ .../operators/joins/join_with_expiration.rs | 261 ++ .../streaming/operators/joins/lookup_join.rs | 363 ++ src/runtime/streaming/operators/joins/mod.rs | 7 + src/runtime/streaming/operators/mod.rs | 75 + .../streaming/operators/sink/kafka/mod.rs | 366 ++ src/runtime/streaming/operators/sink/mod.rs | 5 + .../streaming/operators/source/kafka/mod.rs | 325 ++ src/runtime/streaming/operators/source/mod.rs | 5 + .../streaming/operators/watermark/mod.rs | 3 + .../watermark/watermark_generator.rs | 244 ++ .../streaming/operators/windows/mod.rs | 9 + .../windows/session_aggregating_window.rs | 804 ++++ .../windows/sliding_aggregating_window.rs | 578 +++ .../windows/tumbling_aggregating_window.rs | 399 ++ .../operators/windows/window_function.rs | 292 ++ src/runtime/streaming/protocol/control.rs | 74 + src/runtime/streaming/protocol/event.rs | 11 + src/runtime/streaming/protocol/mod.rs | 15 + src/runtime/streaming/protocol/stream_out.rs | 15 + src/runtime/streaming/protocol/tracked.rs | 31 + src/runtime/streaming/protocol/watermark.rs | 80 + src/runtime/streaming/state/mod.rs | 0 src/runtime/streaming/state/table_manager.rs | 0 .../{ => wasm}/input/input_protocol.rs | 0 .../{ => wasm}/input/input_provider.rs | 0 src/runtime/{ => wasm}/input/input_runner.rs | 0 src/runtime/{ => wasm}/input/interface.rs | 0 src/runtime/{ => wasm}/input/mod.rs | 0 .../{ => wasm}/input/protocol/kafka/config.rs | 0 .../input/protocol/kafka/kafka_protocol.rs | 0 .../{ => wasm}/input/protocol/kafka/mod.rs | 0 src/runtime/{ => wasm}/input/protocol/mod.rs | 0 src/runtime/{sink => wasm}/mod.rs | 7 +- src/runtime/{ => wasm}/output/interface.rs | 0 src/runtime/{ => wasm}/output/mod.rs | 0 .../{ => wasm}/output/output_protocol.rs | 0 .../{ => wasm}/output/output_provider.rs | 0 .../{ => wasm}/output/output_runner.rs | 0 .../output/protocol/kafka/kafka_protocol.rs | 0 .../{ => wasm}/output/protocol/kafka/mod.rs | 0 .../output/protocol/kafka/producer_config.rs | 0 src/runtime/{ => wasm}/output/protocol/mod.rs | 0 .../{ => wasm}/processor/function_error.rs | 0 src/runtime/{ => wasm}/processor/mod.rs | 0 .../{ => wasm}/processor/python/mod.rs | 0 .../processor/python/python_host.rs | 0 .../processor/python/python_service.rs | 0 .../processor/wasm/input_strategy.rs | 0 src/runtime/{ => wasm}/processor/wasm/mod.rs | 0 .../{ => wasm}/processor/wasm/thread_pool.rs | 0 .../{ => wasm}/processor/wasm/wasm_cache.rs | 0 .../{ => wasm}/processor/wasm/wasm_host.rs | 0 .../processor/wasm/wasm_processor.rs | 0 .../processor/wasm/wasm_processor_trait.rs | 0 .../{ => wasm}/processor/wasm/wasm_task.rs | 0 src/sql/common/errors.rs | 13 + src/sql/common/fs_schema.rs | 9 + src/sql/common/mod.rs | 6 +- 94 files changed, 10697 insertions(+), 428 deletions(-) delete mode 100644 src/runtime/source/mod.rs create mode 100644 src/runtime/streaming/api/context.rs create mode 100644 src/runtime/streaming/api/mod.rs create mode 100644 src/runtime/streaming/api/operator.rs create mode 100644 src/runtime/streaming/api/source.rs create mode 100644 src/runtime/streaming/arrow/mod.rs create mode 100644 src/runtime/streaming/cluster/graph.rs create mode 100644 src/runtime/streaming/cluster/manager.rs create mode 100644 src/runtime/streaming/cluster/master.rs create mode 100644 src/runtime/streaming/cluster/mod.rs create mode 100644 src/runtime/streaming/cluster/wiring.rs create mode 100644 src/runtime/streaming/error.rs create mode 100644 src/runtime/streaming/execution/mod.rs create mode 100644 src/runtime/streaming/execution/runner.rs create mode 100644 src/runtime/streaming/execution/source.rs create mode 100644 src/runtime/streaming/execution/tracker/barrier_aligner.rs create mode 100644 src/runtime/streaming/execution/tracker/mod.rs create mode 100644 src/runtime/streaming/execution/tracker/watermark_tracker.rs create mode 100644 src/runtime/streaming/factory/mod.rs create mode 100644 src/runtime/streaming/factory/registry.rs create mode 100644 src/runtime/streaming/format/mod.rs create mode 100644 src/runtime/streaming/lib.rs create mode 100644 src/runtime/streaming/memory/mod.rs create mode 100644 src/runtime/streaming/memory/pool.rs create mode 100644 src/runtime/streaming/memory/ticket.rs create mode 100644 src/runtime/streaming/mod.rs create mode 100644 src/runtime/streaming/network/endpoint.rs create mode 100644 src/runtime/streaming/network/environment.rs create mode 100644 src/runtime/streaming/network/mod.rs create mode 100644 src/runtime/streaming/operators/grouping/incremental_aggregate.rs create mode 100644 src/runtime/streaming/operators/grouping/mod.rs create mode 100644 src/runtime/streaming/operators/grouping/updating_cache.rs create mode 100644 src/runtime/streaming/operators/joins/join_instance.rs create mode 100644 src/runtime/streaming/operators/joins/join_with_expiration.rs create mode 100644 src/runtime/streaming/operators/joins/lookup_join.rs create mode 100644 src/runtime/streaming/operators/joins/mod.rs create mode 100644 src/runtime/streaming/operators/mod.rs create mode 100644 src/runtime/streaming/operators/sink/kafka/mod.rs create mode 100644 src/runtime/streaming/operators/sink/mod.rs create mode 100644 src/runtime/streaming/operators/source/kafka/mod.rs create mode 100644 src/runtime/streaming/operators/source/mod.rs create mode 100644 src/runtime/streaming/operators/watermark/mod.rs create mode 100644 src/runtime/streaming/operators/watermark/watermark_generator.rs create mode 100644 src/runtime/streaming/operators/windows/mod.rs create mode 100644 src/runtime/streaming/operators/windows/session_aggregating_window.rs create mode 100644 src/runtime/streaming/operators/windows/sliding_aggregating_window.rs create mode 100644 src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs create mode 100644 src/runtime/streaming/operators/windows/window_function.rs create mode 100644 src/runtime/streaming/protocol/control.rs create mode 100644 src/runtime/streaming/protocol/event.rs create mode 100644 src/runtime/streaming/protocol/mod.rs create mode 100644 src/runtime/streaming/protocol/stream_out.rs create mode 100644 src/runtime/streaming/protocol/tracked.rs create mode 100644 src/runtime/streaming/protocol/watermark.rs create mode 100644 src/runtime/streaming/state/mod.rs create mode 100644 src/runtime/streaming/state/table_manager.rs rename src/runtime/{ => wasm}/input/input_protocol.rs (100%) rename src/runtime/{ => wasm}/input/input_provider.rs (100%) rename src/runtime/{ => wasm}/input/input_runner.rs (100%) rename src/runtime/{ => wasm}/input/interface.rs (100%) rename src/runtime/{ => wasm}/input/mod.rs (100%) rename src/runtime/{ => wasm}/input/protocol/kafka/config.rs (100%) rename src/runtime/{ => wasm}/input/protocol/kafka/kafka_protocol.rs (100%) rename src/runtime/{ => wasm}/input/protocol/kafka/mod.rs (100%) rename src/runtime/{ => wasm}/input/protocol/mod.rs (100%) rename src/runtime/{sink => wasm}/mod.rs (86%) rename src/runtime/{ => wasm}/output/interface.rs (100%) rename src/runtime/{ => wasm}/output/mod.rs (100%) rename src/runtime/{ => wasm}/output/output_protocol.rs (100%) rename src/runtime/{ => wasm}/output/output_provider.rs (100%) rename src/runtime/{ => wasm}/output/output_runner.rs (100%) rename src/runtime/{ => wasm}/output/protocol/kafka/kafka_protocol.rs (100%) rename src/runtime/{ => wasm}/output/protocol/kafka/mod.rs (100%) rename src/runtime/{ => wasm}/output/protocol/kafka/producer_config.rs (100%) rename src/runtime/{ => wasm}/output/protocol/mod.rs (100%) rename src/runtime/{ => wasm}/processor/function_error.rs (100%) rename src/runtime/{ => wasm}/processor/mod.rs (100%) rename src/runtime/{ => wasm}/processor/python/mod.rs (100%) rename src/runtime/{ => wasm}/processor/python/python_host.rs (100%) rename src/runtime/{ => wasm}/processor/python/python_service.rs (100%) rename src/runtime/{ => wasm}/processor/wasm/input_strategy.rs (100%) rename src/runtime/{ => wasm}/processor/wasm/mod.rs (100%) rename src/runtime/{ => wasm}/processor/wasm/thread_pool.rs (100%) rename src/runtime/{ => wasm}/processor/wasm/wasm_cache.rs (100%) rename src/runtime/{ => wasm}/processor/wasm/wasm_host.rs (100%) rename src/runtime/{ => wasm}/processor/wasm/wasm_processor.rs (100%) rename src/runtime/{ => wasm}/processor/wasm/wasm_processor_trait.rs (100%) rename src/runtime/{ => wasm}/processor/wasm/wasm_task.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 7cd510f3..4cc46aef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,16 +19,16 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "ahash" -version = "0.8.12" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.4", + "getrandom 0.2.16", "once_cell", "version_check", - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -132,6 +132,30 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "apache-avro" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a81f4e6304e455a9d52cf8ab667cb2fcf792f2cee2a31c28800901a335ecd5" +dependencies = [ + "bigdecimal", + "bon", + "digest", + "log", + "miniz_oxide", + "num-bigint", + "quad-rand", + "rand 0.9.2", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.17", + "uuid", +] + [[package]] name = "ar_archive_writer" version = "0.5.1" @@ -147,6 +171,15 @@ version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +[[package]] +name = "arc-swap" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" +dependencies = [ + "rustversion", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -488,6 +521,133 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "arroyo-datastream" +version = "0.16.0-dev" +dependencies = [ + "anyhow", + "arrow-schema 55.2.0", + "arroyo-rpc", + "bincode", + "datafusion-proto 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.14.0", + "petgraph 0.8.3", + "prost", + "rand 0.9.2", + "serde", + "serde_json", + "strum 0.27.2", + "syn 2.0.113", +] + +[[package]] +name = "arroyo-rpc" +version = "0.16.0-dev" +dependencies = [ + "ahash", + "anyhow", + "apache-avro", + "arc-swap", + "arrow", + "arrow-array 55.2.0", + "arrow-ord", + "arrow-schema 55.2.0", + "arroyo-types", + "async-trait", + "base64", + "bincode", + "bytes", + "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "dirs", + "figment", + "futures", + "k8s-openapi", + "local-ip-address", + "log", + "nanoid", + "object_store", + "percent-encoding", + "prost", + "rand 0.9.2", + "regex", + "reqwest", + "rustls", + "rustls-native-certs", + "schemars 1.2.1", + "serde", + "serde_json", + "smallvec", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.17", + "tokio", + "tonic 0.13.1", + "tonic-build 0.13.1", + "tracing", + "url", + "utoipa", +] + +[[package]] +name = "arroyo-state" +version = "0.16.0-dev" +dependencies = [ + "anyhow", + "arrow", + "arrow-array 55.2.0", + "arrow-ord", + "arrow-schema 55.2.0", + "arroyo-datastream", + "arroyo-rpc", + "arroyo-storage", + "arroyo-types", + "async-trait", + "bincode", + "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "lazy_static", + "object_store", + "once_cell", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "prometheus", + "prost", + "serde", + "serde_json", + "tokio", + "tracing", +] + +[[package]] +name = "arroyo-storage" +version = "0.16.0-dev" +dependencies = [ + "arroyo-rpc", + "arroyo-types", + "async-trait", + "aws-config", + "aws-credential-types", + "bytes", + "futures", + "object_store", + "rand 0.9.2", + "regex", + "thiserror 2.0.17", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "arroyo-types" +version = "0.16.0-dev" +dependencies = [ + "arrow", + "arrow-array 55.2.0", + "bincode", + "chrono", + "serde", +] + [[package]] name = "async-compression" version = "0.4.19" @@ -524,7 +684,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -535,7 +695,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -547,6 +707,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" +dependencies = [ + "bytemuck", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -560,162 +729,588 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] -name = "axum" -version = "0.7.9" +name = "aws-config" +version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +checksum = "02a18fd934af6ae7ca52410d4548b98eb895aab0f1ea417d168d85db1434a141" dependencies = [ - "async-trait", - "axum-core", + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http 0.62.6", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.2", - "tower-layer", - "tower-service", + "fastrand", + "hex", + "http 1.4.0", + "ring", + "time", + "tokio", + "tracing", + "url", + "zeroize", ] [[package]] -name = "axum-core" -version = "0.4.5" +name = "aws-credential-types" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", ] [[package]] -name = "backtrace" -version = "0.3.76" +name = "aws-lc-rs" +version = "1.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-link", + "aws-lc-sys", + "zeroize", ] [[package]] -name = "backtrace-ext" -version = "0.2.1" +name = "aws-lc-sys" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50" +checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a" dependencies = [ - "backtrace", + "cc", + "cmake", + "dunce", + "fs_extra", ] [[package]] -name = "base64" -version = "0.22.1" +name = "aws-runtime" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http 0.63.5", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 1.4.0", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] [[package]] -name = "bigdecimal" -version = "0.4.10" +name = "aws-sdk-sso" +version = "1.72.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" +checksum = "13118ad30741222f67b1a18e5071385863914da05124652b38e172d6d3d9ce31" dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.62.6", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", ] [[package]] -name = "bincode" -version = "2.0.1" +name = "aws-sdk-ssooidc" +version = "1.73.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +checksum = "f879a8572b4683a8f84f781695bebf2f25cf11a81a2693c31fc0e0215c2c1726" dependencies = [ - "bincode_derive", - "serde", - "unty", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.62.6", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", ] [[package]] -name = "bincode_derive" -version = "2.0.1" +name = "aws-sdk-sts" +version = "1.73.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +checksum = "f1e9c3c24e36183e2f698235ed38dcfbbdff1d09b9232dc866c4be3011e0b47e" dependencies = [ - "virtue", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.62.6", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", ] [[package]] -name = "bindgen" -version = "0.65.1" +name = "aws-sigv4" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" dependencies = [ - "bitflags 1.3.2", - "cexpr", - "clang-sys", - "lazy_static", - "lazycell", - "peeking_take_while", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn", + "aws-credential-types", + "aws-smithy-http 0.63.5", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "percent-encoding", + "sha2", + "time", + "tracing", ] [[package]] -name = "bindgen" -version = "0.72.1" +name = "aws-smithy-async" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +checksum = "5cc50d0f63e714784b84223abd7abbc8577de8c35d699e0edd19f0a88a08ae13" dependencies = [ - "bitflags 2.10.0", - "cexpr", - "clang-sys", - "itertools 0.13.0", - "proc-macro2", - "quote", - "regex", - "rustc-hash 2.1.1", - "shlex", - "syn", + "futures-util", + "pin-project-lite", + "tokio", ] [[package]] -name = "bitflags" -version = "1.3.2" +name = "aws-smithy-http" +version = "0.62.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] [[package]] -name = "bitflags" +name = "aws-smithy-http" +version = "0.63.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2", + "http 1.4.0", + "hyper", + "hyper-rustls", + "hyper-util", + "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower 0.5.2", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.61.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http 0.63.5", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core 0.4.5", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "itoa", + "matchit 0.7.3", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +dependencies = [ + "axum-core 0.5.6", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "itoa", + "matchit 0.8.4", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", +] + +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link", +] + +[[package]] +name = "backtrace-ext" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50" +dependencies = [ + "backtrace", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "bigdecimal" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", + "serde_json", +] + +[[package]] +name = "bincode" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", +] + +[[package]] +name = "bindgen" +version = "0.65.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +dependencies = [ + "bitflags 1.3.2", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn 2.0.113", +] + +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags 2.10.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.1", + "shlex", + "syn 2.0.113", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" @@ -773,6 +1368,31 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bon" +version = "3.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" +dependencies = [ + "darling 0.23.0", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.113", +] + [[package]] name = "brotli" version = "8.0.2" @@ -803,6 +1423,18 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + [[package]] name = "byteorder" version = "1.5.0" @@ -811,9 +1443,19 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] [[package]] name = "bzip2" @@ -834,6 +1476,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "camino" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" +dependencies = [ + "serde_core", +] + [[package]] name = "cap-fs-ext" version = "3.4.5" @@ -912,6 +1563,28 @@ dependencies = [ "winx", ] +[[package]] +name = "cargo-platform" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", +] + [[package]] name = "cc" version = "1.2.51" @@ -945,6 +1618,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.42" @@ -954,6 +1633,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-link", ] @@ -1020,7 +1700,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -1108,6 +1788,26 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1129,7 +1829,7 @@ dependencies = [ "postgres-types", "prettyplease", "rusqlite", - "syn", + "syn 2.0.113", "thiserror 1.0.69", ] @@ -1417,6 +2117,88 @@ dependencies = [ "memchr", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core 0.23.0", + "darling_macro 0.23.0", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.113", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.113", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn 2.0.113", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core 0.23.0", + "quote", + "syn 2.0.113", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -1431,6 +2213,60 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "datafusion" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a11e19a7ccc5bb979c95c1dceef663eab39c9061b3bbf8d1937faf0f03bf41f" +dependencies = [ + "arrow", + "arrow-ipc 55.2.0", + "arrow-schema 55.2.0", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-catalog-listing 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource-csv 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource-json 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource-parquet 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-nested 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-table 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-window 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "flate2", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.9.2", + "regex", + "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd", +] + [[package]] name = "datafusion" version = "48.0.1" @@ -1443,29 +2279,29 @@ dependencies = [ "bytes", "bzip2", "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", + "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-catalog-listing 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource-csv 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource-json 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource-parquet 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-nested 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-table 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-window 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "flate2", "futures", "itertools 0.14.0", @@ -1475,7 +2311,7 @@ dependencies = [ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.9.2", "regex", - "sqlparser", + "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", "tempfile", "tokio", "url", @@ -1484,6 +2320,32 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-catalog" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94985e67cab97b1099db2a7af11f31a45008b282aba921c1e1d35327c212ec18" +dependencies = [ + "arrow", + "async-trait", + "dashmap 6.1.0", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + [[package]] name = "datafusion-catalog" version = "48.0.1" @@ -1491,16 +2353,16 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "async-trait", - "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", + "dashmap 6.1.0", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", "itertools 0.14.0", "log", @@ -1509,6 +2371,29 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-catalog-listing" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e002df133bdb7b0b9b429d89a69aa77b35caeadee4498b2ce1c7c23a99516988" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "log", + "object_store", + "tokio", +] + [[package]] name = "datafusion-catalog-listing" version = "48.0.1" @@ -1516,21 +2401,45 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", "log", "object_store", "tokio", ] +[[package]] +name = "datafusion-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13242fc58fd753787b0a538e5ae77d356cb9d0656fa85a591a33c5f106267f6" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc 55.2.0", + "base64", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.1", + "libc", + "log", + "object_store", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "paste", + "recursive", + "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio", + "web-time", +] + [[package]] name = "datafusion-common" version = "48.0.1" @@ -1549,11 +2458,22 @@ dependencies = [ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "paste", "recursive", - "sqlparser", + "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", "tokio", "web-time", ] +[[package]] +name = "datafusion-common-runtime" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2239f964e95c3a5d6b4a8cde07e646de8995c1396a7fd62c6e784f5341db499" +dependencies = [ + "futures", + "log", + "tokio", +] + [[package]] name = "datafusion-common-runtime" version = "48.0.1" @@ -1564,6 +2484,42 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-datasource" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cf792579bc8bf07d1b2f68c2d5382f8a63679cce8fbebfd4ba95742b6e08864" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.9.2", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + [[package]] name = "datafusion-datasource" version = "48.0.1" @@ -1575,14 +2531,14 @@ dependencies = [ "bytes", "bzip2", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "flate2", "futures", "glob", @@ -1599,6 +2555,31 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-csv" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfc114f9a1415174f3e8d2719c371fc72092ef2195a7955404cfe6b2ba29a706" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "object_store", + "regex", + "tokio", +] + [[package]] name = "datafusion-datasource-csv" version = "48.0.1" @@ -1607,22 +2588,47 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", "object_store", "regex", "tokio", ] +[[package]] +name = "datafusion-datasource-json" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d88dd5e215c420a52362b9988ecd4cefd71081b730663d4f7d886f706111fc75" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "object_store", + "serde_json", + "tokio", +] + [[package]] name = "datafusion-datasource-json" version = "48.0.1" @@ -1631,22 +2637,53 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", "object_store", "serde_json", "tokio", ] +[[package]] +name = "datafusion-datasource-parquet" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33692acdd1fbe75280d14f4676fe43f39e9cb36296df56575aa2cac9a819e4cf" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.9.2", + "tokio", +] + [[package]] name = "datafusion-datasource-parquet" version = "48.0.1" @@ -1655,18 +2692,18 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", + "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", "itertools 0.14.0", "log", @@ -1677,20 +2714,45 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-doc" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0e7b648387b0c1937b83cb328533c06c923799e73a9e3750b762667f32662c0" + [[package]] name = "datafusion-doc" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" +[[package]] +name = "datafusion-execution" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9609d83d52ff8315283c6dad3b97566e877d8f366fab4c3297742f33dcd636c7" +dependencies = [ + "arrow", + "dashmap 6.1.0", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.9.2", + "tempfile", + "url", +] + [[package]] name = "datafusion-execution" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "dashmap", - "datafusion-common", - "datafusion-expr", + "dashmap 6.1.0", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", "log", "object_store", @@ -1700,6 +2762,27 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-expr" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e75230cd67f650ef0399eb00f54d4a073698f2c0262948298e5299fc7324da63" +dependencies = [ + "arrow", + "chrono", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 2.12.1", + "paste", + "recursive", + "serde_json", + "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "datafusion-expr" version = "48.0.1" @@ -1707,17 +2790,30 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "indexmap 2.12.1", "paste", "recursive", "serde_json", - "sqlparser", + "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", +] + +[[package]] +name = "datafusion-expr-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70fafb3a045ed6c49cfca0cd090f62cf871ca6326cc3355cb0aaf1260fa760b6" +dependencies = [ + "arrow", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 2.12.1", + "itertools 0.14.0", + "paste", ] [[package]] @@ -1726,12 +2822,41 @@ version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "indexmap 2.12.1", "itertools 0.14.0", "paste", ] +[[package]] +name = "datafusion-functions" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdf9a9cf655265861a20453b1e58357147eab59bdc90ce7f2f68f1f35104d3bb" +dependencies = [ + "arrow", + "arrow-buffer 55.2.0", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "rand 0.9.2", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + [[package]] name = "datafusion-functions" version = "48.0.1" @@ -1743,12 +2868,12 @@ dependencies = [ "blake2", "blake3", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "hex", "itertools 0.14.0", "log", @@ -1760,6 +2885,27 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-functions-aggregate" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f07e49733d847be0a05235e17b884d326a2fd402c97a89fe8bcf0bfba310005" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "half", + "log", + "paste", +] + [[package]] name = "datafusion-functions-aggregate" version = "48.0.1" @@ -1767,19 +2913,32 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "half", "log", "paste", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4512607e10d72b0b0a1dc08f42cb5bd5284cb8348b7fea49dc83409493e32b1b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "datafusion-functions-aggregate-common" version = "48.0.1" @@ -1787,9 +2946,30 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", +] + +[[package]] +name = "datafusion-functions-nested" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab331806e34f5545e5f03396e4d5068077395b1665795d8f88c14ec4f1e0b7a" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.14.0", + "log", + "paste", ] [[package]] @@ -1799,19 +2979,35 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-macros", - "datafusion-physical-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "itertools 0.14.0", "log", "paste", ] +[[package]] +name = "datafusion-functions-table" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ac2c0be983a06950ef077e34e0174aa0cb9e346f3aeae459823158037ade37" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "parking_lot", + "paste", +] + [[package]] name = "datafusion-functions-table" version = "48.0.1" @@ -1819,38 +3015,77 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", + "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "parking_lot", "paste", ] +[[package]] +name = "datafusion-functions-window" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f3d92731de384c90906941d36dcadf6a86d4128409a9c5cd916662baed5f53" +dependencies = [ + "arrow", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log", + "paste", +] + [[package]] name = "datafusion-functions-window" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "log", "paste", ] +[[package]] +name = "datafusion-functions-window-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c679f8bf0971704ec8fd4249fcbb2eb49d6a12cc3e7a840ac047b4928d3541b5" +dependencies = [ + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "datafusion-functions-window-common" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", +] + +[[package]] +name = "datafusion-macros" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2821de7cb0362d12e75a5196b636a59ea3584ec1e1cc7dc6f5e34b9e8389d251" +dependencies = [ + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "quote", + "syn 2.0.113", ] [[package]] @@ -1858,9 +3093,28 @@ name = "datafusion-macros" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "datafusion-expr", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "quote", - "syn", + "syn 2.0.113", +] + +[[package]] +name = "datafusion-optimizer" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1594c7a97219ede334f25347ad8d57056621e7f4f35a0693c8da876e10dd6a53" +dependencies = [ + "arrow", + "chrono", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 2.12.1", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax", ] [[package]] @@ -1869,16 +3123,38 @@ version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", + "chrono", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "indexmap 2.12.1", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6da0f2412088d23f6b01929dedd687b5aee63b19b674eb73d00c3eb3c883b7" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "half", + "hashbrown 0.14.5", "indexmap 2.12.1", "itertools 0.14.0", "log", - "recursive", - "regex", - "regex-syntax", + "paste", + "petgraph 0.8.3", ] [[package]] @@ -1888,11 +3164,11 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "half", "hashbrown 0.14.5", "indexmap 2.12.1", @@ -1902,6 +3178,20 @@ dependencies = [ "petgraph 0.8.3", ] +[[package]] +name = "datafusion-physical-expr-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb0dbd9213078a593c3fe28783beaa625a4e6c6a6c797856ee2ba234311fb96" +dependencies = [ + "ahash", + "arrow", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + [[package]] name = "datafusion-physical-expr-common" version = "48.0.1" @@ -1909,30 +3199,79 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "ahash", "arrow", - "datafusion-common", - "datafusion-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "hashbrown 0.14.5", "itertools 0.14.0", ] +[[package]] +name = "datafusion-physical-optimizer" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d140854b2db3ef8ac611caad12bfb2e1e1de827077429322a6188f18fc0026a" +dependencies = [ + "arrow", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.14.0", + "log", + "recursive", +] + [[package]] name = "datafusion-physical-optimizer" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "itertools 0.14.0", "log", "recursive", ] +[[package]] +name = "datafusion-physical-plan" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b46cbdf21a01206be76d467f325273b22c559c744a012ead5018dfe79597de08" +dependencies = [ + "ahash", + "arrow", + "arrow-ord", + "arrow-schema 55.2.0", + "async-trait", + "chrono", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.1", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + [[package]] name = "datafusion-physical-plan" version = "48.0.1" @@ -1944,13 +3283,13 @@ dependencies = [ "arrow-schema 55.2.0", "async-trait", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", "half", "hashbrown 0.14.5", @@ -1962,6 +3301,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-proto" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3fc7a2744332c2ef8804274c21f9fa664b4ca5889169250a6fd6b649ee5d16c" +dependencies = [ + "arrow", + "chrono", + "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-proto-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "object_store", + "prost", +] + [[package]] name = "datafusion-proto" version = "48.0.1" @@ -1969,24 +3324,59 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "chrono", - "datafusion", - "datafusion-common", - "datafusion-expr", - "datafusion-proto-common", + "datafusion 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-proto-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "object_store", "prost", ] +[[package]] +name = "datafusion-proto-common" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "800add86852f12e3d249867425de2224c1e9fb7adc2930460548868781fbeded" +dependencies = [ + "arrow", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "prost", +] + [[package]] name = "datafusion-proto-common" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "datafusion-common", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "prost", ] +[[package]] +name = "datafusion-session" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a72733766ddb5b41534910926e8da5836622316f6283307fd9fb7e19811a59c" +dependencies = [ + "arrow", + "async-trait", + "dashmap 6.1.0", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + [[package]] name = "datafusion-session" version = "48.0.1" @@ -1994,14 +3384,14 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "async-trait", - "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", + "dashmap 6.1.0", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", "itertools 0.14.0", "log", @@ -2010,6 +3400,23 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-sql" +version = "48.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5162338cdec9cc7ea13a0e6015c361acad5ec1d88d83f7c86301f789473971f" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "indexmap 2.12.1", + "log", + "recursive", + "regex", + "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "datafusion-sql" version = "48.0.1" @@ -2017,13 +3424,13 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "bigdecimal", - "datafusion-common", - "datafusion-expr", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "indexmap 2.12.1", "log", "recursive", "regex", - "sqlparser", + "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", ] [[package]] @@ -2079,6 +3486,37 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.113", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.113", +] + [[package]] name = "digest" version = "0.10.7" @@ -2100,6 +3538,27 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.2", + "windows-sys 0.61.2", +] + [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -2107,7 +3566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", - "redox_users", + "redox_users 0.4.6", "winapi", ] @@ -2119,7 +3578,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -2143,6 +3602,12 @@ dependencies = [ "shared_child", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "dyn-clone" version = "1.0.20" @@ -2211,6 +3676,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "version_check", +] + [[package]] name = "error-code" version = "3.3.2" @@ -2252,6 +3726,22 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "figment" +version = "0.10.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3" +dependencies = [ + "atomic", + "pear", + "serde", + "serde_json", + "serde_yaml", + "toml 0.8.23", + "uncased", + "version_check", +] + [[package]] name = "find-msvc-tools" version = "0.1.6" @@ -2313,6 +3803,21 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2333,6 +3838,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "function-stream" version = "0.6.0" @@ -2343,6 +3854,7 @@ dependencies = [ "arrow-ipc 55.2.0", "arrow-json 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson)", "arrow-schema 55.2.0", + "arroyo-state", "async-trait", "base64", "bincode", @@ -2351,21 +3863,24 @@ dependencies = [ "cornucopia", "cornucopia_async", "crossbeam-channel", - "datafusion", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-window", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-proto", + "datafusion 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-functions-window 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-proto 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", "futures", + "governor", + "hex", "itertools 0.14.0", "jiter", "log", "lru", + "mini-moka", "num_cpus", "parking_lot", "parquet 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fparquet)", @@ -2382,12 +3897,13 @@ dependencies = [ "serde_json", "serde_json_path", "serde_yaml", - "sqlparser", - "strum", + "sha2", + "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", + "strum 0.26.3", "thiserror 2.0.17", "tokio", "tokio-stream", - "tonic", + "tonic 0.12.3", "tracing", "tracing-appender", "tracing-subscriber", @@ -2413,7 +3929,7 @@ dependencies = [ "rustyline", "thiserror 2.0.17", "tokio", - "tonic", + "tonic 0.12.3", ] [[package]] @@ -2478,7 +3994,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -2493,6 +4009,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2555,9 +4077,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi", "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getset" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" +dependencies = [ + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.113", ] [[package]] @@ -2577,6 +4113,29 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "governor" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be93b4ec2e4710b04d9264c0c7350cdd62a8c20e5e4ac732552ebb8f0debe8eb" +dependencies = [ + "cfg-if", + "dashmap 6.1.0", + "futures-sink", + "futures-timer", + "futures-util", + "getrandom 0.3.4", + "no-std-compat", + "nonzero_ext", + "parking_lot", + "portable-atomic", + "quanta", + "rand 0.9.2", + "smallvec", + "spinning_top", + "web-time", +] + [[package]] name = "h2" version = "0.4.12" @@ -2588,7 +4147,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.4.0", "indexmap 2.12.1", "slab", "tokio", @@ -2605,7 +4164,7 @@ dependencies = [ "cfg-if", "crunchy", "num-traits", - "zerocopy", + "zerocopy 0.8.31", ] [[package]] @@ -2702,6 +4261,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.4.0" @@ -2712,6 +4282,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -2719,7 +4300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.4.0", ] [[package]] @@ -2730,8 +4311,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -2764,8 +4345,8 @@ dependencies = [ "futures-channel", "futures-core", "h2", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "httparse", "httpdate", "itoa", @@ -2776,6 +4357,23 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.4.0", + "hyper", + "hyper-util", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + [[package]] name = "hyper-timeout" version = "0.5.2" @@ -2789,25 +4387,46 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ + "base64", "bytes", "futures-channel", "futures-core", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "hyper", + "ipnet", "libc", + "percent-encoding", "pin-project-lite", "socket2 0.6.1", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -2921,6 +4540,12 @@ version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -2978,6 +4603,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + [[package]] name = "integer-encoding" version = "3.0.4" @@ -3015,6 +4646,16 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "is-terminal" version = "0.4.17" @@ -3115,6 +4756,19 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "k8s-openapi" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c75b990324f09bef15e791606b7b7a296d02fc88a344f6eba9390970a870ad5" +dependencies = [ + "base64", + "chrono", + "serde", + "serde-value", + "serde_json", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -3355,6 +5009,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" +[[package]] +name = "local-ip-address" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79ef8c257c92ade496781a32a581d43e3d512cf8ce714ecf04ea80f93ed0ff4a" +dependencies = [ + "libc", + "neli", + "windows-sys 0.61.2", +] + [[package]] name = "lock_api" version = "0.4.14" @@ -3379,6 +5044,12 @@ dependencies = [ "hashbrown 0.15.5", ] +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + [[package]] name = "lz4-sys" version = "1.11.1+lz4-1.10.0" @@ -3433,6 +5104,12 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "maybe-owned" version = "0.3.4" @@ -3493,7 +5170,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -3502,6 +5179,21 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mini-moka" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c325dfab65f261f386debee8b0969da215b3fa0037e74c8a1234db7ba986d803" +dependencies = [ + "crossbeam-channel", + "crossbeam-utils", + "dashmap 5.5.3", + "skeptic", + "smallvec", + "tagptr", + "triomphe", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -3535,6 +5227,61 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" +[[package]] +name = "nanoid" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ffa00dec017b5b1a8b7cf5e2c008bfda1aa7e0697ac1508b491fdf2622fb4d8" +dependencies = [ + "rand 0.8.5", +] + +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "neli" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87" +dependencies = [ + "bitflags 2.10.0", + "byteorder", + "derive_builder", + "getset", + "libc", + "log", + "neli-proc-macros", + "parking_lot", +] + +[[package]] +name = "neli-proc-macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05d8d08c6e98f20a62417478ebf7be8e1425ec9acecc6f63e22da633f6b71609" +dependencies = [ + "either", + "proc-macro2", + "quote", + "serde", + "syn 2.0.113", +] + [[package]] name = "nibble_vec" version = "0.1.0" @@ -3552,10 +5299,16 @@ checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ "bitflags 2.10.0", "cfg-if", - "cfg_aliases", + "cfg_aliases 0.1.1", "libc", ] +[[package]] +name = "no-std-compat" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" + [[package]] name = "nom" version = "7.1.3" @@ -3566,6 +5319,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nonzero_ext" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -3597,6 +5356,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", + "serde", ] [[package]] @@ -3684,7 +5444,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -3706,14 +5466,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", + "base64", "bytes", "chrono", + "form_urlencoded", "futures", - "http", + "http 1.4.0", + "http-body-util", + "httparse", "humantime", + "hyper", "itertools 0.14.0", + "md-5", "parking_lot", "percent-encoding", + "quick-xml", + "rand 0.9.2", + "reqwest", + "ring", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", "thiserror 2.0.17", "tokio", "tracing", @@ -3735,6 +5509,38 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "openssl" +version = "0.10.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +dependencies = [ + "bitflags 2.10.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.113", +] + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + [[package]] name = "openssl-sys" version = "0.9.111" @@ -3747,6 +5553,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-float" version = "2.10.1" @@ -3766,6 +5578,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "owo-colors" version = "3.5.0" @@ -3869,6 +5687,29 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pear" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.113", +] + [[package]] name = "peeking_take_while" version = "0.1.2" @@ -3911,7 +5752,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -3954,6 +5795,7 @@ dependencies = [ "hashbrown 0.15.5", "indexmap 2.12.1", "serde", + "serde_derive", ] [[package]] @@ -4010,7 +5852,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -4031,6 +5873,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "postcard" version = "1.1.3" @@ -4107,7 +5955,7 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy", + "zerocopy 0.8.31", ] [[package]] @@ -4117,7 +5965,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.113", ] [[package]] @@ -4126,7 +5974,53 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit", + "toml_edit 0.23.10+spec-1.0.0", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +dependencies = [ + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn 2.0.113", ] [[package]] @@ -4138,6 +6032,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.113", + "version_check", + "yansi", +] + [[package]] name = "proctitle" version = "0.1.1" @@ -4149,6 +6056,21 @@ dependencies = [ "winapi", ] +[[package]] +name = "prometheus" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror 2.0.17", +] + [[package]] name = "prost" version = "0.13.5" @@ -4175,7 +6097,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn", + "syn 2.0.113", "tempfile", ] @@ -4189,61 +6111,178 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] name = "prost-types" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost", +] + +[[package]] +name = "protobuf" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" +dependencies = [ + "once_cell", + "protobuf-support", + "thiserror 1.0.69", +] + +[[package]] +name = "protobuf-support" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" +dependencies = [ + "thiserror 1.0.69", +] + +[[package]] +name = "protocol" +version = "0.1.0" +dependencies = [ + "env_logger", + "log", + "prost", + "serde", + "tonic 0.12.3", + "tonic-build 0.12.3", +] + +[[package]] +name = "psm" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" +dependencies = [ + "ar_archive_writer", + "cc", +] + +[[package]] +name = "pulldown-cmark" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +dependencies = [ + "bitflags 2.10.0", + "memchr", + "unicase", +] + +[[package]] +name = "pulley-interpreter" +version = "41.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01051a5b172e07f9197b85060e6583b942aec679dac08416647bf7e7dc916b65" +dependencies = [ + "cranelift-bitset", + "log", + "pulley-macros", + "wasmtime-internal-math", +] + +[[package]] +name = "pulley-macros" +version = "41.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cf194f5b1a415ef3a44ee35056f4009092cc4038a9f7e3c7c1e392f48ee7dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.113", +] + +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + +[[package]] +name = "quanta" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7" dependencies = [ - "prost", + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi 0.11.1+wasi-snapshot-preview1", + "web-sys", + "winapi", ] [[package]] -name = "protocol" -version = "0.1.0" +name = "quick-xml" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ - "env_logger", - "log", - "prost", + "memchr", "serde", - "tonic", - "tonic-build", ] [[package]] -name = "psm" -version = "0.1.30" +name = "quinn" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" dependencies = [ - "ar_archive_writer", - "cc", + "bytes", + "cfg_aliases 0.2.1", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash 2.1.1", + "rustls", + "socket2 0.6.1", + "thiserror 2.0.17", + "tokio", + "tracing", + "web-time", ] [[package]] -name = "pulley-interpreter" -version = "41.0.3" +name = "quinn-proto" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01051a5b172e07f9197b85060e6583b942aec679dac08416647bf7e7dc916b65" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ - "cranelift-bitset", - "log", - "pulley-macros", - "wasmtime-internal-math", + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash 2.1.1", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.17", + "tinyvec", + "tracing", + "web-time", ] [[package]] -name = "pulley-macros" -version = "41.0.3" +name = "quinn-udp" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cf194f5b1a415ef3a44ee35056f4009092cc4038a9f7e3c7c1e392f48ee7dbb" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" dependencies = [ - "proc-macro2", - "quote", - "syn", + "cfg_aliases 0.2.1", + "libc", + "once_cell", + "socket2 0.6.1", + "tracing", + "windows-sys 0.60.2", ] [[package]] @@ -4345,6 +6384,15 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "rayon" version = "1.11.0" @@ -4415,7 +6463,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -4438,6 +6486,37 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 2.0.17", +] + +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.113", +] + [[package]] name = "regalloc2" version = "0.13.5" @@ -4476,6 +6555,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.8" @@ -4492,6 +6577,67 @@ dependencies = [ "memchr", ] +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-tls", + "hyper-util", + "js-sys", + "log", + "mime", + "native-tls", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-native-tls", + "tokio-rustls", + "tokio-util", + "tower 0.5.2", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rocksdb" version = "0.21.0" @@ -4580,6 +6726,65 @@ dependencies = [ "rustix 1.1.3", ] +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "aws-lc-rs", + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -4635,6 +6840,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "schemars" version = "0.8.22" @@ -4642,7 +6856,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ "dyn-clone", - "schemars_derive", + "schemars_derive 0.8.22", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "dyn-clone", + "ref-cast", + "schemars_derive 1.2.1", "serde", "serde_json", ] @@ -4656,7 +6883,19 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn", + "syn 2.0.113", +] + +[[package]] +name = "schemars_derive" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.113", ] [[package]] @@ -4665,6 +6904,29 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "security-framework" +version = "3.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.27" @@ -4691,6 +6953,26 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float", + "serde", +] + +[[package]] +name = "serde_bytes" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" +dependencies = [ + "serde", + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -4708,7 +6990,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -4719,7 +7001,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -4782,7 +7064,16 @@ checksum = "aafbefbe175fa9bf03ca83ef89beecff7d2a95aaacd5732325b90ac8c3bd7b90" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", ] [[package]] @@ -4803,7 +7094,19 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn", + "syn 2.0.113", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", ] [[package]] @@ -4915,6 +7218,21 @@ dependencies = [ "typenum", ] +[[package]] +name = "skeptic" +version = "0.13.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" +dependencies = [ + "bytecount", + "cargo_metadata", + "error-chain", + "glob", + "pulldown-cmark", + "tempfile", + "walkdir", +] + [[package]] name = "slab" version = "0.4.11" @@ -4946,20 +7264,40 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" name = "socket2" version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "spinning_top" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300" dependencies = [ - "libc", - "windows-sys 0.52.0", + "lock_api", ] [[package]] -name = "socket2" -version = "0.6.1" +name = "sqlparser" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ - "libc", - "windows-sys 0.60.2", + "log", + "recursive", + "sqlparser_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -4969,7 +7307,18 @@ source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunc dependencies = [ "log", "recursive", - "sqlparser_derive", + "sqlparser_derive 0.3.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.113", ] [[package]] @@ -4979,7 +7328,7 @@ source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunc dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -5030,7 +7379,16 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "strum_macros", + "strum_macros 0.26.4", +] + +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros 0.27.2", ] [[package]] @@ -5043,7 +7401,19 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.113", +] + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.113", ] [[package]] @@ -5080,6 +7450,16 @@ dependencies = [ "is-terminal", ] +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.113" @@ -5096,6 +7476,9 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] [[package]] name = "synstructure" @@ -5105,7 +7488,28 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", +] + +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", ] [[package]] @@ -5124,6 +7528,12 @@ dependencies = [ "winx", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "tap" version = "1.0.1" @@ -5205,7 +7615,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -5216,7 +7626,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -5318,6 +7728,7 @@ dependencies = [ "signal-hook-registry", "socket2 0.6.1", "tokio-macros", + "tracing", "windows-sys 0.61.2", ] @@ -5329,7 +7740,17 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", ] [[package]] @@ -5358,6 +7779,16 @@ dependencies = [ "whoami", ] +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.18" @@ -5382,6 +7813,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned 0.6.9", + "toml_datetime 0.6.11", + "toml_edit 0.22.27", +] + [[package]] name = "toml" version = "0.9.11+spec-1.1.0" @@ -5390,13 +7833,22 @@ checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46" dependencies = [ "indexmap 2.12.1", "serde_core", - "serde_spanned", - "toml_datetime", + "serde_spanned 1.0.4", + "toml_datetime 0.7.5+spec-1.1.0", "toml_parser", "toml_writer", "winnow", ] +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + [[package]] name = "toml_datetime" version = "0.7.5+spec-1.1.0" @@ -5406,6 +7858,20 @@ dependencies = [ "serde_core", ] +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap 2.12.1", + "serde", + "serde_spanned 0.6.9", + "toml_datetime 0.6.11", + "toml_write", + "winnow", +] + [[package]] name = "toml_edit" version = "0.23.10+spec-1.0.0" @@ -5413,7 +7879,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" dependencies = [ "indexmap 2.12.1", - "toml_datetime", + "toml_datetime 0.7.5+spec-1.1.0", "toml_parser", "winnow", ] @@ -5427,6 +7893,12 @@ dependencies = [ "winnow", ] +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "toml_writer" version = "1.0.6+spec-1.1.0" @@ -5441,12 +7913,12 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ "async-stream", "async-trait", - "axum", + "axum 0.7.9", "base64", "bytes", "h2", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "hyper", "hyper-timeout", @@ -5463,6 +7935,39 @@ dependencies = [ "tracing", ] +[[package]] +name = "tonic" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" +dependencies = [ + "async-trait", + "axum 0.8.8", + "base64", + "bytes", + "h2", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "rustls-native-certs", + "socket2 0.5.10", + "tokio", + "tokio-rustls", + "tokio-stream", + "tower 0.5.2", + "tower-layer", + "tower-service", + "tracing", + "webpki-roots 0.26.11", + "zstd", +] + [[package]] name = "tonic-build" version = "0.12.3" @@ -5474,7 +7979,21 @@ dependencies = [ "prost-build", "prost-types", "quote", - "syn", + "syn 2.0.113", +] + +[[package]] +name = "tonic-build" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build", + "prost-types", + "quote", + "syn 2.0.113", ] [[package]] @@ -5505,8 +8024,31 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", + "indexmap 2.12.1", "pin-project-lite", + "slab", "sync_wrapper", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags 2.10.0", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "iri-string", + "pin-project-lite", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -5554,7 +8096,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -5610,6 +8152,12 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "triomphe" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd69c5aa8f924c7519d6372789a74eac5b94fb0f8fcf0d4a97eb0bfc3e785f39" + [[package]] name = "try-lock" version = "0.2.5" @@ -5647,9 +8195,9 @@ dependencies = [ "proc-macro2", "quote", "regress", - "schemars", + "schemars 0.8.22", "serde_json", - "syn", + "syn 2.0.113", "thiserror 1.0.69", "unicode-ident", ] @@ -5661,11 +8209,11 @@ source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc0 dependencies = [ "proc-macro2", "quote", - "schemars", + "schemars 0.8.22", "serde", "serde_json", "serde_tokenstream", - "syn", + "syn 2.0.113", "typify-impl", ] @@ -5675,6 +8223,15 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" +[[package]] +name = "uncased" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" +dependencies = [ + "version_check", +] + [[package]] name = "unicase" version = "2.9.0" @@ -5750,6 +8307,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "unty" version = "0.0.4" @@ -5768,6 +8331,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -5780,6 +8349,30 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "utoipa" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23" +dependencies = [ + "indexmap 2.12.1", + "serde", + "serde_json", + "utoipa-gen", +] + +[[package]] +name = "utoipa-gen" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20c24e8ab68ff9ee746aad22d39b5535601e6416d1b0feeabf78be986a5c4392" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.113", +] + [[package]] name = "uuid" version = "1.19.0" @@ -5788,6 +8381,7 @@ checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "getrandom 0.3.4", "js-sys", + "serde_core", "wasm-bindgen", ] @@ -5815,6 +8409,12 @@ version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "walkdir" version = "2.5.0" @@ -5912,7 +8512,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn", + "syn 2.0.113", "wasm-bindgen-shared", ] @@ -5956,6 +8556,19 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.243.0" @@ -6078,7 +8691,7 @@ dependencies = [ "serde", "serde_derive", "sha2", - "toml", + "toml 0.9.11+spec-1.1.0", "wasmtime-environ", "windows-sys 0.61.2", "zstd", @@ -6093,7 +8706,7 @@ dependencies = [ "anyhow", "proc-macro2", "quote", - "syn", + "syn 2.0.113", "wasmtime-internal-component-util", "wasmtime-internal-wit-bindgen", "wit-parser", @@ -6207,7 +8820,7 @@ checksum = "63ba3124cc2cbcd362672f9f077303ccc4cd61daa908f73447b7fdaece75ff9f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -6335,6 +8948,24 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.6", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "whoami" version = "2.1.0" @@ -6370,7 +9001,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.113", "witx", ] @@ -6382,7 +9013,7 @@ checksum = "0e976fe0cecd60041f66b15ad45ebc997952af13da9bf9d90261c7b025057edc" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", "wiggle-generate", ] @@ -6458,7 +9089,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -6469,7 +9100,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -6478,6 +9109,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + [[package]] name = "windows-result" version = "0.4.1" @@ -6731,6 +9373,12 @@ dependencies = [ "tap", ] +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "xxhash-rust" version = "0.8.15" @@ -6746,6 +9394,12 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yoke" version = "0.8.1" @@ -6765,17 +9419,37 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive 0.7.35", +] + [[package]] name = "zerocopy" version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ - "zerocopy-derive", + "zerocopy-derive 0.8.31", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.113", ] [[package]] @@ -6786,7 +9460,7 @@ checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] @@ -6806,10 +9480,16 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", "synstructure", ] +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + [[package]] name = "zerotrie" version = "0.2.3" @@ -6840,7 +9520,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.113", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index cee98282..2c62a473 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,6 +84,11 @@ cornucopia_async = { git = "https://github.com/ArroyoSystems/cornucopia", branch cornucopia = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" } jiter = {git = "https://github.com/ArroyoSystems/jiter", branch = "disable_python" } +arroyo-state = { path = "../arroyo/crates/arroyo-state" } +governor = "0.8.0" +mini-moka = "0.10" +sha2 = "0.10" +hex = "0.4" [features] default = ["incremental-cache", "python"] diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index f69ad017..814358ad 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -14,10 +14,13 @@ pub mod buffer_and_event; pub mod common; -pub mod input; -pub mod output; -pub mod processor; pub mod sink; pub mod source; +pub mod streaming; pub mod task; pub mod taskexecutor; +pub mod wasm; + +pub use wasm::input; +pub use wasm::output; +pub use wasm::processor; diff --git a/src/runtime/source/mod.rs b/src/runtime/source/mod.rs deleted file mode 100644 index 8a05bf30..00000000 --- a/src/runtime/source/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Source module - -// TODO: Add source implementation here diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs new file mode 100644 index 00000000..e81bd03a --- /dev/null +++ b/src/runtime/streaming/api/context.rs @@ -0,0 +1,95 @@ +use crate::runtime::streaming::memory::MemoryPool; +use crate::runtime::streaming::protocol::event::StreamEvent; +use crate::runtime::streaming::protocol::tracked::TrackedEvent; +use crate::runtime::streaming::network::endpoint::PhysicalSender; +use arrow_array::RecordBatch; +use arroyo_state::tables::table_manager::TableManager; +use std::sync::Arc; +use tokio::sync::Mutex; +use tracing::error; + +pub struct TaskContext { + pub job_id: String, + pub vertex_id: u32, + pub subtask_idx: u32, + pub parallelism: u32, + pub outboxes: Vec, + memory_pool: Arc, + table_manager: Option>>, + pub last_present_watermark: Option, +} + +impl TaskContext { + pub fn new( + job_id: String, + vertex_id: u32, + subtask_idx: u32, + parallelism: u32, + outboxes: Vec, + memory_pool: Arc, + table_manager: Option>>, + ) -> Self { + Self { + job_id, + vertex_id, + subtask_idx, + parallelism, + outboxes, + memory_pool, + table_manager, + last_present_watermark: None, + } + } + + pub async fn table_manager(&self) -> tokio::sync::MutexGuard<'_, TableManager> { + self.table_manager + .as_ref() + .expect("State backend not initialized") + .lock() + .await + } + + /// 受内存池管控的数据发送:申请精准字节的内存船票后广播到所有下游 + pub async fn collect(&self, batch: RecordBatch) -> anyhow::Result<()> { + if self.outboxes.is_empty() { + return Ok(()); + } + + let bytes_required = batch.get_array_memory_size(); + let ticket = self.memory_pool.request_memory(bytes_required).await; + let tracked_event = TrackedEvent::new(StreamEvent::Data(batch), Some(ticket)); + + for outbox in &self.outboxes { + outbox.send(tracked_event.clone()).await?; + } + Ok(()) + } + + /// 按 Key 哈希路由到单分区(Shuffle / GroupBy) + pub async fn collect_keyed( + &self, + key_hash: u64, + batch: RecordBatch, + ) -> anyhow::Result<()> { + if self.outboxes.is_empty() { + return Ok(()); + } + + let bytes_required = batch.get_array_memory_size(); + let ticket = self.memory_pool.request_memory(bytes_required).await; + let tracked_event = TrackedEvent::new(StreamEvent::Data(batch), Some(ticket)); + + let target_idx = (key_hash as usize) % self.outboxes.len(); + self.outboxes[target_idx].send(tracked_event).await?; + Ok(()) + } + + /// 广播控制信号(不申请内存船票,保证在拥堵时畅通无阻) + pub async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> { + let tracked_event = TrackedEvent::control(event); + for outbox in &self.outboxes { + outbox.send(tracked_event.clone()).await?; + } + Ok(()) + } +} diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs new file mode 100644 index 00000000..e6bf674d --- /dev/null +++ b/src/runtime/streaming/api/mod.rs @@ -0,0 +1,9 @@ +//! 接口层:算子与源实现需遵循的 trait 与运行时上下文。 + +pub mod context; +pub mod operator; +pub mod source; + +pub use context::TaskContext; +pub use operator::{ConstructedOperator, MessageOperator}; +pub use source::{SourceEvent, SourceOffset, SourceOperator}; diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs new file mode 100644 index 00000000..3974307b --- /dev/null +++ b/src/runtime/streaming/api/operator.rs @@ -0,0 +1,90 @@ +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::source::SourceOperator; +use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; +use crate::runtime::streaming::protocol::stream_out::StreamOutput; +use arrow_array::RecordBatch; +use async_trait::async_trait; +use std::time::Duration; +use crate::sql::common::{CheckpointBarrier, Watermark}; + +/// 工厂反射产出的具体算子实例 +pub enum ConstructedOperator { + Source(Box), + Operator(Box), +} + +/// 多上游、被动驱动的消息算子。 +#[async_trait] +pub trait MessageOperator: Send + 'static { + fn name(&self) -> &str; + + async fn on_start(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<()> { + Ok(()) + } + + /// `input_idx`:多输入拓扑下第几条边(与 `SubtaskRunner` 的 inbox 下标一致;单输入恒为 0)。 + async fn process_data( + &mut self, + input_idx: usize, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> anyhow::Result>; + + async fn process_watermark( + &mut self, + watermark: Watermark, + ctx: &mut TaskContext, + ) -> anyhow::Result>; + + async fn snapshot_state( + &mut self, + barrier: CheckpointBarrier, + ctx: &mut TaskContext, + ) -> anyhow::Result<()>; + + /// 全局 checkpoint 确认后由 `SubtaskRunner` 在 [`ControlCommand::Commit`] 上调用(如 Kafka EOS 二阶段提交)。 + async fn commit_checkpoint( + &mut self, + _epoch: u32, + _ctx: &mut TaskContext, + ) -> anyhow::Result<()> { + Ok(()) + } + + /// 周期性时钟(如 Idle 检测);`None` 表示不注册 tick。 + fn tick_interval(&self) -> Option { + None + } + + /// 与 [`Self::tick_interval`] 配套,由 `SubtaskRunner` 按固定间隔调用。 + async fn process_tick( + &mut self, + _tick_index: u64, + _ctx: &mut TaskContext, + ) -> anyhow::Result> { + Ok(vec![]) + } + + /// 返回 `true` 时应立即结束运行循环(如 `StopMode::Immediate`)。 + async fn handle_control( + &mut self, + command: ControlCommand, + _ctx: &mut TaskContext, + ) -> anyhow::Result { + match command { + ControlCommand::Stop { mode } => { + if mode == StopMode::Immediate { + return Ok(true); + } + Ok(false) + } + ControlCommand::DropState | ControlCommand::Commit { .. } => Ok(false), + ControlCommand::Start | ControlCommand::UpdateConfig { .. } => Ok(false), + ControlCommand::TriggerCheckpoint { .. } => Ok(false), + } + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> anyhow::Result> { + Ok(vec![]) + } +} diff --git a/src/runtime/streaming/api/source.rs b/src/runtime/streaming/api/source.rs new file mode 100644 index 00000000..8ddeb3cf --- /dev/null +++ b/src/runtime/streaming/api/source.rs @@ -0,0 +1,43 @@ +//! 源算子:由 [`crate::runtime::streaming::execution::SourceRunner`] 驱动 `fetch_next`,不得在内部死循环阻塞控制面。 + +use crate::runtime::streaming::api::context::TaskContext; +use arrow_array::RecordBatch; +use async_trait::async_trait; +use crate::sql::common::{CheckpointBarrier, Watermark}; + +/// Kafka 等外部源在 **无已存位点** 时的起始消费策略(与 `arroyo-connectors` 语义对齐)。 +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SourceOffset { + Earliest, + Latest, + #[default] + Group, +} + +#[derive(Debug)] +pub enum SourceEvent { + Data(RecordBatch), + Watermark(Watermark), + Idle, +} + +#[async_trait] +pub trait SourceOperator: Send + 'static { + fn name(&self) -> &str; + + async fn on_start(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<()> { + Ok(()) + } + + async fn fetch_next(&mut self, ctx: &mut TaskContext) -> anyhow::Result; + + async fn snapshot_state( + &mut self, + barrier: CheckpointBarrier, + ctx: &mut TaskContext, + ) -> anyhow::Result<()>; + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<()> { + Ok(()) + } +} diff --git a/src/runtime/streaming/arrow/mod.rs b/src/runtime/streaming/arrow/mod.rs new file mode 100644 index 00000000..fdfa87f7 --- /dev/null +++ b/src/runtime/streaming/arrow/mod.rs @@ -0,0 +1,68 @@ +//! Arrow / DataFusion 辅助:聚合表达式解码等。 +//! +//! `UpdatingCache` 位于 [`crate::runtime::streaming::operators::updating_cache`]。 + +use arrow::datatypes::SchemaRef; +use datafusion::common::internal_err; +use datafusion::common::Result as DFResult; +use datafusion::execution::FunctionRegistry; +use datafusion::physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; +use datafusion::physical_expr::{LexOrdering, PhysicalExpr}; +use datafusion_proto::physical_plan::from_proto::{parse_physical_expr, parse_physical_sort_expr}; +use datafusion_proto::physical_plan::{DefaultPhysicalExtensionCodec, PhysicalExtensionCodec}; +use datafusion_proto::protobuf::physical_aggregate_expr_node::AggregateFunction; +use datafusion_proto::protobuf::physical_expr_node::ExprType; +use datafusion_proto::protobuf::{PhysicalExprNode, proto_error}; +use std::sync::Arc; + +/// 从 `PhysicalExprNode` 解码 UDAF 聚合表达式(与 worker `arrow/mod` 一致)。 +pub fn decode_aggregate( + schema: &SchemaRef, + name: &str, + expr: &PhysicalExprNode, + registry: &dyn FunctionRegistry, +) -> DFResult> { + let codec = &DefaultPhysicalExtensionCodec {}; + let expr_type = expr + .expr_type + .as_ref() + .ok_or_else(|| proto_error("Unexpected empty aggregate physical expression"))?; + + match expr_type { + ExprType::AggregateExpr(agg_node) => { + let input_phy_expr: Vec> = agg_node + .expr + .iter() + .map(|e| parse_physical_expr(e, registry, schema, codec)) + .collect::>>()?; + let ordering_req: LexOrdering = agg_node + .ordering_req + .iter() + .map(|e| parse_physical_sort_expr(e, registry, schema, codec)) + .collect::>()?; + agg_node + .aggregate_function + .as_ref() + .map(|func| match func { + AggregateFunction::UserDefinedAggrFunction(udaf_name) => { + let agg_udf = match &agg_node.fun_definition { + Some(buf) => codec.try_decode_udaf(udaf_name, buf)?, + None => registry.udaf(udaf_name)?, + }; + + AggregateExprBuilder::new(agg_udf, input_phy_expr) + .schema(Arc::clone(schema)) + .alias(name) + .with_ignore_nulls(agg_node.ignore_nulls) + .with_distinct(agg_node.distinct) + .order_by(ordering_req) + .build() + .map(Arc::new) + } + }) + .transpose()? + .ok_or_else(|| proto_error("Invalid AggregateExpr, missing aggregate_function")) + } + _ => internal_err!("Invalid aggregate expression for AggregateExec"), + } +} diff --git a/src/runtime/streaming/cluster/graph.rs b/src/runtime/streaming/cluster/graph.rs new file mode 100644 index 00000000..1ee8f8f7 --- /dev/null +++ b/src/runtime/streaming/cluster/graph.rs @@ -0,0 +1,136 @@ +use std::fmt; +use std::sync::Arc; + +use crate::sql::common::FsSchema; +// ============ 强类型 ID (Strong-type IDs) ============ + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct JobId(pub String); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct VertexId(pub u32); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SubtaskIndex(pub u32); + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct OperatorUid(pub String); + +impl fmt::Display for JobId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl fmt::Display for VertexId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl fmt::Display for SubtaskIndex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl fmt::Display for OperatorUid { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +// ============ 资源画像 (Resource Profile) ============ + +#[derive(Debug, Clone)] +pub struct ResourceProfile { + pub managed_memory_bytes: u64, + pub cpu_cores: f64, + pub network_memory_bytes: u64, +} + +impl Default for ResourceProfile { + fn default() -> Self { + Self { + managed_memory_bytes: 64 * 1024 * 1024, + cpu_cores: 1.0, + network_memory_bytes: 32 * 1024 * 1024, + } + } +} + +// ============ 分区策略 (Partitioning Strategy) ============ + +#[derive(Debug, Clone)] +pub enum PartitioningStrategy { + Forward, + HashByKeys(Vec), + Rebalance, +} + +// ============ 交换模式 (Exchange Mode) ============ + +#[derive(Debug, Clone)] +pub enum ExchangeMode { + LocalThread, + RemoteNetwork { target_addr: String }, +} + +// ============ 部署描述符 (Deployment Descriptors) ============ + +#[derive(Debug, Clone)] +pub struct TaskDeploymentDescriptor { + pub job_id: JobId, + pub vertex_id: VertexId, + pub subtask_idx: SubtaskIndex, + pub parallelism: u32, + pub operator_name: String, + pub operator_uid: OperatorUid, + pub is_source: bool, + pub operator_config_payload: Vec, + pub resources: ResourceProfile, + pub in_schemas: Vec>, + pub out_schema: Option>, + pub input_gates_count: usize, + pub output_gates_count: usize, +} + +#[derive(Debug, Clone)] +pub struct PhysicalEdgeDescriptor { + pub src_vertex: VertexId, + pub src_subtask: SubtaskIndex, + pub dst_vertex: VertexId, + pub dst_subtask: SubtaskIndex, + pub partitioning: PartitioningStrategy, + pub exchange_mode: ExchangeMode, +} + +// ============ 执行图 (Execution Graph) ============ + +#[derive(Debug, Clone)] +pub struct ExecutionGraph { + pub job_id: JobId, + pub tasks: Vec, + pub edges: Vec, +} + +impl ExecutionGraph { + pub fn validate(&self) -> Result<(), String> { + if self.tasks.is_empty() { + return Err("Execution graph has no tasks".into()); + } + if self.edges.is_empty() && self.tasks.len() > 1 { + return Err("Multi-task graph has no edges".into()); + } + let mut seen = std::collections::HashSet::new(); + for tdd in &self.tasks { + if !seen.insert((tdd.vertex_id, tdd.subtask_idx)) { + return Err(format!( + "Duplicate subtask: vertex={}, subtask={}", + tdd.vertex_id, tdd.subtask_idx + )); + } + } + Ok(()) + } +} diff --git a/src/runtime/streaming/cluster/manager.rs b/src/runtime/streaming/cluster/manager.rs new file mode 100644 index 00000000..ce8ec881 --- /dev/null +++ b/src/runtime/streaming/cluster/manager.rs @@ -0,0 +1,164 @@ +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::ConstructedOperator; +use crate::runtime::streaming::cluster::graph::ExecutionGraph; +use crate::runtime::streaming::execution::runner::SubtaskRunner; +use crate::runtime::streaming::execution::source::SourceRunner; +use crate::runtime::streaming::factory::OperatorFactory; +use crate::runtime::streaming::memory::MemoryPool; +use crate::runtime::streaming::network::NetworkEnvironment; +use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; +use arroyo_state::tables::table_manager::TableManager; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::mpsc::{channel, Sender}; +use tokio::task::JoinSet; +use tracing::{error, info, instrument, warn}; + +pub struct TaskManager { + pub worker_id: String, + memory_pool: Arc, + table_manager: Arc>, + operator_factory: Arc, + task_supervisors: JoinSet<()>, + pub controllers: HashMap<(u32, u32), Sender>, +} + +impl TaskManager { + pub fn new( + worker_id: String, + max_memory_bytes: usize, + table_manager: Arc>, + operator_factory: Arc, + ) -> Self { + Self { + worker_id, + memory_pool: MemoryPool::new(max_memory_bytes), + table_manager, + operator_factory, + task_supervisors: JoinSet::new(), + controllers: HashMap::new(), + } + } + + #[instrument(skip(self, graph), fields(job_id = %graph.job_id))] + pub async fn deploy_and_start(&mut self, graph: ExecutionGraph) -> anyhow::Result<()> { + info!("TaskManager [{}] starting deployment...", self.worker_id); + + graph + .validate() + .map_err(|e| anyhow::anyhow!("Graph validation failed: {}", e))?; + + // 1. 网络连线期 + let local_queue_size = 1024; + let mut network_env = NetworkEnvironment::build_from_graph(&graph, local_queue_size); + + // 2. 控制通道初始化 + let mut control_rxs = HashMap::new(); + for tdd in &graph.tasks { + let key = (tdd.vertex_id.0, tdd.subtask_idx.0); + let (ctrl_tx, ctrl_rx) = channel(32); + self.controllers.insert(key, ctrl_tx); + control_rxs.insert(key, ctrl_rx); + } + + // 3. 部署与算子实例化 + for tdd in graph.tasks { + let v_id = tdd.vertex_id; + let s_idx = tdd.subtask_idx; + let key = (v_id.0, s_idx.0); + + let ctrl_rx = control_rxs.remove(&key).unwrap(); + let inboxes = network_env.take_inboxes(v_id, s_idx); + let outboxes = network_env.take_outboxes(v_id, s_idx); + + let ctx = TaskContext::new( + tdd.job_id.0.clone(), + v_id.0, + s_idx.0, + tdd.parallelism, + outboxes, + self.memory_pool.clone(), + Some(self.table_manager.clone()), + ); + + let constructed_op = self.operator_factory.create_operator( + &tdd.operator_name, + &tdd.operator_config_payload, + )?; + + // 4. 任务发射入监督树 + let worker_id = self.worker_id.clone(); + match constructed_op { + ConstructedOperator::Source(source_op) => { + let runner = SourceRunner::new(source_op, ctx, ctrl_rx); + self.task_supervisors.spawn(async move { + if let Err(e) = runner.run().await { + error!( + worker = %worker_id, + vertex = key.0, + subtask = key.1, + "SourceTask CRASHED: {:?}", e + ); + panic!("SourceTask failed"); + } + }); + } + ConstructedOperator::Operator(msg_op) => { + let runner = SubtaskRunner::new(msg_op, ctx, inboxes, ctrl_rx); + self.task_supervisors.spawn(async move { + if let Err(e) = runner.run().await { + error!( + worker = %worker_id, + vertex = key.0, + subtask = key.1, + "StreamTask CRASHED: {:?}", e + ); + panic!("StreamTask failed"); + } + }); + } + } + } + + info!( + "TaskManager [{}] deployment complete. All tasks ignited.", + self.worker_id + ); + Ok(()) + } + + /// 监控运行状态:Supervisor 模式防止级联崩溃 + pub async fn wait_and_supervise(mut self) { + while let Some(result) = self.task_supervisors.join_next().await { + match result { + Ok(_) => { + info!("A subtask finished successfully."); + } + Err(join_error) => { + if join_error.is_panic() { + error!( + "FATAL: A subtask panicked! Initiating emergency shutdown \ + of the entire TaskManager to prevent data corruption." + ); + self.task_supervisors.abort_all(); + break; + } else if join_error.is_cancelled() { + warn!("A subtask was cancelled."); + } + } + } + } + info!("TaskManager shutdown process complete."); + } + + pub async fn stop_all(&self, mode: StopMode) { + for (key, tx) in &self.controllers { + if let Err(e) = tx + .send(ControlCommand::Stop { mode: mode.clone() }) + .await + { + warn!("Failed to send stop command to task {:?}: {}", key, e); + } + } + } +} diff --git a/src/runtime/streaming/cluster/master.rs b/src/runtime/streaming/cluster/master.rs new file mode 100644 index 00000000..5817643d --- /dev/null +++ b/src/runtime/streaming/cluster/master.rs @@ -0,0 +1,274 @@ +use std::collections::HashMap; +use anyhow::Result; + +use crate::runtime::streaming::cluster::graph::{ + ExchangeMode, ExecutionGraph, JobId, OperatorUid, PartitioningStrategy, + PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId, +}; + +use arroyo_datastream::logical::{LogicalEdgeType, LogicalGraph, OperatorChain}; +use petgraph::Direction; +use sha2::{Digest, Sha256}; +use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph}; + +#[derive(thiserror::Error, Debug)] +pub enum CompileError { + #[error("Topology Error: Forward edge between Vertex {src} (p={src_p}) and {dst} (p={dst_p}) requires identical parallelism.")] + ParallelismMismatch { + src: u32, + src_p: usize, + dst: u32, + dst_p: usize, + }, + + #[error("Serialization Error: Failed to serialize operator chain for Vertex {vertex_id}. Error: {source}")] + SerializationFailed { + vertex_id: u32, + source: anyhow::Error, + }, + + #[error("Validation Error: {0}")] + ValidationError(String), +} + +pub struct JobCompiler; + +impl JobCompiler { + pub fn compile( + job_id: String, + logical: &LogicalGraph, + ) -> Result { + let mut tasks = Vec::new(); + let mut edges = Vec::new(); + let job_id_typed = JobId(job_id.clone()); + + // ==================================================================== + // 阶段 1:预计算网络门数量 (Pre-compute Network Gates) + // ==================================================================== + let mut in_degrees: HashMap<(u32, u32), usize> = HashMap::new(); + let mut out_degrees: HashMap<(u32, u32), usize> = HashMap::new(); + + for edge_idx in logical.edge_indices() { + let edge = logical.edge_weight(edge_idx).unwrap(); + let (src_idx, dst_idx) = logical.edge_endpoints(edge_idx).unwrap(); + let src_node = logical.node_weight(src_idx).unwrap(); + let dst_node = logical.node_weight(dst_idx).unwrap(); + + match edge.edge_type { + LogicalEdgeType::Forward => { + if src_node.parallelism != dst_node.parallelism { + return Err(CompileError::ParallelismMismatch { + src: src_node.node_id, + src_p: src_node.parallelism, + dst: dst_node.node_id, + dst_p: dst_node.parallelism, + }); + } + for i in 0..src_node.parallelism as u32 { + *out_degrees.entry((src_node.node_id, i)).or_insert(0) += 1; + *in_degrees.entry((dst_node.node_id, i)).or_insert(0) += 1; + } + } + LogicalEdgeType::Shuffle + | LogicalEdgeType::LeftJoin + | LogicalEdgeType::RightJoin => { + for s in 0..src_node.parallelism as u32 { + *out_degrees.entry((src_node.node_id, s)).or_insert(0) += + dst_node.parallelism; + } + for d in 0..dst_node.parallelism as u32 { + *in_degrees.entry((dst_node.node_id, d)).or_insert(0) += + src_node.parallelism; + } + } + } + } + + // ==================================================================== + // 阶段 2:节点展开与算子融合 (Node Expansion & Operator Fusion) + // ==================================================================== + for idx in logical.node_indices() { + let node = logical.node_weight(idx).unwrap(); + let parallelism = node.parallelism as u32; + + let in_schemas: Vec<_> = logical + .edges_directed(idx, Direction::Incoming) + .map(|e| e.weight().schema.clone()) + .collect(); + let out_schema = logical + .edges_directed(idx, Direction::Outgoing) + .map(|e| e.weight().schema.clone()) + .next(); + + let is_source = node.operator_chain.is_source(); + let (head_op, _) = node + .operator_chain + .iter() + .next() + .expect("operator chain is non-empty"); + + let chain_payload = + Self::serialize_operator_chain(&node.operator_chain).map_err(|e| { + CompileError::SerializationFailed { + vertex_id: node.node_id, + source: e, + } + })?; + + let base_uid = Self::generate_deterministic_uid( + &job_id, + node.node_id, + &node.operator_chain, + ); + + let resource_profile = + Self::calculate_resource_profile(&node.operator_chain, parallelism); + + for subtask_idx in 0..parallelism { + let s_idx = SubtaskIndex(subtask_idx); + let v_id = VertexId(node.node_id); + + let input_gates_count = *in_degrees + .get(&(node.node_id, subtask_idx)) + .unwrap_or(&0); + let output_gates_count = *out_degrees + .get(&(node.node_id, subtask_idx)) + .unwrap_or(&0); + + tasks.push(TaskDeploymentDescriptor { + job_id: job_id_typed.clone(), + vertex_id: v_id, + subtask_idx: s_idx, + parallelism, + operator_name: head_op.operator_name.to_string(), + operator_uid: OperatorUid(format!("{}-{}", base_uid, subtask_idx)), + is_source, + operator_config_payload: chain_payload.clone(), + resources: resource_profile.clone(), + in_schemas: in_schemas.clone(), + out_schema: out_schema.clone(), + input_gates_count, + output_gates_count, + }); + } + } + + // ==================================================================== + // 阶段 3:物理边展开与路由策略推断 (Edge Expansion & Partitioning) + // ==================================================================== + for edge_idx in logical.edge_indices() { + let edge = logical.edge_weight(edge_idx).unwrap(); + let (src_graph_idx, dst_graph_idx) = logical.edge_endpoints(edge_idx).unwrap(); + let src_node = logical.node_weight(src_graph_idx).unwrap(); + let dst_node = logical.node_weight(dst_graph_idx).unwrap(); + + let partitioning = match edge.edge_type { + LogicalEdgeType::Forward => PartitioningStrategy::Forward, + LogicalEdgeType::Shuffle + | LogicalEdgeType::LeftJoin + | LogicalEdgeType::RightJoin => { + if let Some(key_indices) = edge.schema.key_indices.as_ref() { + if !key_indices.is_empty() { + PartitioningStrategy::HashByKeys(key_indices.clone()) + } else { + PartitioningStrategy::Rebalance + } + } else { + PartitioningStrategy::Rebalance + } + } + }; + + let default_exchange = ExchangeMode::LocalThread; + + match edge.edge_type { + LogicalEdgeType::Forward => { + for i in 0..src_node.parallelism as u32 { + edges.push(PhysicalEdgeDescriptor { + src_vertex: VertexId(src_node.node_id), + src_subtask: SubtaskIndex(i), + dst_vertex: VertexId(dst_node.node_id), + dst_subtask: SubtaskIndex(i), + partitioning: partitioning.clone(), + exchange_mode: default_exchange.clone(), + }); + } + } + _ => { + for src_idx in 0..src_node.parallelism as u32 { + for dst_idx in 0..dst_node.parallelism as u32 { + edges.push(PhysicalEdgeDescriptor { + src_vertex: VertexId(src_node.node_id), + src_subtask: SubtaskIndex(src_idx), + dst_vertex: VertexId(dst_node.node_id), + dst_subtask: SubtaskIndex(dst_idx), + partitioning: partitioning.clone(), + exchange_mode: default_exchange.clone(), + }); + } + } + } + } + } + + let exec_graph = ExecutionGraph { + job_id: job_id_typed, + tasks, + edges, + }; + + // ==================================================================== + // 阶段 4:执行拓扑图防御性自检 (Validation) + // ==================================================================== + exec_graph + .validate() + .map_err(CompileError::ValidationError)?; + + Ok(exec_graph) + } + + /// 确定性状态 UID 生成器:哪怕拓扑变化,只要算子内部逻辑不变就能继承状态。 + fn generate_deterministic_uid( + job_id: &str, + node_id: u32, + chain: &OperatorChain, + ) -> String { + let mut hasher = Sha256::new(); + hasher.update(job_id.as_bytes()); + hasher.update(&node_id.to_le_bytes()); + + for (op, _) in chain.iter() { + hasher.update(op.operator_name.to_string().as_bytes()); + hasher.update(&op.operator_config); + } + + let result = hasher.finalize(); + hex::encode(&result[..8]) + } + + /// 序列化整条算子链 (Operator Fusion) + fn serialize_operator_chain(chain: &OperatorChain) -> Result> { + bincode::serde::encode_to_vec(chain, bincode::config::standard()) + .map_err(|e| anyhow::anyhow!("bincode encode failed: {}", e)) + } + + /// 资源画像智能推算 + fn calculate_resource_profile( + chain: &OperatorChain, + parallelism: u32, + ) -> ResourceProfile { + let mut profile = ResourceProfile::default(); + + for (op, _) in chain.iter() { + let name = op.operator_name.to_string(); + if name.contains("Window") || name.contains("Join") || name.contains("Aggregate") { + profile.managed_memory_bytes += 512 * 1024 * 1024 / parallelism as u64; + profile.cpu_cores += 0.5; + } + if name.contains("Source") || name.contains("Sink") { + profile.network_memory_bytes += 128 * 1024 * 1024 / parallelism as u64; + } + } + profile + } +} diff --git a/src/runtime/streaming/cluster/mod.rs b/src/runtime/streaming/cluster/mod.rs new file mode 100644 index 00000000..f337078c --- /dev/null +++ b/src/runtime/streaming/cluster/mod.rs @@ -0,0 +1,11 @@ +pub mod graph; +pub mod manager; +pub mod master; +mod wiring; + +pub use graph::{ + ExchangeMode, ExecutionGraph, JobId, OperatorUid, PartitioningStrategy, + PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId, +}; +pub use manager::TaskManager; +pub use master::{CompileError, JobCompiler}; diff --git a/src/runtime/streaming/cluster/wiring.rs b/src/runtime/streaming/cluster/wiring.rs new file mode 100644 index 00000000..eb3b4162 --- /dev/null +++ b/src/runtime/streaming/cluster/wiring.rs @@ -0,0 +1,46 @@ +//! 物理拓扑构建:channel 与一对一子任务边。 +//! +//! 将 `arroyo_datastream::LogicalGraph` 完整编译为 Task 管道属于上层 worker/planner; +//! 此处提供 **与图无关** 的 channel 工厂与边展开,供适配层调用。 + +use crate::runtime::streaming::protocol::tracked::TrackedEvent; +use std::collections::HashMap; +use tokio::sync::mpsc::{self, Receiver, Sender}; + +pub type SubtaskKey = (String, u32); + +pub type SubtaskOutChannels = HashMap>>; +pub type SubtaskInChannels = HashMap>>; + +pub fn stream_channel(capacity: usize) -> (Sender, Receiver) { + mpsc::channel(capacity) +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct NodeSpec { + pub id: String, + pub parallelism: u32, +} + +#[derive(Debug, Clone)] +pub struct PhysicalEdge { + pub from: (String, u32), + pub to: (String, u32), +} + +/// 为每条 `PhysicalEdge` 建一条独立 channel,并挂到对应子任务的 sender/receiver 列表。 +pub fn build_one_to_one_channels( + edges: &[PhysicalEdge], + capacity: usize, +) -> (SubtaskOutChannels, SubtaskInChannels) { + let mut senders: SubtaskOutChannels = HashMap::new(); + let mut receivers: SubtaskInChannels = HashMap::new(); + + for e in edges { + let (tx, rx) = stream_channel(capacity); + senders.entry(e.from.clone()).or_default().push(tx); + receivers.entry(e.to.clone()).or_default().push(rx); + } + + (senders, receivers) +} diff --git a/src/runtime/streaming/error.rs b/src/runtime/streaming/error.rs new file mode 100644 index 00000000..f00bd9c4 --- /dev/null +++ b/src/runtime/streaming/error.rs @@ -0,0 +1,10 @@ +use thiserror::Error; + +/// 子任务 / 源任务运行中的错误。 +#[derive(Debug, Error)] +pub enum RunError { + #[error("operator error: {0:#}")] + Operator(#[from] anyhow::Error), + #[error("downstream send: {0}")] + DownstreamSend(String), +} diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs new file mode 100644 index 00000000..34002193 --- /dev/null +++ b/src/runtime/streaming/execution/mod.rs @@ -0,0 +1,8 @@ +//! 执行层:Tokio Actor 运行容器。 + +pub mod runner; +pub mod source; +pub mod tracker; + +pub use runner::SubtaskRunner; +pub use source::{SourceRunner, SOURCE_IDLE_SLEEP}; diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs new file mode 100644 index 00000000..f1733b29 --- /dev/null +++ b/src/runtime/streaming/execution/runner.rs @@ -0,0 +1,298 @@ +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::error::RunError; +use crate::runtime::streaming::protocol::control::ControlCommand; +use crate::runtime::streaming::protocol::event::StreamEvent; +use crate::runtime::streaming::protocol::stream_out::StreamOutput; +use crate::runtime::streaming::protocol::tracked::TrackedEvent; +use crate::runtime::streaming::protocol::Watermark; +use super::tracker::barrier_aligner::{AlignmentStatus, BarrierAligner}; +use super::tracker::watermark_tracker::WatermarkTracker; +use crate::runtime::streaming::network::endpoint::BoxedEventStream; +use arroyo_types::CheckpointBarrier; +use std::collections::VecDeque; +use std::pin::Pin; +use tokio::sync::mpsc::Receiver; +use tokio_stream::{StreamExt, StreamMap}; +use tracing::{debug, error, info, warn}; +use crate::sql::common::{CheckpointBarrier, Watermark}; + +pub struct SubtaskRunner { + operator: Box, + ctx: TaskContext, + inboxes: Vec, + control_rx: Receiver, +} + +impl SubtaskRunner { + pub fn new( + operator: Box, + ctx: TaskContext, + inboxes: Vec, + control_rx: Receiver, + ) -> Self { + Self { operator, ctx, inboxes, control_rx } + } + + pub async fn run(mut self) -> Result<(), RunError> { + let input_count = self.inboxes.len(); + info!( + job_id = %self.ctx.job_id, + vertex = self.ctx.vertex_id, + subtask = self.ctx.subtask_idx, + inputs = input_count, + operator = %self.operator.name(), + "subtask starting" + ); + + self.operator.on_start(&mut self.ctx).await?; + + if input_count == 0 { + return self.run_source_loop().await; + } + + let mut stream_map: StreamMap + Send>>> = StreamMap::new(); + for (i, inbox) in self.inboxes.into_iter().enumerate() { + stream_map.insert(i, inbox); + } + + let mut wm_tracker = WatermarkTracker::new(input_count); + let mut barrier_aligner = BarrierAligner::new(input_count); + let mut eof_count = 0usize; + let mut closed_on_full_eof = false; + + let tick_interval = self.operator.tick_interval(); + let mut tick_sleep: Option>> = + tick_interval.map(|d| Box::pin(tokio::time::sleep(d))); + let mut tick_index: u64 = 0; + + 'run: loop { + tokio::select! { + biased; + + cmd_opt = self.control_rx.recv() => { + match cmd_opt { + None => { + debug!( + vertex = self.ctx.vertex_id, + subtask = self.ctx.subtask_idx, + "control channel closed" + ); + break 'run; + } + Some(cmd) => { + info!( + vertex = self.ctx.vertex_id, + subtask = self.ctx.subtask_idx, + ?cmd, + "control command" + ); + if Self::handle_control_command(&mut self.operator, &mut self.ctx, cmd) + .await? + { + break 'run; + } + } + } + } + + next_item = stream_map.next() => { + let Some((input_idx, event)) = next_item else { + break 'run; + }; + + if barrier_aligner.is_blocked(input_idx) + && !matches!(event.event, StreamEvent::Barrier(_)) + { + barrier_aligner.buffer_event(input_idx, event); + } else { + let mut work = VecDeque::new(); + work.push_back((input_idx, event)); + let mut exit_run = false; + let mut dispatch = EventDispatchState { + operator: &mut self.operator, + ctx: &mut self.ctx, + work: &mut work, + wm_tracker: &mut wm_tracker, + barrier_aligner: &mut barrier_aligner, + eof_count: &mut eof_count, + closed_on_full_eof: &mut closed_on_full_eof, + input_count, + }; + while let Some((idx, ev)) = dispatch.work.pop_front() { + if Self::dispatch_stream_event(&mut dispatch, idx, ev).await? { + exit_run = true; + break; + } + } + if exit_run { + break 'run; + } + } + } + + _ = async { + match tick_sleep.as_mut() { + Some(s) => s.as_mut().await, + None => std::future::pending().await, + } + }, if tick_interval.is_some() => { + let outs = self + .operator + .process_tick(tick_index, &mut self.ctx) + .await?; + tick_index = tick_index.wrapping_add(1); + Self::dispatch_stream_outputs(&mut self.ctx, outs).await?; + if let (Some(d), Some(s)) = (tick_interval, tick_sleep.as_mut()) { + s.as_mut() + .reset(tokio::time::Instant::now() + d); + } + } + } + } + + if !closed_on_full_eof { + let close_outs = self.operator.on_close(&mut self.ctx).await?; + Self::dispatch_stream_outputs(&mut self.ctx, close_outs).await?; + } + + info!( + vertex = self.ctx.vertex_id, + subtask = self.ctx.subtask_idx, + "subtask shutdown" + ); + Ok(()) + } + + async fn run_source_loop(mut self) -> Result<(), RunError> { + while let Some(cmd) = self.control_rx.recv().await { + if Self::handle_control_command(&mut self.operator, &mut self.ctx, cmd).await? { + break; + } + } + let close_outs = self.operator.on_close(&mut self.ctx).await?; + Self::dispatch_stream_outputs(&mut self.ctx, close_outs).await?; + if !self.ctx.outboxes.is_empty() { + self.ctx.broadcast(StreamEvent::EndOfStream).await?; + } + info!( + vertex = self.ctx.vertex_id, + subtask = self.ctx.subtask_idx, + "Source subtask finished" + ); + Ok(()) + } + + async fn handle_control_command( + operator: &mut Box, + ctx: &mut TaskContext, + cmd: ControlCommand, + ) -> Result { + if let ControlCommand::TriggerCheckpoint { barrier } = &cmd { + let barrier: CheckpointBarrier = barrier.clone().into(); + if let Err(e) = operator.snapshot_state(barrier, ctx).await { + error!("Source snapshot failed: {}", e); + } + ctx.broadcast(StreamEvent::Barrier(barrier)).await?; + } + + if let ControlCommand::Commit { epoch } = &cmd { + if let Err(e) = operator.commit_checkpoint(*epoch, ctx).await { + error!("commit_checkpoint failed: {}", e); + } + } + + match operator.handle_control(cmd, ctx).await { + Ok(should_stop) => Ok(should_stop), + Err(e) => { + warn!("handle_control error: {}", e); + Ok(false) + } + } + } + + async fn dispatch_stream_outputs( + ctx: &mut TaskContext, + outputs: Vec, + ) -> Result<(), RunError> { + for out in outputs { + match out { + StreamOutput::Forward(b) => ctx.collect(b).await?, + StreamOutput::Keyed(hash, b) => ctx.collect_keyed(hash, b).await?, + StreamOutput::Broadcast(b) => ctx.collect(b).await?, + StreamOutput::Watermark(wm) => { + ctx.broadcast(StreamEvent::Watermark(wm)).await?; + } + } + } + Ok(()) + } + + async fn dispatch_stream_event( + st: &mut EventDispatchState<'_>, + input_idx: usize, + tracked: TrackedEvent, + ) -> Result { + let event = tracked.event; + match event { + StreamEvent::Data(batch) => { + let outputs = st + .operator + .process_data(input_idx, batch, st.ctx) + .await?; + Self::dispatch_stream_outputs(st.ctx, outputs).await?; + } + StreamEvent::Watermark(wm) => { + if let Some(aligned_wm) = st.wm_tracker.update(input_idx, wm) { + if let Watermark::EventTime(t) = aligned_wm { + st.ctx.last_present_watermark = Some(t); + } + let outputs = st + .operator + .process_watermark(aligned_wm.clone(), st.ctx) + .await?; + Self::dispatch_stream_outputs(st.ctx, outputs).await?; + st.ctx + .broadcast(StreamEvent::Watermark(aligned_wm)) + .await?; + } + } + StreamEvent::Barrier(barrier) => { + match st.barrier_aligner.mark(input_idx, &barrier) { + AlignmentStatus::Pending => {} + AlignmentStatus::Complete(buffered) => { + if let Err(e) = st.operator.snapshot_state(barrier, st.ctx).await { + error!("Operator snapshot failed: {}", e); + } + st.ctx.broadcast(StreamEvent::Barrier(barrier)).await?; + for pair in buffered { + st.work.push_back(pair); + } + } + } + } + StreamEvent::EndOfStream => { + *st.eof_count += 1; + if *st.eof_count == st.input_count { + let close_outs = st.operator.on_close(st.ctx).await?; + Self::dispatch_stream_outputs(st.ctx, close_outs).await?; + *st.closed_on_full_eof = true; + st.ctx.broadcast(StreamEvent::EndOfStream).await?; + return Ok(true); + } + } + } + Ok(false) + } +} + +struct EventDispatchState<'a> { + operator: &'a mut Box, + ctx: &'a mut TaskContext, + work: &'a mut VecDeque<(usize, TrackedEvent)>, + wm_tracker: &'a mut WatermarkTracker, + barrier_aligner: &'a mut BarrierAligner, + eof_count: &'a mut usize, + closed_on_full_eof: &'a mut bool, + input_count: usize, +} diff --git a/src/runtime/streaming/execution/source.rs b/src/runtime/streaming/execution/source.rs new file mode 100644 index 00000000..9fe1983e --- /dev/null +++ b/src/runtime/streaming/execution/source.rs @@ -0,0 +1,120 @@ +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::source::{SourceEvent, SourceOperator}; +use crate::runtime::streaming::error::RunError; +use crate::runtime::streaming::protocol::control::ControlCommand; +use crate::runtime::streaming::protocol::event::StreamEvent; +use std::time::Duration; +use tokio::sync::mpsc::Receiver; +use tokio::time::sleep; +use tracing::{debug, info, warn}; +use crate::sql::common::CheckpointBarrier; + +pub const SOURCE_IDLE_SLEEP: Duration = Duration::from_millis(50); + +pub struct SourceRunner { + operator: Box, + ctx: TaskContext, + control_rx: Receiver, +} + +impl SourceRunner { + pub fn new( + operator: Box, + ctx: TaskContext, + control_rx: Receiver, + ) -> Self { + Self { + operator, + ctx, + control_rx, + } + } + + pub async fn run(mut self) -> Result<(), RunError> { + info!( + job_id = %self.ctx.job_id, + vertex = self.ctx.vertex_id, + subtask = self.ctx.subtask_idx, + operator = %self.operator.name(), + "source subtask starting" + ); + + self.operator.on_start(&mut self.ctx).await?; + + let mut is_running = true; + let mut idle_pending = false; + + while is_running { + tokio::select! { + biased; + cmd_opt = self.control_rx.recv() => { + match cmd_opt { + None => { + debug!( + vertex = self.ctx.vertex_id, + subtask = self.ctx.subtask_idx, + "source control channel closed" + ); + is_running = false; + } + Some(cmd) => { + match cmd { + ControlCommand::Stop { .. } => { + is_running = false; + } + ControlCommand::TriggerCheckpoint { barrier } => { + let barrier: CheckpointBarrier = barrier.into(); + self.operator + .snapshot_state(barrier, &mut self.ctx) + .await?; + self.ctx + .broadcast(StreamEvent::Barrier(barrier)) + .await?; + } + ControlCommand::Start + | ControlCommand::DropState + | ControlCommand::Commit { .. } + | ControlCommand::UpdateConfig { .. } => { + debug!(?cmd, "source: ignored control command"); + } + } + } + } + } + _ = sleep(SOURCE_IDLE_SLEEP), if is_running && idle_pending => { + idle_pending = false; + } + fetch_res = self.operator.fetch_next(&mut self.ctx), if is_running && !idle_pending => { + match fetch_res { + Ok(SourceEvent::Data(batch)) => { + self.ctx.collect(batch).await?; + } + Ok(SourceEvent::Watermark(wm)) => { + self.ctx.broadcast(StreamEvent::Watermark(wm)).await?; + } + Ok(SourceEvent::Idle) => { + idle_pending = true; + } + Err(e) => { + warn!( + vertex = self.ctx.vertex_id, + error = %e, + "fetch_next error" + ); + return Err(RunError::Operator(e)); + } + } + } + } + } + + self.operator.on_close(&mut self.ctx).await?; + + info!( + vertex = self.ctx.vertex_id, + subtask = self.ctx.subtask_idx, + "source subtask shutdown" + ); + Ok(()) + } +} diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/runtime/streaming/execution/tracker/barrier_aligner.rs new file mode 100644 index 00000000..e284922b --- /dev/null +++ b/src/runtime/streaming/execution/tracker/barrier_aligner.rs @@ -0,0 +1,57 @@ +//! Chandy–Lamport 风格屏障对齐。 + +use std::collections::HashSet; +use crate::runtime::streaming::protocol::TrackedEvent; +use crate::sql::common::CheckpointBarrier; + +#[derive(Debug)] +pub enum AlignmentStatus { + Pending, + Complete(Vec<(usize, TrackedEvent)>), +} + +#[derive(Debug)] +pub struct BarrierAligner { + input_count: usize, + current_epoch: Option, + reached_inputs: HashSet, + buffered_events: Vec<(usize, TrackedEvent)>, +} + +impl BarrierAligner { + pub fn new(input_count: usize) -> Self { + Self { + input_count, + current_epoch: None, + reached_inputs: HashSet::new(), + buffered_events: Vec::new(), + } + } + + pub fn is_blocked(&self, input_idx: usize) -> bool { + self.current_epoch.is_some() && self.reached_inputs.contains(&input_idx) + } + + pub fn buffer_event(&mut self, input_idx: usize, event: TrackedEvent) { + self.buffered_events.push((input_idx, event)); + } + + pub fn mark(&mut self, input_idx: usize, barrier: &CheckpointBarrier) -> AlignmentStatus { + if self.current_epoch != Some(barrier.epoch) { + self.current_epoch = Some(barrier.epoch); + self.reached_inputs.clear(); + self.buffered_events.clear(); + } + + self.reached_inputs.insert(input_idx); + + if self.reached_inputs.len() == self.input_count { + let released = std::mem::take(&mut self.buffered_events); + self.current_epoch = None; + self.reached_inputs.clear(); + AlignmentStatus::Complete(released) + } else { + AlignmentStatus::Pending + } + } +} \ No newline at end of file diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/runtime/streaming/execution/tracker/mod.rs new file mode 100644 index 00000000..bfa24e8b --- /dev/null +++ b/src/runtime/streaming/execution/tracker/mod.rs @@ -0,0 +1,7 @@ +//! 协调层:屏障对齐与多路水位线追踪。 + +pub mod barrier_aligner; +pub mod watermark_tracker; + +pub use barrier_aligner::{AlignmentStatus, BarrierAligner}; +pub use watermark_tracker::WatermarkTracker; diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/runtime/streaming/execution/tracker/watermark_tracker.rs new file mode 100644 index 00000000..be7043b9 --- /dev/null +++ b/src/runtime/streaming/execution/tracker/watermark_tracker.rs @@ -0,0 +1,86 @@ +use crate::runtime::streaming::protocol::watermark::{merge_watermarks, watermark_strictly_advances, Watermark}; +use crate::sql::common::Watermark; + +#[derive(Debug)] +pub struct WatermarkTracker { + watermarks: Vec>, + current_min_watermark: Option, +} + +impl WatermarkTracker { + pub fn new(input_count: usize) -> Self { + Self { + watermarks: vec![None; input_count], + current_min_watermark: None, + } + } + + pub fn update(&mut self, input_idx: usize, wm: Watermark) -> Option { + self.watermarks[input_idx] = Some(wm); + + if self.watermarks.iter().any(|w| w.is_none()) { + return None; + } + + let new_min = merge_watermarks(&self.watermarks)?; + + if !watermark_strictly_advances(new_min, self.current_min_watermark) { + return None; + } + + self.current_min_watermark = Some(new_min); + Some(new_min) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::{Duration, SystemTime}; + + #[test] + fn no_emit_until_all_inputs_seen() { + let mut t = WatermarkTracker::new(2); + let w = Watermark::EventTime(SystemTime::UNIX_EPOCH + Duration::from_secs(3)); + assert!(t.update(0, w).is_none()); + let w2 = Watermark::EventTime(SystemTime::UNIX_EPOCH + Duration::from_secs(1)); + assert_eq!(t.update(1, w2), Some(w2)); + } + + #[test] + fn dedup_same_aligned() { + let mut t = WatermarkTracker::new(1); + let w = Watermark::EventTime(SystemTime::UNIX_EPOCH + Duration::from_secs(1)); + assert_eq!(t.update(0, w), Some(w)); + assert!(t.update(0, w).is_none()); + } + + #[test] + fn advances_only_when_min_strictly_increases() { + let mut t = WatermarkTracker::new(2); + let t1 = SystemTime::UNIX_EPOCH + Duration::from_secs(1); + let t5 = SystemTime::UNIX_EPOCH + Duration::from_secs(5); + assert!(t.update(0, Watermark::EventTime(t5)).is_none()); + assert_eq!(t.update(1, Watermark::EventTime(t1)), Some(Watermark::EventTime(t1))); + let t3 = SystemTime::UNIX_EPOCH + Duration::from_secs(3); + assert_eq!( + t.update(1, Watermark::EventTime(t3)), + Some(Watermark::EventTime(t3)) + ); + assert!(t.update(1, Watermark::EventTime(t3)).is_none()); + } + + #[test] + fn backward_aligned_min_is_ignored() { + let mut t = WatermarkTracker::new(2); + let t5 = SystemTime::UNIX_EPOCH + Duration::from_secs(5); + let t10 = SystemTime::UNIX_EPOCH + Duration::from_secs(10); + assert!(t.update(0, Watermark::EventTime(t10)).is_none()); + assert_eq!( + t.update(1, Watermark::EventTime(t5)), + Some(Watermark::EventTime(t5)) + ); + let t2 = SystemTime::UNIX_EPOCH + Duration::from_secs(2); + assert!(t.update(0, Watermark::EventTime(t2)).is_none()); + } +} diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs new file mode 100644 index 00000000..4cd52bf3 --- /dev/null +++ b/src/runtime/streaming/factory/mod.rs @@ -0,0 +1,3 @@ +pub mod registry; + +pub use registry:: OperatorFactory; diff --git a/src/runtime/streaming/factory/registry.rs b/src/runtime/streaming/factory/registry.rs new file mode 100644 index 00000000..5b53b920 --- /dev/null +++ b/src/runtime/streaming/factory/registry.rs @@ -0,0 +1,44 @@ +use anyhow::{anyhow, Result}; +use crate::runtime::streaming::api::operator::ConstructedOperator; +use std::collections::HashMap; + + +/// 工业级算子注册表与工厂 +pub struct OperatorFactory { + constructors: HashMap>, +} + +impl OperatorFactory { + pub fn new() -> Self { + let factory = Self { + constructors: HashMap::new(), + }; + + // TODO: 在此注册具体算子构造器 + factory.register("TumblingWindowAggregate", Box::new(TumblingWindowAggregateConstructor)); + factory.register("ExpressionWatermark", Box::new(WatermarkGeneratorConstructor)); + factory.register("KafkaSource", Box::new(KafkaSourceConstructor)); + + factory + } + + pub fn register(&mut self, name: &str, constructor: Box) { + self.constructors.insert(name.to_string(), constructor); + } + + /// 反射与实例化:从 TDD 的字节流中拉起运行时的业务算子 + pub fn create_operator(&self, name: &str, payload: &[u8]) -> Result { + let ctor = self + .constructors + .get(name) + .ok_or_else(|| { + anyhow!( + "FATAL: Operator '{}' not found in Factory Registry. \ + Ensure the worker is compiled with the correct plugins.", + name + ) + })?; + + ctor.with_config(payload) + } +} diff --git a/src/runtime/streaming/format/mod.rs b/src/runtime/streaming/format/mod.rs new file mode 100644 index 00000000..e69de29b diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs new file mode 100644 index 00000000..67cd8f70 --- /dev/null +++ b/src/runtime/streaming/lib.rs @@ -0,0 +1,44 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`). + +pub mod api; +pub mod arrow; +pub mod cluster; +pub mod error; +pub mod execution; +pub mod factory; +pub mod memory; +pub mod network; +pub mod operators; +pub mod protocol; +pub mod state; + +pub use api::{ + ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext, +}; +pub use cluster::{ + CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy, + PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, TaskManager, + VertexId, +}; +pub use error::RunError; +pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; +pub use factory::{OperatorConstructor, OperatorFactory}; +pub use memory::{MemoryPool, MemoryTicket}; +pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; +pub use protocol::{ + CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput, Watermark, + control_channel, merge_watermarks, watermark_strictly_advances, +}; diff --git a/src/runtime/streaming/memory/mod.rs b/src/runtime/streaming/memory/mod.rs new file mode 100644 index 00000000..93101fa2 --- /dev/null +++ b/src/runtime/streaming/memory/mod.rs @@ -0,0 +1,5 @@ +pub mod pool; +pub mod ticket; + +pub use pool::MemoryPool; +pub use ticket::MemoryTicket; diff --git a/src/runtime/streaming/memory/pool.rs b/src/runtime/streaming/memory/pool.rs new file mode 100644 index 00000000..98ba4cf3 --- /dev/null +++ b/src/runtime/streaming/memory/pool.rs @@ -0,0 +1,75 @@ +use parking_lot::Mutex; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use tokio::sync::Notify; +use tracing::{debug, warn}; + +use super::ticket::MemoryTicket; + +/// 工业级全局内存池 (Global Memory Pool) +#[derive(Debug)] +pub struct MemoryPool { + max_bytes: usize, + used_bytes: AtomicUsize, + available_bytes: Mutex, + notify: Notify, +} + +impl MemoryPool { + pub fn new(max_bytes: usize) -> Arc { + Arc::new(Self { + max_bytes, + used_bytes: AtomicUsize::new(0), + available_bytes: Mutex::new(max_bytes), + notify: Notify::new(), + }) + } + + pub fn usage_metrics(&self) -> (usize, usize) { + (self.used_bytes.load(Ordering::Relaxed), self.max_bytes) + } + + pub async fn request_memory(self: &Arc, bytes: usize) -> MemoryTicket { + if bytes == 0 { + return MemoryTicket::new(0, self.clone()); + } + + if bytes > self.max_bytes { + warn!( + "Requested memory ({} B) exceeds total pool size ({} B)! \ + Permitting to avoid pipeline deadlock, but OOM risk is critical.", + bytes, self.max_bytes + ); + self.used_bytes.fetch_add(bytes, Ordering::Relaxed); + return MemoryTicket::new(bytes, self.clone()); + } + + loop { + { + let mut available = self.available_bytes.lock(); + if *available >= bytes { + *available -= bytes; + self.used_bytes.fetch_add(bytes, Ordering::Relaxed); + return MemoryTicket::new(bytes, self.clone()); + } + } + + debug!("Backpressure engaged: waiting for {} bytes to be freed...", bytes); + self.notify.notified().await; + } + } + + pub(crate) fn release(&self, bytes: usize) { + if bytes == 0 { + return; + } + + { + let mut available = self.available_bytes.lock(); + *available += bytes; + } + + self.used_bytes.fetch_sub(bytes, Ordering::Relaxed); + self.notify.notify_waiters(); + } +} diff --git a/src/runtime/streaming/memory/ticket.rs b/src/runtime/streaming/memory/ticket.rs new file mode 100644 index 00000000..ca1759b9 --- /dev/null +++ b/src/runtime/streaming/memory/ticket.rs @@ -0,0 +1,24 @@ +use std::sync::Arc; + +use super::pool::MemoryPool; + +/// 内存船票 (RAII Guard) +/// 不实现 Clone:生命周期严格对应唯一的字节扣减。 +/// 跨多路广播时应包裹在 `Arc` 中。 +#[derive(Debug)] +pub struct MemoryTicket { + bytes: usize, + pool: Arc, +} + +impl MemoryTicket { + pub(crate) fn new(bytes: usize, pool: Arc) -> Self { + Self { bytes, pool } + } +} + +impl Drop for MemoryTicket { + fn drop(&mut self) { + self.pool.release(self.bytes); + } +} diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs new file mode 100644 index 00000000..0edc0d2e --- /dev/null +++ b/src/runtime/streaming/mod.rs @@ -0,0 +1,45 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`). + +pub mod api; +pub mod arrow; +pub mod cluster; +pub mod error; +pub mod execution; +pub mod factory; +pub mod memory; +pub mod network; +pub mod operators; +pub mod protocol; +pub mod state; +mod format; + +pub use api::{ + ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext, +}; +pub use cluster::{ + CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy, + PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, TaskManager, + VertexId, +}; +pub use error::RunError; +pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; +pub use factory:: OperatorFactory; +pub use memory::{MemoryPool, MemoryTicket}; +pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; +pub use protocol::{ + CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput, + control_channel, merge_watermarks, watermark_strictly_advances, +}; diff --git a/src/runtime/streaming/network/endpoint.rs b/src/runtime/streaming/network/endpoint.rs new file mode 100644 index 00000000..3fc1fc57 --- /dev/null +++ b/src/runtime/streaming/network/endpoint.rs @@ -0,0 +1,59 @@ +use crate::runtime::streaming::protocol::event::StreamEvent; +use crate::runtime::streaming::protocol::tracked::TrackedEvent; +use anyhow::{anyhow, Result}; +use std::pin::Pin; +use tokio::sync::mpsc; +use tokio_stream::Stream; +use tracing::debug; + +// ======================================================================== +// 1. 网络桩 (Stub):为后续 gRPC/TCP 扩展预留孔位 +// ======================================================================== + +#[derive(Clone)] +pub struct RemoteSenderStub { + pub target_addr: String, +} + +impl RemoteSenderStub { + pub async fn send_over_network(&self, _event: &StreamEvent) -> Result<()> { + unimplemented!("Remote network transport is not yet implemented") + } +} + +// ======================================================================== +// 2. 物理发送端点 (Physical Sender Endpoint) +// ======================================================================== + +/// 统一的物理发送端点。 +/// 算子无需知道目标是同机还是异机,只管调用 `send`。 +#[derive(Clone)] +pub enum PhysicalSender { + /// 本地线程间传输,携带内存船票,零开销 + Local(mpsc::Sender), + /// 跨机网络传输,需要序列化,并在发送后丢弃本地船票 + Remote(RemoteSenderStub), +} + +impl PhysicalSender { + pub async fn send(&self, tracked_event: TrackedEvent) -> Result<()> { + match self { + PhysicalSender::Local(tx) => { + tx.send(tracked_event) + .await + .map_err(|_| anyhow!("Local channel closed! Downstream task may have crashed."))?; + } + PhysicalSender::Remote(stub) => { + stub.send_over_network(&tracked_event.event).await?; + debug!("Sent event over network, local memory ticket will be released."); + } + } + Ok(()) + } +} + +// ======================================================================== +// 3. 物理接收端点 (Physical Receiver Endpoint) +// ======================================================================== + +pub type BoxedEventStream = Pin + Send>>; diff --git a/src/runtime/streaming/network/environment.rs b/src/runtime/streaming/network/environment.rs new file mode 100644 index 00000000..789af2a8 --- /dev/null +++ b/src/runtime/streaming/network/environment.rs @@ -0,0 +1,82 @@ +use crate::runtime::streaming::cluster::graph::{ + ExchangeMode, ExecutionGraph, SubtaskIndex, VertexId, +}; +use crate::runtime::streaming::protocol::tracked::TrackedEvent; +use super::endpoint::{BoxedEventStream, PhysicalSender, RemoteSenderStub}; +use std::collections::HashMap; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tracing::info; + +/// 物理网络路由注册表 +pub struct NetworkEnvironment { + pub outboxes: HashMap<(VertexId, SubtaskIndex), Vec>, + pub inboxes: HashMap<(VertexId, SubtaskIndex), Vec>, +} + +impl NetworkEnvironment { + pub fn new() -> Self { + Self { + outboxes: HashMap::new(), + inboxes: HashMap::new(), + } + } + + pub fn build_from_graph(graph: &ExecutionGraph, local_queue_size: usize) -> Self { + let mut env = Self::new(); + + for edge in &graph.edges { + let src_key = (edge.src_vertex, edge.src_subtask); + let dst_key = (edge.dst_vertex, edge.dst_subtask); + + match &edge.exchange_mode { + ExchangeMode::LocalThread => { + let (tx, rx) = mpsc::channel::(local_queue_size); + + let sender = PhysicalSender::Local(tx); + let receiver_stream = + Box::pin(ReceiverStream::new(rx)) as BoxedEventStream; + + env.outboxes.entry(src_key).or_default().push(sender); + env.inboxes.entry(dst_key).or_default().push(receiver_stream); + } + ExchangeMode::RemoteNetwork { target_addr } => { + let remote_stub = RemoteSenderStub { + target_addr: target_addr.clone(), + }; + env.outboxes + .entry(src_key) + .or_default() + .push(PhysicalSender::Remote(remote_stub)); + } + } + } + + info!( + "Network Environment built. Wired {} connections.", + graph.edges.len() + ); + + env + } + + pub fn take_outboxes( + &mut self, + vertex_id: VertexId, + subtask_idx: SubtaskIndex, + ) -> Vec { + self.outboxes + .remove(&(vertex_id, subtask_idx)) + .unwrap_or_default() + } + + pub fn take_inboxes( + &mut self, + vertex_id: VertexId, + subtask_idx: SubtaskIndex, + ) -> Vec { + self.inboxes + .remove(&(vertex_id, subtask_idx)) + .unwrap_or_default() + } +} diff --git a/src/runtime/streaming/network/mod.rs b/src/runtime/streaming/network/mod.rs new file mode 100644 index 00000000..259e0f12 --- /dev/null +++ b/src/runtime/streaming/network/mod.rs @@ -0,0 +1,5 @@ +pub mod endpoint; +pub mod environment; + +pub use endpoint::{BoxedEventStream, PhysicalSender, RemoteSenderStub}; +pub use environment::NetworkEnvironment; diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs new file mode 100644 index 00000000..c76111c5 --- /dev/null +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -0,0 +1,847 @@ +use anyhow::{anyhow, bail, Result}; +use arrow::compute::max_array; +use arrow::row::{RowConverter, SortField}; +use arrow_array::builder::{ + BinaryBuilder, TimestampNanosecondBuilder, UInt32Builder, UInt64Builder, +}; +use arrow_array::cast::AsArray; +use arrow_array::types::UInt64Type; +use arrow_array::{ + Array, ArrayRef, BinaryArray, BooleanArray, RecordBatch, StructArray, UInt32Array, UInt64Array, +}; +use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit}; +use datafusion::common::{Result as DFResult, ScalarValue}; +use datafusion::physical_expr::aggregate::AggregateFunctionExpr; +use datafusion::physical_plan::{Accumulator, PhysicalExpr}; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::from_proto::parse_physical_expr; +use datafusion_proto::protobuf::PhysicalExprNode; +use datafusion_proto::protobuf::PhysicalPlanNode; +use datafusion_proto::protobuf::physical_plan_node::PhysicalPlanType; +use futures::StreamExt; +use itertools::Itertools; +use prost::Message; +use std::collections::HashSet; +use std::sync::LazyLock; +use std::time::{Duration, Instant, SystemTime}; +use std::{collections::HashMap, mem, sync::Arc}; +use tracing::{debug, warn}; +use tracing_subscriber::Registry; +use protocol::grpc::api::UpdatingAggregateOperator; +// ========================================================================= +// 引入全新的 Actor 框架核心协议 (取代了老旧的 ArrowOperator 和 Collector) +// ========================================================================= +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::arrow::decode_aggregate; +use crate::runtime::streaming::operators::{Key, UpdatingCache}; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{to_nanos, CheckpointBarrier, FsSchema, Watermark, TIMESTAMP_FIELD, UPDATING_META_FIELD}; +use crate::sql::logical_planner::updating_meta_fields; + +#[derive(Debug, Copy, Clone)] +struct BatchData { + count: u64, + generation: u64, +} + +impl BatchData { + fn new(generation: u64) -> Self { + Self { count: 1, generation } + } + + fn inc(&mut self) { + self.count += 1; + self.generation += 1; + } + + fn dec(&mut self) { + self.count = self.count.checked_sub(1).unwrap_or_default(); + self.generation += 1; + } +} + +#[derive(Debug)] +enum IncrementalState { + Sliding { + expr: Arc, + accumulator: Box, + }, + Batch { + expr: Arc, + data: HashMap, + row_converter: Arc, + changed_values: HashSet, + }, +} + +impl IncrementalState { + fn update_batch(&mut self, new_generation: u64, batch: &[ArrayRef]) -> DFResult<()> { + match self { + IncrementalState::Sliding { accumulator, .. } => { + accumulator.update_batch(batch)?; + } + IncrementalState::Batch { data, row_converter, changed_values, .. } => { + for r in row_converter.convert_columns(batch)?.iter() { + if data.contains_key(r.as_ref()) { + data.get_mut(r.as_ref()).unwrap().inc(); + changed_values.insert(data.get_key_value(r.as_ref()).unwrap().0.clone()); + } else { + let key = Key(Arc::new(r.as_ref().to_vec())); + data.insert(key.clone(), BatchData::new(new_generation)); + changed_values.insert(key); + } + } + } + } + Ok(()) + } + + fn retract_batch(&mut self, batch: &[ArrayRef]) -> DFResult<()> { + match self { + IncrementalState::Sliding { accumulator, .. } => accumulator.retract_batch(batch), + IncrementalState::Batch { data, row_converter, changed_values, .. } => { + for r in row_converter.convert_columns(batch)?.iter() { + match data.get(r.as_ref()).map(|d| d.count) { + Some(0) => { + debug!("tried to retract value for key with count 0; implies append lost"); + } + Some(_) => { + data.get_mut(r.as_ref()).unwrap().dec(); + changed_values.insert(data.get_key_value(r.as_ref()).unwrap().0.clone()); + } + None => { + debug!("tried to retract value for missing key: implies append lost"); + } + } + } + Ok(()) + } + } + } + + fn evaluate(&mut self) -> DFResult { + match self { + IncrementalState::Sliding { accumulator, .. } => accumulator.evaluate(), + IncrementalState::Batch { expr, data, row_converter, .. } => { + let parser = row_converter.parser(); + let input = row_converter.convert_rows( + data.iter() + .filter(|(_, c)| c.count > 0) + .map(|(v, _)| parser.parse(&v.0)), + )?; + let mut acc = expr.create_accumulator()?; + acc.update_batch(&input)?; + acc.evaluate_mut() + } + } + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum AccumulatorType { + Sliding, + Batch, +} + +impl AccumulatorType { + fn state_fields(&self, agg: &AggregateFunctionExpr) -> DFResult> { + Ok(match self { + AccumulatorType::Sliding => agg.sliding_state_fields()?, + AccumulatorType::Batch => vec![], + }) + } +} + +#[derive(Debug)] +struct Aggregator { + func: Arc, + accumulator_type: AccumulatorType, + row_converter: Arc, + state_cols: Vec, +} + +// ========================================================================= +// 核心算子结构体 +// ========================================================================= + +pub struct IncrementalAggregatingFunc { + flush_interval: Duration, + metadata_expr: Arc, + aggregates: Vec, + accumulators: UpdatingCache>, + updated_keys: HashMap>>, + + // 【新增】:算子自身持有输入元数据,不再依赖外部动态传入 + input_schema: Arc, + has_routing_keys: bool, + + sliding_state_schema: Arc, + batch_state_schema: Arc, + schema_without_metadata: Arc, + /// 下游 changelog 批次 schema(与 planner `final_schema` 一致)。 + final_output_schema: Arc, + ttl: Duration, + key_converter: RowConverter, + new_generation: u64, +} + +/// 全局聚合使用的空 key(单分区无 routing key)。 +static GLOBAL_KEY: LazyLock>> = LazyLock::new(|| Arc::new(Vec::new())); + +impl IncrementalAggregatingFunc { + fn update_batch(&mut self, key: &[u8], batch: &[Vec], idx: Option) -> DFResult<()> { + self.accumulators + .modify_and_update(key, Instant::now(), |values| { + for (inputs, accs) in batch.iter().zip(values.iter_mut()) { + let values = if let Some(idx) = idx { + &inputs.iter().map(|c| c.slice(idx, 1)).collect() + } else { + inputs + }; + accs.update_batch(self.new_generation, values)?; + } + Ok(()) + }) + .expect("tried to update for non-existent key") + } + + fn retract_batch(&mut self, key: &[u8], batch: &[Vec], idx: Option) -> DFResult<()> { + self.accumulators + .modify(key, |values| { + for (inputs, accs) in batch.iter().zip(values.iter_mut()) { + let values = if let Some(idx) = idx { + &inputs.iter().map(|c| c.slice(idx, 1)).collect() + } else { + inputs + }; + accs.retract_batch(values)?; + } + Ok::<(), datafusion::common::DataFusionError>(()) + }) + .expect("tried to retract state for non-existent key")?; + Ok(()) + } + + fn evaluate(&mut self, key: &[u8]) -> DFResult> { + self.accumulators + .get_mut(key) + .expect("tried to evaluate non-existent key") + .iter_mut() + .map(|s| s.evaluate()) + .collect::>() + } + + fn get_retracts(batch: &RecordBatch) -> Option<&BooleanArray> { + if let Some(meta_col) = batch.column_by_name(UPDATING_META_FIELD) { + let meta_struct = meta_col + .as_any() + .downcast_ref::() + .expect("_updating_meta must be StructArray"); + + let is_retract_array = meta_struct + .column_by_name("is_retract") + .expect("meta struct must have is_retract"); + + Some(is_retract_array.as_any().downcast_ref::().expect("is_retract must be BooleanArray")) + } else { + None + } + } + + fn make_accumulators(&self) -> Vec { + self.aggregates + .iter() + .map(|agg| match agg.accumulator_type { + AccumulatorType::Sliding => IncrementalState::Sliding { + expr: agg.func.clone(), + accumulator: agg.func.create_sliding_accumulator().unwrap(), + }, + AccumulatorType::Batch => IncrementalState::Batch { + expr: agg.func.clone(), + data: Default::default(), + row_converter: agg.row_converter.clone(), + changed_values: Default::default(), + }, + }) + .collect() + } + + fn compute_inputs(&self, batch: &RecordBatch) -> Vec> { + self.aggregates + .iter() + .map(|agg| { + agg.func + .expressions() + .iter() + .map(|ex| ex.evaluate(batch).unwrap().into_array(batch.num_rows()).unwrap()) + .collect::>() + }) + .collect::>() + } + + fn global_aggregate(&mut self, batch: &RecordBatch) -> Result<()> { + let retracts = Self::get_retracts(batch); + let aggregate_input_cols = self.compute_inputs(&batch); + + let mut first = false; + if !self + .accumulators + .contains_key(GLOBAL_KEY.as_ref().as_slice()) + { + first = true; + self.accumulators.insert( + GLOBAL_KEY.clone(), + Instant::now(), + self.new_generation, + self.make_accumulators(), + ); + } + + if !self + .updated_keys + .contains_key(GLOBAL_KEY.as_ref().as_slice()) + { + if first { + self.updated_keys.insert(Key(GLOBAL_KEY.clone()), None); + } else { + let v = Some(self.evaluate(GLOBAL_KEY.as_ref().as_slice())?); + self.updated_keys.insert(Key(GLOBAL_KEY.clone()), v); + } + } + + if let Some(retracts) = retracts { + for (i, r) in retracts.iter().enumerate() { + if r.unwrap_or_default() { + self.retract_batch( + GLOBAL_KEY.as_ref().as_slice(), + &aggregate_input_cols, + Some(i), + )?; + } else { + self.update_batch( + GLOBAL_KEY.as_ref().as_slice(), + &aggregate_input_cols, + Some(i), + )?; + } + } + } else { + self.update_batch( + GLOBAL_KEY.as_ref().as_slice(), + &aggregate_input_cols, + None, + ) + .unwrap(); + } + Ok(()) + } + + fn keyed_aggregate(&mut self, batch: &RecordBatch) -> Result<()> { + let retracts = Self::get_retracts(batch); + + let sort_columns = &self.input_schema + .sort_columns(batch, false) + .into_iter() + .map(|e| e.values) + .collect::>(); + + let keys = self.key_converter.convert_columns(sort_columns).unwrap(); + + for k in &keys { + if !self.updated_keys.contains_key(k.as_ref()) { + if let Some((key, accs)) = self.accumulators.get_mut_key_value(k.as_ref()) { + self.updated_keys.insert(key, Some(accs.iter_mut().map(|s| s.evaluate()).collect::>()?)); + } else { + self.updated_keys.insert(Key(Arc::new(k.as_ref().to_vec())), None); + } + } + } + + let aggregate_input_cols = self.compute_inputs(&batch); + + for (i, key) in keys.iter().enumerate() { + if !self.accumulators.contains_key(key.as_ref()) { + self.accumulators.insert(Arc::new(key.as_ref().to_vec()), Instant::now(), 0, self.make_accumulators()); + }; + + let retract = retracts.map(|r| r.value(i)).unwrap_or_default(); + if retract { + self.retract_batch(key.as_ref(), &aggregate_input_cols, Some(i))?; + } else { + self.update_batch(key.as_ref(), &aggregate_input_cols, Some(i))?; + } + } + Ok(()) + } + + // ========================================================================= + // 状态读写逻辑 (Checkpointing & Restore) + // ========================================================================= + + fn checkpoint_sliding(&mut self) -> DFResult>> { + if self.updated_keys.is_empty() { return Ok(None); } + + let mut states = vec![vec![]; self.sliding_state_schema.schema.fields.len()]; + let parser = self.key_converter.parser(); + let mut generation_builder = UInt64Builder::with_capacity(self.updated_keys.len()); + + let mut cols = self.key_converter.convert_rows(self.updated_keys.keys().map(|k| { + let (accumulators, generation) = self.accumulators.get_mut_generation(k.0.as_ref()).unwrap(); + generation_builder.append_value(generation); + + for (state, agg) in accumulators.iter_mut().zip(self.aggregates.iter()) { + let IncrementalState::Sliding { expr, accumulator } = state else { continue; }; + let state = accumulator.state().unwrap_or_else(|_| { + let state = accumulator.state().unwrap(); + *accumulator = expr.create_sliding_accumulator().unwrap(); + let states: Vec<_> = state.iter().map(|s| s.to_array()).try_collect().unwrap(); + accumulator.merge_batch(&states).unwrap(); + state + }); + + for (idx, v) in agg.state_cols.iter().zip(state.into_iter()) { + states[*idx].push(v); + } + } + parser.parse(k.0.as_ref()) + }))?; + + cols.extend(states.into_iter().skip(cols.len()).map(|c| ScalarValue::iter_to_array(c).unwrap())); + + let generations = generation_builder.finish(); + self.new_generation = self.new_generation.max(max_array::(&generations).unwrap()); + cols.push(Arc::new(generations)); + + Ok(Some(cols)) + } + + fn checkpoint_batch(&mut self) -> DFResult>> { + if self.aggregates.iter().all(|agg| agg.accumulator_type == AccumulatorType::Sliding) { return Ok(None); } + if self.updated_keys.is_empty() { return Ok(None); } + + let size = self.updated_keys.len(); + let mut rows = Vec::with_capacity(size); + let mut accumulator_builder = UInt32Builder::with_capacity(size); + let mut args_row_builder = BinaryBuilder::with_capacity(size, size * 4); + let mut count_builder = UInt64Builder::with_capacity(size); + let mut timestamp_builder = TimestampNanosecondBuilder::with_capacity(size); + let mut generation_builder = UInt64Builder::with_capacity(size); + + let now = to_nanos(SystemTime::now()) as i64; + let parser = self.key_converter.parser(); + + for k in self.updated_keys.keys() { + let row = parser.parse(&k.0); + for (i, state) in self.accumulators.get_mut(k.0.as_ref()).unwrap().iter_mut().enumerate() { + let IncrementalState::Batch { data, changed_values, .. } = state else { continue; }; + + for vk in changed_values.iter() { + if let Some(count) = data.get(vk) { + accumulator_builder.append_value(i as u32); + args_row_builder.append_value(&*vk.0); + count_builder.append_value(count.count); + generation_builder.append_value(count.generation); + timestamp_builder.append_value(now); + rows.push(row.to_owned()) + } + } + data.retain(|_, v| v.count > 0); + } + } + + let mut cols = self.key_converter.convert_rows(rows.into_iter())?; + cols.push(Arc::new(accumulator_builder.finish())); + cols.push(Arc::new(args_row_builder.finish())); + cols.push(Arc::new(count_builder.finish())); + cols.push(Arc::new(timestamp_builder.finish())); + + let generations = generation_builder.finish(); + self.new_generation = self.new_generation.max(max_array::(&generations).unwrap()); + cols.push(Arc::new(generations)); + + Ok(Some(cols)) + } + + fn restore_sliding(&mut self, key: &[u8], now: Instant, i: usize, aggregate_states: &Vec>, generation: u64) -> Result<()> { + let mut accumulators = self.make_accumulators(); + for ((_, state_cols), acc) in self.aggregates.iter().zip(aggregate_states.iter()).zip(accumulators.iter_mut()) { + if let IncrementalState::Sliding { accumulator, .. } = acc { + accumulator.merge_batch(&state_cols.iter().map(|c| c.slice(i, 1)).collect_vec())? + } + } + self.accumulators.insert(Arc::new(key.to_vec()), now, generation, accumulators); + Ok(()) + } + + async fn initialize(&mut self, ctx: &mut TaskContext) -> Result<()> { + let mut tm = ctx.table_manager_guard().await?; + + let table = tm + .get_uncached_key_value_view("a") + .await + .map_err(|e| anyhow!("state table a: {e}"))?; + let mut stream = Box::pin(table.get_all()); + let key_converter = RowConverter::new(self.sliding_state_schema.sort_fields(false))?; + + while let Some(batch) = stream.next().await { + let batch = batch?; + if batch.num_rows() == 0 { continue; } + + let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect(); + let aggregate_states = self.aggregates.iter().map(|agg| { + agg.state_cols.iter().map(|idx| batch.column(*idx).clone()).collect_vec() + }).collect_vec(); + let generations = batch.columns().last().unwrap().as_primitive::(); + let now = Instant::now(); + + if key_cols.is_empty() { + self.restore_sliding( + GLOBAL_KEY.as_ref().as_slice(), + now, + 0, + &aggregate_states, + generations.value(0), + )?; + } else { + let key_rows = key_converter.convert_columns(&key_cols)?; + for (i, row) in key_rows.iter().enumerate() { + if generations.is_null(i) { + bail!("generation is null at row {i}"); + } + let generation = generations.value(i); + self.restore_sliding( + row.as_ref(), + now, + i, + &aggregate_states, + generation, + )?; + } + } + } + drop(stream); + + // 初始化 Batch Accumulator + if self.aggregates.iter().any(|agg| agg.accumulator_type == AccumulatorType::Batch) { + let table = tm + .get_uncached_key_value_view("b") + .await + .map_err(|e| anyhow!("state table b: {e}"))?; + let mut stream = Box::pin(table.get_all()); + while let Some(batch) = stream.next().await { + let batch = batch?; + if batch.num_rows() == 0 { continue; } + + let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect(); + let count_column = batch.column(self.batch_state_schema.schema.index_of("count").unwrap()).as_any().downcast_ref::().unwrap(); + let accumulator_column = batch.column(self.batch_state_schema.schema.index_of("accumulator").unwrap()).as_any().downcast_ref::().unwrap(); + let args_row_column = batch.column(self.batch_state_schema.schema.index_of("args_row").unwrap()).as_any().downcast_ref::().unwrap(); + let generations = batch.columns().last().unwrap().as_primitive::(); + + let key_rows = if key_cols.is_empty() { + vec![GLOBAL_KEY.as_ref().clone()] + } else { + self.key_converter + .convert_columns(&key_cols)? + .iter() + .map(|k| k.as_ref().to_vec()) + .collect() + }; + + for (i, row) in key_rows.iter().enumerate() { + let Some(accumulators) = self.accumulators.get_mut(row.as_ref()) else { continue; }; + let count = count_column.value(i); + let accumulator_idx = accumulator_column.value(i) as usize; + let args_row = args_row_column.value(i); + let generation = generations.value(i); + + let IncrementalState::Batch { data, .. } = &mut accumulators[accumulator_idx] else { bail!("expected batch accumulator"); }; + + if let Some(existing) = data.get_mut(args_row) { + if existing.generation < generation { existing.count = count; existing.generation = generation; } + } else { + data.insert(Key(Arc::new(args_row.to_vec())), BatchData { count, generation }); + } + } + } + } + + let mut deleted_keys = vec![]; + for (k, v) in self.accumulators.iter_mut() { + let is_deleted = v.last_mut().unwrap().evaluate()?.is_null(); + if is_deleted { deleted_keys.push(k.clone()); } + else { + for is in v { + if let IncrementalState::Batch { data, .. } = is { data.retain(|_, v| v.count > 0); } + } + } + } + for k in deleted_keys { self.accumulators.remove(&k.0); } + Ok(()) + } + + /// 核心逻辑:从内存中提取这段时间的所有变更,生成 Changelog(追加与撤回) + fn generate_changelog(&mut self) -> Result> { + let mut output_keys = Vec::with_capacity(self.updated_keys.len() * 2); + let mut output_values = vec![Vec::with_capacity(self.updated_keys.len() * 2); self.aggregates.len()]; + let mut is_retracts = Vec::with_capacity(self.updated_keys.len() * 2); + + // 提取变更 + let (updated_keys, updated_values): (Vec<_>, Vec<_>) = mem::take(&mut self.updated_keys).into_iter().unzip(); + let mut deleted_keys = vec![]; + + for (k, retract) in updated_keys.iter().zip(updated_values.into_iter()) { + let append = self.evaluate(&k.0)?; + + if let Some(v) = retract { + // 如果没有变化,直接跳过 + if v.iter().zip(append.iter()).take(v.len() - 1).all(|(a, b)| a == b) { continue; } + is_retracts.push(true); + output_keys.push(k.clone()); + for (out, val) in output_values.iter_mut().zip(v) { out.push(val); } + } + + if !append.last().unwrap().is_null() { + is_retracts.push(false); + output_keys.push(k.clone()); + for (out, val) in output_values.iter_mut().zip(append) { out.push(val); } + } else { + deleted_keys.push(k); + } + } + + for k in deleted_keys { self.accumulators.remove(&k.0); } + + // 处理 TTL 过期的键 + let mut ttld_keys = vec![]; + for (k, mut v) in self.accumulators.time_out(Instant::now()) { + is_retracts.push(true); + ttld_keys.push(k); + for (out, val) in output_values.iter_mut().zip(v.iter_mut().map(|s| s.evaluate())) { out.push(val?); } + } + + if output_keys.is_empty() && ttld_keys.is_empty() { return Ok(None); } + + let row_parser = self.key_converter.parser(); + let mut result_cols = self.key_converter.convert_rows( + output_keys.iter().map(|k| row_parser.parse(k.0.as_slice())) + .chain(ttld_keys.iter().map(|k| row_parser.parse(k.as_slice()))) + )?; + + for acc in output_values.into_iter() { result_cols.push(ScalarValue::iter_to_array(acc).unwrap()); } + + let record_batch = RecordBatch::try_new(self.schema_without_metadata.clone(), result_cols).unwrap(); + + let metadata = self.metadata_expr.evaluate(&record_batch).unwrap().into_array(record_batch.num_rows()).unwrap(); + let metadata = set_retract_metadata(metadata, Arc::new(BooleanArray::from(is_retracts))); + + let mut final_batch = record_batch.columns().to_vec(); + final_batch.push(metadata); + + // 注意这里需要匹配最终向外发送的 Schema + Ok(Some(RecordBatch::try_new( + self.final_output_schema.clone(), + final_batch, + )?)) + } +} + +fn set_retract_metadata(metadata: ArrayRef, is_retract: Arc) -> ArrayRef { + let metadata = metadata.as_struct(); + let arrays: Vec> = vec![is_retract, metadata.column(1).clone()]; + Arc::new(StructArray::new(updating_meta_fields(), arrays, None)) +} + +// ========================================================================= +// 实现全新的 Actor MessageOperator 接口 +// ========================================================================= + +#[async_trait::async_trait] +impl MessageOperator for IncrementalAggregatingFunc { + fn name(&self) -> &str { + "UpdatingAggregatingFunc" + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + self.initialize(ctx).await?; + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> Result> { + // 数据进入仅更新内存中的 HashMap,暂不发送数据 + if self.has_routing_keys { + self.keyed_aggregate(&batch)?; + } else { + self.global_aggregate(&batch)?; + } + + Ok(vec![]) + } + + async fn process_watermark( + &mut self, + _watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + // 如果是基于时间的 flush (可根据业务决定是否在水位线推进时 flush) + if let Some(changelog_batch) = self.generate_changelog()? { + // Forward 表示按原路直连发送给下游 + Ok(vec![StreamOutput::Forward(changelog_batch)]) + } else { + Ok(vec![]) + } + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + ctx: &mut TaskContext, + ) -> Result<()> { + let mut tm = ctx.table_manager_guard().await?; + + if let Some(sliding) = self.checkpoint_sliding()? { + let table = tm + .get_uncached_key_value_view("a") + .await + .map_err(|e| anyhow!("state table a: {e}"))?; + table + .insert_batch(sliding) + .await + .map_err(|e| anyhow!("insert_batch a: {e}"))?; + } + + if let Some(batch) = self.checkpoint_batch()? { + let table = tm + .get_uncached_key_value_view("b") + .await + .map_err(|e| anyhow!("state table b: {e}"))?; + table + .insert_batch(batch) + .await + .map_err(|e| anyhow!("insert_batch b: {e}"))?; + } + + // 清理已生成的 changelog 痕迹 + self.updated_keys.clear(); + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +// ========================================================================= +// 算子构造器保持对外 API 兼容 +// ========================================================================= + +pub struct IncrementalAggregatingConstructor; + +impl IncrementalAggregatingConstructor { + pub fn with_config( + &self, + config: UpdatingAggregateOperator, + registry: Arc, + ) -> anyhow::Result { + let ttl = Duration::from_micros(if config.ttl_micros == 0 { + warn!("ttl was not set for updating aggregate"); + 24 * 60 * 60 * 1000 * 1000 + } else { + config.ttl_micros + }); + + let input_schema: FsSchema = config.input_schema.unwrap().try_into()?; + let final_schema: FsSchema = config.final_schema.unwrap().try_into()?; + let mut schema_without_metadata = SchemaBuilder::from((*final_schema.schema).clone()); + schema_without_metadata.remove(final_schema.schema.index_of(UPDATING_META_FIELD).unwrap()); + + let metadata_expr = parse_physical_expr( + &PhysicalExprNode::decode(&mut config.metadata_expr.as_slice())?, + registry.as_ref(), + &input_schema.schema, + &DefaultPhysicalExtensionCodec {}, + )?; + + let aggregate_exec = PhysicalPlanNode::decode(&mut config.aggregate_exec.as_ref())?; + let PhysicalPlanType::Aggregate(aggregate_exec) = aggregate_exec.physical_plan_type.unwrap() else { bail!("invalid proto"); }; + + let mut sliding_state_fields = input_schema.routing_keys() + .map(|v| v.iter().map(|idx| input_schema.schema.field(*idx).clone()).collect_vec()) + .unwrap_or_default(); + + let has_routing_keys = input_schema.routing_keys().is_some(); + let mut batch_state_fields = sliding_state_fields.clone(); + let key_fields = (0..sliding_state_fields.len()).collect_vec(); + + let aggregates: Vec<_> = aggregate_exec.aggr_expr.iter().zip(aggregate_exec.aggr_expr_name.iter()) + .map(|(expr, name)| Ok(decode_aggregate(&input_schema.schema, name, expr, registry.as_ref())?)) + .map_ok(|agg| { + let retract = match agg.create_sliding_accumulator() { Ok(s) => s.supports_retract_batch(), _ => false }; + (agg, if retract { AccumulatorType::Sliding } else { AccumulatorType::Batch }) + }) + .map_ok(|(agg, t)| { + let row_converter = Arc::new(RowConverter::new( + agg.expressions().iter().map(|ex| Ok(SortField::new(ex.data_type(&input_schema.schema)?))).collect::>()? + )?); + let fields = t.state_fields(&agg)?; + let field_names = fields.iter().map(|f| f.name().to_string()).collect_vec(); + sliding_state_fields.extend(fields.into_iter().map(|f| (*f).clone())); + Ok::<_, anyhow::Error>((agg, t, row_converter, field_names)) + }) + .flatten_ok() + .collect::>()?; + + let state_schema = Schema::new(sliding_state_fields); + + let aggregates = aggregates.into_iter().map(|(agg, t, row_converter, field_names)| Aggregator { + func: agg, accumulator_type: t, row_converter, + state_cols: field_names.iter().map(|f| state_schema.index_of(f).unwrap()).collect(), + }).collect(); + + let mut state_fields = state_schema.fields().to_vec(); + let timestamp_field = state_fields.pop().unwrap(); + state_fields.push(Arc::new((*timestamp_field).clone().with_name(TIMESTAMP_FIELD))); + + let sliding_state_schema = Arc::new(FsSchema::from_schema_keys(Arc::new(Schema::new(state_fields)), key_fields.clone())?); + + batch_state_fields.push(Field::new("accumulator", DataType::UInt32, false)); + batch_state_fields.push(Field::new("args_row", DataType::Binary, false)); + batch_state_fields.push(Field::new("count", DataType::UInt64, false)); + batch_state_fields.push(Field::new(TIMESTAMP_FIELD, DataType::Timestamp(TimeUnit::Nanosecond, None), false)); + let timestamp_index = batch_state_fields.len() - 1; + + let mut storage_key_fields = key_fields.clone(); + storage_key_fields.push(storage_key_fields.len()); + storage_key_fields.push(storage_key_fields.len()); + + let batch_state_schema = Arc::new(FsSchema::new( + Arc::new(Schema::new(batch_state_fields)), + timestamp_index, + Some(storage_key_fields), + Some(key_fields), + )); + + Ok(IncrementalAggregatingFunc { + flush_interval: Duration::from_micros(config.flush_interval_micros), + metadata_expr, + ttl, + aggregates, + accumulators: UpdatingCache::with_time_to_idle(ttl), + schema_without_metadata: Arc::new(schema_without_metadata.finish()), + final_output_schema: final_schema.schema.clone(), + updated_keys: Default::default(), + input_schema: Arc::new(input_schema.clone()), + has_routing_keys, + key_converter: RowConverter::new(input_schema.sort_fields(false))?, + sliding_state_schema, + batch_state_schema, + new_generation: 0, + }) + } +} \ No newline at end of file diff --git a/src/runtime/streaming/operators/grouping/mod.rs b/src/runtime/streaming/operators/grouping/mod.rs new file mode 100644 index 00000000..fb2ae7b1 --- /dev/null +++ b/src/runtime/streaming/operators/grouping/mod.rs @@ -0,0 +1,5 @@ +pub mod incremental_aggregate; +pub mod updating_cache; + +pub use incremental_aggregate::{IncrementalAggregatingConstructor, IncrementalAggregatingFunc}; +pub use updating_cache::{Key, UpdatingCache}; diff --git a/src/runtime/streaming/operators/grouping/updating_cache.rs b/src/runtime/streaming/operators/grouping/updating_cache.rs new file mode 100644 index 00000000..b6fbcc99 --- /dev/null +++ b/src/runtime/streaming/operators/grouping/updating_cache.rs @@ -0,0 +1,498 @@ +//! 按 key 的增量状态缓存:LRU + TTL(idle),供 [`super::incremental_aggregate`] 等使用。 + +use std::borrow::Borrow; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +#[derive(Hash, Eq, PartialEq, Clone, Debug)] +pub struct Key(pub Arc>); + +impl Borrow<[u8]> for Key { + fn borrow(&self) -> &[u8] { + &self.0 + } +} + +struct Node { + key: Key, + data: Option, + generation: u64, + updated: Instant, + prev: Option, + next: Option, +} + +/// 基于数组槽位 + 双向链表(LRU)的 UpdatingCache,支持按代更新与 TTL 逐出。 +pub struct UpdatingCache { + map: HashMap, + nodes: Vec>, + free_list: Vec, + head: Option, + tail: Option, + ttl: Duration, +} + +struct TTLIter<'a, T: Send + Sync> { + now: Instant, + cache: &'a mut UpdatingCache, +} + +impl Iterator for TTLIter<'_, T> { + type Item = (Arc>, T); + + fn next(&mut self) -> Option { + let head_idx = self.cache.head?; + let node = &self.cache.nodes[head_idx]; + + if self.now.saturating_duration_since(node.updated) < self.cache.ttl { + return None; + } + + let (k, v) = self.cache.pop_front()?; + Some((k.0, v)) + } +} + +impl UpdatingCache { + pub fn with_time_to_idle(ttl: Duration) -> Self { + Self { + map: HashMap::new(), + nodes: Vec::new(), + free_list: Vec::new(), + head: None, + tail: None, + ttl, + } + } + + pub fn insert(&mut self, key: Arc>, now: Instant, generation: u64, value: T) { + let key_obj = Key(key); + + if let Some(&idx) = self.map.get(&key_obj) { + if self.nodes[idx].generation >= generation { + return; + } + self.nodes[idx].data = Some(value); + self.nodes[idx].generation = generation; + self.nodes[idx].updated = now; + self.move_to_tail(idx); + return; + } + + let idx = self.allocate_node(key_obj.clone(), value, generation, now); + self.map.insert(key_obj, idx); + self.push_back(idx); + } + + pub fn time_out(&mut self, now: Instant) -> impl Iterator>, T)> + '_ { + TTLIter { now, cache: self } + } + + pub fn iter_mut(&mut self) -> impl Iterator { + self.nodes.iter_mut().filter_map(|n| { + if let Some(data) = &mut n.data { + Some((&n.key, data)) + } else { + None + } + }) + } + + pub fn modify_and_update Result<(), E>>( + &mut self, + key: &[u8], + now: Instant, + f: F, + ) -> Option> { + let &idx = self.map.get(key)?; + let node = &mut self.nodes[idx]; + + if let Err(e) = f(node.data.as_mut().unwrap()) { + return Some(Err(e)); + } + + node.generation += 1; + node.updated = now; + self.move_to_tail(idx); + + Some(Ok(())) + } + + pub fn modify Result<(), E>>( + &mut self, + key: &[u8], + f: F, + ) -> Option> { + let &idx = self.map.get(key)?; + let node = &mut self.nodes[idx]; + + node.generation += 1; + + if let Err(e) = f(node.data.as_mut().unwrap()) { + return Some(Err(e)); + } + + Some(Ok(())) + } + + pub fn contains_key(&self, k: &[u8]) -> bool { + self.map.contains_key(k) + } + + pub fn get_mut(&mut self, key: &[u8]) -> Option<&mut T> { + let &idx = self.map.get(key)?; + self.nodes[idx].data.as_mut() + } + + pub fn get_mut_generation(&mut self, key: &[u8]) -> Option<(&mut T, u64)> { + let &idx = self.map.get(key)?; + let node = &mut self.nodes[idx]; + Some((node.data.as_mut().unwrap(), node.generation)) + } + + pub fn get_mut_key_value(&mut self, key: &[u8]) -> Option<(Key, &mut T)> { + let &idx = self.map.get(key)?; + let node = &mut self.nodes[idx]; + Some((node.key.clone(), node.data.as_mut().unwrap())) + } + + pub fn remove(&mut self, key: &[u8]) -> Option { + let &idx = self.map.get(key)?; + self.map.remove(key); + self.remove_node(idx); + + let data = self.nodes[idx].data.take().unwrap(); + self.free_list.push(idx); + + Some(data) + } + + fn pop_front(&mut self) -> Option<(Key, T)> { + let head_idx = self.head?; + self.remove_node(head_idx); + + let node = &mut self.nodes[head_idx]; + self.map.remove(&node.key); + + let key = node.key.clone(); + let data = node.data.take().unwrap(); + self.free_list.push(head_idx); + + Some((key, data)) + } + + fn allocate_node(&mut self, key: Key, data: T, generation: u64, updated: Instant) -> usize { + let new_node = Node { + key, + data: Some(data), + generation, + updated, + prev: None, + next: None, + }; + + if let Some(idx) = self.free_list.pop() { + self.nodes[idx] = new_node; + idx + } else { + let idx = self.nodes.len(); + self.nodes.push(new_node); + idx + } + } + + fn push_back(&mut self, index: usize) { + self.nodes[index].prev = self.tail; + self.nodes[index].next = None; + + if let Some(tail_idx) = self.tail { + self.nodes[tail_idx].next = Some(index); + } else { + self.head = Some(index); + } + self.tail = Some(index); + } + + fn remove_node(&mut self, index: usize) { + let prev = self.nodes[index].prev; + let next = self.nodes[index].next; + + if let Some(p) = prev { + self.nodes[p].next = next; + } else { + self.head = next; + } + + if let Some(n) = next { + self.nodes[n].prev = prev; + } else { + self.tail = prev; + } + + self.nodes[index].prev = None; + self.nodes[index].next = None; + } + + fn move_to_tail(&mut self, index: usize) { + if self.tail == Some(index) { + return; + } + self.remove_node(index); + self.push_back(index); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_insert_and_modify() { + let mut cache = UpdatingCache::with_time_to_idle(Duration::from_secs(60)); + + let key = Arc::new(vec![1, 2, 3]); + let now = Instant::now(); + cache.insert(key.clone(), now, 1, 42); + + assert!( + cache + .modify(key.as_ref(), |x| { + *x = 43; + Ok::<(), ()>(()) + }) + .unwrap() + .is_ok() + ); + + assert_eq!(*cache.get_mut(key.as_ref()).unwrap(), 43); + } + + #[test] + fn test_timeout() { + let mut cache = UpdatingCache::with_time_to_idle(Duration::from_millis(10)); + + let key1 = Arc::new(vec![1]); + let key2 = Arc::new(vec![2]); + + let start = Instant::now(); + cache.insert(key1.clone(), start, 1, "value1"); + cache.insert(key2.clone(), start + Duration::from_millis(5), 2, "value2"); + + let check_time = start + Duration::from_millis(11); + let timed_out: Vec<_> = cache.time_out(check_time).collect(); + assert_eq!(timed_out.len(), 1); + assert_eq!(&*timed_out[0].0, &*key1); + + assert!(cache.contains_key(key2.as_ref())); + assert!(!cache.contains_key(key1.as_ref())); + } + + #[test] + fn test_update_keeps_alive() { + let mut cache = UpdatingCache::with_time_to_idle(Duration::from_millis(10)); + + let key = Arc::new(vec![1]); + let start = Instant::now(); + cache.insert(key.clone(), start, 1, "value"); + + let update_time = start + Duration::from_millis(5); + cache + .modify_and_update(key.as_ref(), update_time, |_| Ok::<(), ()>(())) + .unwrap() + .unwrap(); + + let check_time = start + Duration::from_millis(11); + let timed_out: Vec<_> = cache.time_out(check_time).collect(); + assert!(timed_out.is_empty()); + assert!(cache.contains_key(key.as_ref())); + } + + #[test] + fn test_lru_eviction_order_matches_insertion() { + let mut cache = UpdatingCache::with_time_to_idle(Duration::from_secs(60)); + let key1 = Arc::new(vec![1]); + let key2 = Arc::new(vec![2]); + let key3 = Arc::new(vec![3]); + let now = Instant::now(); + cache.insert(key1.clone(), now, 1, 1); + cache.insert(key2.clone(), now, 2, 2); + cache.insert(key3.clone(), now, 3, 3); + + let evicted: Vec<_> = cache.time_out(now + Duration::from_secs(61)).collect(); + assert_eq!(evicted.len(), 3); + assert_eq!(evicted[0].0.as_ref(), &*key1); + assert_eq!(evicted[1].0.as_ref(), &*key2); + assert_eq!(evicted[2].0.as_ref(), &*key3); + } + + #[test] + fn test_remove_middle_key() { + let mut cache = UpdatingCache::with_time_to_idle(Duration::from_secs(60)); + let key1 = Arc::new(vec![1]); + let key2 = Arc::new(vec![2]); + let key3 = Arc::new(vec![3]); + let now = Instant::now(); + cache.insert(key1.clone(), now, 1, 1); + cache.insert(key2.clone(), now, 2, 2); + cache.insert(key3.clone(), now, 3, 3); + + assert_eq!(cache.remove(&[2]).unwrap(), 2); + assert!(cache.contains_key(&[1])); + assert!(!cache.contains_key(&[2])); + assert!(cache.contains_key(&[3])); + + let evicted: Vec<_> = cache.time_out(now + Duration::from_secs(61)).collect(); + assert_eq!(evicted.len(), 2); + assert_eq!(evicted[0].0.as_ref(), &*key1); + assert_eq!(evicted[1].0.as_ref(), &*key3); + } + + #[test] + fn reorder_with_update() { + let mut cache = UpdatingCache::::with_time_to_idle(Duration::from_secs(10)); + let key1 = Arc::new(vec![1]); + let key2 = Arc::new(vec![2]); + let now = Instant::now(); + + cache.insert(key1.clone(), now, 1, 100); + cache.insert(key2.clone(), now, 2, 200); + + cache + .modify_and_update(&[1], now + Duration::from_secs(1), |v| { + *v += 1; + Ok::<(), ()>(()) + }) + .unwrap() + .unwrap(); + + let _ = cache.modify_and_update(&[1], now + Duration::from_secs(2), |v| { + *v += 1; + Ok::<(), ()>(()) + }); + } + + #[test] + fn test_ttl_eviction() { + let ttl = Duration::from_millis(100); + let mut cache = UpdatingCache::with_time_to_idle(ttl); + let now = Instant::now(); + let key1 = Arc::new(vec![1]); + let key2 = Arc::new(vec![2]); + cache.insert(key1.clone(), now, 1, 10); + cache.insert(key2.clone(), now, 2, 20); + + cache + .modify_and_update(&[2], now + Duration::from_millis(50), |v| { + *v += 1; + Ok::<(), ()>(()) + }) + .unwrap() + .unwrap(); + + let now2 = now + Duration::from_millis(150); + let evicted: Vec<_> = cache.time_out(now2).collect(); + assert_eq!(evicted.len(), 2); + assert_eq!(evicted[0].0.as_ref(), &[1]); + assert_eq!(evicted[1].0.as_ref(), &[2]); + } + + #[test] + fn test_remove_key() { + let ttl = Duration::from_millis(100); + let mut cache = UpdatingCache::with_time_to_idle(ttl); + let now = Instant::now(); + let key = Arc::new(vec![1]); + cache.insert(key.clone(), now, 1, 42); + let value = cache.remove(&[1]).unwrap(); + assert_eq!(value, 42); + assert!(!cache.contains_key(&[1])); + let evicted: Vec<_> = cache.time_out(now + Duration::from_millis(200)).collect(); + assert!(evicted.is_empty()); + } + + #[test] + fn test_update_order() { + let ttl = Duration::from_secs(1); + let mut cache = UpdatingCache::with_time_to_idle(ttl); + let base = Instant::now(); + let key_a = Arc::new(vec![b'A']); + let key_b = Arc::new(vec![b'B']); + let key_c = Arc::new(vec![b'C']); + cache.insert(key_a.clone(), base, 1, 1); + cache.insert(key_b.clone(), base, 2, 2); + cache.insert(key_c.clone(), base, 3, 3); + + let t_update = base + Duration::from_millis(500); + cache + .modify_and_update(b"B", t_update, |v| { + *v += 10; + Ok::<(), ()>(()) + }) + .unwrap() + .unwrap(); + + let t_eviction = base + Duration::from_secs(2); + let evicted: Vec<_> = cache.time_out(t_eviction).collect(); + assert_eq!(evicted.len(), 3); + assert_eq!(evicted[0].0.as_ref(), b"A"); + assert_eq!(evicted[1].0.as_ref(), b"C"); + assert_eq!(evicted[2].0.as_ref(), b"B"); + } + + #[test] + fn test_get_mut_key_value() { + let ttl = Duration::from_secs(1); + let mut cache = UpdatingCache::with_time_to_idle(ttl); + let base = Instant::now(); + let key = Arc::new(vec![1, 2, 3]); + cache.insert(key.clone(), base, 1, 42); + if let Some((k, v)) = cache.get_mut_key_value(&[1, 2, 3]) { + *v += 1; + assert_eq!(*v, 43); + assert_eq!(k.0.as_ref(), &[1, 2, 3]); + } else { + panic!("Key not found"); + } + } + + #[test] + fn test_modify_error() { + let ttl = Duration::from_secs(1); + let mut cache = UpdatingCache::with_time_to_idle(ttl); + let base = Instant::now(); + let key = Arc::new(vec![1]); + cache.insert(key.clone(), base, 1, 42); + let res = cache.modify(&[1], |_v| Err("error")); + assert!(res.unwrap().is_err()); + } + + #[test] + fn test_drop_cleanup() { + let ttl = Duration::from_secs(1); + { + let mut cache = UpdatingCache::with_time_to_idle(ttl); + let base = Instant::now(); + for i in 0..10 { + cache.insert(Arc::new(vec![i as u8]), base, i as u64, i); + } + } + } + + #[test] + fn test_generational_replacement() { + let ttl = Duration::from_secs(1); + let mut cache = UpdatingCache::with_time_to_idle(ttl); + let base = Instant::now(); + let key = Arc::new(vec![1]); + + cache.insert(key.clone(), base, 1, "first"); + assert_eq!(cache.get_mut(&[1]), Some(&mut "first")); + + cache.insert(key.clone(), base, 2, "second"); + assert_eq!(cache.get_mut(&[1]), Some(&mut "second")); + + cache.insert(key.clone(), base, 1, "third"); + assert_eq!(cache.get_mut(&[1]), Some(&mut "second")); + } +} diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs new file mode 100644 index 00000000..dbde4d8e --- /dev/null +++ b/src/runtime/streaming/operators/joins/join_instance.rs @@ -0,0 +1,351 @@ +//! 瞬时 JOIN:双通道喂入 DataFusion 物理计划,水位线推进时闭合实例并抽干结果。 + +use anyhow::{anyhow, Result}; +use arrow::compute::{max, min, partition, sort_to_indices, take}; +use arrow_array::{RecordBatch, TimestampNanosecondArray}; +use datafusion::execution::context::SessionContext; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::execution::SendableRecordBatchStream; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_proto::physical_plan::AsExecutionPlan; +use datafusion_proto::protobuf::PhysicalPlanNode; +use futures::StreamExt; +use prost::Message; +use std::collections::BTreeMap; +use std::sync::{Arc, RwLock}; +use std::time::SystemTime; +use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; +use tracing::warn; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use async_trait::async_trait; +use tracing_subscriber::Registry; +use protocol::grpc::api::JoinOperator; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; +use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum JoinSide { + Left, + Right, +} + +impl JoinSide { + fn name(&self) -> &'static str { + match self { + JoinSide::Left => "left", + JoinSide::Right => "right", + } + } +} + +/// 瞬时 JOIN 执行实例:保存通道;窗口闭合时关闭通道并同步抽干 `SendableRecordBatchStream`。 +struct JoinInstance { + left_tx: UnboundedSender, + right_tx: UnboundedSender, + result_stream: SendableRecordBatchStream, +} + +impl JoinInstance { + fn feed_data(&self, batch: RecordBatch, side: JoinSide) -> Result<()> { + match side { + JoinSide::Left => self + .left_tx + .send(batch) + .map_err(|e| anyhow!("Left send err: {}", e)), + JoinSide::Right => self + .right_tx + .send(batch) + .map_err(|e| anyhow!("Right send err: {}", e)), + } + } + + /// 关闭输入流,促使执行计划结束,并拉取全部 JOIN 结果。 + async fn close_and_drain(self) -> Result> { + drop(self.left_tx); + drop(self.right_tx); + + let mut outputs = Vec::new(); + let mut stream = self.result_stream; + + while let Some(result_batch) = stream.next().await { + outputs.push(result_batch?); + } + + Ok(outputs) + } +} + +pub struct InstantJoinOperator { + left_input_schema: FsSchemaRef, + right_input_schema: FsSchemaRef, + active_joins: BTreeMap, + left_receiver_hook: Arc>>>, + right_receiver_hook: Arc>>>, + join_exec_plan: Arc, +} + +impl InstantJoinOperator { + fn input_schema(&self, side: JoinSide) -> FsSchemaRef { + match side { + JoinSide::Left => self.left_input_schema.clone(), + JoinSide::Right => self.right_input_schema.clone(), + } + } + + fn get_or_create_join_instance(&mut self, time: SystemTime) -> Result<&mut JoinInstance> { + use std::collections::btree_map::Entry; + + if let Entry::Vacant(e) = self.active_joins.entry(time) { + let (left_tx, left_rx) = unbounded_channel(); + let (right_tx, right_rx) = unbounded_channel(); + + *self.left_receiver_hook.write().unwrap() = Some(left_rx); + *self.right_receiver_hook.write().unwrap() = Some(right_rx); + + self.join_exec_plan.reset().map_err(|e| anyhow!("{e}"))?; + let result_stream = self + .join_exec_plan + .execute(0, SessionContext::new().task_ctx()) + .map_err(|e| anyhow!("{e}"))?; + + e.insert(JoinInstance { + left_tx, + right_tx, + result_stream, + }); + } + + self.active_joins + .get_mut(&time) + .ok_or_else(|| anyhow!("join instance missing after insert")) + } + + async fn process_side_internal( + &mut self, + side: JoinSide, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result<()> { + if batch.num_rows() == 0 { + return Ok(()); + } + + let time_column = batch + .column(self.input_schema(side).timestamp_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| anyhow!("Missing timestamp column"))?; + + let min_timestamp = min(time_column).ok_or_else(|| anyhow!("empty timestamp column"))?; + let max_timestamp = max(time_column).ok_or_else(|| anyhow!("empty timestamp column"))?; + + if let Some(watermark) = ctx.last_present_watermark() { + if watermark > from_nanos(min_timestamp as u128) { + warn!("Dropped late batch from {:?} before watermark", side); + return Ok(()); + } + } + + let wm = ctx.last_present_watermark(); + { + let mut tm = ctx.table_manager_guard().await?; + let table = tm + .get_expiring_time_key_table(side.name(), wm) + .await + .map_err(|e| anyhow!("{e:?}"))?; + table.insert(from_nanos(max_timestamp as u128), batch.clone()); + } + + let unkeyed_batch = self.input_schema(side).unkeyed_batch(&batch)?; + + if max_timestamp == min_timestamp { + let time_key = from_nanos(max_timestamp as u128); + let join_instance = self.get_or_create_join_instance(time_key)?; + join_instance.feed_data(unkeyed_batch, side)?; + return Ok(()); + } + + let indices = sort_to_indices(time_column, None, None)?; + let columns: Vec<_> = unkeyed_batch + .columns() + .iter() + .map(|c| take(c, &indices, None).unwrap()) + .collect(); + let sorted_batch = RecordBatch::try_new(unkeyed_batch.schema(), columns)?; + let sorted_timestamps = take(time_column, &indices, None).unwrap(); + let typed_timestamps = sorted_timestamps + .as_any() + .downcast_ref::() + .ok_or_else(|| anyhow!("sorted timestamps downcast failed"))?; + let ranges = partition(std::slice::from_ref(&sorted_timestamps)) + .unwrap() + .ranges(); + + for range in ranges { + let sub_batch = sorted_batch.slice(range.start, range.end - range.start); + let time_key = from_nanos(typed_timestamps.value(range.start) as u128); + let join_instance = self.get_or_create_join_instance(time_key)?; + join_instance.feed_data(sub_batch, side)?; + } + + Ok(()) + } +} + +#[async_trait] +impl MessageOperator for InstantJoinOperator { + fn name(&self) -> &str { + "InstantJoin" + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + let watermark = ctx.last_present_watermark(); + + let left_batches: Vec<_> = { + let mut tm = ctx.table_manager_guard().await?; + let left_table = tm + .get_expiring_time_key_table("left", watermark) + .await + .map_err(|e| anyhow!("{e:?}"))?; + left_table + .all_batches_for_watermark(watermark) + .flat_map(|(_time, batches)| batches.iter().cloned()) + .collect() + }; + for batch in left_batches { + self.process_side_internal(JoinSide::Left, batch, ctx).await?; + } + + let right_batches: Vec<_> = { + let mut tm = ctx.table_manager_guard().await?; + let right_table = tm + .get_expiring_time_key_table("right", watermark) + .await + .map_err(|e| anyhow!("{e:?}"))?; + right_table + .all_batches_for_watermark(watermark) + .flat_map(|(_time, batches)| batches.iter().cloned()) + .collect() + }; + for batch in right_batches { + self.process_side_internal(JoinSide::Right, batch, ctx).await?; + } + + Ok(()) + } + + async fn process_data( + &mut self, + input_idx: usize, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result> { + let side = if input_idx == 0 { + JoinSide::Left + } else { + JoinSide::Right + }; + self.process_side_internal(side, batch, ctx).await?; + Ok(vec![]) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + let Watermark::EventTime(current_time) = watermark else { + return Ok(vec![]); + }; + let mut emit_outputs = Vec::new(); + + let mut expired_times = Vec::new(); + for key in self.active_joins.keys() { + if *key < current_time { + expired_times.push(*key); + } else { + break; + } + } + + for time_key in expired_times { + if let Some(join_instance) = self.active_joins.remove(&time_key) { + let joined_batches = join_instance.close_and_drain().await?; + for batch in joined_batches { + emit_outputs.push(StreamOutput::Forward(batch)); + } + } + } + + Ok(emit_outputs) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + ctx: &mut TaskContext, + ) -> Result<()> { + let watermark = ctx.last_present_watermark(); + let mut tm = ctx.table_manager_guard().await?; + tm.get_expiring_time_key_table("left", watermark) + .await + .map_err(|e| anyhow!("{e:?}"))? + .flush(watermark) + .await + .map_err(|e| anyhow!("{e:?}"))?; + tm.get_expiring_time_key_table("right", watermark) + .await + .map_err(|e| anyhow!("{e:?}"))? + .flush(watermark) + .await + .map_err(|e| anyhow!("{e:?}"))?; + Ok(()) + } +} + +/// 与 `OperatorConstructor` 类似的配置入口;返回 [`InstantJoinOperator`](实现 [`MessageOperator`]), +/// 而非 `ConstructedOperator`(后者仅包装 `ArrowOperator`)。 +pub struct InstantJoinConstructor; + +impl InstantJoinConstructor { + pub fn with_config( + &self, + config: JoinOperator, + registry: Arc, + ) -> anyhow::Result { + let join_physical_plan_node = PhysicalPlanNode::decode(&mut config.join_plan.as_slice())?; + + let left_input_schema: Arc = + Arc::new(config.left_schema.unwrap().try_into()?); + let right_input_schema: Arc = + Arc::new(config.right_schema.unwrap().try_into()?); + + let left_receiver_hook = Arc::new(RwLock::new(None)); + let right_receiver_hook = Arc::new(RwLock::new(None)); + + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::LockedJoinStream { + left: left_receiver_hook.clone(), + right: right_receiver_hook.clone(), + }, + }; + + let join_exec_plan = join_physical_plan_node.try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &codec, + )?; + + Ok(InstantJoinOperator { + left_input_schema, + right_input_schema, + active_joins: BTreeMap::new(), + left_receiver_hook, + right_receiver_hook, + join_exec_plan, + }) + } +} diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs new file mode 100644 index 00000000..d115ac10 --- /dev/null +++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs @@ -0,0 +1,261 @@ +//! 带 TTL 的 Key-Time Join:两侧状态表 + DataFusion 物理计划成对计算。 + +use anyhow::{anyhow, Result}; +use arrow::compute::concat_batches; +use arrow_array::RecordBatch; +use datafusion::execution::context::SessionContext; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode}; +use futures::StreamExt; +use prost::Message; +use std::sync::{Arc, RwLock}; +use std::time::Duration; +use tracing::warn; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use async_trait::async_trait; +use tracing_subscriber::Registry; +use protocol::grpc::api::JoinOperator; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; +use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum JoinSide { + Left, + Right, +} + +impl JoinSide { + fn table_name(&self) -> &'static str { + match self { + JoinSide::Left => "left", + JoinSide::Right => "right", + } + } +} + +pub struct JoinWithExpirationOperator { + /// 保留与配置/表注册语义一致;实际 TTL 由状态表配置决定。 + #[allow(dead_code)] + left_expiration: Duration, + #[allow(dead_code)] + right_expiration: Duration, + left_input_schema: FsSchema, + right_input_schema: FsSchema, + left_schema: FsSchema, + right_schema: FsSchema, + left_passer: Arc>>, + right_passer: Arc>>, + join_exec_plan: Arc, +} + +impl JoinWithExpirationOperator { + /// 执行 DataFusion 物理计划,返回 JOIN 结果批次(不经过 Collector)。 + async fn compute_pair( + &mut self, + left: RecordBatch, + right: RecordBatch, + ) -> Result> { + if left.num_rows() == 0 || right.num_rows() == 0 { + return Ok(vec![]); + } + + { + self.left_passer.write().unwrap().replace(left); + self.right_passer.write().unwrap().replace(right); + } + + self.join_exec_plan + .reset() + .map_err(|e| anyhow!("join plan reset: {e}"))?; + let mut result_stream = self + .join_exec_plan + .execute(0, SessionContext::new().task_ctx()) + .map_err(|e| anyhow!("join execute: {e}"))?; + + let mut outputs = Vec::new(); + while let Some(batch) = result_stream.next().await { + outputs.push(batch.map_err(|e| anyhow!("{e}"))?); + } + + Ok(outputs) + } + + async fn process_side( + &mut self, + side: JoinSide, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result> { + let watermark = ctx.last_present_watermark(); + let target_name = side.table_name(); + let opposite_name = match side { + JoinSide::Left => JoinSide::Right.table_name(), + JoinSide::Right => JoinSide::Left.table_name(), + }; + + let mut tm = ctx.table_manager_guard().await?; + + let inserted_rows = { + let target_table = tm + .get_key_time_table(target_name, watermark) + .await + .map_err(|e| anyhow!("{e:?}"))?; + target_table + .insert(batch.clone()) + .await + .map_err(|e| anyhow!("{e:?}"))? + }; + + let opposite_table = tm + .get_key_time_table(opposite_name, watermark) + .await + .map_err(|e| anyhow!("{e:?}"))?; + + let mut opposite_batches = Vec::new(); + for row in inserted_rows { + if let Some(matched_batch) = opposite_table + .get_batch(row.as_ref()) + .map_err(|e| anyhow!("{e:?}"))? + { + opposite_batches.push(matched_batch.clone()); + } + } + + drop(tm); + + if opposite_batches.is_empty() { + return Ok(vec![]); + } + + let opposite_schema = match side { + JoinSide::Left => &self.right_schema.schema, + JoinSide::Right => &self.left_schema.schema, + }; + let combined_opposite_batch = concat_batches(opposite_schema, opposite_batches.iter())?; + + let unkeyed_target_batch = match side { + JoinSide::Left => self.left_input_schema.unkeyed_batch(&batch)?, + JoinSide::Right => self.right_input_schema.unkeyed_batch(&batch)?, + }; + + let (left_input, right_input) = match side { + JoinSide::Left => (unkeyed_target_batch, combined_opposite_batch), + JoinSide::Right => (combined_opposite_batch, unkeyed_target_batch), + }; + + let result_batches = self.compute_pair(left_input, right_input).await?; + + Ok(result_batches + .into_iter() + .map(StreamOutput::Forward) + .collect()) + } +} + +#[async_trait] +impl MessageOperator for JoinWithExpirationOperator { + fn name(&self) -> &str { + "JoinWithExpiration" + } + + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { + Ok(()) + } + + async fn process_data( + &mut self, + input_idx: usize, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result> { + let side = if input_idx == 0 { + JoinSide::Left + } else { + JoinSide::Right + }; + self.process_side(side, batch, ctx).await + } + + async fn process_watermark( + &mut self, + _watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + _ctx: &mut TaskContext, + ) -> Result<()> { + // `KeyTimeView` 无 `flush`;写入已通过 `insert` 经 `state_tx` 进入后端刷写管线, + // 与 worker 侧 `JoinWithExpiration` 未单独实现 `handle_checkpoint` 一致。 + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +/// 从配置构造 [`JoinWithExpirationOperator`](实现 [`MessageOperator`])。 +/// 注意:`ConstructedOperator` 仅包装 `ArrowOperator`,此处不返回该类型。 +pub struct JoinWithExpirationConstructor; + +impl JoinWithExpirationConstructor { + pub fn with_config( + &self, + config: JoinOperator, + registry: Arc, + ) -> anyhow::Result { + let left_passer = Arc::new(RwLock::new(None)); + let right_passer = Arc::new(RwLock::new(None)); + + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::LockedJoinPair { + left: left_passer.clone(), + right: right_passer.clone(), + }, + }; + + let join_physical_plan_node = PhysicalPlanNode::decode(&mut config.join_plan.as_slice())?; + let join_exec_plan = join_physical_plan_node.try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &codec, + )?; + + let left_input_schema: FsSchema = config.left_schema.unwrap().try_into()?; + let right_input_schema: FsSchema = config.right_schema.unwrap().try_into()?; + let left_schema = left_input_schema.schema_without_keys()?; + let right_schema = right_input_schema.schema_without_keys()?; + + let mut ttl = Duration::from_micros( + config + .ttl_micros + .expect("ttl must be set for non-instant join"), + ); + + if ttl == Duration::ZERO { + warn!("TTL was not set for join with expiration, defaulting to 24 hours."); + ttl = Duration::from_secs(24 * 60 * 60); + } + + Ok(JoinWithExpirationOperator { + left_expiration: ttl, + right_expiration: ttl, + left_input_schema, + right_input_schema, + left_schema, + right_schema, + left_passer, + right_passer, + join_exec_plan, + }) + } +} diff --git a/src/runtime/streaming/operators/joins/lookup_join.rs b/src/runtime/streaming/operators/joins/lookup_join.rs new file mode 100644 index 00000000..b302d198 --- /dev/null +++ b/src/runtime/streaming/operators/joins/lookup_join.rs @@ -0,0 +1,363 @@ +//! 维表 Lookup Join(Enrichment):与 worker `arrow/lookup_join` 逻辑对齐,实现 [`MessageOperator`]。 + +use anyhow::{anyhow, Result}; +use arrow::compute::filter_record_batch; +use arrow::row::{OwnedRow, RowConverter, SortField}; +use arrow_array::cast::AsArray; +use arrow_array::types::UInt64Type; +use arrow_array::{Array, BooleanArray, RecordBatch}; +use arrow_schema::{DataType, Field, FieldRef, Schema}; +use async_trait::async_trait; +use datafusion::physical_expr::PhysicalExpr; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::from_proto::parse_physical_expr; +use datafusion_proto::protobuf::PhysicalExprNode; +use mini_moka::sync::Cache; +use prost::Message; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; +use protocol::grpc::api::JoinType; +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::protocol::stream_output::StreamOutput; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{CheckpointBarrier, FsSchema, MetadataField, OperatorConfig, Watermark, LOOKUP_KEY_INDEX_FIELD}; + +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum LookupJoinType { + Left, + Inner, +} + +/// 维表查询连接算子:外部系统打宽 + 可选 LRU 缓存。 +pub struct LookupJoinOperator { + name: String, + connector: Box, + key_exprs: Vec>, + cache: Option>, + key_row_converter: RowConverter, + result_row_converter: RowConverter, + join_type: LookupJoinType, + lookup_schema: Arc, + metadata_fields: Vec, + input_schema: Arc, + /// 与 worker 侧 `ctx.out_schema` 对齐:由 input 去 key + lookup 列 + 时间列拼成。 + output_schema: Arc, +} + +fn build_lookup_output_schema( + input: &FsSchema, + lookup_columns: &[FieldRef], +) -> anyhow::Result> { + let key_indices = input.routing_keys().cloned().unwrap_or_default(); + let ts = input.timestamp_index; + let mut out: Vec = Vec::new(); + for i in 0..input.schema.fields().len() { + if key_indices.contains(&i) || i == ts { + continue; + } + out.push(input.schema.fields()[i].clone()); + } + out.extend(lookup_columns.iter().cloned()); + out.push(input.schema.fields()[ts].clone()); + Ok(Arc::new(Schema::new(out))) +} + +impl LookupJoinOperator { + async fn process_lookup_batch(&mut self, batch: RecordBatch) -> Result> { + let num_rows = batch.num_rows(); + if num_rows == 0 { + return Ok(vec![]); + } + + let key_arrays: Vec<_> = self + .key_exprs + .iter() + .map(|expr| { + expr.evaluate(&batch) + .map_err(|e| anyhow!("key expr evaluate: {e}"))? + .into_array(num_rows) + .map_err(|e| anyhow!("key expr into_array: {e}")) + }) + .collect::>()?; + + let rows = self + .key_row_converter + .convert_columns(&key_arrays) + .map_err(|e| anyhow!("key_row_converter: {e}"))?; + + let mut key_map: HashMap> = HashMap::new(); + for (i, row) in rows.iter().enumerate() { + key_map.entry(row.owned()).or_default().push(i); + } + + let uncached_keys: Vec<&OwnedRow> = if let Some(cache) = &mut self.cache { + key_map + .keys() + .filter(|k| !cache.contains_key(*k)) + .collect() + } else { + key_map.keys().collect() + }; + + // 按 key 字节存 OwnedRow,避免借用 `convert_columns` 返回的临时行缓冲。 + let mut results: HashMap, OwnedRow> = HashMap::new(); + + if !uncached_keys.is_empty() { + let cols = self + .key_row_converter + .convert_rows(uncached_keys.iter().map(|r| r.row())) + .map_err(|e| anyhow!("convert_rows for lookup: {e}"))?; + + if let Some(result_batch) = self.connector.lookup(&cols).await { + let mut result_batch = result_batch.map_err(|e| anyhow!("connector lookup: {e}"))?; + + let key_idx_col = result_batch + .schema() + .index_of(LOOKUP_KEY_INDEX_FIELD) + .map_err(|e| anyhow!("{e}"))?; + let keys = result_batch.remove_column(key_idx_col); + let keys = keys.as_primitive::(); + + let result_rows = self + .result_row_converter + .convert_columns(result_batch.columns()) + .map_err(|e| anyhow!("result_row_converter: {e}"))?; + + for (i, v) in result_rows.iter().enumerate() { + if keys.is_null(i) { + return Err(anyhow!("lookup key index is null at row {i}")); + } + let req_idx = keys.value(i) as usize; + if req_idx >= uncached_keys.len() { + return Err(anyhow!( + "lookup key index {req_idx} out of range ({} keys)", + uncached_keys.len() + )); + } + let key_bytes = uncached_keys[req_idx].as_ref().to_vec(); + let owned = v.owned(); + results.insert(key_bytes.clone(), owned.clone()); + if let Some(cache) = &mut self.cache { + cache.insert(uncached_keys[req_idx].clone(), owned); + } + } + } + } + + let mut output_rows = self + .result_row_converter + .empty_rows(batch.num_rows(), batch.num_rows().saturating_mul(10)); + + for row in rows.iter() { + let row_owned = self + .cache + .as_mut() + .and_then(|c| c.get(&row.owned())) + .unwrap_or_else(|| { + results + .get(row.as_ref()) + .expect("missing lookup result for key (cache miss without connector row)") + .clone() + }); + output_rows.push(row_owned.row()); + } + + let right_side = self + .result_row_converter + .convert_rows(output_rows.iter()) + .map_err(|e| anyhow!("convert_rows output: {e}"))?; + + let nonnull = (self.join_type == LookupJoinType::Inner).then(|| { + let mut nonnull = vec![false; batch.num_rows()]; + for (_, a) in self + .lookup_schema + .fields() + .iter() + .zip(right_side.iter()) + .filter(|(f, _)| { + !self + .metadata_fields + .iter() + .any(|m| &m.field_name == f.name()) + }) + { + if let Some(nulls) = a.logical_nulls() { + for (valid, b) in nulls.iter().zip(nonnull.iter_mut()) { + *b |= valid; + } + } else { + nonnull.fill(true); + break; + } + } + BooleanArray::from(nonnull) + }); + + let key_indices = self + .input_schema + .routing_keys() + .cloned() + .unwrap_or_default(); + let non_keys: Vec<_> = (0..batch.num_columns()) + .filter(|i| !key_indices.contains(i) && *i != self.input_schema.timestamp_index) + .collect(); + + let mut result_cols = batch + .project(&non_keys) + .map_err(|e| anyhow!("project non_keys: {e}"))? + .columns() + .to_vec(); + result_cols.extend(right_side); + result_cols.push(batch.column(self.input_schema.timestamp_index).clone()); + + let mut out_batch = RecordBatch::try_new(self.output_schema.clone(), result_cols) + .map_err(|e| anyhow!("try_new output batch: {e}"))?; + + if let Some(mask) = nonnull { + out_batch = filter_record_batch(&out_batch, &mask).map_err(|e| anyhow!("{e}"))?; + } + + if out_batch.num_rows() == 0 { + return Ok(vec![]); + } + + Ok(vec![StreamOutput::Forward(out_batch)]) + } +} + +#[async_trait] +impl MessageOperator for LookupJoinOperator { + fn name(&self) -> &str { + &self.name + } + + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> Result> { + self.process_lookup_batch(batch).await + } + + async fn process_watermark( + &mut self, + _watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + _ctx: &mut TaskContext, + ) -> Result<()> { + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +/// 从配置构造 [`LookupJoinOperator`](非 `ConstructedOperator` / `ArrowOperator`)。 +pub struct LookupJoinConstructor; + +impl LookupJoinConstructor { + pub fn with_config( + &self, + config: LookupJoinOperator, + registry: Arc, + ) -> anyhow::Result { + let join_type = config.join_type(); + let input_schema: FsSchema = config.input_schema.unwrap().try_into()?; + let lookup_schema: FsSchema = config.lookup_schema.unwrap().try_into()?; + + let exprs = config + .key_exprs + .iter() + .map(|e| { + let expr = PhysicalExprNode::decode(&mut e.left_expr.as_slice())?; + Ok(parse_physical_expr( + &expr, + registry.as_ref(), + &input_schema.schema, + &DefaultPhysicalExtensionCodec {}, + )?) + }) + .collect::>>()?; + + let op = config.connector.unwrap(); + let operator_config: OperatorConfig = serde_json::from_str(&op.config)?; + + let result_row_converter = RowConverter::new( + lookup_schema + .schema_without_timestamp() + .fields + .iter() + .map(|f| SortField::new(f.data_type().clone())) + .collect(), + )?; + + let lookup_schema_arc = Arc::new( + lookup_schema + .with_additional_fields( + [Field::new(LOOKUP_KEY_INDEX_FIELD, DataType::UInt64, false)].into_iter(), + )? + .schema_without_timestamp(), + ); + + let output_schema = build_lookup_output_schema(&input_schema, lookup_schema_arc.fields())?; + + let connector = connectors() + .get(op.connector.as_str()) + .unwrap_or_else(|| panic!("No connector with name '{}'", op.connector)) + .make_lookup(operator_config.clone(), lookup_schema_arc.clone())?; + + let name = format!("LookupJoin({})", connector.name()); + + let max_capacity_bytes = config.max_capacity_bytes.unwrap_or(8 * 1024 * 1024); + let cache = (max_capacity_bytes > 0).then(|| { + let mut c = Cache::builder() + .weigher(|k: &OwnedRow, v: &OwnedRow| (k.as_ref().len() + v.as_ref().len()) as u32) + .max_capacity(max_capacity_bytes); + + if let Some(ttl) = config.ttl_micros { + c = c.time_to_live(Duration::from_micros(ttl)); + } + c.build() + }); + + let key_row_converter = RowConverter::new( + exprs + .iter() + .map(|e| Ok(SortField::new(e.data_type(&input_schema.schema)?))) + .collect::>()?, + )?; + + Ok(LookupJoinOperator { + name, + connector, + key_exprs: exprs, + cache, + key_row_converter, + result_row_converter, + join_type: match join_type { + JoinType::Inner => LookupJoinType::Inner, + JoinType::Left => LookupJoinType::Left, + jt => panic!("invalid lookup join type {:?}", jt), + }, + lookup_schema: lookup_schema_arc, + metadata_fields: operator_config.metadata_fields, + input_schema: Arc::new(input_schema), + output_schema, + }) + } +} diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/runtime/streaming/operators/joins/mod.rs new file mode 100644 index 00000000..d53e4b91 --- /dev/null +++ b/src/runtime/streaming/operators/joins/mod.rs @@ -0,0 +1,7 @@ +pub mod join_instance; +pub mod join_with_expiration; +pub mod lookup_join; + +pub use join_instance::{InstantJoinConstructor, InstantJoinOperator}; +pub use join_with_expiration::{JoinWithExpirationConstructor, JoinWithExpirationOperator}; +pub use lookup_join::{LookupJoinConstructor, LookupJoinOperator, LookupJoinType}; diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs new file mode 100644 index 00000000..fe2a7d9e --- /dev/null +++ b/src/runtime/streaming/operators/mod.rs @@ -0,0 +1,75 @@ +//! 内置算子。 + +pub mod grouping; +pub mod joins; +pub mod sink; +pub mod source; +pub mod watermark; +pub mod windows; + +pub use grouping::{ + IncrementalAggregatingConstructor, IncrementalAggregatingFunc, Key, UpdatingCache, +}; +pub use joins::{ + InstantJoinConstructor, InstantJoinOperator, JoinWithExpirationConstructor, + JoinWithExpirationOperator, LookupJoinConstructor, LookupJoinOperator, LookupJoinType, +}; +pub use sink::{ConsistencyMode, KafkaSinkOperator}; +pub use source::{BatchDeserializer, KafkaSourceOperator, KafkaState}; +pub use watermark::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState}; +pub use windows::{ + SessionAggregatingWindowConstructor, SessionWindowOperator, + SlidingAggregatingWindowConstructor, SlidingWindowOperator, + TumblingAggregateWindowConstructor, TumblingWindowOperator, WindowFunctionConstructor, + WindowFunctionOperator, +}; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use arrow_array::RecordBatch; +use async_trait::async_trait; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{CheckpointBarrier, Watermark}; + +/// 透传数据。 +pub struct PassthroughOperator { + name: String, +} + +impl PassthroughOperator { + pub fn new(name: impl Into) -> Self { + Self { name: name.into() } + } +} + +#[async_trait] +impl MessageOperator for PassthroughOperator { + fn name(&self) -> &str { + &self.name + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> anyhow::Result> { + Ok(vec![StreamOutput::Forward(batch)]) + } + + async fn process_watermark( + &mut self, + _watermark: Watermark, + _ctx: &mut TaskContext, + ) -> anyhow::Result> { + Ok(vec![]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + _ctx: &mut TaskContext, + ) -> anyhow::Result<()> { + Ok(()) + } +} diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs new file mode 100644 index 00000000..9161ac7b --- /dev/null +++ b/src/runtime/streaming/operators/sink/kafka/mod.rs @@ -0,0 +1,366 @@ +//! Kafka Sink:实现 [`crate::runtime::streaming::api::operator::MessageOperator`],支持 At-Least-Once 与 Exactly-Once(事务 + 二阶段提交)。 + +use anyhow::{anyhow, bail, Result}; +use arrow_array::cast::AsArray; +use arrow_array::Array; +use arrow_array::RecordBatch; +use arrow_schema::{DataType, TimeUnit}; +use async_trait::async_trait; +use rdkafka::error::{KafkaError, RDKafkaErrorCode}; +use rdkafka::producer::{DeliveryFuture, FutureProducer, FutureRecord, Producer}; +use rdkafka::util::Timeout; +use rdkafka::ClientConfig; +use std::collections::HashMap; +use std::time::Duration; +use tokio::time::sleep; +use tracing::{info, warn}; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; +// ============================================================================ +// 1. 领域模型:一致性级别与事务状态机 +// ============================================================================ + +#[derive(Debug, Clone)] +pub enum ConsistencyMode { + AtLeastOnce, + ExactlyOnce, +} + +struct TransactionalState { + next_transaction_index: usize, + active_producer: FutureProducer, + producer_awaiting_commit: Option, +} + +// ============================================================================ +// 2. 核心算子外壳 +// ============================================================================ + +pub struct KafkaSinkOperator { + pub topic: String, + pub bootstrap_servers: String, + pub consistency_mode: ConsistencyMode, + pub client_config: HashMap, + + pub input_schema: FsSchema, + pub timestamp_col_idx: Option, + pub key_col_idx: Option, + + pub serializer: ArrowSerializer, + + at_least_once_producer: Option, + transactional_state: Option, + + write_futures: Vec, +} + +impl KafkaSinkOperator { + pub fn new( + topic: String, + bootstrap_servers: String, + consistency_mode: ConsistencyMode, + client_config: HashMap, + input_schema: FsSchema, + serializer: ArrowSerializer, + ) -> Self { + Self { + topic, + bootstrap_servers, + consistency_mode, + client_config, + input_schema, + timestamp_col_idx: None, + key_col_idx: None, + serializer, + at_least_once_producer: None, + transactional_state: None, + write_futures: Vec::new(), + } + } + + fn resolve_schema_indices(&mut self) { + self.timestamp_col_idx = Some(self.input_schema.timestamp_index); + + if let Some(routing_keys) = self.input_schema.routing_keys() { + if !routing_keys.is_empty() { + self.key_col_idx = Some(routing_keys[0]); + } + } + } + + fn create_producer(&self, ctx: &TaskContext, tx_index: Option) -> Result { + let mut config = ClientConfig::new(); + config.set("bootstrap.servers", &self.bootstrap_servers); + + for (k, v) in &self.client_config { + config.set(k, v); + } + + if let Some(idx) = tx_index { + config.set("enable.idempotence", "true"); + let transactional_id = format!( + "arroyo-tx-{}-{}-{}-{}", + ctx.job_id, self.topic, ctx.subtask_idx, idx + ); + config.set("transactional.id", &transactional_id); + + let producer: FutureProducer = config.create()?; + producer + .init_transactions(Timeout::After(Duration::from_secs(30))) + .map_err(|e| anyhow!("Failed to init Kafka transactions: {}", e))?; + producer + .begin_transaction() + .map_err(|e| anyhow!("Failed to begin Kafka transaction: {}", e))?; + + Ok(producer) + } else { + Ok(config.create()?) + } + } + + async fn flush_to_broker(&mut self) -> Result<()> { + let producer = self.current_producer(); + + producer.poll(Timeout::After(Duration::ZERO)); + + for future in self.write_futures.drain(..) { + match future.await { + Ok(Ok(_)) => continue, + Ok(Err((e, _))) => bail!("Kafka producer delivery failed: {}", e), + Err(_) => bail!("Kafka delivery future canceled"), + } + } + Ok(()) + } + + fn current_producer(&self) -> &FutureProducer { + match &self.consistency_mode { + ConsistencyMode::AtLeastOnce => self.at_least_once_producer.as_ref().unwrap(), + ConsistencyMode::ExactlyOnce => &self.transactional_state.as_ref().unwrap().active_producer, + } + } +} + +fn event_timestamp_ms(batch: &RecordBatch, row: usize, col: usize) -> Option { + let arr = batch.column(col); + match arr.data_type() { + DataType::Timestamp(TimeUnit::Second, _) => { + let a = arr.as_primitive::(); + (!a.is_null(row)).then(|| a.value(row) * 1000) + } + DataType::Timestamp(TimeUnit::Millisecond, _) => { + let a = arr.as_primitive::(); + (!a.is_null(row)).then(|| a.value(row)) + } + DataType::Timestamp(TimeUnit::Microsecond, _) => { + let a = arr.as_primitive::(); + (!a.is_null(row)).then(|| a.value(row) / 1000) + } + DataType::Timestamp(TimeUnit::Nanosecond, _) => { + let a = arr.as_primitive::(); + (!a.is_null(row)).then(|| a.value(row) / 1_000_000) + } + _ => None, + } +} + +fn row_key_bytes(batch: &RecordBatch, row: usize, col: usize) -> Option> { + let arr = batch.column(col); + match arr.data_type() { + DataType::Utf8 => { + let s = arr.as_string::(); + if s.is_null(row) { + None + } else { + Some(s.value(row).as_bytes().to_vec()) + } + } + DataType::LargeUtf8 => { + let s = arr.as_string::(); + if s.is_null(row) { + None + } else { + Some(s.value(row).as_bytes().to_vec()) + } + } + _ => None, + } +} + +// ============================================================================ +// 3. 实现 MessageOperator 协议 +// ============================================================================ + +#[async_trait] +impl MessageOperator for KafkaSinkOperator { + fn name(&self) -> &str { + "KafkaSink" + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + self.resolve_schema_indices(); + + match self.consistency_mode { + ConsistencyMode::AtLeastOnce => { + self.at_least_once_producer = Some(self.create_producer(ctx, None)?); + } + ConsistencyMode::ExactlyOnce => { + let mut next_idx = { + let mut tm = ctx.table_manager_guard().await?; + let index_table = tm + .get_global_keyed_state::("tx_idx") + .await + .map_err(|e| anyhow!(e))?; + index_table.get(&ctx.subtask_idx).copied().unwrap_or(0) + }; + + let active_producer = self.create_producer(ctx, Some(next_idx))?; + next_idx += 1; + + self.transactional_state = Some(TransactionalState { + next_transaction_index: next_idx, + active_producer, + producer_awaiting_commit: None, + }); + } + } + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> Result> { + let payload_iter = self.serializer.serialize(&batch); + let producer = self.current_producer().clone(); + + for (i, payload) in payload_iter.enumerate() { + let ts_millis = self + .timestamp_col_idx + .and_then(|idx| event_timestamp_ms(&batch, i, idx)); + let key_bytes = self + .key_col_idx + .and_then(|idx| row_key_bytes(&batch, i, idx)); + + let mut record = FutureRecord::, Vec>::to(&self.topic).payload(&payload); + if let Some(ts) = ts_millis { + record = record.timestamp(ts); + } + if let Some(ref k) = key_bytes { + record = record.key(k); + } + + loop { + match producer.send_result(record) { + Ok(delivery_future) => { + self.write_futures.push(delivery_future); + break; + } + Err((KafkaError::MessageProduction(RDKafkaErrorCode::QueueFull), returned_record)) => { + record = returned_record; + sleep(Duration::from_millis(10)).await; + } + Err((e, _)) => bail!("Fatal Kafka send error: {}", e), + } + } + } + + Ok(vec![]) + } + + async fn process_watermark( + &mut self, + _watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + ctx: &mut TaskContext, + ) -> Result<()> { + self.flush_to_broker().await?; + + if matches!(self.consistency_mode, ConsistencyMode::ExactlyOnce) { + let next_tx = self + .transactional_state + .as_ref() + .map(|s| s.next_transaction_index) + .unwrap(); + let new_producer = self.create_producer(ctx, Some(next_tx))?; + + let state = self.transactional_state.as_mut().unwrap(); + let old_producer = std::mem::replace(&mut state.active_producer, new_producer); + state.producer_awaiting_commit = Some(old_producer); + + { + let mut tm = ctx.table_manager_guard().await?; + let index_table = tm + .get_global_keyed_state::("tx_idx") + .await + .map_err(|e| anyhow!(e))?; + index_table + .insert(ctx.subtask_idx, state.next_transaction_index) + .await; + } + + state.next_transaction_index += 1; + } + + Ok(()) + } + + async fn commit_checkpoint(&mut self, epoch: u32, _ctx: &mut TaskContext) -> Result<()> { + if matches!(self.consistency_mode, ConsistencyMode::AtLeastOnce) { + return Ok(()); + } + + let state = self.transactional_state.as_mut().unwrap(); + let Some(committing_producer) = state.producer_awaiting_commit.take() else { + warn!( + "Received Commit for epoch {}, but no stashed producer exists. Possibly a recovery duplicate.", + epoch + ); + return Ok(()); + }; + + let mut retries = 0; + loop { + match committing_producer.commit_transaction(Timeout::After(Duration::from_secs(10))) { + Ok(_) => { + info!("Successfully committed Kafka transaction for epoch {}", epoch); + break; + } + Err(e) => { + retries += 1; + if retries >= 5 { + bail!( + "Failed to commit Kafka transaction after 5 retries. Fatal error: {}", + e + ); + } + warn!( + "Failed to commit Kafka transaction (Attempt {}/5): {}. Retrying...", + retries, e + ); + sleep(Duration::from_secs(2)).await; + } + } + } + + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + self.flush_to_broker().await?; + info!("Kafka sink shut down gracefully."); + Ok(vec![]) + } +} diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/runtime/streaming/operators/sink/mod.rs new file mode 100644 index 00000000..3b88f563 --- /dev/null +++ b/src/runtime/streaming/operators/sink/mod.rs @@ -0,0 +1,5 @@ +//! 与外部系统对接的 Sink 实现(Kafka 等)。 + +pub mod kafka; + +pub use kafka::{ConsistencyMode, KafkaSinkOperator}; diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs new file mode 100644 index 00000000..d0c67972 --- /dev/null +++ b/src/runtime/streaming/operators/source/kafka/mod.rs @@ -0,0 +1,325 @@ +//! Kafka 源算子:实现 [`crate::runtime::streaming::api::source::SourceOperator`],由 [`crate::runtime::streaming::execution::SourceRunner`] 轮询 `fetch_next`。 + +use anyhow::{anyhow, Context as _, Result}; +use async_trait::async_trait; +use bincode::{Decode, Encode}; +use governor::{DefaultDirectRateLimiter, Quota, RateLimiter as GovernorRateLimiter}; +use rdkafka::consumer::{CommitMode, Consumer, StreamConsumer}; +use rdkafka::{ClientConfig, Message as KMessage, Offset, TopicPartitionList}; +use std::collections::HashMap; +use std::num::NonZeroU32; +use std::time::Duration; +use tracing::{debug, error, info, warn}; +use arrow_array::RecordBatch; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::source::{SourceEvent, SourceOffset, SourceOperator}; +use crate::sql::common::{CheckpointBarrier, MetadataField}; +// ============================================================================ +// 1. 领域模型:Kafka 状态与配置 +// ============================================================================ + +#[derive(Copy, Clone, Debug, Encode, Decode, PartialEq, PartialOrd)] +pub struct KafkaState { + partition: i32, + offset: i64, +} + +/// 模拟 Arroyo 原版的 Deserializer Buffer +/// (工业实现中,反序列化常带 buffer,满 N 条或超时后吐出一个 [`RecordBatch`])。 +pub trait BatchDeserializer: Send + 'static { + fn deserialize_slice( + &mut self, + payload: &[u8], + timestamp: u64, + metadata: Option>>, + ) -> Result<()>; + + fn should_flush(&self) -> bool; + + fn flush_buffer(&mut self) -> Result>; +} + +impl SourceOffset { + fn rdkafka_offset(self) -> Offset { + match self { + SourceOffset::Earliest => Offset::Beginning, + SourceOffset::Latest => Offset::End, + SourceOffset::Group => Offset::Stored, + } + } +} + +// ============================================================================ +// 2. 核心算子外壳 +// ============================================================================ + +pub struct KafkaSourceOperator { + pub topic: String, + pub bootstrap_servers: String, + pub group_id: Option, + pub group_id_prefix: Option, + pub offset_mode: SourceOffset, + + pub client_configs: HashMap, + pub messages_per_second: NonZeroU32, + pub metadata_fields: Vec, + + consumer: Option, + rate_limiter: Option, + deserializer: Box, + + current_offsets: HashMap, + is_empty_assignment: bool, +} + +impl KafkaSourceOperator { + pub fn new( + topic: String, + bootstrap_servers: String, + group_id: Option, + group_id_prefix: Option, + offset_mode: SourceOffset, + client_configs: HashMap, + messages_per_second: NonZeroU32, + metadata_fields: Vec, + deserializer: Box, + ) -> Self { + Self { + topic, + bootstrap_servers, + group_id, + group_id_prefix, + offset_mode, + client_configs, + messages_per_second, + metadata_fields, + consumer: None, + rate_limiter: None, + deserializer, + current_offsets: HashMap::new(), + is_empty_assignment: false, + } + } + + async fn init_and_assign_consumer(&mut self, ctx: &mut TaskContext) -> Result<()> { + info!("Creating kafka consumer for {}", self.bootstrap_servers); + let mut client_config = ClientConfig::new(); + + let group_id = match (&self.group_id, &self.group_id_prefix) { + (Some(gid), _) => gid.clone(), + (None, Some(prefix)) => { + format!("{}-arroyo-{}-{}", prefix, ctx.job_id, ctx.subtask_idx) + } + (None, None) => format!("arroyo-{}-{}-consumer", ctx.job_id, ctx.subtask_idx), + }; + + for (key, value) in &self.client_configs { + client_config.set(key, value); + } + + let consumer: StreamConsumer = client_config + .set("bootstrap.servers", &self.bootstrap_servers) + .set("enable.partition.eof", "false") + .set("enable.auto.commit", "false") + .set("group.id", &group_id) + .create()?; + + let (has_state, state_map) = { + let mut tm = ctx.table_manager_guard().await?; + let global_state = tm + .get_global_keyed_state::("k") + .await + .map_err(|e| anyhow!(e))?; + let restored_states: Vec<_> = global_state.get_all().values().copied().collect(); + let has_state = !restored_states.is_empty(); + let state_map: HashMap = + restored_states.into_iter().map(|s| (s.partition, s)).collect(); + (has_state, state_map) + }; + + let metadata = consumer + .fetch_metadata(Some(&self.topic), Duration::from_secs(30)) + .context("Failed to fetch Kafka metadata")?; + + let topic_meta = metadata + .topics() + .iter() + .find(|t| t.name() == self.topic) + .ok_or_else(|| anyhow!("topic {} not in metadata", self.topic))?; + + let partitions = topic_meta.partitions(); + let mut our_partitions = HashMap::new(); + let pmax = ctx.parallelism.max(1) as i32; + + for p in partitions { + if p.id().rem_euclid(pmax) == ctx.subtask_idx as i32 { + let offset = state_map + .get(&p.id()) + .map(|s| Offset::Offset(s.offset)) + .unwrap_or_else(|| { + if has_state { + Offset::Beginning + } else { + self.offset_mode.rdkafka_offset() + } + }); + our_partitions.insert((self.topic.clone(), p.id()), offset); + } + } + + if our_partitions.is_empty() { + warn!( + "[Task {}] Subscribed to no partitions. Entering idle mode.", + ctx.subtask_idx + ); + self.is_empty_assignment = true; + } else { + let topic_partitions = TopicPartitionList::from_topic_map(&our_partitions)?; + consumer.assign(&topic_partitions)?; + } + + self.consumer = Some(consumer); + Ok(()) + } +} + +// ============================================================================ +// 3. 实现 SourceOperator 协议 +// ============================================================================ + +#[async_trait] +impl SourceOperator for KafkaSourceOperator { + fn name(&self) -> &str { + &self.topic + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + self.init_and_assign_consumer(ctx).await?; + self.rate_limiter = Some(GovernorRateLimiter::direct(Quota::per_second( + self.messages_per_second, + ))); + Ok(()) + } + + async fn fetch_next(&mut self, _ctx: &mut TaskContext) -> Result { + if self.is_empty_assignment { + return Ok(SourceEvent::Idle); + } + + let consumer = self + .consumer + .as_ref() + .ok_or_else(|| anyhow!("Kafka consumer not initialized"))?; + let rate_limiter = self + .rate_limiter + .as_ref() + .ok_or_else(|| anyhow!("rate limiter not initialized"))?; + + let recv_result = tokio::time::timeout(Duration::from_millis(50), consumer.recv()).await; + + match recv_result { + Ok(Ok(msg)) => { + if let Some(payload) = msg.payload() { + let timestamp = msg.timestamp().to_millis().unwrap_or(0); + let topic = msg.topic(); + + let connector_metadata = if !self.metadata_fields.is_empty() { + let mut meta = HashMap::new(); + for f in &self.metadata_fields { + meta.insert( + f.field_name.as_str(), + match f.key.as_str() { + "key" => FieldValueType::Bytes(msg.key()), + "offset_id" => FieldValueType::Int64(Some(msg.offset())), + "partition" => FieldValueType::Int32(Some(msg.partition())), + "topic" => FieldValueType::String(Some(topic)), + "timestamp" => FieldValueType::Int64(Some(timestamp)), + _ => continue, + }, + ); + } + Some(meta) + } else { + None + }; + + self.deserializer.deserialize_slice( + payload, + timestamp.max(0) as u64, + connector_metadata, + )?; + + self.current_offsets.insert(msg.partition(), msg.offset()); + + rate_limiter.until_ready().await; + + if self.deserializer.should_flush() { + if let Some(batch) = self.deserializer.flush_buffer()? { + return Ok(SourceEvent::Data(batch)); + } + } + } + Ok(SourceEvent::Idle) + } + Ok(Err(e)) => { + error!("Kafka recv error: {}", e); + Err(anyhow!("Kafka error: {}", e)) + } + Err(_) => { + if self.deserializer.should_flush() { + if let Some(batch) = self.deserializer.flush_buffer()? { + return Ok(SourceEvent::Data(batch)); + } + } + Ok(SourceEvent::Idle) + } + } + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + ctx: &mut TaskContext, + ) -> Result<()> { + debug!("Source [{}] executing checkpoint", ctx.subtask_idx); + + let mut tm = ctx.table_manager_guard().await?; + let global_state = tm + .get_global_keyed_state::("k") + .await + .map_err(|e| anyhow!(e))?; + + let mut topic_partitions = TopicPartitionList::new(); + + for (&partition, &offset) in &self.current_offsets { + global_state + .insert( + partition, + KafkaState { + partition, + offset: offset + 1, + }, + ) + .await; + + topic_partitions + .add_partition_offset(&self.topic, partition, Offset::Offset(offset)) + .map_err(|e| anyhow!("add_partition_offset: {e}"))?; + } + + if let Some(consumer) = &self.consumer { + if let Err(e) = consumer.commit(&topic_partitions, CommitMode::Async) { + warn!("Failed to commit async offset to Kafka Broker: {:?}", e); + } + } + + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<()> { + info!("Kafka source shutting down gracefully"); + self.consumer.take(); + Ok(()) + } +} diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs new file mode 100644 index 00000000..ef4e3cb6 --- /dev/null +++ b/src/runtime/streaming/operators/source/mod.rs @@ -0,0 +1,5 @@ +//! 与外部系统对接的源实现(Kafka 等)。 + +pub mod kafka; + +pub use kafka::{BatchDeserializer, KafkaSourceOperator, KafkaState}; diff --git a/src/runtime/streaming/operators/watermark/mod.rs b/src/runtime/streaming/operators/watermark/mod.rs new file mode 100644 index 00000000..becc0b8f --- /dev/null +++ b/src/runtime/streaming/operators/watermark/mod.rs @@ -0,0 +1,3 @@ +pub mod watermark_generator; + +pub use watermark_generator::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState}; diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs new file mode 100644 index 00000000..fa97b3d9 --- /dev/null +++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs @@ -0,0 +1,244 @@ +//! 表达式水位生成器:与 worker `arrow/watermark_generator` 对齐,通过 [`StreamOutput::Watermark`] 向下游广播。 + +use anyhow::{anyhow, Result}; +use arrow::compute::kernels::aggregate; +use arrow_array::cast::AsArray; +use arrow_array::types::TimestampNanosecondType; +use arrow_array::{RecordBatch, TimestampNanosecondArray}; +use bincode::{Decode, Encode}; +use datafusion::physical_expr::PhysicalExpr; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::from_proto::parse_physical_expr; +use datafusion_proto::protobuf::PhysicalExprNode; +use prost::Message; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; +use tracing::{debug, info}; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use async_trait::async_trait; +use tracing_subscriber::Registry; +use protocol::grpc::api::ExpressionWatermarkConfig; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{from_nanos, to_millis, CheckpointBarrier, FsSchema, Watermark}; + +/// 需持久化到 Checkpoint 的状态(与 worker `WatermarkGeneratorState` 语义一致)。 +#[derive(Debug, Copy, Clone, Encode, Decode, PartialEq, Eq)] +pub struct WatermarkGeneratorState { + pub last_watermark_emitted_at: SystemTime, + pub max_watermark: SystemTime, +} + +impl Default for WatermarkGeneratorState { + fn default() -> Self { + Self { + last_watermark_emitted_at: SystemTime::UNIX_EPOCH, + max_watermark: SystemTime::UNIX_EPOCH, + } + } +} + +pub struct WatermarkGeneratorOperator { + interval: Duration, + idle_time: Option, + expression: Arc, + timestamp_index: usize, + state: WatermarkGeneratorState, + last_event_wall: SystemTime, + is_idle: bool, +} + +impl WatermarkGeneratorOperator { + pub fn new( + interval: Duration, + idle_time: Option, + expression: Arc, + timestamp_index: usize, + ) -> Self { + Self { + interval, + idle_time, + expression, + timestamp_index, + state: WatermarkGeneratorState::default(), + last_event_wall: SystemTime::now(), + is_idle: false, + } + } + + fn extract_max_timestamp(&self, batch: &RecordBatch) -> Option { + let ts_column = batch.column(self.timestamp_index); + let arr = ts_column.as_primitive::(); + let max_ts = aggregate::max(arr)?; + Some(from_nanos(max_ts as u128)) + } + + fn evaluate_watermark(&self, batch: &RecordBatch) -> Result { + let watermark_array = self + .expression + .evaluate(batch)? + .into_array(batch.num_rows())?; + + let typed_array = watermark_array + .as_any() + .downcast_ref::() + .ok_or_else(|| anyhow!("watermark expression must return TimestampNanosecondArray"))?; + + let min_watermark_nanos = aggregate::min(typed_array) + .ok_or_else(|| anyhow!("failed to extract min watermark from batch"))?; + + Ok(from_nanos(min_watermark_nanos as u128)) + } +} + +#[async_trait] +impl MessageOperator for WatermarkGeneratorOperator { + fn name(&self) -> &str { + "ExpressionWatermarkGenerator" + } + + fn tick_interval(&self) -> Option { + Some(Duration::from_secs(1)) + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + self.last_event_wall = SystemTime::now(); + + let mut tm = ctx.table_manager_guard().await?; + let gs = tm + .get_global_keyed_state::("s") + .await + .map_err(|e| anyhow!("global keyed state s: {e}"))?; + + if let Some(recovered) = gs.get(&ctx.subtask_idx) { + self.state = *recovered; + } + + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result> { + self.last_event_wall = SystemTime::now(); + + let mut outputs = vec![StreamOutput::Forward(batch.clone())]; + + let Some(max_batch_ts) = self.extract_max_timestamp(&batch) else { + return Ok(outputs); + }; + + let new_watermark = self.evaluate_watermark(&batch)?; + self.state.max_watermark = self.state.max_watermark.max(new_watermark); + + let time_since_last_emit = max_batch_ts + .duration_since(self.state.last_watermark_emitted_at) + .unwrap_or(Duration::ZERO); + + if self.is_idle || time_since_last_emit > self.interval { + debug!( + "[{}] emitting expression watermark {}", + ctx.subtask_idx, + to_millis(self.state.max_watermark) + ); + + outputs.push(StreamOutput::Watermark(Watermark::EventTime( + self.state.max_watermark, + ))); + + self.state.last_watermark_emitted_at = max_batch_ts; + self.is_idle = false; + } + + Ok(outputs) + } + + async fn process_watermark( + &mut self, + _watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![]) + } + + async fn process_tick( + &mut self, + _tick_index: u64, + ctx: &mut TaskContext, + ) -> Result> { + if let Some(idle_timeout) = self.idle_time { + let elapsed = self + .last_event_wall + .elapsed() + .unwrap_or(Duration::ZERO); + if !self.is_idle && elapsed > idle_timeout { + info!( + "task [{}] entering Idle after {:?}", + ctx.subtask_idx, idle_timeout + ); + self.is_idle = true; + return Ok(vec![StreamOutput::Watermark(Watermark::Idle)]); + } + } + Ok(vec![]) + } + + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { + let mut tm = ctx.table_manager_guard().await?; + tm.get_global_keyed_state::("s") + .await + .map_err(|e| anyhow!("global keyed state s: {e}"))? + .insert(ctx.subtask_idx, self.state) + .await; + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![StreamOutput::Watermark(Watermark::EventTime(from_nanos( + u64::MAX as u128, + )))]) + } +} + +pub struct WatermarkGeneratorConstructor; + +impl WatermarkGeneratorConstructor { + pub fn with_config( + &self, + config: ExpressionWatermarkConfig, + registry: Arc, + ) -> anyhow::Result { + let input_schema: FsSchema = config + .input_schema + .ok_or_else(|| anyhow!("missing input schema"))? + .try_into() + .map_err(|e| anyhow!("input schema: {e}"))?; + let timestamp_index = input_schema.timestamp_index; + + let expression_node = + PhysicalExprNode::decode(&mut config.expression.as_slice()).map_err(|e| { + anyhow!("decode expression: {e}") + })?; + let expression = parse_physical_expr( + &expression_node, + registry.as_ref(), + &input_schema.schema, + &DefaultPhysicalExtensionCodec {}, + ) + .map_err(|e| anyhow!("parse physical expr: {e}"))?; + + let interval = Duration::from_micros(config.period_micros); + let idle_time = config.idle_time_micros.map(Duration::from_micros); + + Ok(WatermarkGeneratorOperator::new( + interval, + idle_time, + expression, + timestamp_index, + )) + } +} diff --git a/src/runtime/streaming/operators/windows/mod.rs b/src/runtime/streaming/operators/windows/mod.rs new file mode 100644 index 00000000..ba594016 --- /dev/null +++ b/src/runtime/streaming/operators/windows/mod.rs @@ -0,0 +1,9 @@ +pub mod session_aggregating_window; +pub mod sliding_aggregating_window; +pub mod tumbling_aggregating_window; +pub mod window_function; + +pub use session_aggregating_window::{SessionAggregatingWindowConstructor, SessionWindowOperator}; +pub use sliding_aggregating_window::{SlidingAggregatingWindowConstructor, SlidingWindowOperator}; +pub use tumbling_aggregating_window::{TumblingAggregateWindowConstructor, TumblingWindowOperator}; +pub use window_function::{WindowFunctionConstructor, WindowFunctionOperator}; diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs new file mode 100644 index 00000000..ebe75c4c --- /dev/null +++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs @@ -0,0 +1,804 @@ +//! 会话窗口聚合:与 worker `arrow/session_aggregating_window` 对齐,实现 [`MessageOperator`]。 + +use anyhow::{anyhow, bail, Context, Result}; +use arrow::compute::{ + concat_batches, filter_record_batch, kernels::cmp::gt_eq, lexsort_to_indices, max, partition, take, +}; +use arrow::row::{RowConverter, SortField}; +use arrow_array::types::TimestampNanosecondType; +use arrow_array::{ + Array, BooleanArray, PrimitiveArray, RecordBatch, StructArray, TimestampNanosecondArray, +}; +use arrow_schema::{DataType, Field, FieldRef, Schema}; +use datafusion::execution::context::SessionContext; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::execution::SendableRecordBatchStream; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_proto::physical_plan::AsExecutionPlan; +use datafusion_proto::protobuf::PhysicalPlanNode; +use futures::StreamExt; +use prost::Message; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::sync::{Arc, RwLock}; +use std::time::{Duration, SystemTime}; +use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; +use tracing::warn; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use async_trait::async_trait; +use tracing_subscriber::Registry; +use protocol::grpc::api::SessionWindowAggregateOperator; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; +use crate::sql::common::converter::Converter; +use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::schema::utils::window_arrow_struct; +// ============================================================================ +// 领域模型 +// ============================================================================ + +struct SessionWindowConfig { + gap: Duration, + input_schema_ref: FsSchemaRef, + window_field: FieldRef, + window_index: usize, + final_physical_exec: Arc, + receiver_hook: Arc>>>, + output_schema: Arc, +} + +struct ActiveSession { + data_start: SystemTime, + data_end: SystemTime, + sender: Option>, + result_stream: SendableRecordBatchStream, +} + +impl ActiveSession { + async fn new( + aggregation_plan: Arc, + initial_timestamp: SystemTime, + sender: UnboundedSender, + ) -> Result { + aggregation_plan.reset()?; + let result_exec = aggregation_plan.execute(0, SessionContext::new().task_ctx())?; + Ok(Self { + data_start: initial_timestamp, + data_end: initial_timestamp, + sender: Some(sender), + result_stream: result_exec, + }) + } + + fn ingest_batch( + &mut self, + batch: RecordBatch, + gap: Duration, + ts_idx: usize, + ) -> Result> { + let ts_col = batch + .column(ts_idx) + .as_any() + .downcast_ref::() + .ok_or_else(|| anyhow!("expected timestamp column"))?; + let start_ts = ts_col.value(0); + let end_ts = ts_col.value(batch.num_rows() - 1); + + let current_end_with_gap = to_nanos(self.data_end + gap) as i64; + + if end_ts < current_end_with_gap { + self.data_end = self.data_end.max(from_nanos(end_ts as u128)); + self.data_start = self.data_start.min(from_nanos(start_ts as u128)); + self.sender + .as_ref() + .ok_or_else(|| anyhow!("session sender already closed"))? + .send(batch) + .map_err(|e| anyhow!("session channel send: {e}"))?; + return Ok(None); + } + + if current_end_with_gap < start_ts { + return Ok(Some((from_nanos(start_ts as u128), batch))); + } + + self.data_start = self.data_start.min(from_nanos(start_ts as u128)); + + let mut split_idx = 1; + while split_idx < batch.num_rows() { + let val = ts_col.value(split_idx); + if val < to_nanos(self.data_end) as i64 { + split_idx += 1; + continue; + } + if val < to_nanos(self.data_end + gap) as i64 { + self.data_end = from_nanos(val as u128); + split_idx += 1; + continue; + } + break; + } + + if split_idx == batch.num_rows() { + self.sender + .as_ref() + .ok_or_else(|| anyhow!("session sender already closed"))? + .send(batch) + .map_err(|e| anyhow!("session channel send: {e}"))?; + return Ok(None); + } + + self.sender + .as_ref() + .ok_or_else(|| anyhow!("session sender already closed"))? + .send(batch.slice(0, split_idx)) + .map_err(|e| anyhow!("session channel send: {e}"))?; + let remaining_batch = batch.slice(split_idx, batch.num_rows() - split_idx); + let new_start_time = from_nanos(ts_col.value(split_idx) as u128); + Ok(Some((new_start_time, remaining_batch))) + } + + async fn close_and_drain(mut self, gap: Duration) -> Result { + self.sender.take(); + + let mut result_batches = Vec::new(); + while let Some(batch) = self.result_stream.next().await { + result_batches.push(batch?); + } + + if result_batches.len() != 1 || result_batches[0].num_rows() != 1 { + bail!("active session must yield exactly one aggregate row"); + } + + Ok(SessionWindowResult { + window_start: self.data_start, + window_end: self.data_end + gap, + batch: result_batches.into_iter().next().unwrap(), + }) + } +} + +struct SessionWindowResult { + window_start: SystemTime, + window_end: SystemTime, + batch: RecordBatch, +} + +struct KeySessionState { + config: Arc, + active_session: Option, + buffered_batches: BTreeMap>, +} + +impl KeySessionState { + fn new(config: Arc) -> Self { + Self { + config, + active_session: None, + buffered_batches: BTreeMap::new(), + } + } + + fn is_empty(&self) -> bool { + self.active_session.is_none() && self.buffered_batches.is_empty() + } + + fn earliest_data_time(&self) -> Option { + self.active_session + .as_ref() + .map(|s| s.data_start) + .or_else(|| self.buffered_batches.keys().next().copied()) + } + + fn next_watermark_action_time(&self) -> Option { + self.active_session + .as_ref() + .map(|s| s.data_end + self.config.gap) + .or_else(|| { + self.buffered_batches + .keys() + .next() + .map(|t| *t - self.config.gap) + }) + } + + async fn advance_by_watermark(&mut self, watermark: SystemTime) -> Result> { + let mut results = vec![]; + + loop { + if let Some(session) = &mut self.active_session { + if session.data_end + self.config.gap < watermark { + let closed_session = self + .active_session + .take() + .unwrap() + .close_and_drain(self.config.gap) + .await?; + results.push(closed_session); + } else { + break; + } + } else { + let Some((initial_ts, _)) = self.buffered_batches.first_key_value() else { + break; + }; + if watermark + self.config.gap < *initial_ts { + break; + } + + let (tx, rx) = unbounded_channel(); + *self.config.receiver_hook.write().unwrap() = Some(rx); + + self.active_session = Some( + ActiveSession::new( + self.config.final_physical_exec.clone(), + *initial_ts, + tx, + ) + .await?, + ); + + self.drain_buffer_to_active_session()?; + } + } + Ok(results) + } + + fn drain_buffer_to_active_session(&mut self) -> Result<()> { + let session = self + .active_session + .as_mut() + .ok_or_else(|| anyhow!("drain_buffer_to_active_session without active session"))?; + + while let Some((first_key, _)) = self.buffered_batches.first_key_value() { + if session.data_end + self.config.gap < *first_key { + break; + } + + let (_, batches) = self.buffered_batches.pop_first().unwrap(); + for batch in batches { + if let Some((rem_start, rem_batch)) = session.ingest_batch( + batch, + self.config.gap, + self.config.input_schema_ref.timestamp_index, + )? { + self.buffered_batches + .entry(rem_start) + .or_default() + .push(rem_batch); + } + } + } + Ok(()) + } + + async fn add_data( + &mut self, + start_time: SystemTime, + batch: RecordBatch, + watermark: Option, + ) -> Result<()> { + self.buffered_batches + .entry(start_time) + .or_default() + .push(batch); + + if self.active_session.is_some() { + self.drain_buffer_to_active_session()?; + } + + if let Some(wm) = watermark { + let flushed = self.advance_by_watermark(wm).await?; + if !flushed.is_empty() { + bail!("unexpected flush during data ingestion; session watermark invariant violated"); + } + } + Ok(()) + } +} + +fn start_time_for_sorted_batch(batch: &RecordBatch, schema: &FsSchema) -> SystemTime { + let timestamp_array = batch.column(schema.timestamp_index); + let timestamp_array = timestamp_array + .as_any() + .downcast_ref::>() + .expect("timestamp column"); + from_nanos(timestamp_array.value(0) as u128) +} + +fn build_session_output_schema( + input: &FsSchema, + window_field: FieldRef, + window_index: usize, + agg_schema: &Schema, +) -> Result> { + let key_count = input.routing_keys().map(|k| k.len()).unwrap_or(0); + let mut fields: Vec = (0..key_count) + .map(|i| input.schema.fields()[i].clone()) + .collect(); + fields.insert(window_index, window_field); + fields.extend(agg_schema.fields().iter().cloned()); + fields.push(input.schema.fields()[input.timestamp_index].clone()); + Ok(Arc::new(Schema::new(fields))) +} + +// ============================================================================ +// 算子 +// ============================================================================ + +pub struct SessionWindowOperator { + config: Arc, + row_converter: Converter, + + session_states: HashMap, KeySessionState>, + pq_watermark_actions: BTreeMap>>, + pq_start_times: BTreeMap>>, +} + +impl SessionWindowOperator { + fn filter_batch_by_time(&self, batch: RecordBatch, watermark: Option) -> Result { + let Some(watermark) = watermark else { + return Ok(batch); + }; + + let timestamp_column = batch + .column(self.config.input_schema_ref.timestamp_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| anyhow!("expected timestamp column"))?; + + let watermark_scalar = TimestampNanosecondArray::new_scalar(to_nanos(watermark) as i64); + let on_time = gt_eq(timestamp_column, &watermark_scalar)?; + + Ok(filter_record_batch(&batch, &on_time)?) + } + + fn sort_batch(&self, batch: &RecordBatch) -> Result { + let sort_columns = self.config.input_schema_ref.sort_columns(batch, true); + let sort_indices = lexsort_to_indices(&sort_columns, None)?; + + let columns = batch + .columns() + .iter() + .map(|c| take(c, &sort_indices, None).unwrap()) + .collect(); + + Ok(RecordBatch::try_new(batch.schema(), columns)?) + } + + async fn ingest_sorted_batch( + &mut self, + sorted_batch: RecordBatch, + watermark: Option, + ) -> Result<()> { + let partition_ranges = if !self.config.input_schema_ref.has_routing_keys() { + vec![0..sorted_batch.num_rows()] + } else { + let key_len = self + .config + .input_schema_ref + .routing_keys() + .as_ref() + .unwrap() + .len(); + let key_cols = sorted_batch + .columns() + .iter() + .take(key_len) + .cloned() + .collect::>(); + partition(key_cols.as_slice())?.ranges() + }; + + let key_count = self + .config + .input_schema_ref + .routing_keys() + .map(|k| k.len()) + .unwrap_or(0); + + for range in partition_ranges { + let key_batch = sorted_batch.slice(range.start, range.end - range.start); + + let row_key = if key_count == 0 { + Vec::new() + } else { + self.row_converter + .convert_columns(&key_batch.slice(0, 1).columns()[0..key_count]) + .context("row key convert")? + .as_ref() + .to_vec() + }; + + let state = self + .session_states + .entry(row_key.clone()) + .or_insert_with(|| KeySessionState::new(self.config.clone())); + + let initial_action = state.next_watermark_action_time(); + let initial_start = state.earliest_data_time(); + + let batch_start = start_time_for_sorted_batch(&key_batch, &self.config.input_schema_ref); + + state + .add_data(batch_start, key_batch, watermark) + .await?; + + let new_action = state + .next_watermark_action_time() + .ok_or_else(|| anyhow!("missing next watermark action after add_data"))?; + let new_start = state + .earliest_data_time() + .ok_or_else(|| anyhow!("missing earliest data after add_data"))?; + + match initial_action { + Some(ia) => { + if ia != new_action { + self.pq_watermark_actions + .get_mut(&ia) + .expect("pq watermark entry") + .remove(&row_key); + self.pq_watermark_actions + .entry(new_action) + .or_default() + .insert(row_key.clone()); + } + let is = initial_start.expect("initial start"); + if is != new_start { + self.pq_start_times + .get_mut(&is) + .expect("pq start entry") + .remove(&row_key); + self.pq_start_times + .entry(new_start) + .or_default() + .insert(row_key.clone()); + } + } + None => { + self.pq_watermark_actions + .entry(new_action) + .or_default() + .insert(row_key.clone()); + self.pq_start_times + .entry(new_start) + .or_default() + .insert(row_key); + } + } + } + Ok(()) + } + + async fn evaluate_watermark(&mut self, watermark: SystemTime) -> Result> { + let mut emit_results: Vec<(Vec, Vec)> = Vec::new(); + + loop { + let popped_action_time = match self.pq_watermark_actions.first_key_value() { + Some((t, _)) if *t < watermark => *t, + _ => break, + }; + let keys = self + .pq_watermark_actions + .remove(&popped_action_time) + .expect("pop watermark pq"); + + for key in keys { + let state = self + .session_states + .get_mut(&key) + .ok_or_else(|| anyhow!("missing session state for key"))?; + let initial_start = state + .earliest_data_time() + .ok_or_else(|| anyhow!("missing earliest data in evaluate_watermark"))?; + + let completed_sessions = state.advance_by_watermark(watermark).await?; + if !completed_sessions.is_empty() { + emit_results.push((key.clone(), completed_sessions)); + } + + self.pq_start_times + .get_mut(&initial_start) + .expect("pq start") + .remove(&key); + + if state.is_empty() { + self.session_states.remove(&key); + } else { + let new_start = state + .earliest_data_time() + .expect("earliest after advance"); + self.pq_start_times + .entry(new_start) + .or_default() + .insert(key.clone()); + + let new_next_action = state + .next_watermark_action_time() + .expect("next action after advance"); + if new_next_action == popped_action_time { + bail!( + "processed watermark at {:?} but next watermark action stayed at {:?}", + watermark, popped_action_time + ); + } + self.pq_watermark_actions + .entry(new_next_action) + .or_default() + .insert(key); + } + } + } + + if emit_results.is_empty() { + return Ok(vec![]); + } + + Ok(vec![self.format_to_arrow(emit_results)?]) + } + + fn format_to_arrow(&self, results: Vec<(Vec, Vec)>) -> Result { + let (rows, session_results): (Vec<_>, Vec<_>) = results + .into_iter() + .flat_map(|(row, s_results)| s_results.into_iter().map(move |res| (row.clone(), res))) + .unzip(); + + let key_columns = if let Some(parser) = self.row_converter.parser() { + self.row_converter.convert_rows( + rows.iter() + .map(|row| parser.parse(row.as_ref())) + .collect(), + )? + } else { + vec![] + }; + + let start_times: Vec = session_results + .iter() + .map(|r| to_nanos(r.window_start) as i64) + .collect(); + let end_times: Vec = session_results + .iter() + .map(|r| to_nanos(r.window_end) as i64) + .collect(); + + let window_start_array = PrimitiveArray::::from(start_times); + let window_end_array = PrimitiveArray::::from(end_times.clone()); + let timestamp_array = PrimitiveArray::::from( + end_times.into_iter().map(|t| t - 1).collect::>(), + ); + + let result_batches: Vec<&RecordBatch> = session_results.iter().map(|res| &res.batch).collect(); + let merged_batch = concat_batches(&session_results[0].batch.schema(), result_batches)?; + + let DataType::Struct(window_fields) = self.config.window_field.data_type() else { + bail!("expected window field to be a struct"); + }; + + let window_struct_array = StructArray::try_new( + window_fields.clone(), + vec![Arc::new(window_start_array), Arc::new(window_end_array)], + None, + )?; + + let mut columns = key_columns; + columns.insert(self.config.window_index, Arc::new(window_struct_array)); + columns.extend_from_slice(merged_batch.columns()); + columns.push(Arc::new(timestamp_array)); + + RecordBatch::try_new(self.config.output_schema.clone(), columns) + .context("failed to create session window output batch") + } + + fn earliest_batch_time(&self) -> Option { + self.pq_start_times + .first_key_value() + .map(|(start_time, _keys)| *start_time) + } +} + +#[async_trait] +impl MessageOperator for SessionWindowOperator { + fn name(&self) -> &str { + "SessionWindow" + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + let mut tm = ctx.table_manager_guard().await?; + let start_time_opt = tm + .get_global_keyed_state::>("e") + .await + .map_err(|e| anyhow!("global keyed state e: {e}"))? + .get_all() + .values() + .filter_map(|e| *e) + .min(); + + let Some(start_time) = start_time_opt else { + return Ok(()); + }; + + let state_table = tm + .get_expiring_time_key_table("s", Some(start_time)) + .await + .map_err(|e| anyhow!("expiring time key table s: {e}"))?; + for (_, batches) in state_table.all_batches_for_watermark(Some(start_time)) { + for batch in batches { + let filtered = self.filter_batch_by_time(batch.clone(), Some(start_time))?; + if filtered.num_rows() > 0 { + let sorted = self.sort_batch(&filtered)?; + self.ingest_sorted_batch(sorted, Some(start_time)).await?; + } + } + } + + if let Some(ts) = ctx.last_present_watermark() { + let evicted = self.evaluate_watermark(ts).await?; + if !evicted.is_empty() { + warn!( + "evicted {} session result batch(es) when restoring from state", + evicted.len() + ); + } + } + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result> { + let watermark_time = ctx.last_present_watermark(); + + let filtered_batch = self.filter_batch_by_time(batch, watermark_time)?; + if filtered_batch.num_rows() == 0 { + return Ok(vec![]); + } + + let sorted_batch = self.sort_batch(&filtered_batch)?; + + let max_timestamp = max( + sorted_batch + .column(self.config.input_schema_ref.timestamp_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| anyhow!("expected timestamp column"))?, + ) + .ok_or_else(|| anyhow!("expected max timestamp"))?; + + let mut tm = ctx.table_manager_guard().await?; + let table = tm + .get_expiring_time_key_table("s", ctx.last_present_watermark()) + .await + .map_err(|e| anyhow!("expiring time key table s: {e}"))?; + table.insert(from_nanos(max_timestamp as u128), sorted_batch.clone()); + drop(tm); + + self.ingest_sorted_batch(sorted_batch, watermark_time).await?; + + Ok(vec![]) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + let Watermark::EventTime(current_time) = watermark else { + return Ok(vec![]); + }; + + let output_batches = self.evaluate_watermark(current_time).await?; + Ok(output_batches + .into_iter() + .map(StreamOutput::Forward) + .collect()) + } + + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { + let watermark = ctx.last_present_watermark(); + let mut tm = ctx.table_manager_guard().await?; + + tm.get_expiring_time_key_table("s", watermark) + .await + .map_err(|e| anyhow!("expiring time key table s: {e}"))? + .flush(watermark) + .await?; + + tm.get_global_keyed_state::>("e") + .await + .map_err(|e| anyhow!("global keyed state e: {e}"))? + .insert(ctx.subtask_idx, self.earliest_batch_time()) + .await; + + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +// ============================================================================ +// 构造器(返回 [`SessionWindowOperator`],供 Actor 子任务直接 `Box::new`) +// ============================================================================ + +pub struct SessionAggregatingWindowConstructor; + +impl SessionAggregatingWindowConstructor { + pub fn with_config( + &self, + config: SessionWindowAggregateOperator, + registry: Arc, + ) -> anyhow::Result { + let window_field = Arc::new(Field::new( + config.window_field_name, + window_arrow_struct(), + true, + )); + + let receiver_hook = Arc::new(RwLock::new(None)); + + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::UnboundedBatchStream(receiver_hook.clone()), + }; + + let final_plan = PhysicalPlanNode::decode(&mut config.final_aggregation_plan.as_slice())?; + let final_execution_plan = final_plan.try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &codec, + )?; + + let input_schema: FsSchema = config + .input_schema + .ok_or_else(|| anyhow!("missing input schema"))? + .try_into()?; + + let row_converter = if input_schema.routing_keys().is_none() { + let array = Arc::new(BooleanArray::from(vec![false])); + Converter::Empty( + RowConverter::new(vec![SortField::new(DataType::Boolean)])?, + array, + ) + } else { + let key_count = input_schema.routing_keys().as_ref().unwrap().len(); + Converter::RowConverter(RowConverter::new( + input_schema + .schema + .fields() + .into_iter() + .take(key_count) + .map(|field| SortField::new(field.data_type().clone())) + .collect(), + )?) + }; + + let output_schema = build_session_output_schema( + &input_schema, + window_field.clone(), + config.window_index as usize, + final_execution_plan.schema().as_ref(), + )?; + + let session_config = Arc::new(SessionWindowConfig { + gap: Duration::from_micros(config.gap_micros), + window_field, + window_index: config.window_index as usize, + input_schema_ref: Arc::new(input_schema), + final_physical_exec: final_execution_plan, + receiver_hook, + output_schema, + }); + + Ok(SessionWindowOperator { + config: session_config, + session_states: HashMap::new(), + pq_start_times: BTreeMap::new(), + pq_watermark_actions: BTreeMap::new(), + row_converter, + }) + } +} diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs new file mode 100644 index 00000000..29bad05a --- /dev/null +++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs @@ -0,0 +1,578 @@ +//! 滑动窗口聚合:与 worker `arrow/sliding_aggregating_window` 对齐,实现 [`MessageOperator`]。 + +use anyhow::{anyhow, bail, Result}; +use arrow::compute::{partition, sort_to_indices, take}; +use arrow_array::{Array, PrimitiveArray, RecordBatch, types::TimestampNanosecondType}; +use arrow_schema::SchemaRef; +use datafusion::common::ScalarValue; +use datafusion::execution::context::SessionContext; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::execution::SendableRecordBatchStream; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::{ + physical_plan::{from_proto::parse_physical_expr, AsExecutionPlan}, + protobuf::{PhysicalExprNode, PhysicalPlanNode}, +}; +use futures::StreamExt; +use prost::Message; +use std::collections::{BTreeMap, VecDeque}; +use std::sync::{Arc, RwLock}; +use std::time::{Duration, SystemTime}; +use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use async_trait::async_trait; +use tracing_subscriber::Registry; +use protocol::grpc::api::SlidingWindowAggregateOperator; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; +use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +// ============================================================================ +// Tiered panes +// ============================================================================ + +#[derive(Default, Debug)] +struct RecordBatchPane { + batches: Vec, +} + +#[derive(Debug)] +struct RecordBatchTier { + width: Duration, + start_time: Option, + panes: VecDeque, +} + +impl RecordBatchTier { + fn new(width: Duration) -> Self { + Self { + width, + start_time: None, + panes: VecDeque::new(), + } + } + + fn bin_start(&self, timestamp: SystemTime) -> SystemTime { + if self.width == Duration::ZERO { + return timestamp; + } + let nanos = to_nanos(timestamp) - (to_nanos(timestamp) % self.width.as_nanos()); + from_nanos(nanos) + } + + fn insert(&mut self, batch: RecordBatch, timestamp: SystemTime) -> Result<()> { + let bin_start = self.bin_start(timestamp); + if self.start_time.is_none() { + self.start_time = Some(bin_start); + self.panes.push_back(RecordBatchPane { + batches: vec![batch], + }); + return Ok(()); + } + + let start_time = self.start_time.unwrap(); + let bin_index = + (bin_start.duration_since(start_time)?.as_nanos() / self.width.as_nanos()) as usize; + while self.panes.len() <= bin_index { + self.panes.push_back(RecordBatchPane::default()); + } + self.panes[bin_index].batches.push(batch); + Ok(()) + } + + fn batches_for_timestamp(&self, bin_start: SystemTime) -> Result> { + if self + .start_time + .map(|st| st > bin_start) + .unwrap_or(true) + { + return Ok(vec![]); + } + let bin_index = (bin_start + .duration_since(self.start_time.unwrap())? + .as_nanos() + / self.width.as_nanos()) as usize; + if self.panes.len() <= bin_index { + return Ok(vec![]); + } + Ok(self.panes[bin_index].batches.clone()) + } + + fn delete_before(&mut self, cutoff: SystemTime) -> Result<()> { + let bin_start = self.bin_start(cutoff); + if self + .start_time + .map(|st| st >= bin_start) + .unwrap_or(true) + { + return Ok(()); + } + let bin_index = (bin_start + .duration_since(self.start_time.unwrap()) + .unwrap() + .as_nanos() + / self.width.as_nanos()) as usize; + + if bin_index >= self.panes.len() { + self.panes.clear(); + } else { + self.panes.drain(0..bin_index); + } + self.start_time = Some(bin_start); + Ok(()) + } +} + +#[derive(Debug)] +struct TieredRecordBatchHolder { + tier_widths: Vec, + tiers: Vec, +} + +impl TieredRecordBatchHolder { + fn new(tier_widths: Vec) -> Result { + for i in 0..tier_widths.len().saturating_sub(1) { + if !tier_widths[i + 1].as_nanos().is_multiple_of(tier_widths[i].as_nanos()) { + bail!( + "tier width {} does not evenly divide next {}", + tier_widths[i].as_nanos(), + tier_widths[i + 1].as_nanos() + ); + } + } + let tiers = tier_widths + .iter() + .map(|w| RecordBatchTier::new(*w)) + .collect(); + Ok(Self { tier_widths, tiers }) + } + + fn insert(&mut self, batch: RecordBatch, timestamp: SystemTime) -> Result<()> { + for tier in self.tiers.iter_mut() { + tier.insert(batch.clone(), timestamp)?; + } + Ok(()) + } + + fn batches_for_interval( + &self, + interval_start: SystemTime, + interval_end: SystemTime, + ) -> Result> { + let mut batches = Vec::new(); + let mut current_tier = 0usize; + let mut current_start = interval_start; + + while current_start < interval_end { + let tier_end = current_start + self.tier_widths[current_tier]; + if tier_end > interval_end { + current_tier = current_tier.saturating_sub(1); + continue; + } + if current_tier < self.tier_widths.len() - 1 { + let next_tier = &self.tiers[current_tier + 1]; + if next_tier.bin_start(current_start) == current_start + && current_start + next_tier.width <= interval_end + { + current_tier += 1; + continue; + } + } + batches.extend(self.tiers[current_tier].batches_for_timestamp(current_start)?); + current_start += self.tier_widths[current_tier]; + } + if current_start != interval_end { + bail!( + "interval end {:?} does not match current start {:?}", + interval_end, current_start + ); + } + Ok(batches) + } + + fn delete_before(&mut self, cutoff: SystemTime) -> Result<()> { + for tier in self.tiers.iter_mut() { + tier.delete_before(cutoff)?; + } + Ok(()) + } +} + +// ============================================================================ +// Per-bin partial aggregation +// ============================================================================ + +struct ActiveBin { + sender: Option>, + result_stream: Option, + finished_batches: Vec, +} + +impl Default for ActiveBin { + fn default() -> Self { + Self { + sender: None, + result_stream: None, + finished_batches: Vec::new(), + } + } +} + +impl ActiveBin { + fn start_partial( + plan: Arc, + hook: &Arc>>>, + ) -> Result { + let (tx, rx) = unbounded_channel(); + *hook.write().unwrap() = Some(rx); + plan.reset()?; + let result_stream = plan.execute(0, SessionContext::new().task_ctx())?; + Ok(Self { + sender: Some(tx), + result_stream: Some(result_stream), + finished_batches: Vec::new(), + }) + } + + async fn close_and_drain(&mut self) -> Result<()> { + self.sender.take(); + if let Some(mut stream) = self.result_stream.take() { + while let Some(batch) = stream.next().await { + self.finished_batches.push(batch?); + } + } + Ok(()) + } +} + +// ============================================================================ +// Operator +// ============================================================================ + +pub struct SlidingWindowOperator { + slide: Duration, + width: Duration, + binning_function: Arc, + + partial_aggregation_plan: Arc, + partial_schema: FsSchema, + + finish_execution_plan: Arc, + final_projection: Arc, + projection_input_schema: SchemaRef, + + receiver_hook: Arc>>>, + final_batches_passer: Arc>>, + + active_bins: BTreeMap, + tiered_record_batches: TieredRecordBatchHolder, +} + +impl SlidingWindowOperator { + fn bin_start(&self, timestamp: SystemTime) -> SystemTime { + if self.slide == Duration::ZERO { + return timestamp; + } + let nanos = to_nanos(timestamp) - (to_nanos(timestamp) % self.slide.as_nanos()); + from_nanos(nanos) + } + + fn add_bin_start_as_timestamp( + batch: &RecordBatch, + bin_start: SystemTime, + schema: SchemaRef, + ) -> Result { + let bin_start_scalar = ScalarValue::TimestampNanosecond(Some(to_nanos(bin_start) as i64), None); + let timestamp_array = bin_start_scalar.to_array_of_size(batch.num_rows())?; + let mut columns = batch.columns().to_vec(); + columns.push(timestamp_array); + Ok(RecordBatch::try_new(schema, columns)?) + } + + fn ensure_bin_running( + slot: &mut ActiveBin, + plan: Arc, + hook: &Arc>>>, + ) -> Result<()> { + if slot.sender.is_some() { + return Ok(()); + } + let preserved = std::mem::take(&mut slot.finished_batches); + let mut started = ActiveBin::start_partial(plan, hook)?; + started.finished_batches = preserved; + *slot = started; + Ok(()) + } +} + +#[async_trait] +impl MessageOperator for SlidingWindowOperator { + fn name(&self) -> &str { + "SlidingWindow" + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + let watermark = ctx.last_present_watermark(); + let mut tm = ctx.table_manager_guard().await?; + let table = tm + .get_expiring_time_key_table("t", watermark) + .await + .map_err(|e| anyhow!("expiring time key table t: {e}"))?; + + let watermark_bin = self.bin_start(watermark.unwrap_or(SystemTime::UNIX_EPOCH)); + + for (timestamp, batches) in table.all_batches_for_watermark(watermark) { + let bin = self.bin_start(*timestamp); + if bin < watermark_bin { + for batch in batches { + self.tiered_record_batches.insert(batch.clone(), bin)?; + } + } else { + let slot = self.active_bins.entry(bin).or_default(); + for batch in batches { + slot.finished_batches.push(batch.clone()); + } + } + } + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result> { + let bin_array = self + .binning_function + .evaluate(&batch)? + .into_array(batch.num_rows())?; + let indices = sort_to_indices(bin_array.as_ref(), None, None)?; + + let columns = batch + .columns() + .iter() + .map(|c| take(c, &indices, None).unwrap()) + .collect(); + let sorted = RecordBatch::try_new(batch.schema(), columns)?; + let sorted_bins = take(bin_array.as_ref(), &indices, None)?; + + let typed_bin = sorted_bins + .as_any() + .downcast_ref::>() + .ok_or_else(|| anyhow!("binning function must produce TimestampNanosecond"))?; + let partition_ranges = partition(std::slice::from_ref(&sorted_bins))?.ranges(); + + let watermark = ctx.last_present_watermark(); + + for range in partition_ranges { + let bin_start = from_nanos(typed_bin.value(range.start) as u128); + + if let Some(wm) = watermark { + if bin_start < self.bin_start(wm) { + continue; + } + } + + let bin_batch = sorted.slice(range.start, range.end - range.start); + let slot = self.active_bins.entry(bin_start).or_default(); + + Self::ensure_bin_running( + slot, + self.partial_aggregation_plan.clone(), + &self.receiver_hook, + )?; + + let sender = slot + .sender + .as_ref() + .ok_or_else(|| anyhow!("partial bin sender missing after ensure"))?; + sender + .send(bin_batch) + .map_err(|e| anyhow!("partial channel send: {e}"))?; + } + + Ok(vec![]) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + let Watermark::EventTime(current_time) = watermark else { + return Ok(vec![]); + }; + let watermark_bin = self.bin_start(current_time); + + let mut final_outputs = Vec::new(); + + let mut expired_bins = Vec::new(); + for &k in self.active_bins.keys() { + if k + self.slide <= watermark_bin { + expired_bins.push(k); + } else { + break; + } + } + + for bin_start in expired_bins { + let mut bin = self + .active_bins + .remove(&bin_start) + .ok_or_else(|| anyhow!("missing active bin"))?; + let bin_end = bin_start + self.slide; + + bin.close_and_drain().await?; + for b in bin.finished_batches { + self.tiered_record_batches.insert(b, bin_start)?; + } + + let interval_start = bin_end - self.width; + let interval_end = bin_end; + + let partials = self + .tiered_record_batches + .batches_for_interval(interval_start, interval_end)?; + *self.final_batches_passer.write().unwrap() = partials; + + self.finish_execution_plan.reset()?; + let mut final_exec = self + .finish_execution_plan + .execute(0, SessionContext::new().task_ctx())?; + + let mut aggregate_results = Vec::new(); + while let Some(batch) = final_exec.next().await { + aggregate_results.push(Self::add_bin_start_as_timestamp( + &batch?, + interval_start, + self.projection_input_schema.clone(), + )?); + } + + *self.final_batches_passer.write().unwrap() = aggregate_results; + self.final_projection.reset()?; + let mut proj_exec = self + .final_projection + .execute(0, SessionContext::new().task_ctx())?; + + while let Some(batch) = proj_exec.next().await { + final_outputs.push(StreamOutput::Forward(batch?)); + } + + self.tiered_record_batches + .delete_before(bin_end + self.slide - self.width)?; + } + + Ok(final_outputs) + } + + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { + let watermark = ctx.last_present_watermark(); + let mut tm = ctx.table_manager_guard().await?; + let table = tm + .get_expiring_time_key_table("t", watermark) + .await + .map_err(|e| anyhow!("expiring time key table t: {e}"))?; + + for (bin_start, active_bin) in self.active_bins.iter_mut() { + active_bin.close_and_drain().await?; + + for batch in &active_bin.finished_batches { + let state_batch = Self::add_bin_start_as_timestamp( + batch, + *bin_start, + self.partial_schema.schema.clone(), + )?; + table.insert(*bin_start, state_batch); + } + } + + table.flush(watermark).await?; + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +// ============================================================================ + +pub struct SlidingAggregatingWindowConstructor; + +impl SlidingAggregatingWindowConstructor { + pub fn with_config( + &self, + config: SlidingWindowAggregateOperator, + registry: Arc, + ) -> anyhow::Result { + let width = Duration::from_micros(config.width_micros); + let slide = Duration::from_micros(config.slide_micros); + let input_schema: FsSchema = config + .input_schema + .ok_or_else(|| anyhow!("missing input schema"))? + .try_into()?; + + let binning_function = parse_physical_expr( + &PhysicalExprNode::decode(&mut config.binning_function.as_slice())?, + registry.as_ref(), + &input_schema.schema, + &DefaultPhysicalExtensionCodec {}, + )?; + + let receiver_hook = Arc::new(RwLock::new(None)); + let final_batches_passer = Arc::new(RwLock::new(Vec::new())); + + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::UnboundedBatchStream(receiver_hook.clone()), + }; + let final_codec = FsPhysicalExtensionCodec { + context: DecodingContext::LockedBatchVec(final_batches_passer.clone()), + }; + + let partial_plan = PhysicalPlanNode::decode(&mut config.partial_aggregation_plan.as_slice())? + .try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &codec, + )?; + + let finish_plan = PhysicalPlanNode::decode(&mut config.final_aggregation_plan.as_slice())? + .try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &final_codec, + )?; + + let final_proj = PhysicalPlanNode::decode(&mut config.final_projection.as_slice())? + .try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &final_codec, + )?; + + let partial_schema: FsSchema = config + .partial_schema + .ok_or_else(|| anyhow!("missing partial schema"))? + .try_into()?; + + Ok(SlidingWindowOperator { + slide, + width, + binning_function, + partial_aggregation_plan: partial_plan, + partial_schema, + finish_execution_plan: finish_plan, + final_projection: final_proj.clone(), + projection_input_schema: final_proj.children()[0].schema().clone(), + receiver_hook, + final_batches_passer, + active_bins: BTreeMap::new(), + tiered_record_batches: TieredRecordBatchHolder::new(vec![slide])?, + }) + } +} diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs new file mode 100644 index 00000000..c30950cb --- /dev/null +++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs @@ -0,0 +1,399 @@ +//! 滚动(tumbling)窗口聚合:与 worker `arrow/tumbling_aggregating_window` 对齐,实现 [`MessageOperator`]。 + +use anyhow::{anyhow, Result}; +use arrow::compute::{partition, sort_to_indices, take}; +use arrow_array::{Array, PrimitiveArray, RecordBatch, types::TimestampNanosecondType}; +use arrow_schema::SchemaRef; +use datafusion::common::ScalarValue; +use datafusion::execution::context::SessionContext; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::execution::SendableRecordBatchStream; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::{ + physical_plan::{from_proto::parse_physical_expr, AsExecutionPlan}, + protobuf::{PhysicalExprNode, PhysicalPlanNode}, +}; +use futures::StreamExt; +use prost::Message; +use std::collections::BTreeMap; +use std::mem; +use std::sync::{Arc, RwLock}; +use std::time::{Duration, SystemTime}; +use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; +use tracing::warn; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use async_trait::async_trait; +use tracing_subscriber::Registry; +use protocol::grpc::api::TumblingWindowAggregateOperator; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; +use crate::sql::common::time_utils::print_time; +use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::schema::utils::add_timestamp_field_arrow; + +struct ActiveBin { + sender: Option>, + result_stream: Option, + finished_batches: Vec, +} + +impl Default for ActiveBin { + fn default() -> Self { + Self { + sender: None, + result_stream: None, + finished_batches: Vec::new(), + } + } +} + +impl ActiveBin { + fn start_partial( + plan: Arc, + hook: &Arc>>>, + ) -> Result { + let (tx, rx) = unbounded_channel(); + *hook.write().unwrap() = Some(rx); + plan.reset()?; + let result_stream = plan.execute(0, SessionContext::new().task_ctx())?; + Ok(Self { + sender: Some(tx), + result_stream: Some(result_stream), + finished_batches: Vec::new(), + }) + } + + async fn close_and_drain(&mut self) -> Result<()> { + self.sender.take(); + if let Some(mut stream) = self.result_stream.take() { + while let Some(batch) = stream.next().await { + self.finished_batches.push(batch?); + } + } + Ok(()) + } +} + +pub struct TumblingWindowOperator { + width: Duration, + binning_function: Arc, + + partial_aggregation_plan: Arc, + partial_schema: FsSchema, + + finish_execution_plan: Arc, + aggregate_with_timestamp_schema: SchemaRef, + final_projection: Option>, + + receiver_hook: Arc>>>, + final_batches_passer: Arc>>, + + active_bins: BTreeMap, +} + +impl TumblingWindowOperator { + fn bin_start(&self, timestamp: SystemTime) -> SystemTime { + if self.width == Duration::ZERO { + return timestamp; + } + let nanos = to_nanos(timestamp) - (to_nanos(timestamp) % self.width.as_nanos()); + from_nanos(nanos) + } + + fn add_bin_start_as_timestamp( + batch: &RecordBatch, + bin_start: SystemTime, + schema: SchemaRef, + ) -> Result { + let bin_start_scalar = ScalarValue::TimestampNanosecond(Some(to_nanos(bin_start) as i64), None); + let timestamp_array = bin_start_scalar.to_array_of_size(batch.num_rows())?; + let mut columns = batch.columns().to_vec(); + columns.push(timestamp_array); + RecordBatch::try_new(schema.clone(), columns) + .map_err(|e| anyhow!("add _timestamp column: {e}")) + } + + fn ensure_bin_running( + slot: &mut ActiveBin, + plan: Arc, + hook: &Arc>>>, + ) -> Result<()> { + if slot.sender.is_some() { + return Ok(()); + } + let preserved = mem::take(&mut slot.finished_batches); + let mut started = ActiveBin::start_partial(plan, hook)?; + started.finished_batches = preserved; + *slot = started; + Ok(()) + } +} + +#[async_trait] +impl MessageOperator for TumblingWindowOperator { + fn name(&self) -> &str { + "TumblingWindow" + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + let watermark = ctx.last_present_watermark(); + let mut tm = ctx.table_manager_guard().await?; + let table = tm + .get_expiring_time_key_table("t", watermark) + .await + .map_err(|e| anyhow!("expiring time key table t: {e}"))?; + + for (timestamp, batches) in table.all_batches_for_watermark(watermark) { + let bin_start = self.bin_start(*timestamp); + let slot = self.active_bins.entry(bin_start).or_default(); + for batch in batches { + slot.finished_batches.push(batch.clone()); + } + } + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result> { + let bin_array = self + .binning_function + .evaluate(&batch)? + .into_array(batch.num_rows())?; + let indices = sort_to_indices(bin_array.as_ref(), None, None)?; + + let columns = batch + .columns() + .iter() + .map(|c| take(c, &indices, None).unwrap()) + .collect(); + let sorted = RecordBatch::try_new(batch.schema(), columns)?; + let sorted_bins = take(bin_array.as_ref(), &indices, None)?; + + let typed_bin = sorted_bins + .as_any() + .downcast_ref::>() + .ok_or_else(|| anyhow!("binning function must produce TimestampNanosecond"))?; + let partition_ranges = partition(std::slice::from_ref(&sorted_bins))?.ranges(); + + for range in partition_ranges { + let bin_start = from_nanos(typed_bin.value(range.start) as u128); + + if let Some(watermark) = ctx.last_present_watermark() { + if bin_start < self.bin_start(watermark) { + warn!( + "late data dropped: bin {} < watermark {}", + print_time(bin_start), + print_time(watermark) + ); + continue; + } + } + + let bin_batch = sorted.slice(range.start, range.end - range.start); + let slot = self.active_bins.entry(bin_start).or_default(); + + Self::ensure_bin_running( + slot, + self.partial_aggregation_plan.clone(), + &self.receiver_hook, + )?; + + let sender = slot + .sender + .as_ref() + .ok_or_else(|| anyhow!("tumbling bin sender missing after ensure"))?; + sender + .send(bin_batch) + .map_err(|e| anyhow!("partial channel send: {e}"))?; + } + + Ok(vec![]) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + let Watermark::EventTime(current_time) = watermark else { + return Ok(vec![]); + }; + + let mut final_outputs = Vec::new(); + + let mut expired_bins = Vec::new(); + for &k in self.active_bins.keys() { + if k + self.width <= current_time { + expired_bins.push(k); + } else { + break; + } + } + + for bin_start in expired_bins { + let mut bin = self + .active_bins + .remove(&bin_start) + .ok_or_else(|| anyhow!("missing tumbling bin"))?; + + bin.close_and_drain().await?; + let partial_batches = mem::take(&mut bin.finished_batches); + + if partial_batches.is_empty() { + continue; + } + + *self.final_batches_passer.write().unwrap() = partial_batches; + self.finish_execution_plan.reset()?; + let mut final_exec = self + .finish_execution_plan + .execute(0, SessionContext::new().task_ctx())?; + + let mut aggregate_results = Vec::new(); + while let Some(batch) = final_exec.next().await { + let batch = batch?; + let with_timestamp = Self::add_bin_start_as_timestamp( + &batch, + bin_start, + self.aggregate_with_timestamp_schema.clone(), + )?; + + if self.final_projection.is_none() { + final_outputs.push(StreamOutput::Forward(with_timestamp)); + } else { + aggregate_results.push(with_timestamp); + } + } + + if let Some(final_projection) = &self.final_projection { + *self.final_batches_passer.write().unwrap() = aggregate_results; + final_projection.reset()?; + let mut proj_exec = final_projection.execute(0, SessionContext::new().task_ctx())?; + + while let Some(batch) = proj_exec.next().await { + final_outputs.push(StreamOutput::Forward(batch?)); + } + } + } + + Ok(final_outputs) + } + + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { + let watermark = ctx.last_present_watermark(); + let mut tm = ctx.table_manager_guard().await?; + let table = tm + .get_expiring_time_key_table("t", watermark) + .await + .map_err(|e| anyhow!("expiring time key table t: {e}"))?; + + for (bin_start, active_bin) in self.active_bins.iter_mut() { + active_bin.close_and_drain().await?; + + for batch in &active_bin.finished_batches { + let state_batch = Self::add_bin_start_as_timestamp( + batch, + *bin_start, + self.partial_schema.schema.clone(), + )?; + table.insert(*bin_start, state_batch); + } + } + + table.flush(watermark).await?; + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +pub struct TumblingAggregateWindowConstructor; + +impl TumblingAggregateWindowConstructor { + pub fn with_config( + &self, + config: TumblingWindowAggregateOperator, + registry: Arc, + ) -> anyhow::Result { + let width = Duration::from_micros(config.width_micros); + let input_schema: FsSchema = config + .input_schema + .ok_or_else(|| anyhow!("missing input schema"))? + .try_into()?; + + let binning_function = parse_physical_expr( + &PhysicalExprNode::decode(&mut config.binning_function.as_slice())?, + registry.as_ref(), + &input_schema.schema, + &DefaultPhysicalExtensionCodec {}, + )?; + + let receiver_hook = Arc::new(RwLock::new(None)); + let final_batches_passer = Arc::new(RwLock::new(Vec::new())); + + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::UnboundedBatchStream(receiver_hook.clone()), + }; + let final_codec = FsPhysicalExtensionCodec { + context: DecodingContext::LockedBatchVec(final_batches_passer.clone()), + }; + + let partial_plan = PhysicalPlanNode::decode(&mut config.partial_aggregation_plan.as_slice())? + .try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &codec, + )?; + + let partial_schema: FsSchema = config + .partial_schema + .ok_or_else(|| anyhow!("missing partial schema"))? + .try_into()?; + + let finish_plan = PhysicalPlanNode::decode(&mut config.final_aggregation_plan.as_slice())?; + let finish_execution_plan = finish_plan.try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &final_codec, + )?; + + let final_projection_plan = match &config.final_projection { + Some(proto) if !proto.is_empty() => { + let node = PhysicalPlanNode::decode(&mut proto.as_slice()) + .map_err(|e| anyhow!("decode final_projection: {e}"))?; + Some(node.try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &final_codec, + )?) + } + _ => None, + }; + + let aggregate_with_timestamp_schema = + add_timestamp_field_arrow((*finish_execution_plan.schema()).clone()); + + Ok(TumblingWindowOperator { + width, + binning_function, + partial_aggregation_plan: partial_plan, + partial_schema, + finish_execution_plan, + aggregate_with_timestamp_schema, + final_projection: final_projection_plan, + receiver_hook, + final_batches_passer, + active_bins: BTreeMap::new(), + }) + } +} diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs new file mode 100644 index 00000000..cc51b820 --- /dev/null +++ b/src/runtime/streaming/operators/windows/window_function.rs @@ -0,0 +1,292 @@ +//! 窗口函数(按事件时间分桶的瞬时执行):与 worker `arrow/window_fn` 对齐,实现 [`MessageOperator`]。 + +use anyhow::{anyhow, Result}; +use arrow::compute::{max, min}; +use arrow_array::RecordBatch; +use datafusion::execution::context::SessionContext; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::execution::SendableRecordBatchStream; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_proto::physical_plan::AsExecutionPlan; +use datafusion_proto::protobuf::PhysicalPlanNode; +use futures::StreamExt; +use prost::Message; +use std::collections::BTreeMap; +use std::sync::{Arc, RwLock}; +use std::time::SystemTime; +use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; +use tracing::warn; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use async_trait::async_trait; +use tracing_subscriber::Registry; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; +use crate::sql::common::time_utils::print_time; +use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; + +struct ActiveWindowExec { + sender: Option>, + result_stream: Option, +} + +impl ActiveWindowExec { + fn new( + plan: Arc, + hook: &Arc>>>, + ) -> Result { + let (tx, rx) = unbounded_channel(); + *hook.write().unwrap() = Some(rx); + plan.reset()?; + let result_stream = plan.execute(0, SessionContext::new().task_ctx())?; + Ok(Self { + sender: Some(tx), + result_stream: Some(result_stream), + }) + } + + async fn close_and_drain(&mut self) -> Result> { + self.sender.take(); + let mut results = Vec::new(); + if let Some(mut stream) = self.result_stream.take() { + while let Some(batch) = stream.next().await { + results.push(batch?); + } + } + Ok(results) + } +} + +pub struct WindowFunctionOperator { + input_schema: FsSchemaRef, + input_schema_unkeyed: FsSchemaRef, + window_exec_plan: Arc, + receiver_hook: Arc>>>, + active_execs: BTreeMap, +} + +impl WindowFunctionOperator { + fn filter_and_split_batches( + &self, + batch: RecordBatch, + watermark: Option, + ) -> Result> { + if batch.num_rows() == 0 { + return Ok(vec![]); + } + + let timestamp_column = self.input_schema.timestamp_column(&batch); + let min_timestamp = from_nanos(min(timestamp_column).unwrap() as u128); + let max_timestamp = from_nanos(max(timestamp_column).unwrap() as u128); + + if let Some(wm) = watermark { + if max_timestamp < wm { + warn!( + "dropped late batch: max_ts {} < watermark {}", + print_time(max_timestamp), + print_time(wm) + ); + return Ok(vec![]); + } + } + + if min_timestamp == max_timestamp { + return Ok(vec![(batch, max_timestamp)]); + } + + let sorted_batch = self + .input_schema_unkeyed + .sort(batch, true) + .map_err(|e| anyhow!("sort for window fn: {e}"))?; + let filtered_batch = self + .input_schema_unkeyed + .filter_by_time(sorted_batch, watermark) + .map_err(|e| anyhow!("filter_by_time: {e}"))?; + if filtered_batch.num_rows() == 0 { + return Ok(vec![]); + } + + let filtered_timestamps = self.input_schema.timestamp_column(&filtered_batch); + let ranges = self + .input_schema_unkeyed + .partition(&filtered_batch, true) + .map_err(|e| anyhow!("partition by time: {e}"))?; + + let mut batches = Vec::with_capacity(ranges.len()); + for range in ranges { + let slice = filtered_batch.slice(range.start, range.end - range.start); + let ts = from_nanos(filtered_timestamps.value(range.start) as u128); + batches.push((slice, ts)); + } + Ok(batches) + } + + fn get_or_create_exec(&mut self, timestamp: SystemTime) -> Result<&mut ActiveWindowExec> { + use std::collections::btree_map::Entry; + match self.active_execs.entry(timestamp) { + Entry::Vacant(v) => { + let new_exec = + ActiveWindowExec::new(self.window_exec_plan.clone(), &self.receiver_hook)?; + Ok(v.insert(new_exec)) + } + Entry::Occupied(o) => Ok(o.into_mut()), + } + } +} + +#[async_trait] +impl MessageOperator for WindowFunctionOperator { + fn name(&self) -> &str { + "WindowFunction" + } + + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + let watermark = ctx.last_present_watermark(); + let mut tm = ctx.table_manager_guard().await?; + let table = tm + .get_expiring_time_key_table("input", watermark) + .await + .map_err(|e| anyhow!("expiring time key table input: {e}"))?; + + for (timestamp, batches) in table.all_batches_for_watermark(watermark) { + let exec = self.get_or_create_exec(*timestamp)?; + for batch in batches { + exec + .sender + .as_ref() + .ok_or_else(|| anyhow!("window exec sender missing on restore"))? + .send(batch.clone()) + .map_err(|e| anyhow!("restore send: {e}"))?; + } + } + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + ctx: &mut TaskContext, + ) -> Result> { + let current_watermark = ctx.last_present_watermark(); + let split_batches = self.filter_and_split_batches(batch, current_watermark)?; + + let mut tm = ctx.table_manager_guard().await?; + let table = tm + .get_expiring_time_key_table("input", current_watermark) + .await + .map_err(|e| anyhow!("expiring time key table input: {e}"))?; + + for (sub_batch, timestamp) in split_batches { + table.insert(timestamp, sub_batch.clone()); + let exec = self.get_or_create_exec(timestamp)?; + exec + .sender + .as_ref() + .ok_or_else(|| anyhow!("window exec sender missing"))? + .send(sub_batch) + .map_err(|e| anyhow!("route batch to plan: {e}"))?; + } + + Ok(vec![]) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + let Watermark::EventTime(current_time) = watermark else { + return Ok(vec![]); + }; + + let mut final_outputs = Vec::new(); + + // 与 worker 一致:仅当桶时间戳 **严格小于** 当前事件时间水位时才结算(`watermark <= ts` 时保留)。 + let mut expired_timestamps = Vec::new(); + for &k in self.active_execs.keys() { + if k < current_time { + expired_timestamps.push(k); + } else { + break; + } + } + + for ts in expired_timestamps { + let mut exec = self + .active_execs + .remove(&ts) + .ok_or_else(|| anyhow!("missing window exec"))?; + let result_batches = exec.close_and_drain().await?; + for batch in result_batches { + final_outputs.push(StreamOutput::Forward(batch)); + } + } + + Ok(final_outputs) + } + + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { + let watermark = ctx.last_present_watermark(); + let mut tm = ctx.table_manager_guard().await?; + tm.get_expiring_time_key_table("input", watermark) + .await + .map_err(|e| anyhow!("expiring time key table input: {e}"))? + .flush(watermark) + .await?; + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +pub struct WindowFunctionConstructor; + +impl WindowFunctionConstructor { + pub fn with_config( + &self, + config: protocol::grpc::api::WindowFunctionOperator, + registry: Arc, + ) -> anyhow::Result { + let input_schema = Arc::new( + FsSchema::try_from( + config + .input_schema + .ok_or_else(|| anyhow!("missing input schema"))?, + ) + .map_err(|e| anyhow!("input schema: {e}"))?, + ); + + let input_schema_unkeyed = Arc::new( + FsSchema::from_schema_unkeyed(input_schema.schema.clone()) + .map_err(|e| anyhow!("unkeyed schema: {e}"))?, + ); + + let receiver_hook = Arc::new(RwLock::new(None)); + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::UnboundedBatchStream(receiver_hook.clone()), + }; + + let window_exec_node = + PhysicalPlanNode::decode(&mut config.window_function_plan.as_slice()) + .map_err(|e| anyhow!("decode window_function_plan: {e}"))?; + let window_exec_plan = window_exec_node + .try_into_physical_plan( + registry.as_ref(), + &RuntimeEnvBuilder::new().build()?, + &codec, + ) + .map_err(|e| anyhow!("window physical plan: {e}"))?; + + Ok(WindowFunctionOperator { + input_schema, + input_schema_unkeyed, + window_exec_plan, + receiver_hook, + active_execs: BTreeMap::new(), + }) + } +} diff --git a/src/runtime/streaming/protocol/control.rs b/src/runtime/streaming/protocol/control.rs new file mode 100644 index 00000000..a7a9da57 --- /dev/null +++ b/src/runtime/streaming/protocol/control.rs @@ -0,0 +1,74 @@ +//! 控制平面:与 [`super::event::StreamEvent`] 队列分离的高优先级指令。 + +use serde::{Deserialize, Serialize}; +use std::time::Duration; +use tokio::sync::mpsc::{self, Receiver, Sender}; +use crate::sql::common::CheckpointBarrier; + +/// 可序列化的 barrier 载荷(`CheckpointBarrier` 本身未实现 `serde`,供 RPC / 持久化使用)。 +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CheckpointBarrierWire { + pub epoch: u32, + pub min_epoch: u32, + pub timestamp_secs: u64, + pub timestamp_subsec_nanos: u32, + pub then_stop: bool, +} + +impl From for CheckpointBarrierWire { + fn from(b: CheckpointBarrier) -> Self { + let d = b + .timestamp + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default(); + Self { + epoch: b.epoch, + min_epoch: b.min_epoch, + timestamp_secs: d.as_secs(), + timestamp_subsec_nanos: d.subsec_nanos(), + then_stop: b.then_stop, + } + } +} + +impl From for CheckpointBarrier { + fn from(w: CheckpointBarrierWire) -> Self { + Self { + epoch: w.epoch, + min_epoch: w.min_epoch, + timestamp: std::time::UNIX_EPOCH + + Duration::new(w.timestamp_secs, w.timestamp_subsec_nanos), + then_stop: w.then_stop, + } + } +} + +/// JobManager / 调度器下发的高优控制指令。 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ControlCommand { + Start, + Stop { mode: StopMode }, + DropState, + Commit { epoch: u32 }, + UpdateConfig { config_json: String }, + /// 通常由 [`crate::runtime::streaming::SourceRunner`] 接收,源头落盘后向下游注入 `Barrier`。 + TriggerCheckpoint { barrier: CheckpointBarrierWire }, +} + +impl ControlCommand { + pub fn trigger_checkpoint(barrier: CheckpointBarrier) -> Self { + Self::TriggerCheckpoint { + barrier: barrier.into(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum StopMode { + Graceful, + Immediate, +} + +pub fn control_channel(capacity: usize) -> (Sender, Receiver) { + mpsc::channel(capacity) +} diff --git a/src/runtime/streaming/protocol/event.rs b/src/runtime/streaming/protocol/event.rs new file mode 100644 index 00000000..ee974e7e --- /dev/null +++ b/src/runtime/streaming/protocol/event.rs @@ -0,0 +1,11 @@ +use arrow_array::RecordBatch; +use crate::sql::common::{CheckpointBarrier, Watermark}; + +/// 核心数据面事件 +#[derive(Debug, Clone)] +pub enum StreamEvent { + Data(RecordBatch), + Watermark(Watermark), + Barrier(CheckpointBarrier), + EndOfStream, +} diff --git a/src/runtime/streaming/protocol/mod.rs b/src/runtime/streaming/protocol/mod.rs new file mode 100644 index 00000000..852562de --- /dev/null +++ b/src/runtime/streaming/protocol/mod.rs @@ -0,0 +1,15 @@ +//! 协议层:数据事件、控制命令、水位线合并与比较语义。 + +pub mod control; +pub mod event; +pub mod stream_out; +pub mod tracked; +pub mod watermark; + +pub use control::{ + control_channel, CheckpointBarrierWire, ControlCommand, StopMode, +}; +pub use event::StreamEvent; +pub use stream_out::StreamOutput; +pub use tracked::TrackedEvent; +pub use watermark::{merge_watermarks, watermark_strictly_advances}; diff --git a/src/runtime/streaming/protocol/stream_out.rs b/src/runtime/streaming/protocol/stream_out.rs new file mode 100644 index 00000000..49d963df --- /dev/null +++ b/src/runtime/streaming/protocol/stream_out.rs @@ -0,0 +1,15 @@ +use arrow_array::RecordBatch; +use crate::sql::common::Watermark; + +/// 算子产出的数据及下游 **路由意图**(由 `SubtaskRunner` 选择 `collect` / `collect_keyed` / `broadcast` / 水位广播)。 +#[derive(Debug, Clone)] +pub enum StreamOutput { + /// 发往所有下游(与 `TaskContext::collect` 一致:当前实现为每条边各发一份 `Data`)。 + Forward(RecordBatch), + /// 按 `key_hash % outboxes.len()` 发往单一分区(KeyBy / Shuffle)。 + Keyed(u64, RecordBatch), + /// 广播同一份数据到所有下游边(如 broadcast join)。 + Broadcast(RecordBatch), + /// 向所有下游广播水位线(如表达式水位生成器)。 + Watermark(Watermark), +} diff --git a/src/runtime/streaming/protocol/tracked.rs b/src/runtime/streaming/protocol/tracked.rs new file mode 100644 index 00000000..c675b5bd --- /dev/null +++ b/src/runtime/streaming/protocol/tracked.rs @@ -0,0 +1,31 @@ +use std::sync::Arc; + +use crate::runtime::streaming::memory::MemoryTicket; +use crate::runtime::streaming::protocol::event::StreamEvent; + +/// 在 Channel 中实际传输的事件,完美解决多路广播 (Broadcast) 的内存管理问题。 +/// +/// `MemoryTicket` 包在 `Arc` 中:如果 Event 被发送给 N 个下游分区(Broadcast 路由), +/// 只需 Clone 此 `TrackedEvent`,底层数据共享一块内存,Arc 引用计数 +N。 +/// 只有当所有下游全部处理完并 Drop 后,Arc 归零,内存才被真正释放给 Pool。 +#[derive(Debug, Clone)] +pub struct TrackedEvent { + pub event: StreamEvent, + pub _ticket: Option>, +} + +impl TrackedEvent { + pub fn new(event: StreamEvent, ticket: Option) -> Self { + Self { + event, + _ticket: ticket.map(Arc::new), + } + } + + pub fn control(event: StreamEvent) -> Self { + Self { + event, + _ticket: None, + } + } +} diff --git a/src/runtime/streaming/protocol/watermark.rs b/src/runtime/streaming/protocol/watermark.rs new file mode 100644 index 00000000..43baeabb --- /dev/null +++ b/src/runtime/streaming/protocol/watermark.rs @@ -0,0 +1,80 @@ +//! 水位线类型来自 `arroyo_types::Watermark`;此处提供 **多路对齐合并** 与 **单调推进** 判断。 + +use crate::sql::common::Watermark; + +/// 多输入对齐:`Idle` 不参与事件时间取最小;若全部为 `Idle` 则输出 `Idle`。 +/// 任一路尚未有水位线时返回 `None`(木桶短板未齐)。 +pub fn merge_watermarks(per_input: &[Option]) -> Option { + if per_input.iter().any(|w| w.is_none()) { + return None; + } + + let mut min_event: Option = None; + let mut all_idle = true; + + for w in per_input.iter().flatten() { + match w { + Watermark::Idle => {} + Watermark::EventTime(t) => { + all_idle = false; + min_event = Some(match min_event { + None => *t, + Some(m) => m.min(*t), + }); + } + } + } + + if all_idle { + Some(Watermark::Idle) + } else { + Some(Watermark::EventTime( + min_event.expect("non-idle alignment must have at least one EventTime"), + )) + } +} + +/// `new` 相对 `previous` 是否为 **严格推进**;`previous == None` 时恒为真。 +pub fn watermark_strictly_advances(new: Watermark, previous: Option) -> bool { + match previous { + None => true, + Some(prev) => match (new, prev) { + (Watermark::EventTime(tn), Watermark::EventTime(tp)) => tn > tp, + (Watermark::Idle, Watermark::Idle) => false, + (Watermark::Idle, Watermark::EventTime(_)) => true, + (Watermark::EventTime(_), Watermark::Idle) => true, + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::{Duration, SystemTime}; + + #[test] + fn merge_waits_for_all_channels() { + let wms = vec![Some(Watermark::EventTime(SystemTime::UNIX_EPOCH)), None]; + assert!(merge_watermarks(&wms).is_none()); + } + + #[test] + fn merge_min_event_time_ignores_idle() { + let t1 = SystemTime::UNIX_EPOCH + Duration::from_secs(10); + let t2 = SystemTime::UNIX_EPOCH + Duration::from_secs(5); + let wms = vec![Some(Watermark::EventTime(t1)), Some(Watermark::Idle)]; + assert_eq!(merge_watermarks(&wms), Some(Watermark::EventTime(t1))); + + let wms = vec![ + Some(Watermark::EventTime(t1)), + Some(Watermark::EventTime(t2)), + ]; + assert_eq!(merge_watermarks(&wms), Some(Watermark::EventTime(t2))); + } + + #[test] + fn merge_all_idle() { + let wms = vec![Some(Watermark::Idle), Some(Watermark::Idle)]; + assert_eq!(merge_watermarks(&wms), Some(Watermark::Idle)); + } +} diff --git a/src/runtime/streaming/state/mod.rs b/src/runtime/streaming/state/mod.rs new file mode 100644 index 00000000..e69de29b diff --git a/src/runtime/streaming/state/table_manager.rs b/src/runtime/streaming/state/table_manager.rs new file mode 100644 index 00000000..e69de29b diff --git a/src/runtime/input/input_protocol.rs b/src/runtime/wasm/input/input_protocol.rs similarity index 100% rename from src/runtime/input/input_protocol.rs rename to src/runtime/wasm/input/input_protocol.rs diff --git a/src/runtime/input/input_provider.rs b/src/runtime/wasm/input/input_provider.rs similarity index 100% rename from src/runtime/input/input_provider.rs rename to src/runtime/wasm/input/input_provider.rs diff --git a/src/runtime/input/input_runner.rs b/src/runtime/wasm/input/input_runner.rs similarity index 100% rename from src/runtime/input/input_runner.rs rename to src/runtime/wasm/input/input_runner.rs diff --git a/src/runtime/input/interface.rs b/src/runtime/wasm/input/interface.rs similarity index 100% rename from src/runtime/input/interface.rs rename to src/runtime/wasm/input/interface.rs diff --git a/src/runtime/input/mod.rs b/src/runtime/wasm/input/mod.rs similarity index 100% rename from src/runtime/input/mod.rs rename to src/runtime/wasm/input/mod.rs diff --git a/src/runtime/input/protocol/kafka/config.rs b/src/runtime/wasm/input/protocol/kafka/config.rs similarity index 100% rename from src/runtime/input/protocol/kafka/config.rs rename to src/runtime/wasm/input/protocol/kafka/config.rs diff --git a/src/runtime/input/protocol/kafka/kafka_protocol.rs b/src/runtime/wasm/input/protocol/kafka/kafka_protocol.rs similarity index 100% rename from src/runtime/input/protocol/kafka/kafka_protocol.rs rename to src/runtime/wasm/input/protocol/kafka/kafka_protocol.rs diff --git a/src/runtime/input/protocol/kafka/mod.rs b/src/runtime/wasm/input/protocol/kafka/mod.rs similarity index 100% rename from src/runtime/input/protocol/kafka/mod.rs rename to src/runtime/wasm/input/protocol/kafka/mod.rs diff --git a/src/runtime/input/protocol/mod.rs b/src/runtime/wasm/input/protocol/mod.rs similarity index 100% rename from src/runtime/input/protocol/mod.rs rename to src/runtime/wasm/input/protocol/mod.rs diff --git a/src/runtime/sink/mod.rs b/src/runtime/wasm/mod.rs similarity index 86% rename from src/runtime/sink/mod.rs rename to src/runtime/wasm/mod.rs index a0a2a6fc..b1c82f4c 100644 --- a/src/runtime/sink/mod.rs +++ b/src/runtime/wasm/mod.rs @@ -1,5 +1,6 @@ // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. +// // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 @@ -10,6 +11,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Sink module +//! WebAssembly runtime integration. -// TODO: Add sink implementation here +pub mod input; +pub mod output; +pub mod processor; diff --git a/src/runtime/output/interface.rs b/src/runtime/wasm/output/interface.rs similarity index 100% rename from src/runtime/output/interface.rs rename to src/runtime/wasm/output/interface.rs diff --git a/src/runtime/output/mod.rs b/src/runtime/wasm/output/mod.rs similarity index 100% rename from src/runtime/output/mod.rs rename to src/runtime/wasm/output/mod.rs diff --git a/src/runtime/output/output_protocol.rs b/src/runtime/wasm/output/output_protocol.rs similarity index 100% rename from src/runtime/output/output_protocol.rs rename to src/runtime/wasm/output/output_protocol.rs diff --git a/src/runtime/output/output_provider.rs b/src/runtime/wasm/output/output_provider.rs similarity index 100% rename from src/runtime/output/output_provider.rs rename to src/runtime/wasm/output/output_provider.rs diff --git a/src/runtime/output/output_runner.rs b/src/runtime/wasm/output/output_runner.rs similarity index 100% rename from src/runtime/output/output_runner.rs rename to src/runtime/wasm/output/output_runner.rs diff --git a/src/runtime/output/protocol/kafka/kafka_protocol.rs b/src/runtime/wasm/output/protocol/kafka/kafka_protocol.rs similarity index 100% rename from src/runtime/output/protocol/kafka/kafka_protocol.rs rename to src/runtime/wasm/output/protocol/kafka/kafka_protocol.rs diff --git a/src/runtime/output/protocol/kafka/mod.rs b/src/runtime/wasm/output/protocol/kafka/mod.rs similarity index 100% rename from src/runtime/output/protocol/kafka/mod.rs rename to src/runtime/wasm/output/protocol/kafka/mod.rs diff --git a/src/runtime/output/protocol/kafka/producer_config.rs b/src/runtime/wasm/output/protocol/kafka/producer_config.rs similarity index 100% rename from src/runtime/output/protocol/kafka/producer_config.rs rename to src/runtime/wasm/output/protocol/kafka/producer_config.rs diff --git a/src/runtime/output/protocol/mod.rs b/src/runtime/wasm/output/protocol/mod.rs similarity index 100% rename from src/runtime/output/protocol/mod.rs rename to src/runtime/wasm/output/protocol/mod.rs diff --git a/src/runtime/processor/function_error.rs b/src/runtime/wasm/processor/function_error.rs similarity index 100% rename from src/runtime/processor/function_error.rs rename to src/runtime/wasm/processor/function_error.rs diff --git a/src/runtime/processor/mod.rs b/src/runtime/wasm/processor/mod.rs similarity index 100% rename from src/runtime/processor/mod.rs rename to src/runtime/wasm/processor/mod.rs diff --git a/src/runtime/processor/python/mod.rs b/src/runtime/wasm/processor/python/mod.rs similarity index 100% rename from src/runtime/processor/python/mod.rs rename to src/runtime/wasm/processor/python/mod.rs diff --git a/src/runtime/processor/python/python_host.rs b/src/runtime/wasm/processor/python/python_host.rs similarity index 100% rename from src/runtime/processor/python/python_host.rs rename to src/runtime/wasm/processor/python/python_host.rs diff --git a/src/runtime/processor/python/python_service.rs b/src/runtime/wasm/processor/python/python_service.rs similarity index 100% rename from src/runtime/processor/python/python_service.rs rename to src/runtime/wasm/processor/python/python_service.rs diff --git a/src/runtime/processor/wasm/input_strategy.rs b/src/runtime/wasm/processor/wasm/input_strategy.rs similarity index 100% rename from src/runtime/processor/wasm/input_strategy.rs rename to src/runtime/wasm/processor/wasm/input_strategy.rs diff --git a/src/runtime/processor/wasm/mod.rs b/src/runtime/wasm/processor/wasm/mod.rs similarity index 100% rename from src/runtime/processor/wasm/mod.rs rename to src/runtime/wasm/processor/wasm/mod.rs diff --git a/src/runtime/processor/wasm/thread_pool.rs b/src/runtime/wasm/processor/wasm/thread_pool.rs similarity index 100% rename from src/runtime/processor/wasm/thread_pool.rs rename to src/runtime/wasm/processor/wasm/thread_pool.rs diff --git a/src/runtime/processor/wasm/wasm_cache.rs b/src/runtime/wasm/processor/wasm/wasm_cache.rs similarity index 100% rename from src/runtime/processor/wasm/wasm_cache.rs rename to src/runtime/wasm/processor/wasm/wasm_cache.rs diff --git a/src/runtime/processor/wasm/wasm_host.rs b/src/runtime/wasm/processor/wasm/wasm_host.rs similarity index 100% rename from src/runtime/processor/wasm/wasm_host.rs rename to src/runtime/wasm/processor/wasm/wasm_host.rs diff --git a/src/runtime/processor/wasm/wasm_processor.rs b/src/runtime/wasm/processor/wasm/wasm_processor.rs similarity index 100% rename from src/runtime/processor/wasm/wasm_processor.rs rename to src/runtime/wasm/processor/wasm/wasm_processor.rs diff --git a/src/runtime/processor/wasm/wasm_processor_trait.rs b/src/runtime/wasm/processor/wasm/wasm_processor_trait.rs similarity index 100% rename from src/runtime/processor/wasm/wasm_processor_trait.rs rename to src/runtime/wasm/processor/wasm/wasm_processor_trait.rs diff --git a/src/runtime/processor/wasm/wasm_task.rs b/src/runtime/wasm/processor/wasm/wasm_task.rs similarity index 100% rename from src/runtime/processor/wasm/wasm_task.rs rename to src/runtime/wasm/processor/wasm/wasm_task.rs diff --git a/src/sql/common/errors.rs b/src/sql/common/errors.rs index bcda8667..507851bd 100644 --- a/src/sql/common/errors.rs +++ b/src/sql/common/errors.rs @@ -1,5 +1,8 @@ use std::fmt; +/// Result type for streaming operators and collectors. +pub type DataflowResult = std::result::Result; + /// Unified error type for streaming dataflow operations. #[derive(Debug)] pub enum DataflowError { @@ -26,6 +29,16 @@ impl fmt::Display for DataflowError { impl std::error::Error for DataflowError {} +impl DataflowError { + pub fn with_operator(self, operator_id: impl Into) -> Self { + let id = operator_id.into(); + match self { + DataflowError::Operator(m) => DataflowError::Operator(format!("{id}: {m}")), + other => DataflowError::Operator(format!("{id}: {other}")), + } + } +} + impl From for DataflowError { fn from(e: arrow_schema::ArrowError) -> Self { DataflowError::Arrow(e) diff --git a/src/sql/common/fs_schema.rs b/src/sql/common/fs_schema.rs index 5233bd0c..c99af1e5 100644 --- a/src/sql/common/fs_schema.rs +++ b/src/sql/common/fs_schema.rs @@ -21,6 +21,15 @@ use super::{to_nanos, TIMESTAMP_FIELD}; use std::ops::Range; use crate::sql::common::converter::Converter; +#[derive(Debug, Copy, Clone)] +pub enum FieldValueType<'a> { + Int64(Option), + UInt64(Option), + Int32(Option), + String(Option<&'a str>), + Bytes(Option<&'a [u8]>), +} + pub type FsSchemaRef = Arc; #[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index 730d6f37..cb833c8e 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -30,7 +30,7 @@ pub mod operator_config; pub mod task_info; pub mod time_utils; pub mod worker; -mod converter; +pub mod converter; // ── Re-exports from existing modules ── pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType}; @@ -45,11 +45,11 @@ pub use worker::{MachineId, WorkerId}; // ── Re-exports from new modules ── pub use control::{ CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp, - ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError, + ErrorDomain, RetryHint, StopMode, TableConfig, TaskCheckpointEventType, TaskError, }; pub use fs_schema::{FsSchema, FsSchemaRef}; pub use connector_options::{ConnectorOptions, FromOpts}; -pub use errors::DataflowError; +pub use errors::{DataflowError, DataflowResult}; pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat}; pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; From 3b86ea0f8378ba371d012e6ac90ffcfe6163e337 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 25 Mar 2026 00:09:06 +0800 Subject: [PATCH 16/44] update --- Cargo.lock | 3840 ++--------------- Cargo.toml | 6 +- src/runtime/mod.rs | 2 - src/runtime/streaming/api/context.rs | 61 +- src/runtime/streaming/api/mod.rs | 2 +- src/runtime/streaming/api/operator.rs | 58 + src/runtime/streaming/execution/runner.rs | 4 +- .../execution/tracker/watermark_tracker.rs | 2 +- src/runtime/streaming/factory/mod.rs | 5 +- src/runtime/streaming/factory/registry.rs | 268 +- src/runtime/streaming/format/mod.rs | 9 + src/runtime/streaming/mod.rs | 9 +- .../grouping/incremental_aggregate.rs | 238 +- .../operators/joins/join_instance.rs | 66 +- .../operators/joins/join_with_expiration.rs | 120 +- .../streaming/operators/joins/lookup_join.rs | 10 +- src/runtime/streaming/operators/mod.rs | 17 +- .../streaming/operators/sink/kafka/mod.rs | 11 +- .../streaming/operators/source/kafka/mod.rs | 60 +- src/runtime/streaming/operators/source/mod.rs | 2 +- .../watermark/watermark_generator.rs | 4 +- .../windows/session_aggregating_window.rs | 97 +- .../windows/sliding_aggregating_window.rs | 63 +- .../windows/tumbling_aggregating_window.rs | 3 +- .../operators/windows/window_function.rs | 59 +- src/runtime/streaming/storage/mod.rs | 32 + src/sql/datastream/logical.rs | 2 + src/sql/logical_node/logical/operator_name.rs | 1 + 28 files changed, 1178 insertions(+), 3873 deletions(-) create mode 100644 src/runtime/streaming/storage/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 4cc46aef..fc3a898a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -132,30 +132,6 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" -[[package]] -name = "apache-avro" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a81f4e6304e455a9d52cf8ab667cb2fcf792f2cee2a31c28800901a335ecd5" -dependencies = [ - "bigdecimal", - "bon", - "digest", - "log", - "miniz_oxide", - "num-bigint", - "quad-rand", - "rand 0.9.2", - "regex-lite", - "serde", - "serde_bytes", - "serde_json", - "strum 0.27.2", - "strum_macros 0.27.2", - "thiserror 2.0.17", - "uuid", -] - [[package]] name = "ar_archive_writer" version = "0.5.1" @@ -171,15 +147,6 @@ version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" -[[package]] -name = "arc-swap" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" -dependencies = [ - "rustversion", -] - [[package]] name = "arrayref" version = "0.3.9" @@ -521,133 +488,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "arroyo-datastream" -version = "0.16.0-dev" -dependencies = [ - "anyhow", - "arrow-schema 55.2.0", - "arroyo-rpc", - "bincode", - "datafusion-proto 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "itertools 0.14.0", - "petgraph 0.8.3", - "prost", - "rand 0.9.2", - "serde", - "serde_json", - "strum 0.27.2", - "syn 2.0.113", -] - -[[package]] -name = "arroyo-rpc" -version = "0.16.0-dev" -dependencies = [ - "ahash", - "anyhow", - "apache-avro", - "arc-swap", - "arrow", - "arrow-array 55.2.0", - "arrow-ord", - "arrow-schema 55.2.0", - "arroyo-types", - "async-trait", - "base64", - "bincode", - "bytes", - "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "dirs", - "figment", - "futures", - "k8s-openapi", - "local-ip-address", - "log", - "nanoid", - "object_store", - "percent-encoding", - "prost", - "rand 0.9.2", - "regex", - "reqwest", - "rustls", - "rustls-native-certs", - "schemars 1.2.1", - "serde", - "serde_json", - "smallvec", - "strum 0.27.2", - "strum_macros 0.27.2", - "thiserror 2.0.17", - "tokio", - "tonic 0.13.1", - "tonic-build 0.13.1", - "tracing", - "url", - "utoipa", -] - -[[package]] -name = "arroyo-state" -version = "0.16.0-dev" -dependencies = [ - "anyhow", - "arrow", - "arrow-array 55.2.0", - "arrow-ord", - "arrow-schema 55.2.0", - "arroyo-datastream", - "arroyo-rpc", - "arroyo-storage", - "arroyo-types", - "async-trait", - "bincode", - "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "lazy_static", - "object_store", - "once_cell", - "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "prometheus", - "prost", - "serde", - "serde_json", - "tokio", - "tracing", -] - -[[package]] -name = "arroyo-storage" -version = "0.16.0-dev" -dependencies = [ - "arroyo-rpc", - "arroyo-types", - "async-trait", - "aws-config", - "aws-credential-types", - "bytes", - "futures", - "object_store", - "rand 0.9.2", - "regex", - "thiserror 2.0.17", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "arroyo-types" -version = "0.16.0-dev" -dependencies = [ - "arrow", - "arrow-array 55.2.0", - "bincode", - "chrono", - "serde", -] - [[package]] name = "async-compression" version = "0.4.19" @@ -684,7 +524,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -695,7 +535,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -707,15 +547,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "atomic" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" -dependencies = [ - "bytemuck", -] - [[package]] name = "atomic-waker" version = "1.1.2" @@ -729,627 +560,165 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] -name = "aws-config" -version = "1.6.3" +name = "axum" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02a18fd934af6ae7ca52410d4548b98eb895aab0f1ea417d168d85db1434a141" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-sdk-sso", - "aws-sdk-ssooidc", - "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", + "async-trait", + "axum-core", "bytes", - "fastrand", - "hex", - "http 1.4.0", - "ring", - "time", - "tokio", - "tracing", - "url", - "zeroize", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", ] [[package]] -name = "aws-credential-types" -version = "1.2.13" +name = "axum-core" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "zeroize", + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", ] [[package]] -name = "aws-lc-rs" -version = "1.16.2" +name = "base64" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" -dependencies = [ - "aws-lc-sys", - "zeroize", -] +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "aws-lc-sys" -version = "0.39.0" +name = "bigdecimal" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" dependencies = [ - "cc", - "cmake", - "dunce", - "fs_extra", + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", ] [[package]] -name = "aws-runtime" -version = "1.7.1" +name = "bincode" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" dependencies = [ - "aws-credential-types", - "aws-sigv4", - "aws-smithy-async", - "aws-smithy-http 0.63.5", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "bytes-utils", - "fastrand", - "http 1.4.0", - "http-body 1.0.1", - "percent-encoding", - "pin-project-lite", - "tracing", - "uuid", + "bincode_derive", + "serde", + "unty", ] [[package]] -name = "aws-sdk-sso" -version = "1.72.0" +name = "bincode_derive" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13118ad30741222f67b1a18e5071385863914da05124652b38e172d6d3d9ce31" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", + "virtue", ] [[package]] -name = "aws-sdk-ssooidc" -version = "1.73.0" +name = "bindgen" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f879a8572b4683a8f84f781695bebf2f25cf11a81a2693c31fc0e0215c2c1726" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", + "bitflags 1.3.2", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn", ] [[package]] -name = "aws-sdk-sts" -version = "1.73.0" +name = "bindgen" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1e9c3c24e36183e2f698235ed38dcfbbdff1d09b9232dc866c4be3011e0b47e" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-json", - "aws-smithy-query", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", + "bitflags 2.10.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.1", + "shlex", + "syn", ] [[package]] -name = "aws-sigv4" -version = "1.4.1" +name = "bitflags" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" -dependencies = [ - "aws-credential-types", - "aws-smithy-http 0.63.5", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "form_urlencoded", - "hex", - "hmac", - "http 0.2.12", - "http 1.4.0", - "percent-encoding", - "sha2", - "time", - "tracing", -] +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] -name = "aws-smithy-async" -version = "1.2.13" +name = "bitmaps" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc50d0f63e714784b84223abd7abbc8577de8c35d699e0edd19f0a88a08ae13" +checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", + "typenum", ] [[package]] -name = "aws-smithy-http" -version = "0.62.6" +name = "blake2" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "futures-util", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing", + "digest", ] [[package]] -name = "aws-smithy-http" -version = "0.63.5" +name = "blake3" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7" -dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing", -] - -[[package]] -name = "aws-smithy-http-client" -version = "1.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95" -dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "h2", - "http 1.4.0", - "hyper", - "hyper-rustls", - "hyper-util", - "pin-project-lite", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "tokio", - "tokio-rustls", - "tower 0.5.2", - "tracing", -] - -[[package]] -name = "aws-smithy-json" -version = "0.61.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" -dependencies = [ - "aws-smithy-types", -] - -[[package]] -name = "aws-smithy-observability" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b" -dependencies = [ - "aws-smithy-runtime-api", -] - -[[package]] -name = "aws-smithy-query" -version = "0.60.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0" -dependencies = [ - "aws-smithy-types", - "urlencoding", -] - -[[package]] -name = "aws-smithy-runtime" -version = "1.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf" -dependencies = [ - "aws-smithy-async", - "aws-smithy-http 0.63.5", - "aws-smithy-http-client", - "aws-smithy-observability", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "fastrand", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "http-body 1.0.1", - "http-body-util", - "pin-project-lite", - "pin-utils", - "tokio", - "tracing", -] - -[[package]] -name = "aws-smithy-runtime-api" -version = "1.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516" -dependencies = [ - "aws-smithy-async", - "aws-smithy-types", - "bytes", - "http 0.2.12", - "http 1.4.0", - "pin-project-lite", - "tokio", - "tracing", - "zeroize", -] - -[[package]] -name = "aws-smithy-types" -version = "1.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3" -dependencies = [ - "base64-simd", - "bytes", - "bytes-utils", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "http-body 1.0.1", - "http-body-util", - "itoa", - "num-integer", - "pin-project-lite", - "pin-utils", - "ryu", - "serde", - "time", -] - -[[package]] -name = "aws-smithy-xml" -version = "0.60.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa" -dependencies = [ - "xmlparser", -] - -[[package]] -name = "aws-types" -version = "1.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" -dependencies = [ - "aws-credential-types", - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "rustc_version", - "tracing", -] - -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core 0.4.5", - "bytes", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "itoa", - "matchit 0.7.3", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.2", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum" -version = "0.8.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" -dependencies = [ - "axum-core 0.5.6", - "bytes", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "itoa", - "matchit 0.8.4", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "serde_core", - "sync_wrapper", - "tower 0.5.2", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" -dependencies = [ - "bytes", - "futures-core", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "mime", - "pin-project-lite", - "sync_wrapper", - "tower-layer", - "tower-service", -] - -[[package]] -name = "backtrace" -version = "0.3.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-link", -] - -[[package]] -name = "backtrace-ext" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50" -dependencies = [ - "backtrace", -] - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "base64-simd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" -dependencies = [ - "outref", - "vsimd", -] - -[[package]] -name = "bigdecimal" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" -dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", - "serde", - "serde_json", -] - -[[package]] -name = "bincode" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" -dependencies = [ - "bincode_derive", - "serde", - "unty", -] - -[[package]] -name = "bincode_derive" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" -dependencies = [ - "virtue", -] - -[[package]] -name = "bindgen" -version = "0.65.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" -dependencies = [ - "bitflags 1.3.2", - "cexpr", - "clang-sys", - "lazy_static", - "lazycell", - "peeking_take_while", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn 2.0.113", -] - -[[package]] -name = "bindgen" -version = "0.72.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" -dependencies = [ - "bitflags 2.10.0", - "cexpr", - "clang-sys", - "itertools 0.13.0", - "proc-macro2", - "quote", - "regex", - "rustc-hash 2.1.1", - "shlex", - "syn 2.0.113", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" - -[[package]] -name = "bitmaps" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" -dependencies = [ - "typenum", -] - -[[package]] -name = "bitvec" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] - -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", @@ -1368,31 +737,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "bon" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" -dependencies = [ - "bon-macros", - "rustversion", -] - -[[package]] -name = "bon-macros" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" -dependencies = [ - "darling 0.23.0", - "ident_case", - "prettyplease", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.113", -] - [[package]] name = "brotli" version = "8.0.2" @@ -1429,12 +773,6 @@ version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" -[[package]] -name = "bytemuck" -version = "1.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" - [[package]] name = "byteorder" version = "1.5.0" @@ -1447,16 +785,6 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" -[[package]] -name = "bytes-utils" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" -dependencies = [ - "bytes", - "either", -] - [[package]] name = "bzip2" version = "0.5.2" @@ -1618,12 +946,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - [[package]] name = "chrono" version = "0.4.42" @@ -1633,7 +955,6 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", - "serde", "wasm-bindgen", "windows-link", ] @@ -1645,17 +966,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" dependencies = [ "chrono", - "phf 0.12.1", -] - -[[package]] -name = "chumsky" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" -dependencies = [ - "hashbrown 0.14.5", - "stacker", + "phf", ] [[package]] @@ -1700,7 +1011,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -1736,15 +1047,6 @@ dependencies = [ "thiserror 2.0.17", ] -[[package]] -name = "codegen_template" -version = "0.1.0" -source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209" -dependencies = [ - "unicode-xid", - "unscanny", -] - [[package]] name = "colorchoice" version = "1.0.4" @@ -1788,73 +1090,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "core-foundation" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "cornucopia" -version = "0.9.0" -source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209" -dependencies = [ - "chumsky", - "clap", - "codegen_template", - "heck 0.4.1", - "indexmap 2.12.1", - "miette", - "postgres", - "postgres-types", - "prettyplease", - "rusqlite", - "syn 2.0.113", - "thiserror 1.0.69", -] - -[[package]] -name = "cornucopia_async" -version = "0.6.0" -source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209" -dependencies = [ - "async-trait", - "cornucopia_client_core", - "deadpool-postgres", - "rusqlite", - "tokio-postgres", -] - -[[package]] -name = "cornucopia_client_core" -version = "0.4.0" -source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209" -dependencies = [ - "fallible-iterator 0.2.0", - "postgres-protocol", - "postgres-types", -] - [[package]] name = "cpp_demangle" version = "0.4.5" @@ -2117,75 +1358,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "darling" -version = "0.20.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" -dependencies = [ - "darling_core 0.20.11", - "darling_macro 0.20.11", -] - -[[package]] -name = "darling" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" -dependencies = [ - "darling_core 0.23.0", - "darling_macro 0.23.0", -] - -[[package]] -name = "darling_core" -version = "0.20.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.113", -] - -[[package]] -name = "darling_core" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" -dependencies = [ - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.113", -] - -[[package]] -name = "darling_macro" -version = "0.20.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" -dependencies = [ - "darling_core 0.20.11", - "quote", - "syn 2.0.113", -] - -[[package]] -name = "darling_macro" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" -dependencies = [ - "darling_core 0.23.0", - "quote", - "syn 2.0.113", -] - [[package]] name = "dashmap" version = "5.5.3" @@ -2193,78 +1365,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "dashmap" -version = "6.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "datafusion" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a11e19a7ccc5bb979c95c1dceef663eab39c9061b3bbf8d1937faf0f03bf41f" -dependencies = [ - "arrow", - "arrow-ipc 55.2.0", - "arrow-schema 55.2.0", - "async-trait", - "bytes", - "bzip2", - "chrono", - "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-catalog-listing 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource-csv 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource-json 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource-parquet 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-nested 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-table 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-window 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "flate2", - "futures", - "itertools 0.14.0", - "log", - "object_store", - "parking_lot", - "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.9.2", - "regex", - "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tempfile", - "tokio", - "url", - "uuid", - "xz2", - "zstd", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", ] [[package]] @@ -2279,29 +1397,29 @@ dependencies = [ "bytes", "bzip2", "chrono", - "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-catalog-listing 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource-csv 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource-json 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource-parquet 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-nested 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-table 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-window 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", "flate2", "futures", "itertools 0.14.0", @@ -2311,7 +1429,7 @@ dependencies = [ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.9.2", "regex", - "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", + "sqlparser", "tempfile", "tokio", "url", @@ -2320,32 +1438,6 @@ dependencies = [ "zstd", ] -[[package]] -name = "datafusion-catalog" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94985e67cab97b1099db2a7af11f31a45008b282aba921c1e1d35327c212ec18" -dependencies = [ - "arrow", - "async-trait", - "dashmap 6.1.0", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "itertools 0.14.0", - "log", - "object_store", - "parking_lot", - "tokio", -] - [[package]] name = "datafusion-catalog" version = "48.0.1" @@ -2354,15 +1446,15 @@ dependencies = [ "arrow", "async-trait", "dashmap 6.1.0", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -2371,29 +1463,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "datafusion-catalog-listing" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e002df133bdb7b0b9b429d89a69aa77b35caeadee4498b2ce1c7c23a99516988" -dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "log", - "object_store", - "tokio", -] - [[package]] name = "datafusion-catalog-listing" version = "48.0.1" @@ -2401,45 +1470,21 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "async-trait", - "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", "futures", "log", "object_store", "tokio", ] -[[package]] -name = "datafusion-common" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13242fc58fd753787b0a538e5ae77d356cb9d0656fa85a591a33c5f106267f6" -dependencies = [ - "ahash", - "arrow", - "arrow-ipc 55.2.0", - "base64", - "half", - "hashbrown 0.14.5", - "indexmap 2.12.1", - "libc", - "log", - "object_store", - "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "paste", - "recursive", - "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio", - "web-time", -] - [[package]] name = "datafusion-common" version = "48.0.1" @@ -2458,22 +1503,11 @@ dependencies = [ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "paste", "recursive", - "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", + "sqlparser", "tokio", "web-time", ] -[[package]] -name = "datafusion-common-runtime" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2239f964e95c3a5d6b4a8cde07e646de8995c1396a7fd62c6e784f5341db499" -dependencies = [ - "futures", - "log", - "tokio", -] - [[package]] name = "datafusion-common-runtime" version = "48.0.1" @@ -2484,42 +1518,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "datafusion-datasource" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cf792579bc8bf07d1b2f68c2d5382f8a63679cce8fbebfd4ba95742b6e08864" -dependencies = [ - "arrow", - "async-compression", - "async-trait", - "bytes", - "bzip2", - "chrono", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "flate2", - "futures", - "glob", - "itertools 0.14.0", - "log", - "object_store", - "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.9.2", - "tempfile", - "tokio", - "tokio-util", - "url", - "xz2", - "zstd", -] - [[package]] name = "datafusion-datasource" version = "48.0.1" @@ -2531,14 +1529,14 @@ dependencies = [ "bytes", "bzip2", "chrono", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", "flate2", "futures", "glob", @@ -2555,31 +1553,6 @@ dependencies = [ "zstd", ] -[[package]] -name = "datafusion-datasource-csv" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfc114f9a1415174f3e8d2719c371fc72092ef2195a7955404cfe6b2ba29a706" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "object_store", - "regex", - "tokio", -] - [[package]] name = "datafusion-datasource-csv" version = "48.0.1" @@ -2588,47 +1561,22 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", "futures", "object_store", "regex", "tokio", ] -[[package]] -name = "datafusion-datasource-json" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d88dd5e215c420a52362b9988ecd4cefd71081b730663d4f7d886f706111fc75" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "object_store", - "serde_json", - "tokio", -] - [[package]] name = "datafusion-datasource-json" version = "48.0.1" @@ -2637,53 +1585,22 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", "futures", "object_store", "serde_json", "tokio", ] -[[package]] -name = "datafusion-datasource-parquet" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33692acdd1fbe75280d14f4676fe43f39e9cb36296df56575aa2cac9a819e4cf" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "itertools 0.14.0", - "log", - "object_store", - "parking_lot", - "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.9.2", - "tokio", -] - [[package]] name = "datafusion-datasource-parquet" version = "48.0.1" @@ -2692,18 +1609,18 @@ dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", "futures", "itertools 0.14.0", "log", @@ -2714,36 +1631,11 @@ dependencies = [ "tokio", ] -[[package]] -name = "datafusion-doc" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0e7b648387b0c1937b83cb328533c06c923799e73a9e3750b762667f32662c0" - [[package]] name = "datafusion-doc" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" -[[package]] -name = "datafusion-execution" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9609d83d52ff8315283c6dad3b97566e877d8f366fab4c3297742f33dcd636c7" -dependencies = [ - "arrow", - "dashmap 6.1.0", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "log", - "object_store", - "parking_lot", - "rand 0.9.2", - "tempfile", - "url", -] - [[package]] name = "datafusion-execution" version = "48.0.1" @@ -2751,8 +1643,8 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "dashmap 6.1.0", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-expr", "futures", "log", "object_store", @@ -2762,27 +1654,6 @@ dependencies = [ "url", ] -[[package]] -name = "datafusion-expr" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e75230cd67f650ef0399eb00f54d4a073698f2c0262948298e5299fc7324da63" -dependencies = [ - "arrow", - "chrono", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "indexmap 2.12.1", - "paste", - "recursive", - "serde_json", - "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "datafusion-expr" version = "48.0.1" @@ -2790,30 +1661,17 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "chrono", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", "indexmap 2.12.1", "paste", "recursive", "serde_json", - "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", -] - -[[package]] -name = "datafusion-expr-common" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70fafb3a045ed6c49cfca0cd090f62cf871ca6326cc3355cb0aaf1260fa760b6" -dependencies = [ - "arrow", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "indexmap 2.12.1", - "itertools 0.14.0", - "paste", + "sqlparser", ] [[package]] @@ -2822,41 +1680,12 @@ version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", "indexmap 2.12.1", "itertools 0.14.0", "paste", ] -[[package]] -name = "datafusion-functions" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf9a9cf655265861a20453b1e58357147eab59bdc90ce7f2f68f1f35104d3bb" -dependencies = [ - "arrow", - "arrow-buffer 55.2.0", - "base64", - "blake2", - "blake3", - "chrono", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "hex", - "itertools 0.14.0", - "log", - "md-5", - "rand 0.9.2", - "regex", - "sha2", - "unicode-segmentation", - "uuid", -] - [[package]] name = "datafusion-functions" version = "48.0.1" @@ -2868,108 +1697,53 @@ dependencies = [ "blake2", "blake3", "chrono", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", "hex", "itertools 0.14.0", "log", "md-5", "rand 0.9.2", - "regex", - "sha2", - "unicode-segmentation", - "uuid", -] - -[[package]] -name = "datafusion-functions-aggregate" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f07e49733d847be0a05235e17b884d326a2fd402c97a89fe8bcf0bfba310005" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "half", - "log", - "paste", -] - -[[package]] -name = "datafusion-functions-aggregate" -version = "48.0.1" -source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "half", - "log", - "paste", -] - -[[package]] -name = "datafusion-functions-aggregate-common" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4512607e10d72b0b0a1dc08f42cb5bd5284cb8348b7fea49dc83409493e32b1b" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex", + "sha2", + "unicode-segmentation", + "uuid", ] [[package]] -name = "datafusion-functions-aggregate-common" +name = "datafusion-functions-aggregate" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "ahash", "arrow", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", ] [[package]] -name = "datafusion-functions-nested" +name = "datafusion-functions-aggregate-common" version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab331806e34f5545e5f03396e4d5068077395b1665795d8f88c14ec4f1e0b7a" +source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ + "ahash", "arrow", - "arrow-ord", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "itertools 0.14.0", - "log", - "paste", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", ] [[package]] @@ -2979,35 +1753,19 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "arrow-ord", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-macros", + "datafusion-physical-expr-common", "itertools 0.14.0", "log", "paste", ] -[[package]] -name = "datafusion-functions-table" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4ac2c0be983a06950ef077e34e0174aa0cb9e346f3aeae459823158037ade37" -dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "parking_lot", - "paste", -] - [[package]] name = "datafusion-functions-table" version = "48.0.1" @@ -3015,77 +1773,38 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "async-trait", - "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", "parking_lot", "paste", ] -[[package]] -name = "datafusion-functions-window" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f3d92731de384c90906941d36dcadf6a86d4128409a9c5cd916662baed5f53" -dependencies = [ - "arrow", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "log", - "paste", -] - [[package]] name = "datafusion-functions-window" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "log", "paste", ] -[[package]] -name = "datafusion-functions-window-common" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c679f8bf0971704ec8fd4249fcbb2eb49d6a12cc3e7a840ac047b4928d3541b5" -dependencies = [ - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "datafusion-functions-window-common" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", -] - -[[package]] -name = "datafusion-macros" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2821de7cb0362d12e75a5196b636a59ea3584ec1e1cc7dc6f5e34b9e8389d251" -dependencies = [ - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "quote", - "syn 2.0.113", + "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] @@ -3093,28 +1812,9 @@ name = "datafusion-macros" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-expr", "quote", - "syn 2.0.113", -] - -[[package]] -name = "datafusion-optimizer" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1594c7a97219ede334f25347ad8d57056621e7f4f35a0693c8da876e10dd6a53" -dependencies = [ - "arrow", - "chrono", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "indexmap 2.12.1", - "itertools 0.14.0", - "log", - "recursive", - "regex", - "regex-syntax", + "syn", ] [[package]] @@ -3124,9 +1824,9 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "chrono", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", "indexmap 2.12.1", "itertools 0.14.0", "log", @@ -3135,28 +1835,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "datafusion-physical-expr" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc6da0f2412088d23f6b01929dedd687b5aee63b19b674eb73d00c3eb3c883b7" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "half", - "hashbrown 0.14.5", - "indexmap 2.12.1", - "itertools 0.14.0", - "log", - "paste", - "petgraph 0.8.3", -] - [[package]] name = "datafusion-physical-expr" version = "48.0.1" @@ -3164,11 +1842,11 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "ahash", "arrow", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", "indexmap 2.12.1", @@ -3178,20 +1856,6 @@ dependencies = [ "petgraph 0.8.3", ] -[[package]] -name = "datafusion-physical-expr-common" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb0dbd9213078a593c3fe28783beaa625a4e6c6a6c797856ee2ba234311fb96" -dependencies = [ - "ahash", - "arrow", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "hashbrown 0.14.5", - "itertools 0.14.0", -] - [[package]] name = "datafusion-physical-expr-common" version = "48.0.1" @@ -3199,79 +1863,30 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "ahash", "arrow", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-expr-common", "hashbrown 0.14.5", "itertools 0.14.0", ] -[[package]] -name = "datafusion-physical-optimizer" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d140854b2db3ef8ac611caad12bfb2e1e1de827077429322a6188f18fc0026a" -dependencies = [ - "arrow", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "itertools 0.14.0", - "log", - "recursive", -] - [[package]] name = "datafusion-physical-optimizer" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", "itertools 0.14.0", "log", "recursive", ] -[[package]] -name = "datafusion-physical-plan" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b46cbdf21a01206be76d467f325273b22c559c744a012ead5018dfe79597de08" -dependencies = [ - "ahash", - "arrow", - "arrow-ord", - "arrow-schema 55.2.0", - "async-trait", - "chrono", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "half", - "hashbrown 0.14.5", - "indexmap 2.12.1", - "itertools 0.14.0", - "log", - "parking_lot", - "pin-project-lite", - "tokio", -] - [[package]] name = "datafusion-physical-plan" version = "48.0.1" @@ -3283,13 +1898,13 @@ dependencies = [ "arrow-schema 55.2.0", "async-trait", "chrono", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", "futures", "half", "hashbrown 0.14.5", @@ -3301,22 +1916,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "datafusion-proto" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3fc7a2744332c2ef8804274c21f9fa664b4ca5889169250a6fd6b649ee5d16c" -dependencies = [ - "arrow", - "chrono", - "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-proto-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "object_store", - "prost", -] - [[package]] name = "datafusion-proto" version = "48.0.1" @@ -3324,59 +1923,24 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "chrono", - "datafusion 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-proto-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-proto-common", "object_store", "prost", ] -[[package]] -name = "datafusion-proto-common" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "800add86852f12e3d249867425de2224c1e9fb7adc2930460548868781fbeded" -dependencies = [ - "arrow", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "prost", -] - [[package]] name = "datafusion-proto-common" version = "48.0.1" source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c" dependencies = [ "arrow", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", "prost", ] -[[package]] -name = "datafusion-session" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a72733766ddb5b41534910926e8da5836622316f6283307fd9fb7e19811a59c" -dependencies = [ - "arrow", - "async-trait", - "dashmap 6.1.0", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures", - "itertools 0.14.0", - "log", - "object_store", - "parking_lot", - "tokio", -] - [[package]] name = "datafusion-session" version = "48.0.1" @@ -3385,13 +1949,13 @@ dependencies = [ "arrow", "async-trait", "dashmap 6.1.0", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -3400,23 +1964,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "datafusion-sql" -version = "48.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5162338cdec9cc7ea13a0e6015c361acad5ec1d88d83f7c86301f789473971f" -dependencies = [ - "arrow", - "bigdecimal", - "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "indexmap 2.12.1", - "log", - "recursive", - "regex", - "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "datafusion-sql" version = "48.0.1" @@ -3424,48 +1971,13 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F dependencies = [ "arrow", "bigdecimal", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion-common", + "datafusion-expr", "indexmap 2.12.1", "log", "recursive", "regex", - "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", -] - -[[package]] -name = "deadpool" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" -dependencies = [ - "deadpool-runtime", - "lazy_static", - "num_cpus", - "tokio", -] - -[[package]] -name = "deadpool-postgres" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d697d376cbfa018c23eb4caab1fd1883dd9c906a8c034e8d9a3cb06a7e0bef9" -dependencies = [ - "async-trait", - "deadpool", - "getrandom 0.2.16", - "tokio", - "tokio-postgres", - "tracing", -] - -[[package]] -name = "deadpool-runtime" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" -dependencies = [ - "tokio", + "sqlparser", ] [[package]] @@ -3486,37 +1998,6 @@ dependencies = [ "powerfmt", ] -[[package]] -name = "derive_builder" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" -dependencies = [ - "derive_builder_macro", -] - -[[package]] -name = "derive_builder_core" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" -dependencies = [ - "darling 0.20.11", - "proc-macro2", - "quote", - "syn 2.0.113", -] - -[[package]] -name = "derive_builder_macro" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" -dependencies = [ - "derive_builder_core", - "syn 2.0.113", -] - [[package]] name = "digest" version = "0.10.7" @@ -3538,27 +2019,6 @@ dependencies = [ "dirs-sys-next", ] -[[package]] -name = "dirs" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" -dependencies = [ - "libc", - "option-ext", - "redox_users 0.5.2", - "windows-sys 0.61.2", -] - [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -3566,7 +2026,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", - "redox_users 0.4.6", + "redox_users", "winapi", ] @@ -3578,7 +2038,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -3602,12 +2062,6 @@ dependencies = [ "shared_child", ] -[[package]] -name = "dunce" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" - [[package]] name = "dyn-clone" version = "1.0.20" @@ -3691,24 +2145,12 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" -[[package]] -name = "fallible-iterator" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" - [[package]] name = "fallible-iterator" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - [[package]] name = "fastrand" version = "2.3.0" @@ -3722,24 +2164,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", - "rustix 1.1.3", - "windows-sys 0.59.0", -] - -[[package]] -name = "figment" -version = "0.10.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3" -dependencies = [ - "atomic", - "pear", - "serde", - "serde_json", - "serde_yaml", - "toml 0.8.23", - "uncased", - "version_check", + "rustix 1.1.3", + "windows-sys 0.59.0", ] [[package]] @@ -3803,21 +2229,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "form_urlencoded" version = "1.2.2" @@ -3838,46 +2249,37 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "function-stream" version = "0.6.0" dependencies = [ + "ahash", "anyhow", "arrow", "arrow-array 55.2.0", "arrow-ipc 55.2.0", "arrow-json 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson)", "arrow-schema 55.2.0", - "arroyo-state", "async-trait", "base64", "bincode", "chrono", "clap", - "cornucopia", - "cornucopia_async", "crossbeam-channel", - "datafusion 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-functions-window 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", - "datafusion-proto 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)", + "datafusion", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-window", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-proto", "futures", "governor", "hex", "itertools 0.14.0", - "jiter", "log", "lru", "mini-moka", @@ -3898,12 +2300,12 @@ dependencies = [ "serde_json_path", "serde_yaml", "sha2", - "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", - "strum 0.26.3", + "sqlparser", + "strum", "thiserror 2.0.17", "tokio", "tokio-stream", - "tonic 0.12.3", + "tonic", "tracing", "tracing-appender", "tracing-subscriber", @@ -3929,15 +2331,9 @@ dependencies = [ "rustyline", "thiserror 2.0.17", "tokio", - "tonic 0.12.3", + "tonic", ] -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - [[package]] name = "futures" version = "0.3.31" @@ -3994,7 +2390,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -4064,10 +2460,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", - "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "wasm-bindgen", + "wasi", ] [[package]] @@ -4084,25 +2478,13 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "getset" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" -dependencies = [ - "proc-macro-error2", - "proc-macro2", - "quote", - "syn 2.0.113", -] - [[package]] name = "gimli" version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" dependencies = [ - "fallible-iterator 0.3.0", + "fallible-iterator", "indexmap 2.12.1", "stable_deref_trait", ] @@ -4147,7 +2529,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.4.0", + "http", "indexmap 2.12.1", "slab", "tokio", @@ -4210,15 +2592,6 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" -[[package]] -name = "hashlink" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" -dependencies = [ - "hashbrown 0.14.5", -] - [[package]] name = "heck" version = "0.4.1" @@ -4243,15 +2616,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - [[package]] name = "home" version = "0.5.12" @@ -4261,17 +2625,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - [[package]] name = "http" version = "1.4.0" @@ -4282,17 +2635,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - [[package]] name = "http-body" version = "1.0.1" @@ -4300,7 +2642,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.4.0", + "http", ] [[package]] @@ -4311,8 +2653,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "pin-project-lite", ] @@ -4345,8 +2687,8 @@ dependencies = [ "futures-channel", "futures-core", "h2", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "httparse", "httpdate", "itoa", @@ -4357,23 +2699,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper-rustls" -version = "0.27.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" -dependencies = [ - "http 1.4.0", - "hyper", - "hyper-util", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "tokio", - "tokio-rustls", - "tower-service", -] - [[package]] name = "hyper-timeout" version = "0.5.2" @@ -4387,46 +2712,25 @@ dependencies = [ "tower-service", ] -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ - "base64", "bytes", "futures-channel", "futures-core", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "hyper", - "ipnet", "libc", - "percent-encoding", "pin-project-lite", "socket2 0.6.1", - "system-configuration", "tokio", "tower-service", "tracing", - "windows-registry", ] [[package]] @@ -4540,12 +2844,6 @@ version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - [[package]] name = "idna" version = "1.1.0" @@ -4603,12 +2901,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "inlinable_string" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" - [[package]] name = "integer-encoding" version = "3.0.4" @@ -4646,16 +2938,6 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" -[[package]] -name = "iri-string" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "is-terminal" version = "0.4.17" @@ -4667,12 +2949,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "is_ci" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" - [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -4723,19 +2999,6 @@ dependencies = [ "cc", ] -[[package]] -name = "jiter" -version = "0.10.0" -source = "git+https://github.com/ArroyoSystems/jiter?branch=disable_python#e5a90990780433a5972031a62eff87555d98884d" -dependencies = [ - "ahash", - "bitvec", - "lexical-parse-float 1.0.6", - "num-bigint", - "num-traits", - "smallvec", -] - [[package]] name = "jobserver" version = "0.1.34" @@ -4756,19 +3019,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "k8s-openapi" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c75b990324f09bef15e791606b7b7a296d02fc88a344f6eba9390970a870ad5" -dependencies = [ - "base64", - "chrono", - "serde", - "serde-value", - "serde_json", -] - [[package]] name = "lazy_static" version = "1.5.0" @@ -4962,17 +3212,6 @@ dependencies = [ "zstd-sys", ] -[[package]] -name = "libsqlite3-sys" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - [[package]] name = "libz-sys" version = "1.1.23" @@ -5009,17 +3248,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" -[[package]] -name = "local-ip-address" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79ef8c257c92ade496781a32a581d43e3d512cf8ce714ecf04ea80f93ed0ff4a" -dependencies = [ - "libc", - "neli", - "windows-sys 0.61.2", -] - [[package]] name = "lock_api" version = "0.4.14" @@ -5044,12 +3272,6 @@ dependencies = [ "hashbrown 0.15.5", ] -[[package]] -name = "lru-slab" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" - [[package]] name = "lz4-sys" version = "1.11.1+lz4-1.10.0" @@ -5104,12 +3326,6 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" -[[package]] -name = "matchit" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" - [[package]] name = "maybe-owned" version = "0.3.4" @@ -5141,38 +3357,6 @@ dependencies = [ "rustix 1.1.3", ] -[[package]] -name = "miette" -version = "5.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59bb584eaeeab6bd0226ccf3509a69d7936d148cf3d036ad350abe35e8c6856e" -dependencies = [ - "backtrace", - "backtrace-ext", - "is-terminal", - "miette-derive", - "once_cell", - "owo-colors", - "supports-color", - "supports-hyperlinks", - "supports-unicode", - "terminal_size", - "textwrap", - "thiserror 1.0.69", - "unicode-width 0.1.14", -] - -[[package]] -name = "miette-derive" -version = "5.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.113", -] - [[package]] name = "mime" version = "0.3.17" @@ -5217,7 +3401,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.61.2", ] @@ -5227,61 +3411,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "nanoid" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ffa00dec017b5b1a8b7cf5e2c008bfda1aa7e0697ac1508b491fdf2622fb4d8" -dependencies = [ - "rand 0.8.5", -] - -[[package]] -name = "native-tls" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - -[[package]] -name = "neli" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87" -dependencies = [ - "bitflags 2.10.0", - "byteorder", - "derive_builder", - "getset", - "libc", - "log", - "neli-proc-macros", - "parking_lot", -] - -[[package]] -name = "neli-proc-macros" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d8d08c6e98f20a62417478ebf7be8e1425ec9acecc6f63e22da633f6b71609" -dependencies = [ - "either", - "proc-macro2", - "quote", - "serde", - "syn 2.0.113", -] - [[package]] name = "nibble_vec" version = "0.1.0" @@ -5299,7 +3428,7 @@ checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ "bitflags 2.10.0", "cfg-if", - "cfg_aliases 0.1.1", + "cfg_aliases", "libc", ] @@ -5356,7 +3485,6 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", - "serde", ] [[package]] @@ -5444,7 +3572,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -5466,28 +3594,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", - "base64", "bytes", "chrono", - "form_urlencoded", "futures", - "http 1.4.0", - "http-body-util", - "httparse", + "http", "humantime", - "hyper", "itertools 0.14.0", - "md-5", "parking_lot", "percent-encoding", - "quick-xml", - "rand 0.9.2", - "reqwest", - "ring", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", "thiserror 2.0.17", "tokio", "tracing", @@ -5509,38 +3623,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" -[[package]] -name = "openssl" -version = "0.10.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" -dependencies = [ - "bitflags 2.10.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.113", -] - -[[package]] -name = "openssl-probe" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" - [[package]] name = "openssl-sys" version = "0.9.111" @@ -5553,12 +3635,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "option-ext" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" - [[package]] name = "ordered-float" version = "2.10.1" @@ -5578,18 +3654,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "outref" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" - -[[package]] -name = "owo-colors" -version = "3.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" - [[package]] name = "parking_lot" version = "0.12.5" @@ -5677,38 +3741,15 @@ dependencies = [ "simdutf8", "snap", "thrift", - "twox-hash", - "zstd", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pear" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" -dependencies = [ - "inlinable_string", - "pear_codegen", - "yansi", + "twox-hash", + "zstd", ] [[package]] -name = "pear_codegen" -version = "0.2.9" +name = "paste" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" -dependencies = [ - "proc-macro2", - "proc-macro2-diagnostics", - "quote", - "syn 2.0.113", -] +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "peeking_take_while" @@ -5752,7 +3793,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -5795,7 +3836,6 @@ dependencies = [ "hashbrown 0.15.5", "indexmap 2.12.1", "serde", - "serde_derive", ] [[package]] @@ -5804,17 +3844,7 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" dependencies = [ - "phf_shared 0.12.1", -] - -[[package]] -name = "phf" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" -dependencies = [ - "phf_shared 0.13.1", - "serde", + "phf_shared", ] [[package]] @@ -5826,15 +3856,6 @@ dependencies = [ "siphasher", ] -[[package]] -name = "phf_shared" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" -dependencies = [ - "siphasher", -] - [[package]] name = "pin-project" version = "1.1.10" @@ -5852,7 +3873,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -5891,49 +3912,6 @@ dependencies = [ "serde", ] -[[package]] -name = "postgres" -version = "0.19.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c48ece1c6cda0db61b058c1721378da76855140e9214339fa1317decacb176" -dependencies = [ - "bytes", - "fallible-iterator 0.2.0", - "futures-util", - "log", - "tokio", - "tokio-postgres", -] - -[[package]] -name = "postgres-protocol" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491" -dependencies = [ - "base64", - "byteorder", - "bytes", - "fallible-iterator 0.2.0", - "hmac", - "md-5", - "memchr", - "rand 0.9.2", - "sha2", - "stringprep", -] - -[[package]] -name = "postgres-types" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b858f82211e84682fecd373f68e1ceae642d8d751a1ebd13f33de6257b3e20" -dependencies = [ - "bytes", - "fallible-iterator 0.2.0", - "postgres-protocol", -] - [[package]] name = "potential_utf" version = "0.1.4" @@ -5965,7 +3943,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.113", + "syn", ] [[package]] @@ -5974,53 +3952,7 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit 0.23.10+spec-1.0.0", -] - -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr2" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" -dependencies = [ - "proc-macro2", - "quote", -] - -[[package]] -name = "proc-macro-error2" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" -dependencies = [ - "proc-macro-error-attr2", - "proc-macro2", - "quote", - "syn 2.0.113", + "toml_edit", ] [[package]] @@ -6032,19 +3964,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "proc-macro2-diagnostics" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.113", - "version_check", - "yansi", -] - [[package]] name = "proctitle" version = "0.1.1" @@ -6056,21 +3975,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "prometheus" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" -dependencies = [ - "cfg-if", - "fnv", - "lazy_static", - "memchr", - "parking_lot", - "protobuf", - "thiserror 2.0.17", -] - [[package]] name = "prost" version = "0.13.5" @@ -6097,7 +4001,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.113", + "syn", "tempfile", ] @@ -6111,7 +4015,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -6123,26 +4027,6 @@ dependencies = [ "prost", ] -[[package]] -name = "protobuf" -version = "3.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" -dependencies = [ - "once_cell", - "protobuf-support", - "thiserror 1.0.69", -] - -[[package]] -name = "protobuf-support" -version = "3.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" -dependencies = [ - "thiserror 1.0.69", -] - [[package]] name = "protocol" version = "0.1.0" @@ -6151,8 +4035,8 @@ dependencies = [ "log", "prost", "serde", - "tonic 0.12.3", - "tonic-build 0.12.3", + "tonic", + "tonic-build", ] [[package]] @@ -6196,15 +4080,9 @@ checksum = "2cf194f5b1a415ef3a44ee35056f4009092cc4038a9f7e3c7c1e392f48ee7dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] -[[package]] -name = "quad-rand" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" - [[package]] name = "quanta" version = "0.12.6" @@ -6215,76 +4093,11 @@ dependencies = [ "libc", "once_cell", "raw-cpuid", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "web-sys", "winapi", ] -[[package]] -name = "quick-xml" -version = "0.38.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" -dependencies = [ - "memchr", - "serde", -] - -[[package]] -name = "quinn" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" -dependencies = [ - "bytes", - "cfg_aliases 0.2.1", - "pin-project-lite", - "quinn-proto", - "quinn-udp", - "rustc-hash 2.1.1", - "rustls", - "socket2 0.6.1", - "thiserror 2.0.17", - "tokio", - "tracing", - "web-time", -] - -[[package]] -name = "quinn-proto" -version = "0.11.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" -dependencies = [ - "bytes", - "getrandom 0.3.4", - "lru-slab", - "rand 0.9.2", - "ring", - "rustc-hash 2.1.1", - "rustls", - "rustls-pki-types", - "slab", - "thiserror 2.0.17", - "tinyvec", - "tracing", - "web-time", -] - -[[package]] -name = "quinn-udp" -version = "0.5.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" -dependencies = [ - "cfg_aliases 0.2.1", - "libc", - "once_cell", - "socket2 0.6.1", - "tracing", - "windows-sys 0.60.2", -] - [[package]] name = "quote" version = "1.0.42" @@ -6300,12 +4113,6 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - [[package]] name = "radix_trie" version = "0.2.1" @@ -6463,7 +4270,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -6486,37 +4293,6 @@ dependencies = [ "thiserror 1.0.69", ] -[[package]] -name = "redox_users" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" -dependencies = [ - "getrandom 0.2.16", - "libredox", - "thiserror 2.0.17", -] - -[[package]] -name = "ref-cast" -version = "1.0.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" -dependencies = [ - "ref-cast-impl", -] - -[[package]] -name = "ref-cast-impl" -version = "1.0.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.113", -] - [[package]] name = "regalloc2" version = "0.13.5" @@ -6539,128 +4315,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-lite" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" - -[[package]] -name = "regex-syntax" -version = "0.8.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" - -[[package]] -name = "regress" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb" -dependencies = [ - "hashbrown 0.13.2", - "memchr", -] - -[[package]] -name = "reqwest" -version = "0.12.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" -dependencies = [ - "base64", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "hyper", - "hyper-rustls", - "hyper-tls", - "hyper-util", - "js-sys", - "log", - "mime", - "native-tls", - "percent-encoding", - "pin-project-lite", - "quinn", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "tokio", - "tokio-native-tls", - "tokio-rustls", - "tokio-util", - "tower 0.5.2", - "tower-http", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-streams", - "web-sys", + "memchr", + "regex-automata", + "regex-syntax", ] [[package]] -name = "ring" -version = "0.17.14" +name = "regex-automata" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ - "cc", - "cfg-if", - "getrandom 0.2.16", - "libc", - "untrusted", - "windows-sys 0.52.0", + "aho-corasick", + "memchr", + "regex-syntax", ] [[package]] -name = "rocksdb" -version = "0.21.0" +name = "regex-syntax" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "regress" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb" dependencies = [ - "libc", - "librocksdb-sys", + "hashbrown 0.13.2", + "memchr", ] [[package]] -name = "rusqlite" -version = "0.31.0" +name = "rocksdb" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" dependencies = [ - "bitflags 2.10.0", - "fallible-iterator 0.3.0", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "serde_json", - "smallvec", + "libc", + "librocksdb-sys", ] [[package]] @@ -6726,65 +4420,6 @@ dependencies = [ "rustix 1.1.3", ] -[[package]] -name = "rustls" -version = "0.23.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" -dependencies = [ - "aws-lc-rs", - "log", - "once_cell", - "ring", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls-native-certs" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" -dependencies = [ - "openssl-probe", - "rustls-pki-types", - "schannel", - "security-framework", -] - -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - -[[package]] -name = "rustls-pki-types" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" -dependencies = [ - "web-time", - "zeroize", -] - -[[package]] -name = "rustls-webpki" -version = "0.103.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" -dependencies = [ - "aws-lc-rs", - "ring", - "rustls-pki-types", - "untrusted", -] - [[package]] name = "rustversion" version = "1.0.22" @@ -6840,15 +4475,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "schannel" -version = "0.1.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" -dependencies = [ - "windows-sys 0.61.2", -] - [[package]] name = "schemars" version = "0.8.22" @@ -6856,20 +4482,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ "dyn-clone", - "schemars_derive 0.8.22", - "serde", - "serde_json", -] - -[[package]] -name = "schemars" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" -dependencies = [ - "dyn-clone", - "ref-cast", - "schemars_derive 1.2.1", + "schemars_derive", "serde", "serde_json", ] @@ -6883,19 +4496,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.113", -] - -[[package]] -name = "schemars_derive" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" -dependencies = [ - "proc-macro2", - "quote", - "serde_derive_internals", - "syn 2.0.113", + "syn", ] [[package]] @@ -6904,29 +4505,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "security-framework" -version = "3.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" -dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.10.1", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "semver" version = "1.0.27" @@ -6953,26 +4531,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-value" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" -dependencies = [ - "ordered-float", - "serde", -] - -[[package]] -name = "serde_bytes" -version = "0.11.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" -dependencies = [ - "serde", - "serde_core", -] - [[package]] name = "serde_core" version = "1.0.228" @@ -6990,7 +4548,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -7001,7 +4559,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -7064,16 +4622,7 @@ checksum = "aafbefbe175fa9bf03ca83ef89beecff7d2a95aaacd5732325b90ac8c3bd7b90" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", -] - -[[package]] -name = "serde_spanned" -version = "0.6.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" -dependencies = [ - "serde", + "syn", ] [[package]] @@ -7094,19 +4643,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.113", -] - -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", + "syn", ] [[package]] @@ -7248,12 +4785,6 @@ dependencies = [ "serde", ] -[[package]] -name = "smawk" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" - [[package]] name = "snap" version = "1.1.1" @@ -7289,17 +4820,6 @@ dependencies = [ "lock_api", ] -[[package]] -name = "sqlparser" -version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" -dependencies = [ - "log", - "recursive", - "sqlparser_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "sqlparser" version = "0.55.0" @@ -7307,18 +4827,7 @@ source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunc dependencies = [ "log", "recursive", - "sqlparser_derive 0.3.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)", -] - -[[package]] -name = "sqlparser_derive" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.113", + "sqlparser_derive", ] [[package]] @@ -7328,7 +4837,7 @@ source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunc dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -7356,17 +4865,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "stringprep" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" -dependencies = [ - "unicode-bidi", - "unicode-normalization", - "unicode-properties", -] - [[package]] name = "strsim" version = "0.11.1" @@ -7379,16 +4877,7 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "strum_macros 0.26.4", -] - -[[package]] -name = "strum" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" -dependencies = [ - "strum_macros 0.27.2", + "strum_macros", ] [[package]] @@ -7401,19 +4890,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.113", -] - -[[package]] -name = "strum_macros" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -7422,44 +4899,6 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" -[[package]] -name = "supports-color" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" -dependencies = [ - "is-terminal", - "is_ci", -] - -[[package]] -name = "supports-hyperlinks" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84231692eb0d4d41e4cdd0cabfdd2e6cd9e255e65f80c9aa7c98dd502b4233d" -dependencies = [ - "is-terminal", -] - -[[package]] -name = "supports-unicode" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f850c19edd184a205e883199a261ed44471c81e39bd95b1357f5febbef00e77a" -dependencies = [ - "is-terminal", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.113" @@ -7476,9 +4915,6 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" -dependencies = [ - "futures-core", -] [[package]] name = "synstructure" @@ -7488,28 +4924,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", -] - -[[package]] -name = "system-configuration" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" -dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.9.4", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" -dependencies = [ - "core-foundation-sys", - "libc", + "syn", ] [[package]] @@ -7534,12 +4949,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "target-lexicon" version = "0.13.4" @@ -7551,42 +4960,21 @@ name = "tempfile" version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" -dependencies = [ - "fastrand", - "getrandom 0.3.4", - "once_cell", - "rustix 1.1.3", - "windows-sys 0.61.2", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix 1.1.3", + "windows-sys 0.61.2", ] [[package]] -name = "textwrap" -version = "0.15.2" +name = "termcolor" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ - "smawk", - "unicode-linebreak", - "unicode-width 0.1.14", + "winapi-util", ] [[package]] @@ -7615,7 +5003,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -7626,7 +5014,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -7699,21 +5087,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "tinyvec" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" version = "1.49.0" @@ -7728,7 +5101,6 @@ dependencies = [ "signal-hook-registry", "socket2 0.6.1", "tokio-macros", - "tracing", "windows-sys 0.61.2", ] @@ -7740,53 +5112,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", -] - -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - -[[package]] -name = "tokio-postgres" -version = "0.7.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcea47c8f71744367793f16c2db1f11cb859d28f436bdb4ca9193eb1f787ee42" -dependencies = [ - "async-trait", - "byteorder", - "bytes", - "fallible-iterator 0.2.0", - "futures-channel", - "futures-util", - "log", - "parking_lot", - "percent-encoding", - "phf 0.13.1", - "pin-project-lite", - "postgres-protocol", - "postgres-types", - "rand 0.9.2", - "socket2 0.6.1", - "tokio", - "tokio-util", - "whoami", -] - -[[package]] -name = "tokio-rustls" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" -dependencies = [ - "rustls", - "tokio", + "syn", ] [[package]] @@ -7813,18 +5139,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.8.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" -dependencies = [ - "serde", - "serde_spanned 0.6.9", - "toml_datetime 0.6.11", - "toml_edit 0.22.27", -] - [[package]] name = "toml" version = "0.9.11+spec-1.1.0" @@ -7833,22 +5147,13 @@ checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46" dependencies = [ "indexmap 2.12.1", "serde_core", - "serde_spanned 1.0.4", - "toml_datetime 0.7.5+spec-1.1.0", + "serde_spanned", + "toml_datetime", "toml_parser", "toml_writer", "winnow", ] -[[package]] -name = "toml_datetime" -version = "0.6.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" -dependencies = [ - "serde", -] - [[package]] name = "toml_datetime" version = "0.7.5+spec-1.1.0" @@ -7858,20 +5163,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "toml_edit" -version = "0.22.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" -dependencies = [ - "indexmap 2.12.1", - "serde", - "serde_spanned 0.6.9", - "toml_datetime 0.6.11", - "toml_write", - "winnow", -] - [[package]] name = "toml_edit" version = "0.23.10+spec-1.0.0" @@ -7879,7 +5170,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" dependencies = [ "indexmap 2.12.1", - "toml_datetime 0.7.5+spec-1.1.0", + "toml_datetime", "toml_parser", "winnow", ] @@ -7893,12 +5184,6 @@ dependencies = [ "winnow", ] -[[package]] -name = "toml_write" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" - [[package]] name = "toml_writer" version = "1.0.6+spec-1.1.0" @@ -7913,12 +5198,12 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ "async-stream", "async-trait", - "axum 0.7.9", + "axum", "base64", "bytes", "h2", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "hyper", "hyper-timeout", @@ -7935,39 +5220,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "tonic" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" -dependencies = [ - "async-trait", - "axum 0.8.8", - "base64", - "bytes", - "h2", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "hyper", - "hyper-timeout", - "hyper-util", - "percent-encoding", - "pin-project", - "prost", - "rustls-native-certs", - "socket2 0.5.10", - "tokio", - "tokio-rustls", - "tokio-stream", - "tower 0.5.2", - "tower-layer", - "tower-service", - "tracing", - "webpki-roots 0.26.11", - "zstd", -] - [[package]] name = "tonic-build" version = "0.12.3" @@ -7979,21 +5231,7 @@ dependencies = [ "prost-build", "prost-types", "quote", - "syn 2.0.113", -] - -[[package]] -name = "tonic-build" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847" -dependencies = [ - "prettyplease", - "proc-macro2", - "prost-build", - "prost-types", - "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -8024,31 +5262,8 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", - "indexmap 2.12.1", "pin-project-lite", - "slab", "sync_wrapper", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower-http" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" -dependencies = [ - "bitflags 2.10.0", - "bytes", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "iri-string", - "pin-project-lite", - "tower 0.5.2", "tower-layer", "tower-service", ] @@ -8096,7 +5311,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -8195,9 +5410,9 @@ dependencies = [ "proc-macro2", "quote", "regress", - "schemars 0.8.22", + "schemars", "serde_json", - "syn 2.0.113", + "syn", "thiserror 1.0.69", "unicode-ident", ] @@ -8209,11 +5424,11 @@ source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc0 dependencies = [ "proc-macro2", "quote", - "schemars 0.8.22", + "schemars", "serde", "serde_json", "serde_tokenstream", - "syn 2.0.113", + "syn", "typify-impl", ] @@ -8223,54 +5438,18 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" -[[package]] -name = "uncased" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" -dependencies = [ - "version_check", -] - [[package]] name = "unicase" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" -[[package]] -name = "unicode-bidi" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" - [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" -[[package]] -name = "unicode-linebreak" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" - -[[package]] -name = "unicode-normalization" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-properties" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" - [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -8301,18 +5480,6 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" -[[package]] -name = "unscanny" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47" - -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - [[package]] name = "unty" version = "0.0.4" @@ -8331,12 +5498,6 @@ dependencies = [ "serde", ] -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -8349,30 +5510,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" -[[package]] -name = "utoipa" -version = "4.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23" -dependencies = [ - "indexmap 2.12.1", - "serde", - "serde_json", - "utoipa-gen", -] - -[[package]] -name = "utoipa-gen" -version = "4.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20c24e8ab68ff9ee746aad22d39b5535601e6416d1b0feeabf78be986a5c4392" -dependencies = [ - "proc-macro-error", - "proc-macro2", - "quote", - "syn 2.0.113", -] - [[package]] name = "uuid" version = "1.19.0" @@ -8381,7 +5518,6 @@ checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "getrandom 0.3.4", "js-sys", - "serde_core", "wasm-bindgen", ] @@ -8409,12 +5545,6 @@ version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" -[[package]] -name = "vsimd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" - [[package]] name = "walkdir" version = "2.5.0" @@ -8440,15 +5570,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" -dependencies = [ - "wasip2", -] - [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -8458,15 +5579,6 @@ dependencies = [ "wit-bindgen", ] -[[package]] -name = "wasite" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" -dependencies = [ - "wasi 0.14.7+wasi-0.2.4", -] - [[package]] name = "wasm-bindgen" version = "0.2.106" @@ -8512,7 +5624,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.113", + "syn", "wasm-bindgen-shared", ] @@ -8556,19 +5668,6 @@ dependencies = [ "wasmparser", ] -[[package]] -name = "wasm-streams" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "wasmparser" version = "0.243.0" @@ -8691,7 +5790,7 @@ dependencies = [ "serde", "serde_derive", "sha2", - "toml 0.9.11+spec-1.1.0", + "toml", "wasmtime-environ", "windows-sys 0.61.2", "zstd", @@ -8706,7 +5805,7 @@ dependencies = [ "anyhow", "proc-macro2", "quote", - "syn 2.0.113", + "syn", "wasmtime-internal-component-util", "wasmtime-internal-wit-bindgen", "wit-parser", @@ -8820,7 +5919,7 @@ checksum = "63ba3124cc2cbcd362672f9f077303ccc4cd61daa908f73447b7fdaece75ff9f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -8948,35 +6047,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki-roots" -version = "0.26.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" -dependencies = [ - "webpki-roots 1.0.6", -] - -[[package]] -name = "webpki-roots" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" -dependencies = [ - "rustls-pki-types", -] - -[[package]] -name = "whoami" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fae98cf96deed1b7572272dfc777713c249ae40aa1cf8862e091e8b745f5361" -dependencies = [ - "libredox", - "wasite", - "web-sys", -] - [[package]] name = "wiggle" version = "41.0.3" @@ -9001,7 +6071,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.113", + "syn", "witx", ] @@ -9013,7 +6083,7 @@ checksum = "0e976fe0cecd60041f66b15ad45ebc997952af13da9bf9d90261c7b025057edc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", "wiggle-generate", ] @@ -9089,7 +6159,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -9100,7 +6170,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -9109,17 +6179,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-registry" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" -dependencies = [ - "windows-link", - "windows-result", - "windows-strings", -] - [[package]] name = "windows-result" version = "0.4.1" @@ -9364,21 +6423,6 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] - -[[package]] -name = "xmlparser" -version = "0.13.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" - [[package]] name = "xxhash-rust" version = "0.8.15" @@ -9394,12 +6438,6 @@ dependencies = [ "lzma-sys", ] -[[package]] -name = "yansi" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" - [[package]] name = "yoke" version = "0.8.1" @@ -9419,7 +6457,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", "synstructure", ] @@ -9449,7 +6487,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -9460,7 +6498,7 @@ checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] @@ -9480,16 +6518,10 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", "synstructure", ] -[[package]] -name = "zeroize" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" - [[package]] name = "zerotrie" version = "0.2.3" @@ -9520,7 +6552,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2c62a473..8e343baa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,11 +80,7 @@ datafusion-functions-window = {git = 'https://github.com/ArroyoSystems/arrow-dat sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.6.0/function-sql-parser" } -cornucopia_async = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" } -cornucopia = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" } -jiter = {git = "https://github.com/ArroyoSystems/jiter", branch = "disable_python" } - -arroyo-state = { path = "../arroyo/crates/arroyo-state" } +ahash = "0.8" governor = "0.8.0" mini-moka = "0.10" sha2 = "0.10" diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index 814358ad..0dce921e 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -14,8 +14,6 @@ pub mod buffer_and_event; pub mod common; -pub mod sink; -pub mod source; pub mod streaming; pub mod task; pub mod taskexecutor; diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs index e81bd03a..e838b06e 100644 --- a/src/runtime/streaming/api/context.rs +++ b/src/runtime/streaming/api/context.rs @@ -2,21 +2,24 @@ use crate::runtime::streaming::memory::MemoryPool; use crate::runtime::streaming::protocol::event::StreamEvent; use crate::runtime::streaming::protocol::tracked::TrackedEvent; use crate::runtime::streaming::network::endpoint::PhysicalSender; +use crate::runtime::streaming::storage::manager::TableManager; + use arrow_array::RecordBatch; -use arroyo_state::tables::table_manager::TableManager; use std::sync::Arc; use tokio::sync::Mutex; -use tracing::error; pub struct TaskContext { pub job_id: String, pub vertex_id: u32, pub subtask_idx: u32, pub parallelism: u32, + pub outboxes: Vec, + memory_pool: Arc, table_manager: Option>>, - pub last_present_watermark: Option, + + current_watermark: Option, } impl TaskContext { @@ -37,10 +40,46 @@ impl TaskContext { outboxes, memory_pool, table_manager, - last_present_watermark: None, + current_watermark: None, + } + } + + // ======================================================================== + // 水位线与时间流管理 API + // ======================================================================== + + /// 供业务算子调用:获取当前任务的安全水位线 + pub fn last_present_watermark(&self) -> Option { + self.current_watermark + } + + /// 供底座框架 (SubtaskRunner) 调用:推进本地时间,保证单调递增 + pub fn advance_watermark(&mut self, watermark: std::time::SystemTime) { + if let Some(current) = self.current_watermark { + if watermark > current { + self.current_watermark = Some(watermark); + } + } else { + self.current_watermark = Some(watermark); } } + // ======================================================================== + // 可观测性 API (Observability) + // ======================================================================== + + /// 格式化当前 Task 的唯一标识,用于分布式追踪和日志打印 + pub fn task_identity(&self) -> String { + format!( + "Job[{}], Vertex[{}], Subtask[{}/{}]", + self.job_id, self.vertex_id, self.subtask_idx, self.parallelism + ) + } + + // ======================================================================== + // 状态管理与背压网络发送 API + // ======================================================================== + pub async fn table_manager(&self) -> tokio::sync::MutexGuard<'_, TableManager> { self.table_manager .as_ref() @@ -49,6 +88,16 @@ impl TaskContext { .await } + pub async fn table_manager_guard( + &self, + ) -> anyhow::Result> { + let arc = self + .table_manager + .as_ref() + .ok_or_else(|| anyhow::anyhow!("table_manager is not configured on TaskContext"))?; + Ok(arc.lock().await) + } + /// 受内存池管控的数据发送:申请精准字节的内存船票后广播到所有下游 pub async fn collect(&self, batch: RecordBatch) -> anyhow::Result<()> { if self.outboxes.is_empty() { @@ -65,7 +114,7 @@ impl TaskContext { Ok(()) } - /// 按 Key 哈希路由到单分区(Shuffle / GroupBy) + /// 按 Key 哈希路由到单分区(用于 Shuffle / KeyBy) pub async fn collect_keyed( &self, key_hash: u64, @@ -84,7 +133,7 @@ impl TaskContext { Ok(()) } - /// 广播控制信号(不申请内存船票,保证在拥堵时畅通无阻) + /// 广播控制信号(如 Watermark, Barrier:不申请内存船票,保证在拥堵时畅通无阻) pub async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> { let tracked_event = TrackedEvent::control(event); for outbox in &self.outboxes { diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs index e6bf674d..8115b0fe 100644 --- a/src/runtime/streaming/api/mod.rs +++ b/src/runtime/streaming/api/mod.rs @@ -5,5 +5,5 @@ pub mod operator; pub mod source; pub use context::TaskContext; -pub use operator::{ConstructedOperator, MessageOperator}; +pub use operator::{ConstructedOperator, MessageOperator, Registry}; pub use source::{SourceEvent, SourceOffset, SourceOperator}; diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs index 3974307b..3cd5a316 100644 --- a/src/runtime/streaming/api/operator.rs +++ b/src/runtime/streaming/api/operator.rs @@ -4,9 +4,67 @@ use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; use crate::runtime::streaming::protocol::stream_out::StreamOutput; use arrow_array::RecordBatch; use async_trait::async_trait; +use datafusion::common::Result as DfResult; +use datafusion::execution::context::SessionContext; +use datafusion::execution::FunctionRegistry; +use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; +use std::collections::HashSet; +use std::sync::Arc; use std::time::Duration; use crate::sql::common::{CheckpointBarrier, Watermark}; +// --------------------------------------------------------------------------- +// Registry — 算子 / UDF 注册表(取代 tracing_subscriber::Registry) +// --------------------------------------------------------------------------- + +/// 运行时函数与状态注册表。 +/// +/// 包装 DataFusion [`SessionContext`],为物理计划反序列化提供 UDF / UDAF / UDWF 查询能力。 +/// `Arc` 在工厂中创建后,由各构造器共享。 +pub struct Registry { + ctx: SessionContext, +} + +impl Registry { + pub fn new() -> Self { + Self { + ctx: SessionContext::new(), + } + } + + pub fn session_context(&self) -> &SessionContext { + &self.ctx + } +} + +impl Default for Registry { + fn default() -> Self { + Self::new() + } +} + +impl FunctionRegistry for Registry { + fn udfs(&self) -> HashSet { + self.ctx.udfs() + } + + fn udf(&self, name: &str) -> DfResult> { + self.ctx.udf(name) + } + + fn udaf(&self, name: &str) -> DfResult> { + self.ctx.udaf(name) + } + + fn udwf(&self, name: &str) -> DfResult> { + self.ctx.udwf(name) + } +} + +// --------------------------------------------------------------------------- +// ConstructedOperator +// --------------------------------------------------------------------------- + /// 工厂反射产出的具体算子实例 pub enum ConstructedOperator { Source(Box), diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs index f1733b29..d824d025 100644 --- a/src/runtime/streaming/execution/runner.rs +++ b/src/runtime/streaming/execution/runner.rs @@ -5,11 +5,9 @@ use crate::runtime::streaming::protocol::control::ControlCommand; use crate::runtime::streaming::protocol::event::StreamEvent; use crate::runtime::streaming::protocol::stream_out::StreamOutput; use crate::runtime::streaming::protocol::tracked::TrackedEvent; -use crate::runtime::streaming::protocol::Watermark; use super::tracker::barrier_aligner::{AlignmentStatus, BarrierAligner}; use super::tracker::watermark_tracker::WatermarkTracker; use crate::runtime::streaming::network::endpoint::BoxedEventStream; -use arroyo_types::CheckpointBarrier; use std::collections::VecDeque; use std::pin::Pin; use tokio::sync::mpsc::Receiver; @@ -245,7 +243,7 @@ impl SubtaskRunner { StreamEvent::Watermark(wm) => { if let Some(aligned_wm) = st.wm_tracker.update(input_idx, wm) { if let Watermark::EventTime(t) = aligned_wm { - st.ctx.last_present_watermark = Some(t); + st.ctx.advance_watermark(t); } let outputs = st .operator diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/runtime/streaming/execution/tracker/watermark_tracker.rs index be7043b9..29233fc3 100644 --- a/src/runtime/streaming/execution/tracker/watermark_tracker.rs +++ b/src/runtime/streaming/execution/tracker/watermark_tracker.rs @@ -1,4 +1,4 @@ -use crate::runtime::streaming::protocol::watermark::{merge_watermarks, watermark_strictly_advances, Watermark}; +use crate::runtime::streaming::protocol::watermark::{merge_watermarks, watermark_strictly_advances}; use crate::sql::common::Watermark; #[derive(Debug)] diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs index 4cd52bf3..2cc0cfba 100644 --- a/src/runtime/streaming/factory/mod.rs +++ b/src/runtime/streaming/factory/mod.rs @@ -1,3 +1,6 @@ pub mod registry; -pub use registry:: OperatorFactory; +pub use registry::{ + ConnectorSinkDispatcher, ConnectorSourceDispatcher, OperatorConstructor, OperatorFactory, + PassthroughConstructor, +}; diff --git a/src/runtime/streaming/factory/registry.rs b/src/runtime/streaming/factory/registry.rs index 5b53b920..b8b45fff 100644 --- a/src/runtime/streaming/factory/registry.rs +++ b/src/runtime/streaming/factory/registry.rs @@ -1,24 +1,64 @@ use anyhow::{anyhow, Result}; -use crate::runtime::streaming::api::operator::ConstructedOperator; +use prost::Message; use std::collections::HashMap; +use std::sync::Arc; + +use crate::runtime::streaming::api::operator::Registry; + +use crate::runtime::streaming::api::operator::ConstructedOperator; +use crate::runtime::streaming::operators::PassthroughOperator; +use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor; +use crate::runtime::streaming::operators::joins::{ + InstantJoinConstructor, JoinWithExpirationConstructor, LookupJoinConstructor, +}; +use crate::runtime::streaming::operators::key_by::KeyByConstructor; +use crate::runtime::streaming::operators::watermark::WatermarkGeneratorConstructor; +use crate::runtime::streaming::operators::windows::{ + SessionAggregatingWindowConstructor, SlidingAggregatingWindowConstructor, + TumblingAggregateWindowConstructor, WindowFunctionConstructor, +}; + +use protocol::grpc::api::{ + ConnectorOp, ExpressionWatermarkConfig, + JoinOperator as JoinOperatorProto, + KeyPlanOperator as KeyByProto, + LookupJoinOperator as LookupJoinProto, + SessionWindowAggregateOperator, SlidingWindowAggregateOperator, + TumblingWindowAggregateOperator, UpdatingAggregateOperator, + WindowFunctionOperator as WindowFunctionProto, +}; +// --------------------------------------------------------------------------- +// 1. Core Trait (工厂契约) +// --------------------------------------------------------------------------- -/// 工业级算子注册表与工厂 +/// 算子构造器 trait:每个实现者负责从 protobuf 字节流反序列化配置并构造 [`ConstructedOperator`]。 +/// +/// 外部插件可实现此 trait 并通过 [`OperatorFactory::register`] 注入。 +pub trait OperatorConstructor: Send + Sync { + fn with_config(&self, config: &[u8], registry: Arc) -> Result; +} + +// --------------------------------------------------------------------------- +// 2. 工业级工厂注册表 +// --------------------------------------------------------------------------- + +/// 持有 `name → OperatorConstructor` 映射与共享 [`Registry`]。 +/// +/// [`TaskManager`] 在部署 TDD 时调用 [`create_operator`],完成从字节流到运行时算子的 +/// 反射式实例化。 pub struct OperatorFactory { constructors: HashMap>, + registry: Arc, } impl OperatorFactory { - pub fn new() -> Self { - let factory = Self { + pub fn new(registry: Arc) -> Self { + let mut factory = Self { constructors: HashMap::new(), + registry, }; - - // TODO: 在此注册具体算子构造器 - factory.register("TumblingWindowAggregate", Box::new(TumblingWindowAggregateConstructor)); - factory.register("ExpressionWatermark", Box::new(WatermarkGeneratorConstructor)); - factory.register("KafkaSource", Box::new(KafkaSourceConstructor)); - + factory.register_builtins(); factory } @@ -39,6 +79,212 @@ impl OperatorFactory { ) })?; - ctor.with_config(payload) + ctor.with_config(payload, self.registry.clone()) + } + + /// 列出已注册的所有算子名称(调试用)。 + pub fn registered_operators(&self) -> Vec<&str> { + self.constructors.keys().map(|s| s.as_str()).collect() + } + + fn register_builtins(&mut self) { + // ─── 窗口聚合 ─── + self.register("TumblingWindowAggregate", Box::new(TumblingWindowBridge)); + self.register("SlidingWindowAggregate", Box::new(SlidingWindowBridge)); + self.register("SessionWindowAggregate", Box::new(SessionWindowBridge)); + + // ─── 水位 ─── + self.register("ExpressionWatermark", Box::new(WatermarkBridge)); + + // ─── SQL Window Function ─── + self.register("WindowFunction", Box::new(WindowFunctionBridge)); + + // ─── Join ─── + self.register("Join", Box::new(JoinWithExpirationBridge)); + self.register("InstantJoin", Box::new(InstantJoinBridge)); + self.register("LookupJoin", Box::new(LookupJoinBridge)); + + // ─── 增量聚合 ─── + self.register("UpdatingAggregate", Box::new(IncrementalAggregateBridge)); + + // ─── 物理网络路由 ─── + self.register("KeyBy", Box::new(KeyByBridge)); + + // ─── 连接器 Source / Sink(分发器模式,不硬编码具体连接器) ─── + self.register("ConnectorSource", Box::new(ConnectorSourceDispatcher)); + self.register("ConnectorSink", Box::new(ConnectorSinkDispatcher)); + + // ─── 透传类算子 ─── + self.register("Projection", Box::new(PassthroughConstructor("Projection"))); + self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue"))); + self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey"))); + } +} + +// --------------------------------------------------------------------------- +// 3. 构造器适配 — 解码 protobuf 后委托给各算子模块的 Constructor +// --------------------------------------------------------------------------- + +struct TumblingWindowBridge; +impl OperatorConstructor for TumblingWindowBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = TumblingWindowAggregateOperator::decode(config) + .map_err(|e| anyhow!("Decode TumblingWindowAggregateOperator failed: {e}"))?; + let op = TumblingAggregateWindowConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct SlidingWindowBridge; +impl OperatorConstructor for SlidingWindowBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = SlidingWindowAggregateOperator::decode(config) + .map_err(|e| anyhow!("Decode SlidingWindowAggregateOperator failed: {e}"))?; + let op = SlidingAggregatingWindowConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct SessionWindowBridge; +impl OperatorConstructor for SessionWindowBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = SessionWindowAggregateOperator::decode(config) + .map_err(|e| anyhow!("Decode SessionWindowAggregateOperator failed: {e}"))?; + let op = SessionAggregatingWindowConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct WatermarkBridge; +impl OperatorConstructor for WatermarkBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = ExpressionWatermarkConfig::decode(config) + .map_err(|e| anyhow!("Decode ExpressionWatermarkConfig failed: {e}"))?; + let op = WatermarkGeneratorConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct WindowFunctionBridge; +impl OperatorConstructor for WindowFunctionBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = WindowFunctionProto::decode(config) + .map_err(|e| anyhow!("Decode WindowFunctionOperator failed: {e}"))?; + let op = WindowFunctionConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct JoinWithExpirationBridge; +impl OperatorConstructor for JoinWithExpirationBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = JoinOperatorProto::decode(config) + .map_err(|e| anyhow!("Decode JoinOperator (expiration) failed: {e}"))?; + let op = JoinWithExpirationConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct InstantJoinBridge; +impl OperatorConstructor for InstantJoinBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = JoinOperatorProto::decode(config) + .map_err(|e| anyhow!("Decode JoinOperator (instant) failed: {e}"))?; + let op = InstantJoinConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct LookupJoinBridge; +impl OperatorConstructor for LookupJoinBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = LookupJoinProto::decode(config) + .map_err(|e| anyhow!("Decode LookupJoinOperator failed: {e}"))?; + let op = LookupJoinConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct IncrementalAggregateBridge; +impl OperatorConstructor for IncrementalAggregateBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = UpdatingAggregateOperator::decode(config) + .map_err(|e| anyhow!("Decode UpdatingAggregateOperator failed: {e}"))?; + let op = IncrementalAggregatingConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +struct KeyByBridge; +impl OperatorConstructor for KeyByBridge { + fn with_config(&self, config: &[u8], _registry: Arc) -> Result { + let proto = KeyByProto::decode(config) + .map_err(|e| anyhow!("Decode KeyPlanOperator failed: {e}"))?; + let op = KeyByConstructor.with_config(proto)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} + +// --------------------------------------------------------------------------- +// 4. 连接器分发抽象 (Connector Dispatcher) — 不硬编码具体连接器 +// --------------------------------------------------------------------------- + +pub struct ConnectorSourceDispatcher; + +impl OperatorConstructor for ConnectorSourceDispatcher { + fn with_config(&self, config: &[u8], _registry: Arc) -> Result { + let op = ConnectorOp::decode(config) + .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?; + + match op.connector.as_str() { + "kafka" => { + // TODO: 委托给 crate::connectors::kafka::build_kafka_source(&op.config) + Err(anyhow!( + "ConnectorSource '{}' factory wiring not yet implemented", + op.connector + )) + } + "redis" => { + Err(anyhow!( + "ConnectorSource '{}' factory wiring not yet implemented", + op.connector + )) + } + other => Err(anyhow!("Unsupported source connector type: {}", other)), + } + } +} + +pub struct ConnectorSinkDispatcher; + +impl OperatorConstructor for ConnectorSinkDispatcher { + fn with_config(&self, config: &[u8], _registry: Arc) -> Result { + let op = ConnectorOp::decode(config) + .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?; + + match op.connector.as_str() { + "kafka" => { + // TODO: 委托给 crate::connectors::kafka::build_kafka_sink(&op.config) + Err(anyhow!( + "ConnectorSink '{}' factory wiring not yet implemented", + op.connector + )) + } + other => Err(anyhow!("Unsupported sink connector type: {}", other)), + } + } +} + +// --------------------------------------------------------------------------- +// 5. 透传类算子 +// --------------------------------------------------------------------------- + +pub struct PassthroughConstructor(pub &'static str); + +impl OperatorConstructor for PassthroughConstructor { + fn with_config(&self, _config: &[u8], _registry: Arc) -> Result { + Ok(ConstructedOperator::Operator(Box::new( + PassthroughOperator::new(self.0), + ))) } } diff --git a/src/runtime/streaming/format/mod.rs b/src/runtime/streaming/format/mod.rs index e69de29b..c4dbbeda 100644 --- a/src/runtime/streaming/format/mod.rs +++ b/src/runtime/streaming/format/mod.rs @@ -0,0 +1,9 @@ +pub mod config; +pub mod deserializer; +pub mod json_encoder; +pub mod serializer; + +pub use config::{BadDataPolicy, DecimalEncoding, Format, JsonFormat, TimestampFormat}; +pub use deserializer::DataDeserializer; +pub use json_encoder::CustomEncoderFactory; +pub use serializer::DataSerializer; diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs index 0edc0d2e..5997623e 100644 --- a/src/runtime/streaming/mod.rs +++ b/src/runtime/streaming/mod.rs @@ -16,18 +16,21 @@ pub mod api; pub mod arrow; pub mod cluster; +pub mod connectors; pub mod error; pub mod execution; pub mod factory; +pub mod format; pub mod memory; pub mod network; pub mod operators; pub mod protocol; pub mod state; -mod format; +pub mod storage; pub use api::{ - ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext, + ConstructedOperator, MessageOperator, Registry, SourceEvent, SourceOffset, SourceOperator, + TaskContext, }; pub use cluster::{ CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy, @@ -36,7 +39,7 @@ pub use cluster::{ }; pub use error::RunError; pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; -pub use factory:: OperatorFactory; +pub use factory::{OperatorConstructor, OperatorFactory}; pub use memory::{MemoryPool, MemoryTicket}; pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; pub use protocol::{ diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs index c76111c5..ac2cd585 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -26,13 +26,12 @@ use std::sync::LazyLock; use std::time::{Duration, Instant, SystemTime}; use std::{collections::HashMap, mem, sync::Arc}; use tracing::{debug, warn}; -use tracing_subscriber::Registry; use protocol::grpc::api::UpdatingAggregateOperator; // ========================================================================= // 引入全新的 Actor 框架核心协议 (取代了老旧的 ArrowOperator 和 Collector) // ========================================================================= use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; use crate::runtime::streaming::arrow::decode_aggregate; use crate::runtime::streaming::operators::{Key, UpdatingCache}; use crate::runtime::streaming::StreamOutput; @@ -476,96 +475,95 @@ impl IncrementalAggregatingFunc { async fn initialize(&mut self, ctx: &mut TaskContext) -> Result<()> { let mut tm = ctx.table_manager_guard().await?; - - let table = tm - .get_uncached_key_value_view("a") - .await - .map_err(|e| anyhow!("state table a: {e}"))?; - let mut stream = Box::pin(table.get_all()); - let key_converter = RowConverter::new(self.sliding_state_schema.sort_fields(false))?; - - while let Some(batch) = stream.next().await { - let batch = batch?; - if batch.num_rows() == 0 { continue; } - - let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect(); - let aggregate_states = self.aggregates.iter().map(|agg| { - agg.state_cols.iter().map(|idx| batch.column(*idx).clone()).collect_vec() - }).collect_vec(); - let generations = batch.columns().last().unwrap().as_primitive::(); - let now = Instant::now(); - - if key_cols.is_empty() { - self.restore_sliding( - GLOBAL_KEY.as_ref().as_slice(), - now, - 0, - &aggregate_states, - generations.value(0), - )?; - } else { - let key_rows = key_converter.convert_columns(&key_cols)?; - for (i, row) in key_rows.iter().enumerate() { - if generations.is_null(i) { - bail!("generation is null at row {i}"); - } - let generation = generations.value(i); - self.restore_sliding( - row.as_ref(), - now, - i, - &aggregate_states, - generation, - )?; - } - } - } - drop(stream); - - // 初始化 Batch Accumulator - if self.aggregates.iter().any(|agg| agg.accumulator_type == AccumulatorType::Batch) { - let table = tm - .get_uncached_key_value_view("b") - .await - .map_err(|e| anyhow!("state table b: {e}"))?; - let mut stream = Box::pin(table.get_all()); - while let Some(batch) = stream.next().await { - let batch = batch?; - if batch.num_rows() == 0 { continue; } - - let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect(); - let count_column = batch.column(self.batch_state_schema.schema.index_of("count").unwrap()).as_any().downcast_ref::().unwrap(); - let accumulator_column = batch.column(self.batch_state_schema.schema.index_of("accumulator").unwrap()).as_any().downcast_ref::().unwrap(); - let args_row_column = batch.column(self.batch_state_schema.schema.index_of("args_row").unwrap()).as_any().downcast_ref::().unwrap(); - let generations = batch.columns().last().unwrap().as_primitive::(); - - let key_rows = if key_cols.is_empty() { - vec![GLOBAL_KEY.as_ref().clone()] - } else { - self.key_converter - .convert_columns(&key_cols)? - .iter() - .map(|k| k.as_ref().to_vec()) - .collect() - }; - - for (i, row) in key_rows.iter().enumerate() { - let Some(accumulators) = self.accumulators.get_mut(row.as_ref()) else { continue; }; - let count = count_column.value(i); - let accumulator_idx = accumulator_column.value(i) as usize; - let args_row = args_row_column.value(i); - let generation = generations.value(i); - - let IncrementalState::Batch { data, .. } = &mut accumulators[accumulator_idx] else { bail!("expected batch accumulator"); }; - - if let Some(existing) = data.get_mut(args_row) { - if existing.generation < generation { existing.count = count; existing.generation = generation; } - } else { - data.insert(Key(Arc::new(args_row.to_vec())), BatchData { count, generation }); - } - } - } - } + // let table = tm + // .get_uncached_key_value_view("a") + // .await + // .map_err(|e| anyhow!("state table a: {e}"))?; + // let mut stream = Box::pin(table.get_all()); + // let key_converter = RowConverter::new(self.sliding_state_schema.sort_fields(false))?; + // + // while let Some(batch) = stream.next().await { + // let batch = batch?; + // if batch.num_rows() == 0 { continue; } + // + // let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect(); + // let aggregate_states = self.aggregates.iter().map(|agg| { + // agg.state_cols.iter().map(|idx| batch.column(*idx).clone()).collect_vec() + // }).collect_vec(); + // let generations = batch.columns().last().unwrap().as_primitive::(); + // let now = Instant::now(); + // + // if key_cols.is_empty() { + // self.restore_sliding( + // GLOBAL_KEY.as_ref().as_slice(), + // now, + // 0, + // &aggregate_states, + // generations.value(0), + // )?; + // } else { + // let key_rows = key_converter.convert_columns(&key_cols)?; + // for (i, row) in key_rows.iter().enumerate() { + // if generations.is_null(i) { + // bail!("generation is null at row {i}"); + // } + // let generation = generations.value(i); + // self.restore_sliding( + // row.as_ref(), + // now, + // i, + // &aggregate_states, + // generation, + // )?; + // } + // } + // } + // drop(stream); + + // + // if self.aggregates.iter().any(|agg| agg.accumulator_type == AccumulatorType::Batch) { + // let table = tm + // .get_uncached_key_value_view("b") + // .await + // .map_err(|e| anyhow!("state table b: {e}"))?; + // let mut stream = Box::pin(table.get_all()); + // while let Some(batch) = stream.next().await { + // let batch = batch?; + // if batch.num_rows() == 0 { continue; } + // + // let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect(); + // let count_column = batch.column(self.batch_state_schema.schema.index_of("count").unwrap()).as_any().downcast_ref::().unwrap(); + // let accumulator_column = batch.column(self.batch_state_schema.schema.index_of("accumulator").unwrap()).as_any().downcast_ref::().unwrap(); + // let args_row_column = batch.column(self.batch_state_schema.schema.index_of("args_row").unwrap()).as_any().downcast_ref::().unwrap(); + // let generations = batch.columns().last().unwrap().as_primitive::(); + // + // let key_rows = if key_cols.is_empty() { + // vec![GLOBAL_KEY.as_ref().clone()] + // } else { + // self.key_converter + // .convert_columns(&key_cols)? + // .iter() + // .map(|k| k.as_ref().to_vec()) + // .collect() + // }; + // + // for (i, row) in key_rows.iter().enumerate() { + // let Some(accumulators) = self.accumulators.get_mut(row.as_ref()) else { continue; }; + // let count = count_column.value(i); + // let accumulator_idx = accumulator_column.value(i) as usize; + // let args_row = args_row_column.value(i); + // let generation = generations.value(i); + // + // let IncrementalState::Batch { data, .. } = &mut accumulators[accumulator_idx] else { bail!("expected batch accumulator"); }; + // + // if let Some(existing) = data.get_mut(args_row) { + // if existing.generation < generation { existing.count = count; existing.generation = generation; } + // } else { + // data.insert(Key(Arc::new(args_row.to_vec())), BatchData { count, generation }); + // } + // } + // } + // } let mut deleted_keys = vec![]; for (k, v) in self.accumulators.iter_mut() { @@ -613,7 +611,6 @@ impl IncrementalAggregatingFunc { for k in deleted_keys { self.accumulators.remove(&k.0); } - // 处理 TTL 过期的键 let mut ttld_keys = vec![]; for (k, mut v) in self.accumulators.time_out(Instant::now()) { is_retracts.push(true); @@ -639,7 +636,6 @@ impl IncrementalAggregatingFunc { let mut final_batch = record_batch.columns().to_vec(); final_batch.push(metadata); - // 注意这里需要匹配最终向外发送的 Schema Ok(Some(RecordBatch::try_new( self.final_output_schema.clone(), final_batch, @@ -703,33 +699,33 @@ impl MessageOperator for IncrementalAggregatingFunc { _barrier: CheckpointBarrier, ctx: &mut TaskContext, ) -> Result<()> { - let mut tm = ctx.table_manager_guard().await?; - - if let Some(sliding) = self.checkpoint_sliding()? { - let table = tm - .get_uncached_key_value_view("a") - .await - .map_err(|e| anyhow!("state table a: {e}"))?; - table - .insert_batch(sliding) - .await - .map_err(|e| anyhow!("insert_batch a: {e}"))?; - } - - if let Some(batch) = self.checkpoint_batch()? { - let table = tm - .get_uncached_key_value_view("b") - .await - .map_err(|e| anyhow!("state table b: {e}"))?; - table - .insert_batch(batch) - .await - .map_err(|e| anyhow!("insert_batch b: {e}"))?; - } - - // 清理已生成的 changelog 痕迹 - self.updated_keys.clear(); - Ok(()) + // let mut tm = ctx.table_manager_guard().await?; + // + // if let Some(sliding) = self.checkpoint_sliding()? { + // let table = tm + // .get_uncached_key_value_view("a") + // .await + // .map_err(|e| anyhow!("state table a: {e}"))?; + // table + // .insert_batch(sliding) + // .await + // .map_err(|e| anyhow!("insert_batch a: {e}"))?; + // } + // + // if let Some(batch) = self.checkpoint_batch()? { + // let table = tm + // .get_uncached_key_value_view("b") + // .await + // .map_err(|e| anyhow!("state table b: {e}"))?; + // table + // .insert_batch(batch) + // .await + // .map_err(|e| anyhow!("insert_batch b: {e}"))?; + // } + // + // + // self.updated_keys.clear(); + Ok(()) } async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs index dbde4d8e..639876bf 100644 --- a/src/runtime/streaming/operators/joins/join_instance.rs +++ b/src/runtime/streaming/operators/joins/join_instance.rs @@ -1,4 +1,4 @@ -//! 瞬时 JOIN:双通道喂入 DataFusion 物理计划,水位线推进时闭合实例并抽干结果。 +//! 瞬时 JOIN:双通道喂入 DataFusion 物理计划,水位线推进时闭合实例并抽干结果(纯内存版)。 use anyhow::{anyhow, Result}; use arrow::compute::{max, min, partition, sort_to_indices, take}; @@ -18,9 +18,8 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; use async_trait::async_trait; -use tracing_subscriber::Registry; use protocol::grpc::api::JoinOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; @@ -33,6 +32,7 @@ enum JoinSide { } impl JoinSide { + #[allow(dead_code)] fn name(&self) -> &'static str { match self { JoinSide::Left => "left", @@ -149,16 +149,6 @@ impl InstantJoinOperator { } } - let wm = ctx.last_present_watermark(); - { - let mut tm = ctx.table_manager_guard().await?; - let table = tm - .get_expiring_time_key_table(side.name(), wm) - .await - .map_err(|e| anyhow!("{e:?}"))?; - table.insert(from_nanos(max_timestamp as u128), batch.clone()); - } - let unkeyed_batch = self.input_schema(side).unkeyed_batch(&batch)?; if max_timestamp == min_timestamp { @@ -201,39 +191,7 @@ impl MessageOperator for InstantJoinOperator { "InstantJoin" } - async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { - let watermark = ctx.last_present_watermark(); - - let left_batches: Vec<_> = { - let mut tm = ctx.table_manager_guard().await?; - let left_table = tm - .get_expiring_time_key_table("left", watermark) - .await - .map_err(|e| anyhow!("{e:?}"))?; - left_table - .all_batches_for_watermark(watermark) - .flat_map(|(_time, batches)| batches.iter().cloned()) - .collect() - }; - for batch in left_batches { - self.process_side_internal(JoinSide::Left, batch, ctx).await?; - } - - let right_batches: Vec<_> = { - let mut tm = ctx.table_manager_guard().await?; - let right_table = tm - .get_expiring_time_key_table("right", watermark) - .await - .map_err(|e| anyhow!("{e:?}"))?; - right_table - .all_batches_for_watermark(watermark) - .flat_map(|(_time, batches)| batches.iter().cloned()) - .collect() - }; - for batch in right_batches { - self.process_side_internal(JoinSide::Right, batch, ctx).await?; - } - + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } @@ -286,22 +244,8 @@ impl MessageOperator for InstantJoinOperator { async fn snapshot_state( &mut self, _barrier: CheckpointBarrier, - ctx: &mut TaskContext, + _ctx: &mut TaskContext, ) -> Result<()> { - let watermark = ctx.last_present_watermark(); - let mut tm = ctx.table_manager_guard().await?; - tm.get_expiring_time_key_table("left", watermark) - .await - .map_err(|e| anyhow!("{e:?}"))? - .flush(watermark) - .await - .map_err(|e| anyhow!("{e:?}"))?; - tm.get_expiring_time_key_table("right", watermark) - .await - .map_err(|e| anyhow!("{e:?}"))? - .flush(watermark) - .await - .map_err(|e| anyhow!("{e:?}"))?; Ok(()) } } diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs index d115ac10..c2bb6259 100644 --- a/src/runtime/streaming/operators/joins/join_with_expiration.rs +++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs @@ -1,4 +1,5 @@ -//! 带 TTL 的 Key-Time Join:两侧状态表 + DataFusion 物理计划成对计算。 +//! 带 TTL 的 Key-Time Join:纯内存状态版 + DataFusion 物理计划成对计算。 +//! 完全移除了底层 TableManager 和持久化状态依赖。 use anyhow::{anyhow, Result}; use arrow::compute::concat_batches; @@ -9,14 +10,14 @@ use datafusion::physical_plan::ExecutionPlan; use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode}; use futures::StreamExt; use prost::Message; +use std::collections::VecDeque; use std::sync::{Arc, RwLock}; -use std::time::Duration; +use std::time::{Duration, SystemTime}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; use async_trait::async_trait; -use tracing_subscriber::Registry; use protocol::grpc::api::JoinOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; @@ -28,32 +29,64 @@ enum JoinSide { Right, } -impl JoinSide { - fn table_name(&self) -> &'static str { - match self { - JoinSide::Left => "left", - JoinSide::Right => "right", +// ============================================================================ +// 纯内存状态缓冲区 (In-Memory TTL Buffer) +// ============================================================================ + +struct StateBuffer { + batches: VecDeque<(SystemTime, RecordBatch)>, + ttl: Duration, +} + +impl StateBuffer { + fn new(ttl: Duration) -> Self { + Self { + batches: VecDeque::new(), + ttl, + } + } + + fn insert(&mut self, batch: RecordBatch, time: SystemTime) { + self.batches.push_back((time, batch)); + } + + fn expire(&mut self, current_time: SystemTime) { + let cutoff = current_time + .checked_sub(self.ttl) + .unwrap_or(SystemTime::UNIX_EPOCH); + while let Some((time, _)) = self.batches.front() { + if *time < cutoff { + self.batches.pop_front(); + } else { + break; + } } } + + fn get_all_batches(&self) -> Vec { + self.batches.iter().map(|(_, b)| b.clone()).collect() + } } +// ============================================================================ +// 算子主体 +// ============================================================================ + pub struct JoinWithExpirationOperator { - /// 保留与配置/表注册语义一致;实际 TTL 由状态表配置决定。 - #[allow(dead_code)] - left_expiration: Duration, - #[allow(dead_code)] - right_expiration: Duration, left_input_schema: FsSchema, right_input_schema: FsSchema, left_schema: FsSchema, right_schema: FsSchema, + left_passer: Arc>>, right_passer: Arc>>, join_exec_plan: Arc, + + left_state: StateBuffer, + right_state: StateBuffer, } impl JoinWithExpirationOperator { - /// 执行 DataFusion 物理计划,返回 JOIN 结果批次(不经过 Collector)。 async fn compute_pair( &mut self, left: RecordBatch, @@ -71,6 +104,7 @@ impl JoinWithExpirationOperator { self.join_exec_plan .reset() .map_err(|e| anyhow!("join plan reset: {e}"))?; + let mut result_stream = self .join_exec_plan .execute(0, SessionContext::new().task_ctx()) @@ -90,42 +124,22 @@ impl JoinWithExpirationOperator { batch: RecordBatch, ctx: &mut TaskContext, ) -> Result> { - let watermark = ctx.last_present_watermark(); - let target_name = side.table_name(); - let opposite_name = match side { - JoinSide::Left => JoinSide::Right.table_name(), - JoinSide::Right => JoinSide::Left.table_name(), - }; + let current_time = ctx + .last_present_watermark() + .unwrap_or_else(SystemTime::now); - let mut tm = ctx.table_manager_guard().await?; - - let inserted_rows = { - let target_table = tm - .get_key_time_table(target_name, watermark) - .await - .map_err(|e| anyhow!("{e:?}"))?; - target_table - .insert(batch.clone()) - .await - .map_err(|e| anyhow!("{e:?}"))? - }; + self.left_state.expire(current_time); + self.right_state.expire(current_time); - let opposite_table = tm - .get_key_time_table(opposite_name, watermark) - .await - .map_err(|e| anyhow!("{e:?}"))?; - - let mut opposite_batches = Vec::new(); - for row in inserted_rows { - if let Some(matched_batch) = opposite_table - .get_batch(row.as_ref()) - .map_err(|e| anyhow!("{e:?}"))? - { - opposite_batches.push(matched_batch.clone()); - } + match side { + JoinSide::Left => self.left_state.insert(batch.clone(), current_time), + JoinSide::Right => self.right_state.insert(batch.clone(), current_time), } - drop(tm); + let opposite_batches = match side { + JoinSide::Left => self.right_state.get_all_batches(), + JoinSide::Right => self.left_state.get_all_batches(), + }; if opposite_batches.is_empty() { return Ok(vec![]); @@ -193,8 +207,6 @@ impl MessageOperator for JoinWithExpirationOperator { _barrier: CheckpointBarrier, _ctx: &mut TaskContext, ) -> Result<()> { - // `KeyTimeView` 无 `flush`;写入已通过 `insert` 经 `state_tx` 进入后端刷写管线, - // 与 worker 侧 `JoinWithExpiration` 未单独实现 `handle_checkpoint` 一致。 Ok(()) } @@ -203,8 +215,10 @@ impl MessageOperator for JoinWithExpirationOperator { } } -/// 从配置构造 [`JoinWithExpirationOperator`](实现 [`MessageOperator`])。 -/// 注意:`ConstructedOperator` 仅包装 `ArrowOperator`,此处不返回该类型。 +// ============================================================================ +// 构造器 +// ============================================================================ + pub struct JoinWithExpirationConstructor; impl JoinWithExpirationConstructor { @@ -247,8 +261,6 @@ impl JoinWithExpirationConstructor { } Ok(JoinWithExpirationOperator { - left_expiration: ttl, - right_expiration: ttl, left_input_schema, right_input_schema, left_schema, @@ -256,6 +268,8 @@ impl JoinWithExpirationConstructor { left_passer, right_passer, join_exec_plan, + left_state: StateBuffer::new(ttl), + right_state: StateBuffer::new(ttl), }) } } diff --git a/src/runtime/streaming/operators/joins/lookup_join.rs b/src/runtime/streaming/operators/joins/lookup_join.rs index b302d198..c6458174 100644 --- a/src/runtime/streaming/operators/joins/lookup_join.rs +++ b/src/runtime/streaming/operators/joins/lookup_join.rs @@ -14,13 +14,14 @@ use datafusion_proto::physical_plan::from_proto::parse_physical_expr; use datafusion_proto::protobuf::PhysicalExprNode; use mini_moka::sync::Cache; use prost::Message; +use protocol::grpc::api::{JoinType, LookupJoinOperator as LookupJoinProto}; use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; -use protocol::grpc::api::JoinType; + use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; -use crate::runtime::streaming::protocol::stream_output::StreamOutput; +use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; +use crate::runtime::streaming::connectors::{LookupConnector, connectors}; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, FsSchema, MetadataField, OperatorConfig, Watermark, LOOKUP_KEY_INDEX_FIELD}; @@ -273,7 +274,7 @@ pub struct LookupJoinConstructor; impl LookupJoinConstructor { pub fn with_config( &self, - config: LookupJoinOperator, + config: LookupJoinProto, registry: Arc, ) -> anyhow::Result { let join_type = config.join_type(); @@ -361,3 +362,4 @@ impl LookupJoinConstructor { }) } } + diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs index fe2a7d9e..66d3e892 100644 --- a/src/runtime/streaming/operators/mod.rs +++ b/src/runtime/streaming/operators/mod.rs @@ -2,25 +2,22 @@ pub mod grouping; pub mod joins; +pub mod key_by; pub mod sink; pub mod source; pub mod watermark; pub mod windows; -pub use grouping::{ - IncrementalAggregatingConstructor, IncrementalAggregatingFunc, Key, UpdatingCache, -}; +pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache}; pub use joins::{ - InstantJoinConstructor, InstantJoinOperator, JoinWithExpirationConstructor, - JoinWithExpirationOperator, LookupJoinConstructor, LookupJoinOperator, LookupJoinType, + InstantJoinOperator, JoinWithExpirationOperator, LookupJoinOperator, LookupJoinType, }; +pub use key_by::KeyByOperator; pub use sink::{ConsistencyMode, KafkaSinkOperator}; -pub use source::{BatchDeserializer, KafkaSourceOperator, KafkaState}; -pub use watermark::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState}; +pub use source::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState}; +pub use watermark::{WatermarkGeneratorOperator, WatermarkGeneratorState}; pub use windows::{ - SessionAggregatingWindowConstructor, SessionWindowOperator, - SlidingAggregatingWindowConstructor, SlidingWindowOperator, - TumblingAggregateWindowConstructor, TumblingWindowOperator, WindowFunctionConstructor, + SessionWindowOperator, SlidingWindowOperator, TumblingWindowOperator, WindowFunctionOperator, }; diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs index 9161ac7b..0b68b88b 100644 --- a/src/runtime/streaming/operators/sink/kafka/mod.rs +++ b/src/runtime/streaming/operators/sink/kafka/mod.rs @@ -17,6 +17,7 @@ use tracing::{info, warn}; use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::format::DataSerializer; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; // ============================================================================ @@ -49,7 +50,7 @@ pub struct KafkaSinkOperator { pub timestamp_col_idx: Option, pub key_col_idx: Option, - pub serializer: ArrowSerializer, + pub serializer: DataSerializer, at_least_once_producer: Option, transactional_state: Option, @@ -64,7 +65,7 @@ impl KafkaSinkOperator { consistency_mode: ConsistencyMode, client_config: HashMap, input_schema: FsSchema, - serializer: ArrowSerializer, + serializer: DataSerializer, ) -> Self { Self { topic, @@ -102,7 +103,7 @@ impl KafkaSinkOperator { if let Some(idx) = tx_index { config.set("enable.idempotence", "true"); let transactional_id = format!( - "arroyo-tx-{}-{}-{}-{}", + "fs-tx-{}-{}-{}-{}", ctx.job_id, self.topic, ctx.subtask_idx, idx ); config.set("transactional.id", &transactional_id); @@ -236,10 +237,10 @@ impl MessageOperator for KafkaSinkOperator { batch: RecordBatch, _ctx: &mut TaskContext, ) -> Result> { - let payload_iter = self.serializer.serialize(&batch); + let payloads = self.serializer.serialize(&batch)?; let producer = self.current_producer().clone(); - for (i, payload) in payload_iter.enumerate() { + for (i, payload) in payloads.iter().enumerate() { let ts_millis = self .timestamp_col_idx .and_then(|idx| event_timestamp_ms(&batch, i, idx)); diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs index d0c67972..595fbcc3 100644 --- a/src/runtime/streaming/operators/source/kafka/mod.rs +++ b/src/runtime/streaming/operators/source/kafka/mod.rs @@ -1,6 +1,8 @@ //! Kafka 源算子:实现 [`crate::runtime::streaming::api::source::SourceOperator`],由 [`crate::runtime::streaming::execution::SourceRunner`] 轮询 `fetch_next`。 use anyhow::{anyhow, Context as _, Result}; +use arrow_array::RecordBatch; +use arrow_schema::SchemaRef; use async_trait::async_trait; use bincode::{Decode, Encode}; use governor::{DefaultDirectRateLimiter, Quota, RateLimiter as GovernorRateLimiter}; @@ -10,11 +12,12 @@ use std::collections::HashMap; use std::num::NonZeroU32; use std::time::Duration; use tracing::{debug, error, info, warn}; -use arrow_array::RecordBatch; use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::source::{SourceEvent, SourceOffset, SourceOperator}; +use crate::runtime::streaming::format::{BadDataPolicy, DataDeserializer, Format}; use crate::sql::common::{CheckpointBarrier, MetadataField}; +use crate::sql::common::fs_schema::FieldValueType; // ============================================================================ // 1. 领域模型:Kafka 状态与配置 // ============================================================================ @@ -25,8 +28,7 @@ pub struct KafkaState { offset: i64, } -/// 模拟 Arroyo 原版的 Deserializer Buffer -/// (工业实现中,反序列化常带 buffer,满 N 条或超时后吐出一个 [`RecordBatch`])。 +/// 增量反序列化缓冲 trait:Source 逐条 `deserialize_slice`,攒满后 `flush_buffer` 输出 [`RecordBatch`]。 pub trait BatchDeserializer: Send + 'static { fn deserialize_slice( &mut self, @@ -40,6 +42,54 @@ pub trait BatchDeserializer: Send + 'static { fn flush_buffer(&mut self) -> Result>; } +// --------------------------------------------------------------------------- +// BufferedDeserializer — 基于 DataDeserializer 的默认 BatchDeserializer 实现 +// --------------------------------------------------------------------------- + +/// 将 [`DataDeserializer`] 包装为 [`BatchDeserializer`]:逐条缓存 payload,达到阈值后批量反序列化。 +pub struct BufferedDeserializer { + inner: DataDeserializer, + buffer: Vec>, + batch_size: usize, +} + +impl BufferedDeserializer { + pub fn new(format: Format, schema: SchemaRef, bad_data_policy: BadDataPolicy, batch_size: usize) -> Self { + Self { + inner: DataDeserializer::new(format, schema, bad_data_policy), + buffer: Vec::with_capacity(batch_size), + batch_size, + } + } +} + +impl BatchDeserializer for BufferedDeserializer { + fn deserialize_slice( + &mut self, + payload: &[u8], + _timestamp: u64, + _metadata: Option>>, + ) -> Result<()> { + self.buffer.push(payload.to_vec()); + Ok(()) + } + + fn should_flush(&self) -> bool { + self.buffer.len() >= self.batch_size + } + + fn flush_buffer(&mut self) -> Result> { + if self.buffer.is_empty() { + return Ok(None); + } + + let refs: Vec<&[u8]> = self.buffer.iter().map(|v| v.as_slice()).collect(); + let batch = self.inner.deserialize_batch(&refs)?; + self.buffer.clear(); + Ok(Some(batch)) + } +} + impl SourceOffset { fn rdkafka_offset(self) -> Offset { match self { @@ -109,9 +159,9 @@ impl KafkaSourceOperator { let group_id = match (&self.group_id, &self.group_id_prefix) { (Some(gid), _) => gid.clone(), (None, Some(prefix)) => { - format!("{}-arroyo-{}-{}", prefix, ctx.job_id, ctx.subtask_idx) + format!("{}-fs-{}-{}", prefix, ctx.job_id, ctx.subtask_idx) } - (None, None) => format!("arroyo-{}-{}-consumer", ctx.job_id, ctx.subtask_idx), + (None, None) => format!("fs-{}-{}-consumer", ctx.job_id, ctx.subtask_idx), }; for (key, value) in &self.client_configs { diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs index ef4e3cb6..59b3ff7c 100644 --- a/src/runtime/streaming/operators/source/mod.rs +++ b/src/runtime/streaming/operators/source/mod.rs @@ -2,4 +2,4 @@ pub mod kafka; -pub use kafka::{BatchDeserializer, KafkaSourceOperator, KafkaState}; +pub use kafka::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState}; diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs index fa97b3d9..f210c95a 100644 --- a/src/runtime/streaming/operators/watermark/watermark_generator.rs +++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs @@ -16,9 +16,8 @@ use std::time::{Duration, SystemTime}; use tracing::{debug, info}; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; use async_trait::async_trait; -use tracing_subscriber::Registry; use protocol::grpc::api::ExpressionWatermarkConfig; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_millis, CheckpointBarrier, FsSchema, Watermark}; @@ -242,3 +241,4 @@ impl WatermarkGeneratorConstructor { )) } } + diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs index ebe75c4c..cae0935c 100644 --- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs @@ -1,8 +1,9 @@ -//! 会话窗口聚合:与 worker `arrow/session_aggregating_window` 对齐,实现 [`MessageOperator`]。 +//! 会话窗口聚合:纯内存版,完全脱离持久化状态存储。 +//! 利用 BTreeMap 充当优先队列,数据天然在内存中进行 Gap 合并与触发。 use anyhow::{anyhow, bail, Context, Result}; use arrow::compute::{ - concat_batches, filter_record_batch, kernels::cmp::gt_eq, lexsort_to_indices, max, partition, take, + concat_batches, filter_record_batch, kernels::cmp::gt_eq, lexsort_to_indices, partition, take, }; use arrow::row::{RowConverter, SortField}; use arrow_array::types::TimestampNanosecondType; @@ -22,12 +23,11 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::{Arc, RwLock}; use std::time::{Duration, SystemTime}; use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; -use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; use async_trait::async_trait; -use tracing_subscriber::Registry; +use crate::runtime::streaming::api::operator::Registry; use protocol::grpc::api::SessionWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; @@ -35,7 +35,7 @@ use crate::sql::common::converter::Converter; use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; use crate::sql::schema::utils::window_arrow_struct; // ============================================================================ -// 领域模型 +// 领域模型与纯内存状态 // ============================================================================ struct SessionWindowConfig { @@ -167,7 +167,7 @@ struct SessionWindowResult { struct KeySessionState { config: Arc, active_session: Option, - buffered_batches: BTreeMap>, + buffered_batches: BTreeMap>, // 纯内存缓冲 } impl KeySessionState { @@ -323,7 +323,7 @@ fn build_session_output_schema( } // ============================================================================ -// 算子 +// 算子本体:负责处理输入数据与时间流,路由给具体的 KeySessionState // ============================================================================ pub struct SessionWindowOperator { @@ -564,10 +564,7 @@ impl SessionWindowOperator { let window_start_array = PrimitiveArray::::from(start_times); let window_end_array = PrimitiveArray::::from(end_times.clone()); - let timestamp_array = PrimitiveArray::::from( - end_times.into_iter().map(|t| t - 1).collect::>(), - ); - + let result_batches: Vec<&RecordBatch> = session_results.iter().map(|res| &res.batch).collect(); let merged_batch = concat_batches(&session_results[0].batch.schema(), result_batches)?; @@ -584,12 +581,12 @@ impl SessionWindowOperator { let mut columns = key_columns; columns.insert(self.config.window_index, Arc::new(window_struct_array)); columns.extend_from_slice(merged_batch.columns()); - columns.push(Arc::new(timestamp_array)); RecordBatch::try_new(self.config.output_schema.clone(), columns) .context("failed to create session window output batch") } + #[allow(dead_code)] fn earliest_batch_time(&self) -> Option { self.pq_start_times .first_key_value() @@ -603,44 +600,7 @@ impl MessageOperator for SessionWindowOperator { "SessionWindow" } - async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { - let mut tm = ctx.table_manager_guard().await?; - let start_time_opt = tm - .get_global_keyed_state::>("e") - .await - .map_err(|e| anyhow!("global keyed state e: {e}"))? - .get_all() - .values() - .filter_map(|e| *e) - .min(); - - let Some(start_time) = start_time_opt else { - return Ok(()); - }; - - let state_table = tm - .get_expiring_time_key_table("s", Some(start_time)) - .await - .map_err(|e| anyhow!("expiring time key table s: {e}"))?; - for (_, batches) in state_table.all_batches_for_watermark(Some(start_time)) { - for batch in batches { - let filtered = self.filter_batch_by_time(batch.clone(), Some(start_time))?; - if filtered.num_rows() > 0 { - let sorted = self.sort_batch(&filtered)?; - self.ingest_sorted_batch(sorted, Some(start_time)).await?; - } - } - } - - if let Some(ts) = ctx.last_present_watermark() { - let evicted = self.evaluate_watermark(ts).await?; - if !evicted.is_empty() { - warn!( - "evicted {} session result batch(es) when restoring from state", - evicted.len() - ); - } - } + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } @@ -659,23 +619,6 @@ impl MessageOperator for SessionWindowOperator { let sorted_batch = self.sort_batch(&filtered_batch)?; - let max_timestamp = max( - sorted_batch - .column(self.config.input_schema_ref.timestamp_index) - .as_any() - .downcast_ref::() - .ok_or_else(|| anyhow!("expected timestamp column"))?, - ) - .ok_or_else(|| anyhow!("expected max timestamp"))?; - - let mut tm = ctx.table_manager_guard().await?; - let table = tm - .get_expiring_time_key_table("s", ctx.last_present_watermark()) - .await - .map_err(|e| anyhow!("expiring time key table s: {e}"))?; - table.insert(from_nanos(max_timestamp as u128), sorted_batch.clone()); - drop(tm); - self.ingest_sorted_batch(sorted_batch, watermark_time).await?; Ok(vec![]) @@ -697,22 +640,7 @@ impl MessageOperator for SessionWindowOperator { .collect()) } - async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { - let watermark = ctx.last_present_watermark(); - let mut tm = ctx.table_manager_guard().await?; - - tm.get_expiring_time_key_table("s", watermark) - .await - .map_err(|e| anyhow!("expiring time key table s: {e}"))? - .flush(watermark) - .await?; - - tm.get_global_keyed_state::>("e") - .await - .map_err(|e| anyhow!("global keyed state e: {e}"))? - .insert(ctx.subtask_idx, self.earliest_batch_time()) - .await; - + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } @@ -722,7 +650,7 @@ impl MessageOperator for SessionWindowOperator { } // ============================================================================ -// 构造器(返回 [`SessionWindowOperator`],供 Actor 子任务直接 `Box::new`) +// 构造器 // ============================================================================ pub struct SessionAggregatingWindowConstructor; @@ -802,3 +730,4 @@ impl SessionAggregatingWindowConstructor { }) } } + diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs index 29bad05a..aa2e2474 100644 --- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs @@ -1,4 +1,6 @@ -//! 滑动窗口聚合:与 worker `arrow/sliding_aggregating_window` 对齐,实现 [`MessageOperator`]。 +//! 滑动窗口聚合:纯内存版。 +//! 完全依赖内部的 TieredRecordBatchHolder 和 ActiveBin 在内存中进行计算, +//! 摆脱 TableManager 依赖,遇到 Barrier 自动透传。 use anyhow::{anyhow, bail, Result}; use arrow::compute::{partition, sort_to_indices, take}; @@ -25,13 +27,14 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; use async_trait::async_trait; -use tracing_subscriber::Registry; +use crate::runtime::streaming::api::operator::Registry; use protocol::grpc::api::SlidingWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; // ============================================================================ -// Tiered panes +// 纯内存状态:阶梯式时间面板 (Tiered panes) +// 这部分本身就是极佳的内存数据结构,原样保留! // ============================================================================ #[derive(Default, Debug)] @@ -202,7 +205,7 @@ impl TieredRecordBatchHolder { } // ============================================================================ -// Per-bin partial aggregation +// Per-bin partial aggregation (纯内存缓冲区) // ============================================================================ struct ActiveBin { @@ -249,7 +252,7 @@ impl ActiveBin { } // ============================================================================ -// Operator +// 算子主体 // ============================================================================ pub struct SlidingWindowOperator { @@ -314,29 +317,7 @@ impl MessageOperator for SlidingWindowOperator { "SlidingWindow" } - async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { - let watermark = ctx.last_present_watermark(); - let mut tm = ctx.table_manager_guard().await?; - let table = tm - .get_expiring_time_key_table("t", watermark) - .await - .map_err(|e| anyhow!("expiring time key table t: {e}"))?; - - let watermark_bin = self.bin_start(watermark.unwrap_or(SystemTime::UNIX_EPOCH)); - - for (timestamp, batches) in table.all_batches_for_watermark(watermark) { - let bin = self.bin_start(*timestamp); - if bin < watermark_bin { - for batch in batches { - self.tiered_record_batches.insert(batch.clone(), bin)?; - } - } else { - let slot = self.active_bins.entry(bin).or_default(); - for batch in batches { - slot.finished_batches.push(batch.clone()); - } - } - } + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } @@ -470,28 +451,7 @@ impl MessageOperator for SlidingWindowOperator { Ok(final_outputs) } - async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { - let watermark = ctx.last_present_watermark(); - let mut tm = ctx.table_manager_guard().await?; - let table = tm - .get_expiring_time_key_table("t", watermark) - .await - .map_err(|e| anyhow!("expiring time key table t: {e}"))?; - - for (bin_start, active_bin) in self.active_bins.iter_mut() { - active_bin.close_and_drain().await?; - - for batch in &active_bin.finished_batches { - let state_batch = Self::add_bin_start_as_timestamp( - batch, - *bin_start, - self.partial_schema.schema.clone(), - )?; - table.insert(*bin_start, state_batch); - } - } - - table.flush(watermark).await?; + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } @@ -500,6 +460,8 @@ impl MessageOperator for SlidingWindowOperator { } } +// ============================================================================ +// 构造器 // ============================================================================ pub struct SlidingAggregatingWindowConstructor; @@ -576,3 +538,4 @@ impl SlidingAggregatingWindowConstructor { }) } } + diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs index c30950cb..c23da40a 100644 --- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs @@ -27,7 +27,7 @@ use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; use async_trait::async_trait; -use tracing_subscriber::Registry; +use crate::runtime::streaming::api::operator::Registry; use protocol::grpc::api::TumblingWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; @@ -397,3 +397,4 @@ impl TumblingAggregateWindowConstructor { }) } } + diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs index cc51b820..03f02a19 100644 --- a/src/runtime/streaming/operators/windows/window_function.rs +++ b/src/runtime/streaming/operators/windows/window_function.rs @@ -1,4 +1,6 @@ -//! 窗口函数(按事件时间分桶的瞬时执行):与 worker `arrow/window_fn` 对齐,实现 [`MessageOperator`]。 +//! 窗口函数(按事件时间分桶的瞬时执行):纯内存版。 +//! 完全依赖内部的 ActiveWindowExec 通道在内存中缓冲数据, +//! 摆脱持久化状态存储的依赖,遇到 Barrier 自动透传。 use anyhow::{anyhow, Result}; use arrow::compute::{max, min}; @@ -18,14 +20,17 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; use async_trait::async_trait; -use tracing_subscriber::Registry; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; use crate::sql::common::time_utils::print_time; use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +// ============================================================================ +// 纯内存执行缓冲区 +// ============================================================================ + struct ActiveWindowExec { sender: Option>, result_stream: Option, @@ -58,6 +63,10 @@ impl ActiveWindowExec { } } +// ============================================================================ +// 算子主体 +// ============================================================================ + pub struct WindowFunctionOperator { input_schema: FsSchemaRef, input_schema_unkeyed: FsSchemaRef, @@ -141,25 +150,7 @@ impl MessageOperator for WindowFunctionOperator { "WindowFunction" } - async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { - let watermark = ctx.last_present_watermark(); - let mut tm = ctx.table_manager_guard().await?; - let table = tm - .get_expiring_time_key_table("input", watermark) - .await - .map_err(|e| anyhow!("expiring time key table input: {e}"))?; - - for (timestamp, batches) in table.all_batches_for_watermark(watermark) { - let exec = self.get_or_create_exec(*timestamp)?; - for batch in batches { - exec - .sender - .as_ref() - .ok_or_else(|| anyhow!("window exec sender missing on restore"))? - .send(batch.clone()) - .map_err(|e| anyhow!("restore send: {e}"))?; - } - } + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } @@ -172,17 +163,9 @@ impl MessageOperator for WindowFunctionOperator { let current_watermark = ctx.last_present_watermark(); let split_batches = self.filter_and_split_batches(batch, current_watermark)?; - let mut tm = ctx.table_manager_guard().await?; - let table = tm - .get_expiring_time_key_table("input", current_watermark) - .await - .map_err(|e| anyhow!("expiring time key table input: {e}"))?; - for (sub_batch, timestamp) in split_batches { - table.insert(timestamp, sub_batch.clone()); let exec = self.get_or_create_exec(timestamp)?; - exec - .sender + exec.sender .as_ref() .ok_or_else(|| anyhow!("window exec sender missing"))? .send(sub_batch) @@ -227,14 +210,7 @@ impl MessageOperator for WindowFunctionOperator { Ok(final_outputs) } - async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { - let watermark = ctx.last_present_watermark(); - let mut tm = ctx.table_manager_guard().await?; - tm.get_expiring_time_key_table("input", watermark) - .await - .map_err(|e| anyhow!("expiring time key table input: {e}"))? - .flush(watermark) - .await?; + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } @@ -243,6 +219,10 @@ impl MessageOperator for WindowFunctionOperator { } } +// ============================================================================ +// 构造器 +// ============================================================================ + pub struct WindowFunctionConstructor; impl WindowFunctionConstructor { @@ -290,3 +270,4 @@ impl WindowFunctionConstructor { }) } } + diff --git a/src/runtime/streaming/storage/mod.rs b/src/runtime/streaming/storage/mod.rs new file mode 100644 index 00000000..c411b5ee --- /dev/null +++ b/src/runtime/streaming/storage/mod.rs @@ -0,0 +1,32 @@ +use anyhow::Result; +use async_trait::async_trait; +use std::sync::Arc; + +pub mod backend; +pub mod manager; +pub mod table; + +#[async_trait] +pub trait StorageProvider: Send + Sync + 'static { + async fn get(&self, _path: &str) -> Result>; + async fn put(&self, _path: &str, _data: Vec) -> Result<()>; + async fn delete_if_present(&self, _path: &str) -> Result<()>; +} + +pub type StorageProviderRef = Arc; + +/// 空的存储实现,供测试和占位使用 +pub struct DummyStorageProvider; + +#[async_trait] +impl StorageProvider for DummyStorageProvider { + async fn get(&self, _path: &str) -> Result> { + Ok(vec![]) + } + async fn put(&self, _path: &str, _data: Vec) -> Result<()> { + Ok(()) + } + async fn delete_if_present(&self, _path: &str) -> Result<()> { + Ok(()) + } +} diff --git a/src/sql/datastream/logical.rs b/src/sql/datastream/logical.rs index 13560a3e..c0e5465e 100644 --- a/src/sql/datastream/logical.rs +++ b/src/sql/datastream/logical.rs @@ -28,6 +28,7 @@ pub enum OperatorName { SlidingWindowAggregate, SessionWindowAggregate, UpdatingAggregate, + KeyBy, ConnectorSource, ConnectorSink, } @@ -320,6 +321,7 @@ impl LogicalProgram { "sql-session-window-aggregate".to_string() } OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(), + OperatorName::KeyBy => "key-by-routing".to_string(), OperatorName::ConnectorSource => "connector-source".to_string(), OperatorName::ConnectorSink => "connector-sink".to_string(), }; diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs index 22f58bbe..2fd9ad82 100644 --- a/src/sql/logical_node/logical/operator_name.rs +++ b/src/sql/logical_node/logical/operator_name.rs @@ -30,6 +30,7 @@ pub enum OperatorName { SlidingWindowAggregate, SessionWindowAggregate, UpdatingAggregate, + KeyBy, ConnectorSource, ConnectorSink, } From f54301f6b25658907299620ed0a141d3d4a6c129 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 25 Mar 2026 00:09:22 +0800 Subject: [PATCH 17/44] update --- src/runtime/streaming/connectors/mod.rs | 61 +++++++ src/runtime/streaming/format/config.rs | 38 +++++ src/runtime/streaming/format/deserializer.rs | 85 ++++++++++ src/runtime/streaming/format/json_encoder.rs | 170 +++++++++++++++++++ src/runtime/streaming/format/serializer.rs | 129 ++++++++++++++ src/runtime/streaming/operators/key_by.rs | 159 +++++++++++++++++ src/runtime/streaming/storage/backend.rs | 78 +++++++++ src/runtime/streaming/storage/manager.rs | 156 +++++++++++++++++ src/runtime/streaming/storage/table.rs | 91 ++++++++++ 9 files changed, 967 insertions(+) create mode 100644 src/runtime/streaming/connectors/mod.rs create mode 100644 src/runtime/streaming/format/config.rs create mode 100644 src/runtime/streaming/format/deserializer.rs create mode 100644 src/runtime/streaming/format/json_encoder.rs create mode 100644 src/runtime/streaming/format/serializer.rs create mode 100644 src/runtime/streaming/operators/key_by.rs create mode 100644 src/runtime/streaming/storage/backend.rs create mode 100644 src/runtime/streaming/storage/manager.rs create mode 100644 src/runtime/streaming/storage/table.rs diff --git a/src/runtime/streaming/connectors/mod.rs b/src/runtime/streaming/connectors/mod.rs new file mode 100644 index 00000000..d10a55a9 --- /dev/null +++ b/src/runtime/streaming/connectors/mod.rs @@ -0,0 +1,61 @@ +use anyhow::Result; +use arrow_array::{ArrayRef, RecordBatch}; +use arrow_schema::Schema; +use async_trait::async_trait; +use std::collections::HashMap; +use std::sync::Arc; + +use crate::sql::common::OperatorConfig; + +/// 维表查询接口:由具体 Connector(如 Redis、MySQL)实现。 +#[async_trait] +pub trait LookupConnector: Send { + fn name(&self) -> &str; + + /// 根据 key 列批量查询外部系统,返回结果 batch(含 `_lookup_key_index` 列)。 + /// 返回 `None` 表示无匹配行。 + async fn lookup(&self, keys: &[ArrayRef]) -> Option>; +} + +/// Connector 工厂 trait:每种外部系统实现此 trait 提供 Source / Sink / Lookup 构建能力。 +pub trait Connector: Send + Sync { + fn name(&self) -> &str; + + fn make_lookup( + &self, + config: OperatorConfig, + schema: Arc, + ) -> Result>; +} + +/// 全局 Connector 注册表。 +pub struct ConnectorRegistry { + connectors: HashMap>, +} + +impl ConnectorRegistry { + pub fn new() -> Self { + Self { + connectors: HashMap::new(), + } + } + + pub fn register(&mut self, connector: Box) { + self.connectors + .insert(connector.name().to_string(), connector); + } + + pub fn get(&self, name: &str) -> Option<&dyn Connector> { + self.connectors.get(name).map(|c| c.as_ref()) + } +} + +/// 返回当前已注册的所有 Connector。 +/// +/// 目前返回空注册表,后续接入 Kafka / Redis 等时在此处注册。 +pub fn connectors() -> ConnectorRegistry { + let registry = ConnectorRegistry::new(); + // TODO: registry.register(Box::new(KafkaConnector)); + // TODO: registry.register(Box::new(RedisConnector)); + registry +} diff --git a/src/runtime/streaming/format/config.rs b/src/runtime/streaming/format/config.rs new file mode 100644 index 00000000..e0ac61bb --- /dev/null +++ b/src/runtime/streaming/format/config.rs @@ -0,0 +1,38 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum TimestampFormat { + RFC3339, + UnixMillis, + UnixSeconds, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum DecimalEncoding { + String, + Number, + Bytes, +} + +/// 数据容错策略 +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum BadDataPolicy { + /// 遇到脏数据直接报错,导致算子 Panic 和重启 + Fail, + /// 丢弃脏数据,并记录监控 Metrics + Drop, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct JsonFormat { + pub timestamp_format: TimestampFormat, + pub decimal_encoding: DecimalEncoding, + pub include_schema: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Format { + Json(JsonFormat), + RawString, + RawBytes, +} diff --git a/src/runtime/streaming/format/deserializer.rs b/src/runtime/streaming/format/deserializer.rs new file mode 100644 index 00000000..83360bd8 --- /dev/null +++ b/src/runtime/streaming/format/deserializer.rs @@ -0,0 +1,85 @@ +//! 数据反序列化器:将外界收到的字节流转化为结构化 [`RecordBatch`]。 + +use anyhow::{anyhow, Result}; +use arrow_array::builder::StringBuilder; +use arrow_array::RecordBatch; +use arrow_json::reader::ReaderBuilder; +use arrow_schema::SchemaRef; +use std::sync::Arc; + +use super::config::{BadDataPolicy, Format}; + +pub struct DataDeserializer { + format: Format, + schema: SchemaRef, + bad_data_policy: BadDataPolicy, +} + +impl DataDeserializer { + pub fn new(format: Format, schema: SchemaRef, bad_data_policy: BadDataPolicy) -> Self { + Self { + format, + schema, + bad_data_policy, + } + } + + /// 工业级反序列化:包含完整的脏数据容错兜底 + pub fn deserialize_batch(&self, messages: &[&[u8]]) -> Result { + match &self.format { + Format::Json(_) => self.deserialize_json(messages), + Format::RawString => self.deserialize_raw_string(messages), + Format::RawBytes => self.deserialize_raw_bytes(messages), + } + } + + fn deserialize_json(&self, messages: &[&[u8]]) -> Result { + let mut buffer = Vec::with_capacity(messages.len() * 256); + for msg in messages { + buffer.extend_from_slice(msg); + buffer.push(b'\n'); + } + + let allow_bad_data = self.bad_data_policy == BadDataPolicy::Drop; + let mut decoder = ReaderBuilder::new(self.schema.clone()) + .with_strict_mode(!allow_bad_data) + .build_decoder()?; + + decoder.decode(&buffer)?; + + let batch = if allow_bad_data { + let (batch, _mask, _, _errors) = decoder.flush_with_bad_data()?.unwrap(); + batch + } else { + decoder + .flush()? + .ok_or_else(|| anyhow!("JSON decoder returned no batch"))? + }; + + Ok(batch) + } + + fn deserialize_raw_string(&self, messages: &[&[u8]]) -> Result { + let mut builder = StringBuilder::with_capacity(messages.len(), messages.len() * 64); + for msg in messages { + builder.append_value(String::from_utf8_lossy(msg)); + } + + let array = Arc::new(builder.finish()); + RecordBatch::try_new(self.schema.clone(), vec![array]) + .map_err(|e| anyhow!("build RawString batch: {e}")) + } + + fn deserialize_raw_bytes(&self, messages: &[&[u8]]) -> Result { + use arrow_array::builder::BinaryBuilder; + + let mut builder = BinaryBuilder::with_capacity(messages.len(), messages.len() * 64); + for msg in messages { + builder.append_value(msg); + } + + let array = Arc::new(builder.finish()); + RecordBatch::try_new(self.schema.clone(), vec![array]) + .map_err(|e| anyhow!("build RawBytes batch: {e}")) + } +} diff --git a/src/runtime/streaming/format/json_encoder.rs b/src/runtime/streaming/format/json_encoder.rs new file mode 100644 index 00000000..7721d9bc --- /dev/null +++ b/src/runtime/streaming/format/json_encoder.rs @@ -0,0 +1,170 @@ +//! 极致优化的 Arrow JSON 编码器。 +//! +//! 解决 Arrow 原生 JSON 导出时不兼容 Kafka / 时间戳 / Decimal 的痛点。 + +use arrow_array::{ + Array, Decimal128Array, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, +}; +use arrow_json::writer::NullableEncoder; +use arrow_json::{Encoder, EncoderFactory, EncoderOptions}; +use arrow_schema::{ArrowError, DataType, FieldRef, TimeUnit}; +use base64::prelude::BASE64_STANDARD; +use base64::Engine; + +use super::config::{DecimalEncoding, TimestampFormat}; + +#[derive(Debug)] +pub struct CustomEncoderFactory { + pub timestamp_format: TimestampFormat, + pub decimal_encoding: DecimalEncoding, +} + +impl EncoderFactory for CustomEncoderFactory { + fn make_default_encoder<'a>( + &self, + _field: &'a FieldRef, + array: &'a dyn Array, + _options: &'a EncoderOptions, + ) -> Result>, ArrowError> { + let encoder: Box = match ( + &self.decimal_encoding, + &self.timestamp_format, + array.data_type(), + ) { + // ── Timestamp → Unix 毫秒 ── + (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Nanosecond, _)) => { + let arr = array + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Box::new(UnixMillisEncoder::Nanos(arr)) + } + (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Microsecond, _)) => { + let arr = array + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Box::new(UnixMillisEncoder::Micros(arr)) + } + (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Millisecond, _)) => { + let arr = array + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Box::new(UnixMillisEncoder::Millis(arr)) + } + (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Second, _)) => { + let arr = array + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Box::new(UnixMillisEncoder::Seconds(arr)) + } + + // ── Decimal128 → String / Bytes ── + (DecimalEncoding::String, _, DataType::Decimal128(_, _)) => { + let arr = array + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Box::new(DecimalEncoder::StringEncoder(arr)) + } + (DecimalEncoding::Bytes, _, DataType::Decimal128(_, _)) => { + let arr = array + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Box::new(DecimalEncoder::BytesEncoder(arr)) + } + + // ── Binary → Base64 ── + (_, _, DataType::Binary) => { + let arr = array + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Box::new(BinaryEncoder(arr)) + } + + // 其他类型:降级使用 Arrow 原生 encoder + _ => return Ok(None), + }; + + Ok(Some(NullableEncoder::new(encoder, array.nulls().cloned()))) + } +} + +// --------------------------------------------------------------------------- +// UnixMillisEncoder — 各精度 Timestamp → i64 毫秒 +// --------------------------------------------------------------------------- + +enum UnixMillisEncoder { + Nanos(TimestampNanosecondArray), + Micros(TimestampMicrosecondArray), + Millis(TimestampMillisecondArray), + Seconds(TimestampSecondArray), +} + +impl Encoder for UnixMillisEncoder { + fn encode(&mut self, idx: usize, out: &mut Vec) { + let millis = match self { + Self::Nanos(arr) => arr.value(idx) / 1_000_000, + Self::Micros(arr) => arr.value(idx) / 1_000, + Self::Millis(arr) => arr.value(idx), + Self::Seconds(arr) => arr.value(idx) * 1_000, + }; + out.extend_from_slice(millis.to_string().as_bytes()); + } +} + +// --------------------------------------------------------------------------- +// DecimalEncoder — Decimal128 → JSON 字符串 / Base64 Bytes +// --------------------------------------------------------------------------- + +enum DecimalEncoder { + StringEncoder(Decimal128Array), + BytesEncoder(Decimal128Array), +} + +impl Encoder for DecimalEncoder { + fn encode(&mut self, idx: usize, out: &mut Vec) { + match self { + Self::StringEncoder(arr) => { + out.push(b'"'); + out.extend_from_slice(arr.value_as_string(idx).as_bytes()); + out.push(b'"'); + } + Self::BytesEncoder(arr) => { + out.push(b'"'); + out.extend_from_slice( + BASE64_STANDARD + .encode(arr.value(idx).to_be_bytes()) + .as_bytes(), + ); + out.push(b'"'); + } + } + } +} + +// --------------------------------------------------------------------------- +// BinaryEncoder — Binary → Base64 字符串 +// --------------------------------------------------------------------------- + +struct BinaryEncoder(arrow_array::BinaryArray); + +impl Encoder for BinaryEncoder { + fn encode(&mut self, idx: usize, out: &mut Vec) { + out.push(b'"'); + out.extend_from_slice(BASE64_STANDARD.encode(self.0.value(idx)).as_bytes()); + out.push(b'"'); + } +} diff --git a/src/runtime/streaming/format/serializer.rs b/src/runtime/streaming/format/serializer.rs new file mode 100644 index 00000000..80969ec6 --- /dev/null +++ b/src/runtime/streaming/format/serializer.rs @@ -0,0 +1,129 @@ +//! 数据序列化器:将内存 [`RecordBatch`] 转换为二进制消息流,供 Sink 连接器发送。 + +use anyhow::{anyhow, Result}; +use arrow_array::{Array, RecordBatch, StructArray}; +use arrow_json::writer::make_encoder; +use arrow_json::EncoderOptions; +use arrow_schema::{DataType, Field, SchemaRef}; +use std::sync::Arc; + +use super::config::{Format, JsonFormat}; +use super::json_encoder::CustomEncoderFactory; + +pub struct DataSerializer { + format: Format, + projection_indices: Vec, +} + +impl DataSerializer { + pub fn new(format: Format, schema: SchemaRef) -> Self { + let projection_indices: Vec = schema + .fields() + .iter() + .enumerate() + .filter(|(_, f)| !f.name().starts_with('_')) + .map(|(i, _)| i) + .collect(); + + Self { + format, + projection_indices, + } + } + + pub fn serialize(&self, batch: &RecordBatch) -> Result>> { + let projected_batch = batch.project(&self.projection_indices)?; + + match &self.format { + Format::Json(config) => self.serialize_json(config, &projected_batch), + Format::RawString => self.serialize_raw_string(&projected_batch), + Format::RawBytes => self.serialize_raw_bytes(&projected_batch), + } + } + + fn serialize_json(&self, config: &JsonFormat, batch: &RecordBatch) -> Result>> { + let array = StructArray::from(batch.clone()); + let field = Arc::new(Field::new_struct( + "", + batch.schema().fields().clone(), + false, + )); + + let options = EncoderOptions::default() + .with_explicit_nulls(true) + .with_encoder_factory(Arc::new(CustomEncoderFactory { + timestamp_format: config.timestamp_format.clone(), + decimal_encoding: config.decimal_encoding.clone(), + })); + + let mut encoder = make_encoder(&field, &array, &options)?; + let mut results = Vec::with_capacity(batch.num_rows()); + + for idx in 0..array.len() { + let mut buffer = Vec::with_capacity(128); + encoder.encode(idx, &mut buffer); + if !buffer.is_empty() { + results.push(buffer); + } + } + Ok(results) + } + + fn serialize_raw_string(&self, batch: &RecordBatch) -> Result>> { + let value_idx = batch + .schema() + .index_of("value") + .map_err(|_| anyhow!("RawString format requires a 'value' column"))?; + + if *batch.schema().field(value_idx).data_type() != DataType::Utf8 { + return Err(anyhow!("RawString 'value' column must be Utf8")); + } + + let string_array = batch + .column(value_idx) + .as_any() + .downcast_ref::() + .unwrap(); + + let values: Vec> = (0..string_array.len()) + .map(|i| { + if string_array.is_null(i) { + vec![] + } else { + string_array.value(i).as_bytes().to_vec() + } + }) + .collect(); + + Ok(values) + } + + fn serialize_raw_bytes(&self, batch: &RecordBatch) -> Result>> { + let value_idx = batch + .schema() + .index_of("value") + .map_err(|_| anyhow!("RawBytes format requires a 'value' column"))?; + + if *batch.schema().field(value_idx).data_type() != DataType::Binary { + return Err(anyhow!("RawBytes 'value' column must be Binary")); + } + + let binary_array = batch + .column(value_idx) + .as_any() + .downcast_ref::() + .unwrap(); + + let values: Vec> = (0..binary_array.len()) + .map(|i| { + if binary_array.is_null(i) { + vec![] + } else { + binary_array.value(i).to_vec() + } + }) + .collect(); + + Ok(values) + } +} diff --git a/src/runtime/streaming/operators/key_by.rs b/src/runtime/streaming/operators/key_by.rs new file mode 100644 index 00000000..2c183577 --- /dev/null +++ b/src/runtime/streaming/operators/key_by.rs @@ -0,0 +1,159 @@ +//! 物理网络路由算子:利用 DataFusion 物理表达式提取 Key,基于 Hash 排序执行零拷贝切片路由。 + +use anyhow::{anyhow, Result}; +use arrow_array::{Array, RecordBatch, UInt64Array}; +use arrow::compute::{sort_to_indices, take}; +use async_trait::async_trait; +use datafusion::physical_expr::PhysicalExpr; +use datafusion_physical_expr::expressions::Column; +use datafusion_common::hash_utils::create_hashes; +use std::sync::Arc; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{CheckpointBarrier, Watermark}; + +use protocol::grpc::api::KeyPlanOperator; + +pub struct KeyByOperator { + name: String, + key_extractors: Vec>, + random_state: ahash::RandomState, +} + +impl KeyByOperator { + pub fn new(name: String, key_extractors: Vec>) -> Self { + Self { + name, + key_extractors, + random_state: ahash::RandomState::new(), + } + } +} + +#[async_trait] +impl MessageOperator for KeyByOperator { + fn name(&self) -> &str { + &self.name + } + + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> Result> { + let num_rows = batch.num_rows(); + if num_rows == 0 { + return Ok(vec![]); + } + + // 1. 执行物理表达式,提取所有 Key 列 + let mut key_columns = Vec::with_capacity(self.key_extractors.len()); + for expr in &self.key_extractors { + let column_array = expr + .evaluate(&batch) + .map_err(|e| anyhow!("Failed to evaluate key expr: {}", e))? + .into_array(num_rows) + .map_err(|e| anyhow!("Failed to convert into array: {}", e))?; + key_columns.push(column_array); + } + + // 2. 向量化计算 Hash 数组 + let mut hash_buffer = vec![0u64; num_rows]; + create_hashes(&key_columns, &self.random_state, &mut hash_buffer) + .map_err(|e| anyhow!("Failed to compute hashes: {}", e))?; + + let hash_array = UInt64Array::from(hash_buffer); + + // 3. 基于 Hash 值排序,获取重排 Indices + let sorted_indices = sort_to_indices(&hash_array, None, None) + .map_err(|e| anyhow!("Failed to sort hashes: {}", e))?; + + // 4. 对齐重排 Hash 数组和原始 Batch + let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?; + let sorted_hashes = sorted_hashes_ref + .as_any() + .downcast_ref::() + .unwrap(); + + let sorted_columns: std::result::Result, _> = batch + .columns() + .iter() + .map(|col| take(col, &sorted_indices, None)) + .collect(); + let sorted_batch = RecordBatch::try_new(batch.schema(), sorted_columns?)?; + + // 5. 零拷贝微批切片 —— 按 Hash 值连续段切分并标记路由意图 + let mut outputs = Vec::new(); + let mut start_idx = 0; + + while start_idx < num_rows { + let current_hash = sorted_hashes.value(start_idx); + let mut end_idx = start_idx + 1; + while end_idx < num_rows && sorted_hashes.value(end_idx) == current_hash { + end_idx += 1; + } + + let sub_batch = sorted_batch.slice(start_idx, end_idx - start_idx); + outputs.push(StreamOutput::Keyed(current_hash, sub_batch)); + start_idx = end_idx; + } + + Ok(outputs) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![StreamOutput::Watermark(watermark)]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + _ctx: &mut TaskContext, + ) -> Result<()> { + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +// --------------------------------------------------------------------------- +// Constructor +// --------------------------------------------------------------------------- + +pub struct KeyByConstructor; + +impl KeyByConstructor { + pub fn with_config(&self, config: KeyPlanOperator) -> Result { + let mut key_extractors: Vec> = + Vec::with_capacity(config.key_fields.len()); + + for field_idx in &config.key_fields { + let idx = *field_idx as usize; + let expr = Arc::new(Column::new(&format!("col_{}", idx), idx)) + as Arc; + key_extractors.push(expr); + } + + let name = if config.name.is_empty() { + "KeyBy".to_string() + } else { + config.name.clone() + }; + + Ok(KeyByOperator::new(name, key_extractors)) + } +} + diff --git a/src/runtime/streaming/storage/backend.rs b/src/runtime/streaming/storage/backend.rs new file mode 100644 index 00000000..265b99ca --- /dev/null +++ b/src/runtime/streaming/storage/backend.rs @@ -0,0 +1,78 @@ +use anyhow::Result; +use async_trait::async_trait; + +#[derive(Default, Debug, Clone)] +pub struct CheckpointMetadata { + pub job_id: String, + pub epoch: u32, + pub min_epoch: u32, + pub operator_ids: Vec, +} + +#[derive(Default, Debug, Clone)] +pub struct OperatorCheckpointMetadata { + pub operator_id: String, + pub epoch: u32, +} + +#[async_trait] +pub trait BackingStore: Send + Sync + 'static { + fn name() -> &'static str; + async fn load_checkpoint_metadata(job_id: &str, epoch: u32) -> Result; + async fn load_operator_metadata( + job_id: &str, + operator_id: &str, + epoch: u32, + ) -> Result>; + async fn write_operator_checkpoint_metadata( + metadata: OperatorCheckpointMetadata, + ) -> Result<()>; + async fn write_checkpoint_metadata(metadata: CheckpointMetadata) -> Result<()>; + async fn cleanup_checkpoint( + metadata: CheckpointMetadata, + old_min_epoch: u32, + new_min_epoch: u32, + ) -> Result<()>; +} + +pub struct ParquetStateBackend; + +#[async_trait] +impl BackingStore for ParquetStateBackend { + fn name() -> &'static str { + "parquet" + } + + async fn load_checkpoint_metadata( + _job_id: &str, + _epoch: u32, + ) -> Result { + Ok(CheckpointMetadata::default()) + } + + async fn load_operator_metadata( + _job_id: &str, + _operator_id: &str, + _epoch: u32, + ) -> Result> { + Ok(None) + } + + async fn write_operator_checkpoint_metadata( + _metadata: OperatorCheckpointMetadata, + ) -> Result<()> { + Ok(()) + } + + async fn write_checkpoint_metadata(_metadata: CheckpointMetadata) -> Result<()> { + Ok(()) + } + + async fn cleanup_checkpoint( + _metadata: CheckpointMetadata, + _old_min_epoch: u32, + _new_min_epoch: u32, + ) -> Result<()> { + Ok(()) + } +} diff --git a/src/runtime/streaming/storage/manager.rs b/src/runtime/streaming/storage/manager.rs new file mode 100644 index 00000000..2aa79e6b --- /dev/null +++ b/src/runtime/streaming/storage/manager.rs @@ -0,0 +1,156 @@ +use anyhow::{Result, anyhow}; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::SystemTime; + +use super::table::TaskInfo; +use super::{DummyStorageProvider, StorageProviderRef}; + +#[derive(Default)] +pub struct GlobalKeyedView { + data: HashMap, +} + +impl GlobalKeyedView { + pub async fn insert(&mut self, key: K, value: V) { + self.data.insert(key, value); + } + + pub fn get(&self, key: &K) -> Option<&V> { + self.data.get(key) + } + + pub fn get_all(&self) -> &HashMap { + &self.data + } +} + +#[derive(Default)] +pub struct ExpiringTimeKeyView; + +impl ExpiringTimeKeyView { + pub fn insert(&mut self, _timestamp: SystemTime, _batch: arrow_array::RecordBatch) {} + + pub fn all_batches_for_watermark( + &self, + _watermark: Option, + ) -> std::iter::Empty<(&SystemTime, &Vec)> { + std::iter::empty() + } + + pub async fn flush(&mut self, _watermark: Option) -> Result<()> { + Ok(()) + } +} + +#[derive(Default)] +pub struct KeyTimeView; + +impl KeyTimeView { + pub async fn insert( + &mut self, + _batch: arrow_array::RecordBatch, + ) -> Result> { + Ok(vec![]) + } + + pub fn get_batch(&self, _key: &[u8]) -> Result> { + Ok(None) + } +} + +pub struct BackendWriter {} + +pub struct TableManager { + epoch: u32, + min_epoch: u32, + writer: BackendWriter, + task_info: Arc, + storage: StorageProviderRef, + caches: HashMap>, +} + +impl TableManager { + /// 加载状态后端(返回默认的空 Manager) + pub async fn load(task_info: Arc) -> Result<(Self, Option)> { + let manager = Self { + epoch: 1, + min_epoch: 1, + writer: BackendWriter {}, + task_info, + storage: Arc::new(DummyStorageProvider), + caches: HashMap::new(), + }; + Ok((manager, None)) + } + + /// 接收到 CheckpointBarrier 时(空操作) + pub async fn checkpoint( + &mut self, + _epoch: u32, + _watermark: Option, + _then_stop: bool, + ) { + } + + /// 面向算子的 API:获取全局 Key-Value 表 + pub async fn get_global_keyed_state< + K: Eq + std::hash::Hash + Send + 'static, + V: Send + 'static, + >( + &mut self, + table_name: &str, + ) -> Result<&mut GlobalKeyedView> { + if !self.caches.contains_key(table_name) { + let view: Box = + Box::new(GlobalKeyedView:: { data: HashMap::new() }); + self.caches.insert(table_name.to_string(), view); + } + + let cache = self.caches.get_mut(table_name).unwrap(); + + let view = cache + .downcast_mut::>() + .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?; + + Ok(view) + } + + /// 面向算子的 API:获取带 TTL 的时间键值表 + pub async fn get_expiring_time_key_table( + &mut self, + table_name: &str, + _watermark: Option, + ) -> Result<&mut ExpiringTimeKeyView> { + if !self.caches.contains_key(table_name) { + let view: Box = Box::new(ExpiringTimeKeyView::default()); + self.caches.insert(table_name.to_string(), view); + } + + let cache = self.caches.get_mut(table_name).unwrap(); + let view = cache + .downcast_mut::() + .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?; + + Ok(view) + } + + /// 面向算子的 API:获取标准的 Key-Time 双重映射表 + pub async fn get_key_time_table( + &mut self, + table_name: &str, + _watermark: Option, + ) -> Result<&mut KeyTimeView> { + if !self.caches.contains_key(table_name) { + let view: Box = Box::new(KeyTimeView::default()); + self.caches.insert(table_name.to_string(), view); + } + + let cache = self.caches.get_mut(table_name).unwrap(); + let view = cache + .downcast_mut::() + .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?; + + Ok(view) + } +} diff --git a/src/runtime/streaming/storage/table.rs b/src/runtime/streaming/storage/table.rs new file mode 100644 index 00000000..4b37ec4a --- /dev/null +++ b/src/runtime/streaming/storage/table.rs @@ -0,0 +1,91 @@ +use anyhow::Result; +use arrow_array::RecordBatch; +use async_trait::async_trait; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +#[derive(Default)] +pub struct TaskInfo { + pub job_id: String, + pub operator_id: String, + pub task_index: u32, +} + +#[derive(Debug)] +pub enum TableData { + RecordBatch(RecordBatch), + CommitData { data: Vec }, + KeyedData { key: Vec, value: Vec }, +} + +pub struct CheckpointMessage { + pub epoch: u32, + pub time: std::time::SystemTime, + pub watermark: Option, + pub then_stop: bool, +} + +#[async_trait] +pub trait TableEpochCheckpointer: Send + 'static { + type SubTableCheckpointMessage: prost::Message + Default; + + async fn insert_data(&mut self, _data: TableData) -> Result<()> { + Ok(()) + } + + async fn finish( + self: Box, + _checkpoint: &CheckpointMessage, + ) -> Result> { + Ok(None) + } + + fn subtask_index(&self) -> u32; +} + +#[async_trait] +pub trait Table: Send + Sync + 'static + Clone { + type Checkpointer: TableEpochCheckpointer< + SubTableCheckpointMessage = Self::TableSubtaskCheckpointMetadata, + >; + type ConfigMessage: prost::Message + Default; + type TableCheckpointMessage: prost::Message + Default + Clone; + type TableSubtaskCheckpointMetadata: prost::Message + Default + Clone; + + fn from_config( + _config: Self::ConfigMessage, + _task_info: Arc, + _storage_provider: super::StorageProviderRef, + _checkpoint_message: Option, + _state_version: u32, + ) -> Result + where + Self: Sized; + + fn epoch_checkpointer( + &self, + _epoch: u32, + _previous_metadata: Option, + ) -> Result; + + fn merge_checkpoint_metadata( + _config: Self::ConfigMessage, + _subtask_metadata: HashMap, + ) -> Result> { + Ok(None) + } + + fn subtask_metadata_from_table( + &self, + _table_metadata: Self::TableCheckpointMessage, + ) -> Result> { + Ok(None) + } + + fn files_to_keep( + _config: Self::ConfigMessage, + _checkpoint: Self::TableCheckpointMessage, + ) -> Result> { + Ok(HashSet::new()) + } +} From ee03dc83b92edf11c701728368786c2b032d5f98 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 25 Mar 2026 00:34:58 +0800 Subject: [PATCH 18/44] update --- src/runtime/streaming/api/operator.rs | 5 +++++ src/runtime/streaming/cluster/manager.rs | 2 +- src/runtime/streaming/cluster/master.rs | 5 ++--- src/sql/logical_node/logical/mod.rs | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs index 3cd5a316..6eb49d2c 100644 --- a/src/runtime/streaming/api/operator.rs +++ b/src/runtime/streaming/api/operator.rs @@ -8,6 +8,7 @@ use datafusion::common::Result as DfResult; use datafusion::execution::context::SessionContext; use datafusion::execution::FunctionRegistry; use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; +use datafusion::logical_expr::planner::ExprPlanner; use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; @@ -59,6 +60,10 @@ impl FunctionRegistry for Registry { fn udwf(&self, name: &str) -> DfResult> { self.ctx.udwf(name) } + + fn expr_planners(&self) -> Vec> { + self.ctx.expr_planners() + } } // --------------------------------------------------------------------------- diff --git a/src/runtime/streaming/cluster/manager.rs b/src/runtime/streaming/cluster/manager.rs index ce8ec881..34045dee 100644 --- a/src/runtime/streaming/cluster/manager.rs +++ b/src/runtime/streaming/cluster/manager.rs @@ -7,7 +7,7 @@ use crate::runtime::streaming::factory::OperatorFactory; use crate::runtime::streaming::memory::MemoryPool; use crate::runtime::streaming::network::NetworkEnvironment; use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; -use arroyo_state::tables::table_manager::TableManager; +use crate::runtime::streaming::storage::manager::TableManager; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::mpsc::{channel, Sender}; diff --git a/src/runtime/streaming/cluster/master.rs b/src/runtime/streaming/cluster/master.rs index 5817643d..e456d8e3 100644 --- a/src/runtime/streaming/cluster/master.rs +++ b/src/runtime/streaming/cluster/master.rs @@ -6,10 +6,9 @@ use crate::runtime::streaming::cluster::graph::{ PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId, }; -use arroyo_datastream::logical::{LogicalEdgeType, LogicalGraph, OperatorChain}; use petgraph::Direction; use sha2::{Digest, Sha256}; -use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph}; +use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, OperatorChain}; #[derive(thiserror::Error, Debug)] pub enum CompileError { @@ -167,7 +166,7 @@ impl JobCompiler { LogicalEdgeType::Shuffle | LogicalEdgeType::LeftJoin | LogicalEdgeType::RightJoin => { - if let Some(key_indices) = edge.schema.key_indices.as_ref() { + if let Some(key_indices) = edge.schema.storage_keys() { if !key_indices.is_empty() { PartitioningStrategy::HashByKeys(key_indices.clone()) } else { diff --git a/src/sql/logical_node/logical/mod.rs b/src/sql/logical_node/logical/mod.rs index 3a94d1f3..ab318804 100644 --- a/src/sql/logical_node/logical/mod.rs +++ b/src/sql/logical_node/logical/mod.rs @@ -26,6 +26,7 @@ pub use logical_edge::{LogicalEdge, LogicalEdgeType}; pub use logical_graph::{LogicalGraph, Optimizer}; pub use logical_node::LogicalNode; pub use logical_program::LogicalProgram; +pub use operator_chain::OperatorChain; pub use operator_name::OperatorName; pub use program_config::ProgramConfig; pub use python_udf_config::PythonUdfConfig; From 5dc090c2e09f5b0ac480d645ed1b768fc269f8fa Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Thu, 26 Mar 2026 00:53:26 +0800 Subject: [PATCH 19/44] update --- .../streaming/operators/key_operator.rs | 289 ++++++++++++++++++ src/runtime/streaming/operators/mod.rs | 6 + src/runtime/streaming/operators/projection.rs | 81 +++++ .../operators/stateless_physical_executor.rs | 77 +++++ .../streaming/operators/value_execution.rs | 66 ++++ 5 files changed, 519 insertions(+) create mode 100644 src/runtime/streaming/operators/key_operator.rs create mode 100644 src/runtime/streaming/operators/projection.rs create mode 100644 src/runtime/streaming/operators/stateless_physical_executor.rs create mode 100644 src/runtime/streaming/operators/value_execution.rs diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/runtime/streaming/operators/key_operator.rs new file mode 100644 index 00000000..93a50db5 --- /dev/null +++ b/src/runtime/streaming/operators/key_operator.rs @@ -0,0 +1,289 @@ +//! 物理网络路由算子:利用 DataFusion 物理表达式提取 Key,基于 Hash 排序执行零拷贝切片路由。 +//! +//! 提供两种算子: +//! - [`KeyByOperator`]:纯 Key 提取 + Hash 路由,适用于简单的 GROUP BY / PARTITION BY。 +//! - [`KeyExecutionOperator`]:先执行完整物理计划,再按指定列 Hash 路由,适用于需要先做 +//! 计算(如聚合结果映射)再分区的场景。 + +use anyhow::{anyhow, Result}; +use arrow_array::{Array, ArrayRef, RecordBatch, UInt64Array}; +use arrow::compute::{sort_to_indices, take}; +use async_trait::async_trait; +use datafusion::physical_expr::PhysicalExpr; +use datafusion_physical_expr::expressions::Column; +use datafusion_common::hash_utils::create_hashes; +use futures::StreamExt; +use std::sync::Arc; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::operators::StatelessPhysicalExecutor; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{CheckpointBarrier, Watermark}; + +use protocol::grpc::api::KeyPlanOperator; + +pub struct KeyByOperator { + name: String, + key_extractors: Vec>, + random_state: ahash::RandomState, +} + +impl KeyByOperator { + pub fn new(name: String, key_extractors: Vec>) -> Self { + Self { + name, + key_extractors, + random_state: ahash::RandomState::new(), + } + } +} + +#[async_trait] +impl MessageOperator for KeyByOperator { + fn name(&self) -> &str { + &self.name + } + + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { + Ok(()) + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> Result> { + let num_rows = batch.num_rows(); + if num_rows == 0 { + return Ok(vec![]); + } + + // 1. 执行物理表达式,提取所有 Key 列 + let mut key_columns = Vec::with_capacity(self.key_extractors.len()); + for expr in &self.key_extractors { + let column_array = expr + .evaluate(&batch) + .map_err(|e| anyhow!("Failed to evaluate key expr: {}", e))? + .into_array(num_rows) + .map_err(|e| anyhow!("Failed to convert into array: {}", e))?; + key_columns.push(column_array); + } + + // 2. 向量化计算 Hash 数组 + let mut hash_buffer = vec![0u64; num_rows]; + create_hashes(&key_columns, &self.random_state, &mut hash_buffer) + .map_err(|e| anyhow!("Failed to compute hashes: {}", e))?; + + let hash_array = UInt64Array::from(hash_buffer); + + // 3. 基于 Hash 值排序,获取重排 Indices + let sorted_indices = sort_to_indices(&hash_array, None, None) + .map_err(|e| anyhow!("Failed to sort hashes: {}", e))?; + + // 4. 对齐重排 Hash 数组和原始 Batch + let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?; + let sorted_hashes = sorted_hashes_ref + .as_any() + .downcast_ref::() + .unwrap(); + + let sorted_columns: std::result::Result, _> = batch + .columns() + .iter() + .map(|col| take(col, &sorted_indices, None)) + .collect(); + let sorted_batch = RecordBatch::try_new(batch.schema(), sorted_columns?)?; + + // 5. 零拷贝微批切片 —— 按 Hash 值连续段切分并标记路由意图 + let mut outputs = Vec::new(); + let mut start_idx = 0; + + while start_idx < num_rows { + let current_hash = sorted_hashes.value(start_idx); + let mut end_idx = start_idx + 1; + while end_idx < num_rows && sorted_hashes.value(end_idx) == current_hash { + end_idx += 1; + } + + let sub_batch = sorted_batch.slice(start_idx, end_idx - start_idx); + outputs.push(StreamOutput::Keyed(current_hash, sub_batch)); + start_idx = end_idx; + } + + Ok(outputs) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![StreamOutput::Watermark(watermark)]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + _ctx: &mut TaskContext, + ) -> Result<()> { + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + +// --------------------------------------------------------------------------- +// Constructor +// --------------------------------------------------------------------------- + +pub struct KeyByConstructor; + +impl KeyByConstructor { + pub fn with_config(&self, config: KeyPlanOperator) -> Result { + let mut key_extractors: Vec> = + Vec::with_capacity(config.key_fields.len()); + + for field_idx in &config.key_fields { + let idx = *field_idx as usize; + let expr = Arc::new(Column::new(&format!("col_{}", idx), idx)) + as Arc; + key_extractors.push(expr); + } + + let name = if config.name.is_empty() { + "KeyBy".to_string() + } else { + config.name.clone() + }; + + Ok(KeyByOperator::new(name, key_extractors)) + } +} + +// =========================================================================== +// KeyExecutionOperator — 先执行物理计划,再按 Key 列 Hash 路由 +// =========================================================================== + +/// 键控路由执行算子:先驱动 DataFusion 物理计划完成计算(如聚合结果映射), +/// 再根据 `key_fields` 指定列计算 Hash 并以 [`StreamOutput::Keyed`] 输出, +/// 实现算子内部分区。 +pub struct KeyExecutionOperator { + name: String, + executor: StatelessPhysicalExecutor, + key_fields: Vec, + random_state: ahash::RandomState, +} + +impl KeyExecutionOperator { + pub fn new( + name: String, + executor: StatelessPhysicalExecutor, + key_fields: Vec, + ) -> Self { + Self { + name, + executor, + key_fields, + random_state: ahash::RandomState::new(), + } + } +} + +#[async_trait] +impl MessageOperator for KeyExecutionOperator { + fn name(&self) -> &str { + &self.name + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> Result> { + let mut outputs = Vec::new(); + + // 1. 执行物理转换 + let mut stream = self.executor.process_batch(batch).await?; + + while let Some(batch_result) = stream.next().await { + let out_batch = batch_result?; + let num_rows = out_batch.num_rows(); + if num_rows == 0 { + continue; + } + + // 2. 提取 Key 列并计算 Hash + let key_columns: Vec = self + .key_fields + .iter() + .map(|&idx| out_batch.column(idx).clone()) + .collect(); + + let mut hash_buffer = vec![0u64; num_rows]; + create_hashes(&key_columns, &self.random_state, &mut hash_buffer) + .map_err(|e| anyhow!("hash compute: {e}"))?; + let hash_array = UInt64Array::from(hash_buffer); + + // 3. 基于 Hash 排序,获取重排 Indices + let sorted_indices = sort_to_indices(&hash_array, None, None) + .map_err(|e| anyhow!("sort hashes: {e}"))?; + + let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?; + let sorted_hashes = sorted_hashes_ref + .as_any() + .downcast_ref::() + .unwrap(); + + let sorted_columns: std::result::Result, _> = out_batch + .columns() + .iter() + .map(|col| take(col, &sorted_indices, None)) + .collect(); + let sorted_batch = + RecordBatch::try_new(out_batch.schema(), sorted_columns?)?; + + // 4. 零拷贝切片 —— 按 Hash 连续段分组,标记 Keyed 路由意图 + let mut start_idx = 0; + while start_idx < num_rows { + let current_hash = sorted_hashes.value(start_idx); + let mut end_idx = start_idx + 1; + while end_idx < num_rows + && sorted_hashes.value(end_idx) == current_hash + { + end_idx += 1; + } + + let sub_batch = sorted_batch.slice(start_idx, end_idx - start_idx); + outputs.push(StreamOutput::Keyed(current_hash, sub_batch)); + start_idx = end_idx; + } + } + Ok(outputs) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![StreamOutput::Watermark(watermark)]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + _ctx: &mut TaskContext, + ) -> Result<()> { + Ok(()) + } + + async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { + Ok(vec![]) + } +} + diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs index 66d3e892..e3c0f566 100644 --- a/src/runtime/streaming/operators/mod.rs +++ b/src/runtime/streaming/operators/mod.rs @@ -7,6 +7,12 @@ pub mod sink; pub mod source; pub mod watermark; pub mod windows; +mod key_operator; +mod projection; +mod stateless_physical_executor; +mod value_execution; + +pub use stateless_physical_executor::StatelessPhysicalExecutor; pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache}; pub use joins::{ diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs new file mode 100644 index 00000000..5e62afc6 --- /dev/null +++ b/src/runtime/streaming/operators/projection.rs @@ -0,0 +1,81 @@ +//! 高性能投影算子:直接操作 Arrow Array 执行列映射与标量运算, +//! 避开 DataFusion 执行树开销,适用于 SELECT 字段筛选和简单标量计算。 + +use anyhow::Result; +use arrow_array::RecordBatch; +use async_trait::async_trait; +use datafusion::physical_expr::PhysicalExpr; +use std::sync::Arc; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{CheckpointBarrier, FsSchemaRef, Watermark}; + +pub struct ProjectionOperator { + name: String, + output_schema: FsSchemaRef, + exprs: Vec>, +} + +impl ProjectionOperator { + pub fn new( + name: String, + output_schema: FsSchemaRef, + exprs: Vec>, + ) -> Self { + Self { + name, + output_schema, + exprs, + } + } +} + +#[async_trait] +impl MessageOperator for ProjectionOperator { + fn name(&self) -> &str { + &self.name + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> Result> { + if batch.num_rows() == 0 { + return Ok(vec![]); + } + + let projected_columns = self + .exprs + .iter() + .map(|expr| { + expr.evaluate(&batch) + .and_then(|val| val.into_array(batch.num_rows())) + }) + .collect::>>()?; + + let out_batch = + RecordBatch::try_new(self.output_schema.schema.clone(), projected_columns)?; + + Ok(vec![StreamOutput::Forward(out_batch)]) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![StreamOutput::Watermark(watermark)]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + _ctx: &mut TaskContext, + ) -> Result<()> { + Ok(()) + } +} diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs new file mode 100644 index 00000000..9e801188 --- /dev/null +++ b/src/runtime/streaming/operators/stateless_physical_executor.rs @@ -0,0 +1,77 @@ +//! 无状态物理计划执行器:将单批次写入 `SingleLockedBatch` 并让 DataFusion 计划消费。 + +use std::sync::{Arc, RwLock}; + +use anyhow::{anyhow, Result}; +use arrow_array::RecordBatch; +use datafusion::execution::context::SessionContext; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::execution::SendableRecordBatchStream; +use datafusion::execution::TaskContext; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_proto::physical_plan::AsExecutionPlan; +use datafusion_proto::protobuf::PhysicalPlanNode; +use futures::StreamExt; +use prost::Message; + +use crate::runtime::streaming::api::operator::Registry; +use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; + +pub struct StatelessPhysicalExecutor { + batch: Arc>>, + plan: Arc, + task_context: Arc, +} + +impl StatelessPhysicalExecutor { + pub fn new(mut proto: &[u8], registry: &Registry) -> Result { + let batch = Arc::new(RwLock::default()); + + let plan_node = PhysicalPlanNode::decode(&mut proto) + .map_err(|e| anyhow!("decode PhysicalPlanNode: {e}"))?; + let codec = FsPhysicalExtensionCodec { + context: DecodingContext::SingleLockedBatch(batch.clone()), + }; + + let plan = plan_node.try_into_physical_plan( + registry, + &RuntimeEnvBuilder::new().build()?, + &codec, + )?; + + Ok(Self { + batch, + plan, + task_context: SessionContext::new().task_ctx(), + }) + } + + pub async fn process_batch(&mut self, batch: RecordBatch) -> Result { + { + let mut writer = self + .batch + .write() + .map_err(|e| anyhow!("SingleLockedBatch lock: {e}"))?; + *writer = Some(batch); + } + self.plan + .reset() + .map_err(|e| anyhow!("reset execution plan: {e}"))?; + self.plan + .execute(0, self.task_context.clone()) + .map_err(|e| anyhow!("failed to compute plan: {e}")) + } + + pub async fn process_single(&mut self, batch: RecordBatch) -> Result { + let mut stream = self.process_batch(batch).await?; + let result = stream + .next() + .await + .ok_or_else(|| anyhow!("empty output stream"))??; + anyhow::ensure!( + stream.next().await.is_none(), + "expected exactly one output batch" + ); + Ok(result) + } +} diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/runtime/streaming/operators/value_execution.rs new file mode 100644 index 00000000..b9fb0cd8 --- /dev/null +++ b/src/runtime/streaming/operators/value_execution.rs @@ -0,0 +1,66 @@ +//! 通用无状态执行算子:驱动 DataFusion 物理计划(Filter, Case When, Scalar UDF 等), +//! 不改变分区状态,适用于 Map / Filter 阶段。 + +use anyhow::Result; +use arrow_array::RecordBatch; +use async_trait::async_trait; +use futures::StreamExt; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::operators::StatelessPhysicalExecutor; +use crate::runtime::streaming::StreamOutput; +use crate::sql::common::{CheckpointBarrier, Watermark}; + +pub struct ValueExecutionOperator { + name: String, + executor: StatelessPhysicalExecutor, +} + +impl ValueExecutionOperator { + pub fn new(name: String, executor: StatelessPhysicalExecutor) -> Self { + Self { name, executor } + } +} + +#[async_trait] +impl MessageOperator for ValueExecutionOperator { + fn name(&self) -> &str { + &self.name + } + + async fn process_data( + &mut self, + _input_idx: usize, + batch: RecordBatch, + _ctx: &mut TaskContext, + ) -> Result> { + let mut outputs = Vec::new(); + + let mut stream = self.executor.process_batch(batch).await?; + + while let Some(batch_result) = stream.next().await { + let out_batch = batch_result?; + if out_batch.num_rows() > 0 { + outputs.push(StreamOutput::Forward(out_batch)); + } + } + Ok(outputs) + } + + async fn process_watermark( + &mut self, + watermark: Watermark, + _ctx: &mut TaskContext, + ) -> Result> { + Ok(vec![StreamOutput::Watermark(watermark)]) + } + + async fn snapshot_state( + &mut self, + _barrier: CheckpointBarrier, + _ctx: &mut TaskContext, + ) -> Result<()> { + Ok(()) + } +} From 97b978e0b35afcd90e8e1149035939fd2ad346a0 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Thu, 26 Mar 2026 21:35:43 +0800 Subject: [PATCH 20/44] update --- src/runtime/streaming/job/edge_manager.rs | 41 ++++ src/runtime/streaming/job/job_manager.rs | 198 +++++++++++++++ src/runtime/streaming/job/mod.rs | 7 + src/runtime/streaming/job/models.rs | 35 +++ src/runtime/streaming/job/pipeline_runner.rs | 242 +++++++++++++++++++ src/runtime/streaming/lib.rs | 2 + src/runtime/streaming/mod.rs | 3 +- src/runtime/streaming/state/mod.rs | 0 src/runtime/streaming/state/table_manager.rs | 0 9 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 src/runtime/streaming/job/edge_manager.rs create mode 100644 src/runtime/streaming/job/job_manager.rs create mode 100644 src/runtime/streaming/job/mod.rs create mode 100644 src/runtime/streaming/job/models.rs create mode 100644 src/runtime/streaming/job/pipeline_runner.rs delete mode 100644 src/runtime/streaming/state/mod.rs delete mode 100644 src/runtime/streaming/state/table_manager.rs diff --git a/src/runtime/streaming/job/edge_manager.rs b/src/runtime/streaming/job/edge_manager.rs new file mode 100644 index 00000000..10ca97f1 --- /dev/null +++ b/src/runtime/streaming/job/edge_manager.rs @@ -0,0 +1,41 @@ +use std::collections::HashMap; + +use protocol::grpc::api::{FsEdge, FsNode}; +use tokio::sync::mpsc; + +use crate::runtime::streaming::protocol::tracked::TrackedEvent; + +pub struct EdgeManager { + // PipelineID -> (输入 Receiver, 输出 Sender 列表) + endpoints: HashMap>, Vec>)>, +} + +impl EdgeManager { + pub fn build(nodes: &[FsNode], edges: &[FsEdge]) -> Self { + let mut tx_map: HashMap>> = HashMap::new(); + let mut rx_map: HashMap> = HashMap::new(); + + for edge in edges { + let (tx, rx) = mpsc::channel(2048); + tx_map.entry(edge.source as u32).or_default().push(tx); + rx_map.insert(edge.target as u32, rx); + } + + let mut endpoints = HashMap::new(); + for node in nodes { + let id = node.node_index as u32; + endpoints.insert(id, (rx_map.remove(&id), tx_map.remove(&id).unwrap_or_default())); + } + + Self { endpoints } + } + + pub fn take_endpoints( + &mut self, + id: u32, + ) -> (Option>, Vec>) { + self.endpoints + .remove(&id) + .expect("Critical: Execution Graph Inconsistent") + } +} diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs new file mode 100644 index 00000000..82b02b3d --- /dev/null +++ b/src/runtime/streaming/job/job_manager.rs @@ -0,0 +1,198 @@ +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use protocol::grpc::api::{ChainedOperator, FsProgram}; +use tokio::sync::mpsc; +use tracing::error; + +use crate::runtime::streaming::api::operator::ConstructedOperator; +use crate::runtime::streaming::factory::OperatorFactory; +use crate::runtime::streaming::job::edge_manager::EdgeManager; +use crate::runtime::streaming::job::models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; +use crate::runtime::streaming::job::pipeline_runner::{FusionOperatorChain, PipelineRunner}; +use crate::runtime::streaming::memory::MemoryPool; +use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; +use crate::runtime::streaming::storage::manager::TableManager; + +pub struct JobManager { + active_jobs: Arc>>, + operator_factory: Arc, + memory_pool: Arc, + table_manager: Option>>, +} + +impl JobManager { + pub fn new( + operator_factory: Arc, + max_memory_bytes: usize, + table_manager: Option>>, + ) -> Self { + Self { + active_jobs: Arc::new(RwLock::new(HashMap::new())), + operator_factory, + memory_pool: MemoryPool::new(max_memory_bytes), + table_manager, + } + } + + /// 从逻辑计划点火物理线程 + pub async fn submit_job(&self, program: FsProgram) -> anyhow::Result { + let job_id = format!("job-{}", chrono::Utc::now().timestamp_millis()); + + let mut edge_manager = EdgeManager::build(&program.nodes, &program.edges); + let mut physical_pipelines = HashMap::new(); + + for node in &program.nodes { + let pipe_id = node.node_index as u32; + let (inbox, outboxes) = edge_manager.take_endpoints(pipe_id); + let chain = self.create_chain(&node.operators)?; + let (ctrl_tx, ctrl_rx) = mpsc::channel(64); + let status = Arc::new(RwLock::new(PipelineStatus::Initializing)); + + let thread_status = status.clone(); + let job_id_for_thread = job_id.clone(); + let exit_job_id = job_id_for_thread.clone(); + let registry_ptr = self.active_jobs.clone(); + let memory_pool = self.memory_pool.clone(); + let table_manager = self.table_manager.clone(); + + let handle = std::thread::Builder::new() + .name(format!("Job-{}-Pipe-{}", job_id, pipe_id)) + .spawn(move || { + { + let mut st = thread_status.write().unwrap(); + *st = PipelineStatus::Running; + } + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("build current thread runtime"); + + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + rt.block_on(async move { + let mut runner = PipelineRunner::new( + pipe_id, + chain, + inbox, + outboxes, + ctrl_rx, + job_id_for_thread.clone(), + memory_pool, + table_manager, + ); + runner.run().await + }) + })); + + Self::on_pipeline_exit(exit_job_id, pipe_id, result, thread_status, registry_ptr); + })?; + + physical_pipelines.insert( + pipe_id, + PhysicalPipeline { + pipeline_id: pipe_id, + handle: Some(handle), + status, + control_tx: ctrl_tx, + }, + ); + } + + let graph = PhysicalExecutionGraph { + job_id: job_id.clone(), + program, + pipelines: physical_pipelines, + start_time: std::time::Instant::now(), + }; + + self.active_jobs.write().unwrap().insert(job_id.clone(), graph); + Ok(job_id) + } + + pub async fn stop_job(&self, job_id: &str, mode: StopMode) -> anyhow::Result<()> { + let controllers = { + let jobs = self.active_jobs.read().unwrap(); + let graph = jobs + .get(job_id) + .ok_or_else(|| anyhow::anyhow!("job not found: {job_id}"))?; + graph + .pipelines + .values() + .map(|p| p.control_tx.clone()) + .collect::>() + }; + + for tx in controllers { + tx.send(ControlCommand::Stop { mode: mode.clone() }).await?; + } + Ok(()) + } + + pub fn get_pipeline_statuses(&self, job_id: &str) -> Option> { + let jobs = self.active_jobs.read().unwrap(); + let graph = jobs.get(job_id)?; + Some( + graph + .pipelines + .iter() + .map(|(id, pipeline)| (*id, pipeline.status.read().unwrap().clone())) + .collect(), + ) + } + + fn create_chain(&self, operators: &[ChainedOperator]) -> anyhow::Result { + let mut chain = Vec::with_capacity(operators.len()); + for op in operators { + match self + .operator_factory + .create_operator(&op.operator_name, &op.operator_config)? + { + ConstructedOperator::Operator(msg_op) => chain.push(msg_op), + ConstructedOperator::Source(_) => { + return Err(anyhow::anyhow!( + "source operator '{}' cannot be used inside a physical pipeline chain", + op.operator_name + )); + } + } + } + Ok(FusionOperatorChain::new(chain)) + } + + fn on_pipeline_exit( + job_id: String, + pipe_id: u32, + result: std::thread::Result>, + status: Arc>, + _registry: Arc>>, + ) { + let mut needs_abort = false; + match result { + Ok(Err(e)) => { + *status.write().unwrap() = PipelineStatus::Failed { + error: e.to_string(), + is_panic: false, + }; + needs_abort = true; + } + Err(_) => { + *status.write().unwrap() = PipelineStatus::Failed { + error: "panic".into(), + is_panic: true, + }; + needs_abort = true; + } + Ok(Ok(_)) => { + *status.write().unwrap() = PipelineStatus::Finished; + } + } + + if needs_abort { + error!( + "Pipeline {}-{} failed. Initiating Job Abort.", + job_id, pipe_id + ); + } + } +} diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs new file mode 100644 index 00000000..9490e84e --- /dev/null +++ b/src/runtime/streaming/job/mod.rs @@ -0,0 +1,7 @@ +pub mod edge_manager; +pub mod job_manager; +pub mod models; +pub mod pipeline_runner; + +pub use job_manager::JobManager; +pub use models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; diff --git a/src/runtime/streaming/job/models.rs b/src/runtime/streaming/job/models.rs new file mode 100644 index 00000000..3e843ea4 --- /dev/null +++ b/src/runtime/streaming/job/models.rs @@ -0,0 +1,35 @@ +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; +use std::thread::JoinHandle; +use std::time::Instant; + +use protocol::grpc::api::FsProgram; +use tokio::sync::mpsc; + +use crate::runtime::streaming::protocol::control::ControlCommand; + +/// 物理 Pipeline 的实时状态 +#[derive(Debug, Clone, PartialEq)] +pub enum PipelineStatus { + Initializing, + Running, + Failed { error: String, is_panic: bool }, + Finished, + Stopping, +} + +/// 物理执行图中的一个执行单元 +pub struct PhysicalPipeline { + pub pipeline_id: u32, + pub handle: Option>, + pub status: Arc>, + pub control_tx: mpsc::Sender, +} + +/// 一个 SQL Job 的物理执行图 +pub struct PhysicalExecutionGraph { + pub job_id: String, + pub program: FsProgram, + pub pipelines: HashMap, + pub start_time: Instant, +} diff --git a/src/runtime/streaming/job/pipeline_runner.rs b/src/runtime/streaming/job/pipeline_runner.rs new file mode 100644 index 00000000..57c0fec8 --- /dev/null +++ b/src/runtime/streaming/job/pipeline_runner.rs @@ -0,0 +1,242 @@ +use std::future::pending; +use std::sync::Arc; + +use tokio::sync::mpsc; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::memory::MemoryPool; +use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; +use crate::runtime::streaming::protocol::event::StreamEvent; +use crate::runtime::streaming::protocol::tracked::TrackedEvent; +use crate::runtime::streaming::storage::manager::TableManager; +use crate::sql::common::CheckpointBarrier; + +pub struct PipelineRunner { + chain: FusionOperatorChain, + inbox: Option>, + outboxes: Vec>, + control_rx: mpsc::Receiver, + ctx: TaskContext, +} + +impl PipelineRunner { + pub fn new( + pipeline_id: u32, + chain: FusionOperatorChain, + inbox: Option>, + outboxes: Vec>, + control_rx: mpsc::Receiver, + job_id: String, + memory_pool: Arc, + table_manager: Option>>, + ) -> Self { + Self { + chain, + inbox, + outboxes, + control_rx, + ctx: TaskContext::new(job_id, pipeline_id, 0, 1, vec![], memory_pool, table_manager), + } + } + + pub async fn run(&mut self) -> anyhow::Result<()> { + self.chain.on_start(&mut self.ctx).await?; + + 'main: loop { + tokio::select! { + biased; + Some(cmd) = self.control_rx.recv() => { + if self.handle_control(cmd).await? { + break 'main; + } + } + Some(event) = async { + if let Some(ref mut rx) = self.inbox { rx.recv().await } + else { pending().await } + } => { + self.process_event(event).await?; + } + } + } + + self.chain.on_close(&mut self.ctx).await?; + Ok(()) + } + + async fn handle_control(&mut self, cmd: ControlCommand) -> anyhow::Result { + match &cmd { + ControlCommand::TriggerCheckpoint { barrier } => { + let barrier: CheckpointBarrier = barrier.clone().into(); + self.chain.snapshot_state(barrier.clone(), &mut self.ctx).await?; + self.broadcast(StreamEvent::Barrier(barrier)).await?; + } + ControlCommand::Commit { epoch } => { + self.chain.commit_checkpoint(*epoch, &mut self.ctx).await?; + } + ControlCommand::Stop { mode } if *mode == StopMode::Immediate => { + return Ok(true); + } + _ => {} + } + + self.chain.handle_control(cmd, &mut self.ctx).await + } + + async fn process_event(&mut self, tracked: TrackedEvent) -> anyhow::Result<()> { + match tracked.event { + StreamEvent::Data(batch) => { + let outputs = self.chain.process_data(0, batch, &mut self.ctx).await?; + self.emit_outputs(outputs).await?; + } + StreamEvent::Watermark(wm) => { + let outputs = self.chain.process_watermark(wm.clone(), &mut self.ctx).await?; + self.emit_outputs(outputs).await?; + self.broadcast(StreamEvent::Watermark(wm)).await?; + } + StreamEvent::Barrier(barrier) => { + self.chain.snapshot_state(barrier.clone(), &mut self.ctx).await?; + self.broadcast(StreamEvent::Barrier(barrier)).await?; + } + StreamEvent::EndOfStream => { + self.broadcast(StreamEvent::EndOfStream).await?; + } + } + Ok(()) + } + + async fn emit_outputs( + &mut self, + outputs: Vec, + ) -> anyhow::Result<()> { + for out in outputs { + match out { + crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward(batch) + | crate::runtime::streaming::protocol::stream_out::StreamOutput::Broadcast(batch) + | crate::runtime::streaming::protocol::stream_out::StreamOutput::Keyed(_, batch) => { + self.broadcast(StreamEvent::Data(batch)).await?; + } + crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(wm) => { + self.broadcast(StreamEvent::Watermark(wm)).await?; + } + } + } + Ok(()) + } + + async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> { + let tracked = TrackedEvent::control(event); + for tx in &self.outboxes { + tx.send(tracked.clone()).await?; + } + Ok(()) + } +} + +pub struct FusionOperatorChain { + operators: Vec>, +} + +impl FusionOperatorChain { + pub fn new(operators: Vec>) -> Self { + Self { operators } + } + + pub async fn on_start(&mut self, ctx: &mut TaskContext) -> anyhow::Result<()> { + for op in &mut self.operators { + op.on_start(ctx).await?; + } + Ok(()) + } + + pub async fn process_data( + &mut self, + input_idx: usize, + batch: arrow_array::RecordBatch, + ctx: &mut TaskContext, + ) -> anyhow::Result> { + let mut data_batches = vec![batch]; + for (idx, op) in self.operators.iter_mut().enumerate() { + let mut next_batches = Vec::new(); + for b in data_batches { + let outputs = op + .process_data(if idx == 0 { input_idx } else { 0 }, b, ctx) + .await?; + for out in outputs { + match out { + crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward(b) + | crate::runtime::streaming::protocol::stream_out::StreamOutput::Broadcast(b) + | crate::runtime::streaming::protocol::stream_out::StreamOutput::Keyed(_, b) => { + next_batches.push(b); + } + crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(_) => {} + } + } + } + data_batches = next_batches; + } + Ok(data_batches + .into_iter() + .map(crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward) + .collect()) + } + + pub async fn process_watermark( + &mut self, + watermark: crate::sql::common::Watermark, + ctx: &mut TaskContext, + ) -> anyhow::Result> { + let mut outs = vec![crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(watermark)]; + for op in &mut self.operators { + let mut next = Vec::new(); + for out in outs { + match out { + crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(wm) => { + let mut produced = op.process_watermark(wm, ctx).await?; + next.append(&mut produced); + } + other => next.push(other), + } + } + outs = next; + } + Ok(outs) + } + + pub async fn snapshot_state( + &mut self, + barrier: CheckpointBarrier, + ctx: &mut TaskContext, + ) -> anyhow::Result<()> { + for op in &mut self.operators { + op.snapshot_state(barrier.clone(), ctx).await?; + } + Ok(()) + } + + pub async fn commit_checkpoint(&mut self, epoch: u32, ctx: &mut TaskContext) -> anyhow::Result<()> { + for op in &mut self.operators { + op.commit_checkpoint(epoch, ctx).await?; + } + Ok(()) + } + + pub async fn handle_control( + &mut self, + cmd: ControlCommand, + ctx: &mut TaskContext, + ) -> anyhow::Result { + let mut should_stop = false; + for op in &mut self.operators { + should_stop = should_stop || op.handle_control(cmd.clone(), ctx).await?; + } + Ok(should_stop) + } + + pub async fn on_close(&mut self, ctx: &mut TaskContext) -> anyhow::Result<()> { + for op in &mut self.operators { + let _ = op.on_close(ctx).await?; + } + Ok(()) + } +} diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs index 67cd8f70..6b145cd7 100644 --- a/src/runtime/streaming/lib.rs +++ b/src/runtime/streaming/lib.rs @@ -19,6 +19,7 @@ pub mod cluster; pub mod error; pub mod execution; pub mod factory; +pub mod job; pub mod memory; pub mod network; pub mod operators; @@ -36,6 +37,7 @@ pub use cluster::{ pub use error::RunError; pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; pub use factory::{OperatorConstructor, OperatorFactory}; +pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; pub use memory::{MemoryPool, MemoryTicket}; pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; pub use protocol::{ diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs index 5997623e..237f3c06 100644 --- a/src/runtime/streaming/mod.rs +++ b/src/runtime/streaming/mod.rs @@ -21,11 +21,11 @@ pub mod error; pub mod execution; pub mod factory; pub mod format; +pub mod job; pub mod memory; pub mod network; pub mod operators; pub mod protocol; -pub mod state; pub mod storage; pub use api::{ @@ -40,6 +40,7 @@ pub use cluster::{ pub use error::RunError; pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; pub use factory::{OperatorConstructor, OperatorFactory}; +pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; pub use memory::{MemoryPool, MemoryTicket}; pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; pub use protocol::{ diff --git a/src/runtime/streaming/state/mod.rs b/src/runtime/streaming/state/mod.rs deleted file mode 100644 index e69de29b..00000000 diff --git a/src/runtime/streaming/state/table_manager.rs b/src/runtime/streaming/state/table_manager.rs deleted file mode 100644 index e69de29b..00000000 From 784299538423e8614d32c99383a30f10630b1ee1 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sat, 28 Mar 2026 21:45:02 +0800 Subject: [PATCH 21/44] update --- src/config/global_config.rs | 9 + src/coordinator/coordinator.rs | 1 + src/coordinator/execution/executor.rs | 26 +- src/coordinator/runtime_context.rs | 12 +- src/runtime/streaming/api/context.rs | 25 +- src/runtime/streaming/api/operator.rs | 20 - src/runtime/streaming/api/source.rs | 8 + src/runtime/streaming/cluster/graph.rs | 136 ----- src/runtime/streaming/cluster/manager.rs | 164 ------ src/runtime/streaming/cluster/master.rs | 273 --------- src/runtime/streaming/cluster/mod.rs | 11 - src/runtime/streaming/cluster/wiring.rs | 46 -- src/runtime/streaming/connectors/mod.rs | 61 -- src/runtime/streaming/driver.rs | 254 ++++++++ src/runtime/streaming/error.rs | 39 +- src/runtime/streaming/execution/mod.rs | 4 +- src/runtime/streaming/execution/runner.rs | 546 ++++++++++-------- src/runtime/streaming/execution/source.rs | 192 +++--- .../execution/tracker/barrier_aligner.rs | 24 +- .../execution/tracker/watermark_tracker.rs | 11 + .../factory/registry/kafka_factory.rs | 328 +++++++++++ .../factory/{registry.rs => registry/mod.rs} | 48 +- src/runtime/streaming/job/job_manager.rs | 280 +++++---- src/runtime/streaming/job/mod.rs | 1 - src/runtime/streaming/job/pipeline_runner.rs | 242 -------- src/runtime/streaming/lib.rs | 13 +- src/runtime/streaming/mod.rs | 12 +- src/runtime/streaming/network/environment.rs | 50 +- .../grouping/incremental_aggregate.rs | 3 +- .../streaming/operators/joins/lookup_join.rs | 365 ------------ src/runtime/streaming/operators/joins/mod.rs | 2 - src/runtime/streaming/operators/mod.rs | 4 +- .../streaming/operators/sink/kafka/mod.rs | 20 +- .../streaming/operators/source/kafka/mod.rs | 85 +-- .../watermark/watermark_generator.rs | 33 +- .../windows/tumbling_aggregating_window.rs | 39 +- src/runtime/streaming/storage/backend.rs | 78 --- src/runtime/streaming/storage/manager.rs | 156 ----- src/runtime/streaming/storage/mod.rs | 32 - src/runtime/streaming/storage/table.rs | 91 --- src/server/initializer.rs | 22 +- src/sql/common/fs_schema.rs | 8 + src/sql/common/kafka_catalog.rs | 122 ++++ src/sql/common/mod.rs | 5 + src/sql/common/operator_config.rs | 4 + 45 files changed, 1531 insertions(+), 2374 deletions(-) delete mode 100644 src/runtime/streaming/cluster/graph.rs delete mode 100644 src/runtime/streaming/cluster/manager.rs delete mode 100644 src/runtime/streaming/cluster/master.rs delete mode 100644 src/runtime/streaming/cluster/mod.rs delete mode 100644 src/runtime/streaming/cluster/wiring.rs delete mode 100644 src/runtime/streaming/connectors/mod.rs create mode 100644 src/runtime/streaming/driver.rs create mode 100644 src/runtime/streaming/factory/registry/kafka_factory.rs rename src/runtime/streaming/factory/{registry.rs => registry/mod.rs} (88%) delete mode 100644 src/runtime/streaming/job/pipeline_runner.rs delete mode 100644 src/runtime/streaming/operators/joins/lookup_join.rs delete mode 100644 src/runtime/streaming/storage/backend.rs delete mode 100644 src/runtime/streaming/storage/manager.rs delete mode 100644 src/runtime/streaming/storage/mod.rs delete mode 100644 src/runtime/streaming/storage/table.rs create mode 100644 src/sql/common/kafka_catalog.rs diff --git a/src/config/global_config.rs b/src/config/global_config.rs index b4f92edd..33676125 100644 --- a/src/config/global_config.rs +++ b/src/config/global_config.rs @@ -19,6 +19,13 @@ use crate::config::python_config::PythonConfig; use crate::config::service_config::ServiceConfig; use crate::config::wasm_config::WasmConfig; +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct StreamingConfig { + /// Maximum heap memory (in bytes) available to the streaming runtime's memory pool. + /// Defaults to 256 MiB when absent. + pub max_memory_bytes: Option, +} + #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct GlobalConfig { pub service: ServiceConfig, @@ -31,6 +38,8 @@ pub struct GlobalConfig { pub state_storage: crate::config::storage::StateStorageConfig, #[serde(default)] pub task_storage: crate::config::storage::TaskStorageConfig, + #[serde(default)] + pub streaming: StreamingConfig, } impl GlobalConfig { diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs index ec81132a..f21b12ca 100644 --- a/src/coordinator/coordinator.rs +++ b/src/coordinator/coordinator.rs @@ -111,6 +111,7 @@ impl Coordinator { let res = Executor::new( Arc::clone(&runtime.task_manager), runtime.catalog_manager.clone(), + Arc::clone(&runtime.job_manager), ) .execute(plan.as_ref()) .map_err(|e| anyhow::anyhow!(e)) diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 3639ee7a..28082abe 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -12,6 +12,7 @@ use std::sync::Arc; +use protocol::grpc::api::FsProgram; use thiserror::Error; use tracing::{debug, info}; @@ -23,6 +24,7 @@ use crate::coordinator::plan::{ StreamingTableConnectorPlan, }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; +use crate::runtime::streaming::job::JobManager; use crate::runtime::taskexecutor::TaskManager; use crate::sql::schema::StreamTable; use crate::storage::stream_catalog::CatalogManager; @@ -42,13 +44,19 @@ pub enum ExecuteError { pub struct Executor { task_manager: Arc, catalog_manager: Arc, + job_manager: Arc, } impl Executor { - pub fn new(task_manager: Arc, catalog_manager: Arc) -> Self { + pub fn new( + task_manager: Arc, + catalog_manager: Arc, + job_manager: Arc, + ) -> Self { Self { task_manager, catalog_manager, + job_manager, } } @@ -273,8 +281,22 @@ impl PlanVisitor for Executor { .add_table(sink) .map_err(|e| ExecuteError::Internal(e.to_string()))?; + let fs_program: FsProgram = plan.program.clone().into(); + let job_manager: Arc = Arc::clone(&self.job_manager); + + let job_id = tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on(job_manager.submit_job(fs_program)) + }) + .map_err(|e| ExecuteError::Internal(format!("Failed to submit streaming job: {e}")))?; + + info!( + job_id = %job_id, + table = %plan.name, + "Streaming table registered and job submitted" + ); + Ok(ExecuteResult::ok_with_data( - format!("Registered streaming table '{}'", plan.name), + format!("Streaming table '{}' created, job_id = {}", plan.name, job_id), empty_record_batch(), )) }; diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs index 7b1d82dc..d0f80786 100644 --- a/src/coordinator/runtime_context.rs +++ b/src/coordinator/runtime_context.rs @@ -10,34 +10,36 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Runtime resources for a single coordinator run: [`TaskManager`] and [`CatalogManager`]. +//! Runtime resources for a single coordinator run: [`TaskManager`], [`CatalogManager`], and [`JobManager`]. use std::sync::Arc; use anyhow::Result; +use crate::runtime::streaming::job::JobManager; use crate::runtime::taskexecutor::TaskManager; use crate::sql::schema::StreamSchemaProvider; use crate::storage::stream_catalog::CatalogManager; /// Dependencies shared by analyze / plan / execute, analogous to installing globals in -/// [`TaskManager`] and [`CatalogManager`]. +/// [`TaskManager`], [`CatalogManager`], and [`JobManager`]. #[derive(Clone)] pub struct CoordinatorRuntimeContext { pub task_manager: Arc, pub catalog_manager: Arc, - /// When set (e.g. unit tests), used for SQL planning instead of a catalog snapshot. + pub job_manager: Arc, planning_schema_override: Option, } impl CoordinatorRuntimeContext { - /// Resolve [`TaskManager`] and global stream catalog (same pattern as server startup). pub fn try_from_globals() -> Result { Ok(Self { task_manager: TaskManager::get() .map_err(|e| anyhow::anyhow!("Failed to get TaskManager: {}", e))?, catalog_manager: CatalogManager::global() .map_err(|e| anyhow::anyhow!("Failed to get CatalogManager: {}", e))?, + job_manager: JobManager::global() + .map_err(|e| anyhow::anyhow!("Failed to get JobManager: {}", e))?, planning_schema_override: None, }) } @@ -45,11 +47,13 @@ impl CoordinatorRuntimeContext { pub fn new( task_manager: Arc, catalog_manager: Arc, + job_manager: Arc, planning_schema_override: Option, ) -> Self { Self { task_manager, catalog_manager, + job_manager, planning_schema_override, } } diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs index e838b06e..77038bf3 100644 --- a/src/runtime/streaming/api/context.rs +++ b/src/runtime/streaming/api/context.rs @@ -2,11 +2,9 @@ use crate::runtime::streaming::memory::MemoryPool; use crate::runtime::streaming::protocol::event::StreamEvent; use crate::runtime::streaming::protocol::tracked::TrackedEvent; use crate::runtime::streaming::network::endpoint::PhysicalSender; -use crate::runtime::streaming::storage::manager::TableManager; use arrow_array::RecordBatch; use std::sync::Arc; -use tokio::sync::Mutex; pub struct TaskContext { pub job_id: String, @@ -17,7 +15,6 @@ pub struct TaskContext { pub outboxes: Vec, memory_pool: Arc, - table_manager: Option>>, current_watermark: Option, } @@ -30,7 +27,6 @@ impl TaskContext { parallelism: u32, outboxes: Vec, memory_pool: Arc, - table_manager: Option>>, ) -> Self { Self { job_id, @@ -39,7 +35,6 @@ impl TaskContext { parallelism, outboxes, memory_pool, - table_manager, current_watermark: None, } } @@ -77,27 +72,9 @@ impl TaskContext { } // ======================================================================== - // 状态管理与背压网络发送 API + // 背压网络发送 API // ======================================================================== - pub async fn table_manager(&self) -> tokio::sync::MutexGuard<'_, TableManager> { - self.table_manager - .as_ref() - .expect("State backend not initialized") - .lock() - .await - } - - pub async fn table_manager_guard( - &self, - ) -> anyhow::Result> { - let arc = self - .table_manager - .as_ref() - .ok_or_else(|| anyhow::anyhow!("table_manager is not configured on TaskContext"))?; - Ok(arc.lock().await) - } - /// 受内存池管控的数据发送:申请精准字节的内存船票后广播到所有下游 pub async fn collect(&self, batch: RecordBatch) -> anyhow::Result<()> { if self.outboxes.is_empty() { diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs index 6eb49d2c..3c088e3c 100644 --- a/src/runtime/streaming/api/operator.rs +++ b/src/runtime/streaming/api/operator.rs @@ -1,6 +1,5 @@ use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::source::SourceOperator; -use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; use crate::runtime::streaming::protocol::stream_out::StreamOutput; use arrow_array::RecordBatch; use async_trait::async_trait; @@ -128,25 +127,6 @@ pub trait MessageOperator: Send + 'static { Ok(vec![]) } - /// 返回 `true` 时应立即结束运行循环(如 `StopMode::Immediate`)。 - async fn handle_control( - &mut self, - command: ControlCommand, - _ctx: &mut TaskContext, - ) -> anyhow::Result { - match command { - ControlCommand::Stop { mode } => { - if mode == StopMode::Immediate { - return Ok(true); - } - Ok(false) - } - ControlCommand::DropState | ControlCommand::Commit { .. } => Ok(false), - ControlCommand::Start | ControlCommand::UpdateConfig { .. } => Ok(false), - ControlCommand::TriggerCheckpoint { .. } => Ok(false), - } - } - async fn on_close(&mut self, _ctx: &mut TaskContext) -> anyhow::Result> { Ok(vec![]) } diff --git a/src/runtime/streaming/api/source.rs b/src/runtime/streaming/api/source.rs index 8ddeb3cf..a4ff46c4 100644 --- a/src/runtime/streaming/api/source.rs +++ b/src/runtime/streaming/api/source.rs @@ -18,7 +18,9 @@ pub enum SourceOffset { pub enum SourceEvent { Data(RecordBatch), Watermark(Watermark), + /// 无数据可读:必须由 Runner 调度退避,禁止在 `fetch_next` 内长时间阻塞。 Idle, + EndOfStream, } #[async_trait] @@ -29,8 +31,14 @@ pub trait SourceOperator: Send + 'static { Ok(()) } + /// 核心拉取:无数据时必须返回 [`SourceEvent::Idle`],严禁内部阻塞控制面。 async fn fetch_next(&mut self, ctx: &mut TaskContext) -> anyhow::Result; + /// 独立于 `fetch_next` 的水位线脉搏(例如解决 Idle 时仍要推进水印)。 + fn poll_watermark(&mut self) -> Option { + None + } + async fn snapshot_state( &mut self, barrier: CheckpointBarrier, diff --git a/src/runtime/streaming/cluster/graph.rs b/src/runtime/streaming/cluster/graph.rs deleted file mode 100644 index 1ee8f8f7..00000000 --- a/src/runtime/streaming/cluster/graph.rs +++ /dev/null @@ -1,136 +0,0 @@ -use std::fmt; -use std::sync::Arc; - -use crate::sql::common::FsSchema; -// ============ 强类型 ID (Strong-type IDs) ============ - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct JobId(pub String); - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct VertexId(pub u32); - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct SubtaskIndex(pub u32); - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct OperatorUid(pub String); - -impl fmt::Display for JobId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl fmt::Display for VertexId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl fmt::Display for SubtaskIndex { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl fmt::Display for OperatorUid { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -// ============ 资源画像 (Resource Profile) ============ - -#[derive(Debug, Clone)] -pub struct ResourceProfile { - pub managed_memory_bytes: u64, - pub cpu_cores: f64, - pub network_memory_bytes: u64, -} - -impl Default for ResourceProfile { - fn default() -> Self { - Self { - managed_memory_bytes: 64 * 1024 * 1024, - cpu_cores: 1.0, - network_memory_bytes: 32 * 1024 * 1024, - } - } -} - -// ============ 分区策略 (Partitioning Strategy) ============ - -#[derive(Debug, Clone)] -pub enum PartitioningStrategy { - Forward, - HashByKeys(Vec), - Rebalance, -} - -// ============ 交换模式 (Exchange Mode) ============ - -#[derive(Debug, Clone)] -pub enum ExchangeMode { - LocalThread, - RemoteNetwork { target_addr: String }, -} - -// ============ 部署描述符 (Deployment Descriptors) ============ - -#[derive(Debug, Clone)] -pub struct TaskDeploymentDescriptor { - pub job_id: JobId, - pub vertex_id: VertexId, - pub subtask_idx: SubtaskIndex, - pub parallelism: u32, - pub operator_name: String, - pub operator_uid: OperatorUid, - pub is_source: bool, - pub operator_config_payload: Vec, - pub resources: ResourceProfile, - pub in_schemas: Vec>, - pub out_schema: Option>, - pub input_gates_count: usize, - pub output_gates_count: usize, -} - -#[derive(Debug, Clone)] -pub struct PhysicalEdgeDescriptor { - pub src_vertex: VertexId, - pub src_subtask: SubtaskIndex, - pub dst_vertex: VertexId, - pub dst_subtask: SubtaskIndex, - pub partitioning: PartitioningStrategy, - pub exchange_mode: ExchangeMode, -} - -// ============ 执行图 (Execution Graph) ============ - -#[derive(Debug, Clone)] -pub struct ExecutionGraph { - pub job_id: JobId, - pub tasks: Vec, - pub edges: Vec, -} - -impl ExecutionGraph { - pub fn validate(&self) -> Result<(), String> { - if self.tasks.is_empty() { - return Err("Execution graph has no tasks".into()); - } - if self.edges.is_empty() && self.tasks.len() > 1 { - return Err("Multi-task graph has no edges".into()); - } - let mut seen = std::collections::HashSet::new(); - for tdd in &self.tasks { - if !seen.insert((tdd.vertex_id, tdd.subtask_idx)) { - return Err(format!( - "Duplicate subtask: vertex={}, subtask={}", - tdd.vertex_id, tdd.subtask_idx - )); - } - } - Ok(()) - } -} diff --git a/src/runtime/streaming/cluster/manager.rs b/src/runtime/streaming/cluster/manager.rs deleted file mode 100644 index 34045dee..00000000 --- a/src/runtime/streaming/cluster/manager.rs +++ /dev/null @@ -1,164 +0,0 @@ -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::cluster::graph::ExecutionGraph; -use crate::runtime::streaming::execution::runner::SubtaskRunner; -use crate::runtime::streaming::execution::source::SourceRunner; -use crate::runtime::streaming::factory::OperatorFactory; -use crate::runtime::streaming::memory::MemoryPool; -use crate::runtime::streaming::network::NetworkEnvironment; -use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; -use crate::runtime::streaming::storage::manager::TableManager; -use std::collections::HashMap; -use std::sync::Arc; -use tokio::sync::mpsc::{channel, Sender}; -use tokio::task::JoinSet; -use tracing::{error, info, instrument, warn}; - -pub struct TaskManager { - pub worker_id: String, - memory_pool: Arc, - table_manager: Arc>, - operator_factory: Arc, - task_supervisors: JoinSet<()>, - pub controllers: HashMap<(u32, u32), Sender>, -} - -impl TaskManager { - pub fn new( - worker_id: String, - max_memory_bytes: usize, - table_manager: Arc>, - operator_factory: Arc, - ) -> Self { - Self { - worker_id, - memory_pool: MemoryPool::new(max_memory_bytes), - table_manager, - operator_factory, - task_supervisors: JoinSet::new(), - controllers: HashMap::new(), - } - } - - #[instrument(skip(self, graph), fields(job_id = %graph.job_id))] - pub async fn deploy_and_start(&mut self, graph: ExecutionGraph) -> anyhow::Result<()> { - info!("TaskManager [{}] starting deployment...", self.worker_id); - - graph - .validate() - .map_err(|e| anyhow::anyhow!("Graph validation failed: {}", e))?; - - // 1. 网络连线期 - let local_queue_size = 1024; - let mut network_env = NetworkEnvironment::build_from_graph(&graph, local_queue_size); - - // 2. 控制通道初始化 - let mut control_rxs = HashMap::new(); - for tdd in &graph.tasks { - let key = (tdd.vertex_id.0, tdd.subtask_idx.0); - let (ctrl_tx, ctrl_rx) = channel(32); - self.controllers.insert(key, ctrl_tx); - control_rxs.insert(key, ctrl_rx); - } - - // 3. 部署与算子实例化 - for tdd in graph.tasks { - let v_id = tdd.vertex_id; - let s_idx = tdd.subtask_idx; - let key = (v_id.0, s_idx.0); - - let ctrl_rx = control_rxs.remove(&key).unwrap(); - let inboxes = network_env.take_inboxes(v_id, s_idx); - let outboxes = network_env.take_outboxes(v_id, s_idx); - - let ctx = TaskContext::new( - tdd.job_id.0.clone(), - v_id.0, - s_idx.0, - tdd.parallelism, - outboxes, - self.memory_pool.clone(), - Some(self.table_manager.clone()), - ); - - let constructed_op = self.operator_factory.create_operator( - &tdd.operator_name, - &tdd.operator_config_payload, - )?; - - // 4. 任务发射入监督树 - let worker_id = self.worker_id.clone(); - match constructed_op { - ConstructedOperator::Source(source_op) => { - let runner = SourceRunner::new(source_op, ctx, ctrl_rx); - self.task_supervisors.spawn(async move { - if let Err(e) = runner.run().await { - error!( - worker = %worker_id, - vertex = key.0, - subtask = key.1, - "SourceTask CRASHED: {:?}", e - ); - panic!("SourceTask failed"); - } - }); - } - ConstructedOperator::Operator(msg_op) => { - let runner = SubtaskRunner::new(msg_op, ctx, inboxes, ctrl_rx); - self.task_supervisors.spawn(async move { - if let Err(e) = runner.run().await { - error!( - worker = %worker_id, - vertex = key.0, - subtask = key.1, - "StreamTask CRASHED: {:?}", e - ); - panic!("StreamTask failed"); - } - }); - } - } - } - - info!( - "TaskManager [{}] deployment complete. All tasks ignited.", - self.worker_id - ); - Ok(()) - } - - /// 监控运行状态:Supervisor 模式防止级联崩溃 - pub async fn wait_and_supervise(mut self) { - while let Some(result) = self.task_supervisors.join_next().await { - match result { - Ok(_) => { - info!("A subtask finished successfully."); - } - Err(join_error) => { - if join_error.is_panic() { - error!( - "FATAL: A subtask panicked! Initiating emergency shutdown \ - of the entire TaskManager to prevent data corruption." - ); - self.task_supervisors.abort_all(); - break; - } else if join_error.is_cancelled() { - warn!("A subtask was cancelled."); - } - } - } - } - info!("TaskManager shutdown process complete."); - } - - pub async fn stop_all(&self, mode: StopMode) { - for (key, tx) in &self.controllers { - if let Err(e) = tx - .send(ControlCommand::Stop { mode: mode.clone() }) - .await - { - warn!("Failed to send stop command to task {:?}: {}", key, e); - } - } - } -} diff --git a/src/runtime/streaming/cluster/master.rs b/src/runtime/streaming/cluster/master.rs deleted file mode 100644 index e456d8e3..00000000 --- a/src/runtime/streaming/cluster/master.rs +++ /dev/null @@ -1,273 +0,0 @@ -use std::collections::HashMap; -use anyhow::Result; - -use crate::runtime::streaming::cluster::graph::{ - ExchangeMode, ExecutionGraph, JobId, OperatorUid, PartitioningStrategy, - PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId, -}; - -use petgraph::Direction; -use sha2::{Digest, Sha256}; -use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, OperatorChain}; - -#[derive(thiserror::Error, Debug)] -pub enum CompileError { - #[error("Topology Error: Forward edge between Vertex {src} (p={src_p}) and {dst} (p={dst_p}) requires identical parallelism.")] - ParallelismMismatch { - src: u32, - src_p: usize, - dst: u32, - dst_p: usize, - }, - - #[error("Serialization Error: Failed to serialize operator chain for Vertex {vertex_id}. Error: {source}")] - SerializationFailed { - vertex_id: u32, - source: anyhow::Error, - }, - - #[error("Validation Error: {0}")] - ValidationError(String), -} - -pub struct JobCompiler; - -impl JobCompiler { - pub fn compile( - job_id: String, - logical: &LogicalGraph, - ) -> Result { - let mut tasks = Vec::new(); - let mut edges = Vec::new(); - let job_id_typed = JobId(job_id.clone()); - - // ==================================================================== - // 阶段 1:预计算网络门数量 (Pre-compute Network Gates) - // ==================================================================== - let mut in_degrees: HashMap<(u32, u32), usize> = HashMap::new(); - let mut out_degrees: HashMap<(u32, u32), usize> = HashMap::new(); - - for edge_idx in logical.edge_indices() { - let edge = logical.edge_weight(edge_idx).unwrap(); - let (src_idx, dst_idx) = logical.edge_endpoints(edge_idx).unwrap(); - let src_node = logical.node_weight(src_idx).unwrap(); - let dst_node = logical.node_weight(dst_idx).unwrap(); - - match edge.edge_type { - LogicalEdgeType::Forward => { - if src_node.parallelism != dst_node.parallelism { - return Err(CompileError::ParallelismMismatch { - src: src_node.node_id, - src_p: src_node.parallelism, - dst: dst_node.node_id, - dst_p: dst_node.parallelism, - }); - } - for i in 0..src_node.parallelism as u32 { - *out_degrees.entry((src_node.node_id, i)).or_insert(0) += 1; - *in_degrees.entry((dst_node.node_id, i)).or_insert(0) += 1; - } - } - LogicalEdgeType::Shuffle - | LogicalEdgeType::LeftJoin - | LogicalEdgeType::RightJoin => { - for s in 0..src_node.parallelism as u32 { - *out_degrees.entry((src_node.node_id, s)).or_insert(0) += - dst_node.parallelism; - } - for d in 0..dst_node.parallelism as u32 { - *in_degrees.entry((dst_node.node_id, d)).or_insert(0) += - src_node.parallelism; - } - } - } - } - - // ==================================================================== - // 阶段 2:节点展开与算子融合 (Node Expansion & Operator Fusion) - // ==================================================================== - for idx in logical.node_indices() { - let node = logical.node_weight(idx).unwrap(); - let parallelism = node.parallelism as u32; - - let in_schemas: Vec<_> = logical - .edges_directed(idx, Direction::Incoming) - .map(|e| e.weight().schema.clone()) - .collect(); - let out_schema = logical - .edges_directed(idx, Direction::Outgoing) - .map(|e| e.weight().schema.clone()) - .next(); - - let is_source = node.operator_chain.is_source(); - let (head_op, _) = node - .operator_chain - .iter() - .next() - .expect("operator chain is non-empty"); - - let chain_payload = - Self::serialize_operator_chain(&node.operator_chain).map_err(|e| { - CompileError::SerializationFailed { - vertex_id: node.node_id, - source: e, - } - })?; - - let base_uid = Self::generate_deterministic_uid( - &job_id, - node.node_id, - &node.operator_chain, - ); - - let resource_profile = - Self::calculate_resource_profile(&node.operator_chain, parallelism); - - for subtask_idx in 0..parallelism { - let s_idx = SubtaskIndex(subtask_idx); - let v_id = VertexId(node.node_id); - - let input_gates_count = *in_degrees - .get(&(node.node_id, subtask_idx)) - .unwrap_or(&0); - let output_gates_count = *out_degrees - .get(&(node.node_id, subtask_idx)) - .unwrap_or(&0); - - tasks.push(TaskDeploymentDescriptor { - job_id: job_id_typed.clone(), - vertex_id: v_id, - subtask_idx: s_idx, - parallelism, - operator_name: head_op.operator_name.to_string(), - operator_uid: OperatorUid(format!("{}-{}", base_uid, subtask_idx)), - is_source, - operator_config_payload: chain_payload.clone(), - resources: resource_profile.clone(), - in_schemas: in_schemas.clone(), - out_schema: out_schema.clone(), - input_gates_count, - output_gates_count, - }); - } - } - - // ==================================================================== - // 阶段 3:物理边展开与路由策略推断 (Edge Expansion & Partitioning) - // ==================================================================== - for edge_idx in logical.edge_indices() { - let edge = logical.edge_weight(edge_idx).unwrap(); - let (src_graph_idx, dst_graph_idx) = logical.edge_endpoints(edge_idx).unwrap(); - let src_node = logical.node_weight(src_graph_idx).unwrap(); - let dst_node = logical.node_weight(dst_graph_idx).unwrap(); - - let partitioning = match edge.edge_type { - LogicalEdgeType::Forward => PartitioningStrategy::Forward, - LogicalEdgeType::Shuffle - | LogicalEdgeType::LeftJoin - | LogicalEdgeType::RightJoin => { - if let Some(key_indices) = edge.schema.storage_keys() { - if !key_indices.is_empty() { - PartitioningStrategy::HashByKeys(key_indices.clone()) - } else { - PartitioningStrategy::Rebalance - } - } else { - PartitioningStrategy::Rebalance - } - } - }; - - let default_exchange = ExchangeMode::LocalThread; - - match edge.edge_type { - LogicalEdgeType::Forward => { - for i in 0..src_node.parallelism as u32 { - edges.push(PhysicalEdgeDescriptor { - src_vertex: VertexId(src_node.node_id), - src_subtask: SubtaskIndex(i), - dst_vertex: VertexId(dst_node.node_id), - dst_subtask: SubtaskIndex(i), - partitioning: partitioning.clone(), - exchange_mode: default_exchange.clone(), - }); - } - } - _ => { - for src_idx in 0..src_node.parallelism as u32 { - for dst_idx in 0..dst_node.parallelism as u32 { - edges.push(PhysicalEdgeDescriptor { - src_vertex: VertexId(src_node.node_id), - src_subtask: SubtaskIndex(src_idx), - dst_vertex: VertexId(dst_node.node_id), - dst_subtask: SubtaskIndex(dst_idx), - partitioning: partitioning.clone(), - exchange_mode: default_exchange.clone(), - }); - } - } - } - } - } - - let exec_graph = ExecutionGraph { - job_id: job_id_typed, - tasks, - edges, - }; - - // ==================================================================== - // 阶段 4:执行拓扑图防御性自检 (Validation) - // ==================================================================== - exec_graph - .validate() - .map_err(CompileError::ValidationError)?; - - Ok(exec_graph) - } - - /// 确定性状态 UID 生成器:哪怕拓扑变化,只要算子内部逻辑不变就能继承状态。 - fn generate_deterministic_uid( - job_id: &str, - node_id: u32, - chain: &OperatorChain, - ) -> String { - let mut hasher = Sha256::new(); - hasher.update(job_id.as_bytes()); - hasher.update(&node_id.to_le_bytes()); - - for (op, _) in chain.iter() { - hasher.update(op.operator_name.to_string().as_bytes()); - hasher.update(&op.operator_config); - } - - let result = hasher.finalize(); - hex::encode(&result[..8]) - } - - /// 序列化整条算子链 (Operator Fusion) - fn serialize_operator_chain(chain: &OperatorChain) -> Result> { - bincode::serde::encode_to_vec(chain, bincode::config::standard()) - .map_err(|e| anyhow::anyhow!("bincode encode failed: {}", e)) - } - - /// 资源画像智能推算 - fn calculate_resource_profile( - chain: &OperatorChain, - parallelism: u32, - ) -> ResourceProfile { - let mut profile = ResourceProfile::default(); - - for (op, _) in chain.iter() { - let name = op.operator_name.to_string(); - if name.contains("Window") || name.contains("Join") || name.contains("Aggregate") { - profile.managed_memory_bytes += 512 * 1024 * 1024 / parallelism as u64; - profile.cpu_cores += 0.5; - } - if name.contains("Source") || name.contains("Sink") { - profile.network_memory_bytes += 128 * 1024 * 1024 / parallelism as u64; - } - } - profile - } -} diff --git a/src/runtime/streaming/cluster/mod.rs b/src/runtime/streaming/cluster/mod.rs deleted file mode 100644 index f337078c..00000000 --- a/src/runtime/streaming/cluster/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -pub mod graph; -pub mod manager; -pub mod master; -mod wiring; - -pub use graph::{ - ExchangeMode, ExecutionGraph, JobId, OperatorUid, PartitioningStrategy, - PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId, -}; -pub use manager::TaskManager; -pub use master::{CompileError, JobCompiler}; diff --git a/src/runtime/streaming/cluster/wiring.rs b/src/runtime/streaming/cluster/wiring.rs deleted file mode 100644 index eb3b4162..00000000 --- a/src/runtime/streaming/cluster/wiring.rs +++ /dev/null @@ -1,46 +0,0 @@ -//! 物理拓扑构建:channel 与一对一子任务边。 -//! -//! 将 `arroyo_datastream::LogicalGraph` 完整编译为 Task 管道属于上层 worker/planner; -//! 此处提供 **与图无关** 的 channel 工厂与边展开,供适配层调用。 - -use crate::runtime::streaming::protocol::tracked::TrackedEvent; -use std::collections::HashMap; -use tokio::sync::mpsc::{self, Receiver, Sender}; - -pub type SubtaskKey = (String, u32); - -pub type SubtaskOutChannels = HashMap>>; -pub type SubtaskInChannels = HashMap>>; - -pub fn stream_channel(capacity: usize) -> (Sender, Receiver) { - mpsc::channel(capacity) -} - -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct NodeSpec { - pub id: String, - pub parallelism: u32, -} - -#[derive(Debug, Clone)] -pub struct PhysicalEdge { - pub from: (String, u32), - pub to: (String, u32), -} - -/// 为每条 `PhysicalEdge` 建一条独立 channel,并挂到对应子任务的 sender/receiver 列表。 -pub fn build_one_to_one_channels( - edges: &[PhysicalEdge], - capacity: usize, -) -> (SubtaskOutChannels, SubtaskInChannels) { - let mut senders: SubtaskOutChannels = HashMap::new(); - let mut receivers: SubtaskInChannels = HashMap::new(); - - for e in edges { - let (tx, rx) = stream_channel(capacity); - senders.entry(e.from.clone()).or_default().push(tx); - receivers.entry(e.to.clone()).or_default().push(rx); - } - - (senders, receivers) -} diff --git a/src/runtime/streaming/connectors/mod.rs b/src/runtime/streaming/connectors/mod.rs deleted file mode 100644 index d10a55a9..00000000 --- a/src/runtime/streaming/connectors/mod.rs +++ /dev/null @@ -1,61 +0,0 @@ -use anyhow::Result; -use arrow_array::{ArrayRef, RecordBatch}; -use arrow_schema::Schema; -use async_trait::async_trait; -use std::collections::HashMap; -use std::sync::Arc; - -use crate::sql::common::OperatorConfig; - -/// 维表查询接口:由具体 Connector(如 Redis、MySQL)实现。 -#[async_trait] -pub trait LookupConnector: Send { - fn name(&self) -> &str; - - /// 根据 key 列批量查询外部系统,返回结果 batch(含 `_lookup_key_index` 列)。 - /// 返回 `None` 表示无匹配行。 - async fn lookup(&self, keys: &[ArrayRef]) -> Option>; -} - -/// Connector 工厂 trait:每种外部系统实现此 trait 提供 Source / Sink / Lookup 构建能力。 -pub trait Connector: Send + Sync { - fn name(&self) -> &str; - - fn make_lookup( - &self, - config: OperatorConfig, - schema: Arc, - ) -> Result>; -} - -/// 全局 Connector 注册表。 -pub struct ConnectorRegistry { - connectors: HashMap>, -} - -impl ConnectorRegistry { - pub fn new() -> Self { - Self { - connectors: HashMap::new(), - } - } - - pub fn register(&mut self, connector: Box) { - self.connectors - .insert(connector.name().to_string(), connector); - } - - pub fn get(&self, name: &str) -> Option<&dyn Connector> { - self.connectors.get(name).map(|c| c.as_ref()) - } -} - -/// 返回当前已注册的所有 Connector。 -/// -/// 目前返回空注册表,后续接入 Kafka / Redis 等时在此处注册。 -pub fn connectors() -> ConnectorRegistry { - let registry = ConnectorRegistry::new(); - // TODO: registry.register(Box::new(KafkaConnector)); - // TODO: registry.register(Box::new(RedisConnector)); - registry -} diff --git a/src/runtime/streaming/driver.rs b/src/runtime/streaming/driver.rs new file mode 100644 index 00000000..f2abec87 --- /dev/null +++ b/src/runtime/streaming/driver.rs @@ -0,0 +1,254 @@ +use std::future::pending; +use std::sync::Arc; + +use arrow_array::RecordBatch; +use tokio::sync::mpsc; + +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::{MessageOperator, OperatorContext, StreamOperator}; +use crate::runtime::streaming::context::{ChainedOperatorContext, TerminalOutputContext}; +use crate::runtime::streaming::environment::TaskEnvironment; +use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; +use crate::runtime::streaming::protocol::event::StreamEvent; +use crate::runtime::streaming::protocol::stream_out::StreamOutput; +use crate::runtime::streaming::protocol::tracked::TrackedEvent; +use crate::sql::common::CheckpointBarrier; + +pub struct StreamTaskDriver { + head_op: Box, + head_ctx: Box, + inbox: Option>, + control_rx: mpsc::Receiver, +} + +impl StreamTaskDriver { + pub fn new( + task_id: u32, + mut operators: Vec>, + inbox: Option>, + outboxes: Vec>, + control_rx: mpsc::Receiver, + job_id: String, + ) -> Self { + let env = TaskEnvironment::new(job_id, task_id, 0, 1); + let mut current_op = operators.pop().expect("Operators pipeline cannot be empty"); + let mut current_ctx: Box = + Box::new(TerminalOutputContext::new(outboxes, env)); + + while let Some(prev_op) = operators.pop() { + let chained = ChainedOperatorContext::new(current_op, current_ctx); + current_op = prev_op; + current_ctx = Box::new(chained); + } + + Self { + head_op: current_op, + head_ctx: current_ctx, + inbox, + control_rx, + } + } + + pub async fn run(&mut self) -> anyhow::Result<()> { + self.head_op.open(self.head_ctx.env()).await?; + + 'main_loop: loop { + tokio::select! { + biased; + Some(cmd) = self.control_rx.recv() => { + if self.process_control_command(cmd).await? { + break 'main_loop; + } + } + Some(tracked) = async { + if let Some(ref mut rx) = self.inbox { rx.recv().await } + else { pending().await } + } => { + self.pump_event(tracked.event).await?; + } + } + } + + self.head_op.close(self.head_ctx.env()).await?; + Ok(()) + } + + async fn process_control_command(&mut self, cmd: ControlCommand) -> anyhow::Result { + match cmd { + ControlCommand::TriggerCheckpoint { barrier } => { + let barrier: CheckpointBarrier = barrier.into(); + self.pump_event(StreamEvent::Barrier(barrier)).await?; + Ok(false) + } + ControlCommand::Commit { epoch } => { + self.head_op.commit_checkpoint(epoch, self.head_ctx.env()).await?; + self.head_ctx.commit_checkpoint(epoch).await?; + Ok(false) + } + ControlCommand::Stop { mode } if mode == StopMode::Immediate => Ok(true), + other_cmd => { + let stop_head = self + .head_op + .handle_control(other_cmd.clone(), self.head_ctx.env()) + .await?; + let stop_rest = self.head_ctx.handle_control(other_cmd).await?; + Ok(stop_head || stop_rest) + } + } + } + + async fn pump_event(&mut self, event: StreamEvent) -> anyhow::Result<()> { + match event { + StreamEvent::Data(batch) => self.head_op.process_data(batch, self.head_ctx.as_mut()).await, + StreamEvent::Watermark(wm) => { + self.head_op.process_watermark(wm, self.head_ctx.as_mut()).await + } + StreamEvent::Barrier(br) => { + self.head_op + .snapshot_state(br.clone(), self.head_ctx.as_mut()) + .await?; + self.head_ctx.broadcast(StreamEvent::Barrier(br)).await + } + StreamEvent::EndOfStream => { + self.head_op.close(self.head_ctx.env()).await?; + self.head_ctx.broadcast(StreamEvent::EndOfStream).await + } + } + } +} + +pub struct MessageOperatorAdapter { + inner: Box, +} + +impl MessageOperatorAdapter { + pub fn new(inner: Box) -> Self { + Self { inner } + } + + async fn emit_outputs( + ctx: &mut dyn OperatorContext, + outputs: Vec, + ) -> anyhow::Result<()> { + for out in outputs { + match out { + StreamOutput::Forward(b) | StreamOutput::Broadcast(b) | StreamOutput::Keyed(_, b) => { + ctx.collect(b).await?; + } + StreamOutput::Watermark(wm) => { + ctx.broadcast(StreamEvent::Watermark(wm)).await?; + } + } + } + Ok(()) + } +} + +#[async_trait::async_trait(?Send)] +impl StreamOperator for MessageOperatorAdapter { + async fn open(&mut self, env: &mut TaskEnvironment) -> anyhow::Result<()> { + let mut ctx = TaskContext::new( + env.job_id.clone(), + env.task_id, + env.subtask_index, + env.parallelism, + vec![], + env.memory_pool.clone(), + ); + self.inner.on_start(&mut ctx).await + } + + async fn close(&mut self, env: &mut TaskEnvironment) -> anyhow::Result<()> { + let mut ctx = TaskContext::new( + env.job_id.clone(), + env.task_id, + env.subtask_index, + env.parallelism, + vec![], + env.memory_pool.clone(), + ); + let _ = self.inner.on_close(&mut ctx).await?; + Ok(()) + } + + async fn process_data( + &mut self, + batch: RecordBatch, + ctx: &mut dyn OperatorContext, + ) -> anyhow::Result<()> { + let mut op_ctx = TaskContext::new( + ctx.env().job_id.clone(), + ctx.env().task_id, + ctx.env().subtask_index, + ctx.env().parallelism, + vec![], + ctx.env().memory_pool.clone(), + ); + let outs = self.inner.process_data(0, batch, &mut op_ctx).await?; + Self::emit_outputs(ctx, outs).await + } + + async fn process_watermark( + &mut self, + wm: crate::sql::common::Watermark, + ctx: &mut dyn OperatorContext, + ) -> anyhow::Result<()> { + let mut op_ctx = TaskContext::new( + ctx.env().job_id.clone(), + ctx.env().task_id, + ctx.env().subtask_index, + ctx.env().parallelism, + vec![], + ctx.env().memory_pool.clone(), + ); + let outs = self.inner.process_watermark(wm, &mut op_ctx).await?; + Self::emit_outputs(ctx, outs).await + } + + async fn snapshot_state( + &mut self, + barrier: CheckpointBarrier, + ctx: &mut dyn OperatorContext, + ) -> anyhow::Result<()> { + let mut op_ctx = TaskContext::new( + ctx.env().job_id.clone(), + ctx.env().task_id, + ctx.env().subtask_index, + ctx.env().parallelism, + vec![], + ctx.env().memory_pool.clone(), + ); + self.inner.snapshot_state(barrier, &mut op_ctx).await + } + + async fn commit_checkpoint( + &mut self, + epoch: u32, + env: &mut TaskEnvironment, + ) -> anyhow::Result<()> { + let mut ctx = TaskContext::new( + env.job_id.clone(), + env.task_id, + env.subtask_index, + env.parallelism, + vec![], + env.memory_pool.clone(), + ); + self.inner.commit_checkpoint(epoch, &mut ctx).await + } + + async fn handle_control( + &mut self, + cmd: ControlCommand, + _env: &mut TaskEnvironment, + ) -> anyhow::Result { + match cmd { + ControlCommand::Stop { mode } => Ok(mode == StopMode::Immediate), + ControlCommand::DropState + | ControlCommand::Start + | ControlCommand::UpdateConfig { .. } + | ControlCommand::TriggerCheckpoint { .. } + | ControlCommand::Commit { .. } => Ok(false), + } + } +} diff --git a/src/runtime/streaming/error.rs b/src/runtime/streaming/error.rs index f00bd9c4..3d8fba19 100644 --- a/src/runtime/streaming/error.rs +++ b/src/runtime/streaming/error.rs @@ -1,10 +1,43 @@ +use std::fmt::Display; use thiserror::Error; -/// 子任务 / 源任务运行中的错误。 +/// 流水线 / 子任务运行期间的错误定义。 #[derive(Debug, Error)] pub enum RunError { - #[error("operator error: {0:#}")] + /// 算子内部业务逻辑抛出的错误 + #[error("Operator execution failed: {0:#}")] Operator(#[from] anyhow::Error), - #[error("downstream send: {0}")] + + /// 向下游 Task 发送数据/信号时通道阻塞或断开 + #[error("Downstream send failed: {0}")] DownstreamSend(String), + + /// 引擎内部状态机错误或拓扑规划错误(如:DAG 为空、在链条中间发生 Shuffle) + #[error("Internal engine error: {0}")] + Internal(String), + + /// Checkpoint 状态持久化或恢复时发生的错误 + #[error("State backend error: {0}")] + State(String), + + /// 底层网络或文件 I/O 错误 + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), } + +impl RunError { + /// 快捷构造器:引擎内部错误(常用于防御性编程和边界校验) + pub fn internal(msg: T) -> Self { + Self::Internal(msg.to_string()) + } + + /// 快捷构造器:下游发送异常 + pub fn downstream(msg: T) -> Self { + Self::DownstreamSend(msg.to_string()) + } + + /// 快捷构造器:状态后端异常 + pub fn state(msg: T) -> Self { + Self::State(msg.to_string()) + } +} \ No newline at end of file diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs index 34002193..4d55e361 100644 --- a/src/runtime/streaming/execution/mod.rs +++ b/src/runtime/streaming/execution/mod.rs @@ -4,5 +4,5 @@ pub mod runner; pub mod source; pub mod tracker; -pub use runner::SubtaskRunner; -pub use source::{SourceRunner, SOURCE_IDLE_SLEEP}; +pub use runner::{OperatorDrive, SubtaskRunner}; +pub use source::{SourceRunner, SOURCE_IDLE_SLEEP, WATERMARK_EMIT_INTERVAL}; diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs index d824d025..fa907088 100644 --- a/src/runtime/streaming/execution/runner.rs +++ b/src/runtime/streaming/execution/runner.rs @@ -1,296 +1,368 @@ +use async_trait::async_trait; +use tokio::sync::mpsc::Receiver; +use tokio_stream::{StreamExt, StreamMap}; +use tracing::{info, info_span, Instrument}; + use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; use crate::runtime::streaming::error::RunError; -use crate::runtime::streaming::protocol::control::ControlCommand; -use crate::runtime::streaming::protocol::event::StreamEvent; -use crate::runtime::streaming::protocol::stream_out::StreamOutput; -use crate::runtime::streaming::protocol::tracked::TrackedEvent; -use super::tracker::barrier_aligner::{AlignmentStatus, BarrierAligner}; -use super::tracker::watermark_tracker::WatermarkTracker; use crate::runtime::streaming::network::endpoint::BoxedEventStream; -use std::collections::VecDeque; -use std::pin::Pin; -use tokio::sync::mpsc::Receiver; -use tokio_stream::{StreamExt, StreamMap}; -use tracing::{debug, error, info, warn}; +use crate::runtime::streaming::protocol::{ + control::{ControlCommand, StopMode}, + event::StreamEvent, + stream_out::StreamOutput, + tracked::TrackedEvent, +}; +use crate::runtime::streaming::execution::tracker::{ + barrier_aligner::{AlignmentStatus, BarrierAligner}, + watermark_tracker::WatermarkTracker, +}; use crate::sql::common::{CheckpointBarrier, Watermark}; -pub struct SubtaskRunner { +// ========================================== +// 第一部分:逻辑处理层 - 算子融合链 (Logical Driver) +// ========================================== + +#[async_trait] +pub trait OperatorDrive: Send { + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<(), RunError>; + async fn process_event( + &mut self, + input_idx: usize, + event: TrackedEvent, + ctx: &mut TaskContext, + ) -> Result; + async fn handle_control( + &mut self, + cmd: ControlCommand, + ctx: &mut TaskContext, + ) -> Result; + async fn on_close(&mut self, ctx: &mut TaskContext) -> Result<(), RunError>; +} + +pub struct ChainedDriver { operator: Box, - ctx: TaskContext, - inboxes: Vec, - control_rx: Receiver, + next: Option>, } -impl SubtaskRunner { - pub fn new( - operator: Box, - ctx: TaskContext, - inboxes: Vec, - control_rx: Receiver, - ) -> Self { - Self { operator, ctx, inboxes, control_rx } +impl ChainedDriver { + pub fn new(operator: Box, next: Option>) -> Self { + Self { operator, next } } - pub async fn run(mut self) -> Result<(), RunError> { - let input_count = self.inboxes.len(); - info!( - job_id = %self.ctx.job_id, - vertex = self.ctx.vertex_id, - subtask = self.ctx.subtask_idx, - inputs = input_count, - operator = %self.operator.name(), - "subtask starting" - ); - - self.operator.on_start(&mut self.ctx).await?; - - if input_count == 0 { - return self.run_source_loop().await; + /// 从后往前组装算子,构建责任链 + pub fn build_chain(mut operators: Vec>) -> Option> { + if operators.is_empty() { + return None; } - - let mut stream_map: StreamMap + Send>>> = StreamMap::new(); - for (i, inbox) in self.inboxes.into_iter().enumerate() { - stream_map.insert(i, inbox); + let mut next_driver: Option> = None; + while let Some(op) = operators.pop() { + let current = ChainedDriver::new(op, next_driver); + next_driver = Some(Box::new(current)); } + next_driver + } - let mut wm_tracker = WatermarkTracker::new(input_count); - let mut barrier_aligner = BarrierAligner::new(input_count); - let mut eof_count = 0usize; - let mut closed_on_full_eof = false; - - let tick_interval = self.operator.tick_interval(); - let mut tick_sleep: Option>> = - tick_interval.map(|d| Box::pin(tokio::time::sleep(d))); - let mut tick_index: u64 = 0; - - 'run: loop { - tokio::select! { - biased; - - cmd_opt = self.control_rx.recv() => { - match cmd_opt { - None => { - debug!( - vertex = self.ctx.vertex_id, - subtask = self.ctx.subtask_idx, - "control channel closed" - ); - break 'run; - } - Some(cmd) => { - info!( - vertex = self.ctx.vertex_id, - subtask = self.ctx.subtask_idx, - ?cmd, - "control command" - ); - if Self::handle_control_command(&mut self.operator, &mut self.ctx, cmd) - .await? - { - break 'run; - } - } + async fn dispatch_outputs( + &mut self, + outputs: Vec, + ctx: &mut TaskContext, + ) -> Result<(), RunError> { + for out in outputs { + match out { + StreamOutput::Forward(b) => { + if let Some(next) = &mut self.next { + next.process_event(0, TrackedEvent::control(StreamEvent::Data(b)), ctx) + .await?; + } else { + ctx.collect(b).await?; } } - - next_item = stream_map.next() => { - let Some((input_idx, event)) = next_item else { - break 'run; - }; - - if barrier_aligner.is_blocked(input_idx) - && !matches!(event.event, StreamEvent::Barrier(_)) - { - barrier_aligner.buffer_event(input_idx, event); - } else { - let mut work = VecDeque::new(); - work.push_back((input_idx, event)); - let mut exit_run = false; - let mut dispatch = EventDispatchState { - operator: &mut self.operator, - ctx: &mut self.ctx, - work: &mut work, - wm_tracker: &mut wm_tracker, - barrier_aligner: &mut barrier_aligner, - eof_count: &mut eof_count, - closed_on_full_eof: &mut closed_on_full_eof, - input_count, - }; - while let Some((idx, ev)) = dispatch.work.pop_front() { - if Self::dispatch_stream_event(&mut dispatch, idx, ev).await? { - exit_run = true; - break; - } - } - if exit_run { - break 'run; - } + StreamOutput::Keyed(hash, b) => { + if self.next.is_some() { + return Err(RunError::internal(format!( + "Topology Error: Keyed output emitted in the middle of chain by '{}'", + self.operator.name() + ))); } + ctx.collect_keyed(hash, b).await?; } - - _ = async { - match tick_sleep.as_mut() { - Some(s) => s.as_mut().await, - None => std::future::pending().await, + StreamOutput::Broadcast(b) => { + if self.next.is_some() { + return Err(RunError::internal(format!( + "Topology Error: Broadcast output emitted in the middle of chain by '{}'", + self.operator.name() + ))); } - }, if tick_interval.is_some() => { - let outs = self - .operator - .process_tick(tick_index, &mut self.ctx) + ctx.collect(b).await?; + } + StreamOutput::Watermark(wm) => { + if let Some(next) = &mut self.next { + next.process_event( + 0, + TrackedEvent::control(StreamEvent::Watermark(wm)), + ctx, + ) .await?; - tick_index = tick_index.wrapping_add(1); - Self::dispatch_stream_outputs(&mut self.ctx, outs).await?; - if let (Some(d), Some(s)) = (tick_interval, tick_sleep.as_mut()) { - s.as_mut() - .reset(tokio::time::Instant::now() + d); + } else { + ctx.broadcast(StreamEvent::Watermark(wm)).await?; } } } } - - if !closed_on_full_eof { - let close_outs = self.operator.on_close(&mut self.ctx).await?; - Self::dispatch_stream_outputs(&mut self.ctx, close_outs).await?; - } - - info!( - vertex = self.ctx.vertex_id, - subtask = self.ctx.subtask_idx, - "subtask shutdown" - ); Ok(()) } - async fn run_source_loop(mut self) -> Result<(), RunError> { - while let Some(cmd) = self.control_rx.recv().await { - if Self::handle_control_command(&mut self.operator, &mut self.ctx, cmd).await? { - break; + async fn forward_signal( + &mut self, + event: StreamEvent, + ctx: &mut TaskContext, + ) -> Result<(), RunError> { + if let Some(next) = &mut self.next { + next.process_event(0, TrackedEvent::control(event), ctx).await?; + } else { + match event { + StreamEvent::Watermark(wm) => ctx.broadcast(StreamEvent::Watermark(wm)).await?, + StreamEvent::Barrier(b) => ctx.broadcast(StreamEvent::Barrier(b)).await?, + StreamEvent::EndOfStream => ctx.broadcast(StreamEvent::EndOfStream).await?, + StreamEvent::Data(_) => unreachable!(), } } - let close_outs = self.operator.on_close(&mut self.ctx).await?; - Self::dispatch_stream_outputs(&mut self.ctx, close_outs).await?; - if !self.ctx.outboxes.is_empty() { - self.ctx.broadcast(StreamEvent::EndOfStream).await?; + Ok(()) + } +} + +#[async_trait] +impl OperatorDrive for ChainedDriver { + async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<(), RunError> { + self.operator.on_start(ctx).await?; + if let Some(next) = &mut self.next { + next.on_start(ctx).await?; } - info!( - vertex = self.ctx.vertex_id, - subtask = self.ctx.subtask_idx, - "Source subtask finished" - ); Ok(()) } - async fn handle_control_command( - operator: &mut Box, + async fn process_event( + &mut self, + input_idx: usize, + tracked: TrackedEvent, ctx: &mut TaskContext, - cmd: ControlCommand, ) -> Result { - if let ControlCommand::TriggerCheckpoint { barrier } = &cmd { - let barrier: CheckpointBarrier = barrier.clone().into(); - if let Err(e) = operator.snapshot_state(barrier, ctx).await { - error!("Source snapshot failed: {}", e); + let mut should_stop = false; + match tracked.event { + StreamEvent::Data(batch) => { + let outputs = self.operator.process_data(input_idx, batch, ctx).await?; + self.dispatch_outputs(outputs, ctx).await?; + } + StreamEvent::Watermark(wm) => { + let outputs = self.operator.process_watermark(wm.clone(), ctx).await?; + self.dispatch_outputs(outputs, ctx).await?; + self.forward_signal(StreamEvent::Watermark(wm), ctx).await?; + } + StreamEvent::Barrier(barrier) => { + self.operator.snapshot_state(barrier.clone(), ctx).await?; + self.forward_signal(StreamEvent::Barrier(barrier), ctx).await?; + } + StreamEvent::EndOfStream => { + should_stop = true; + self.forward_signal(StreamEvent::EndOfStream, ctx).await?; } - ctx.broadcast(StreamEvent::Barrier(barrier)).await?; } + Ok(should_stop) + } - if let ControlCommand::Commit { epoch } = &cmd { - if let Err(e) = operator.commit_checkpoint(*epoch, ctx).await { - error!("commit_checkpoint failed: {}", e); + async fn handle_control( + &mut self, + cmd: ControlCommand, + ctx: &mut TaskContext, + ) -> Result { + let mut stop = false; + match &cmd { + ControlCommand::TriggerCheckpoint { barrier } => { + let b: CheckpointBarrier = barrier.clone().into(); + self.operator.snapshot_state(b, ctx).await?; + } + ControlCommand::Commit { epoch } => { + self.operator.commit_checkpoint(*epoch, ctx).await?; + } + ControlCommand::Stop { mode } => { + if *mode == StopMode::Immediate { + stop = true; + } } + ControlCommand::DropState | ControlCommand::Start | ControlCommand::UpdateConfig { .. } => {} } - match operator.handle_control(cmd, ctx).await { - Ok(should_stop) => Ok(should_stop), - Err(e) => { - warn!("handle_control error: {}", e); - Ok(false) + if let Some(next) = &mut self.next { + if next.handle_control(cmd, ctx).await? { + stop = true; } + } else if let ControlCommand::TriggerCheckpoint { barrier } = cmd { + ctx.broadcast(StreamEvent::Barrier(barrier.into())).await?; } + + Ok(stop) } - async fn dispatch_stream_outputs( - ctx: &mut TaskContext, - outputs: Vec, - ) -> Result<(), RunError> { - for out in outputs { - match out { - StreamOutput::Forward(b) => ctx.collect(b).await?, - StreamOutput::Keyed(hash, b) => ctx.collect_keyed(hash, b).await?, - StreamOutput::Broadcast(b) => ctx.collect(b).await?, - StreamOutput::Watermark(wm) => { - ctx.broadcast(StreamEvent::Watermark(wm)).await?; - } - } + async fn on_close(&mut self, ctx: &mut TaskContext) -> Result<(), RunError> { + let close_outs = self.operator.on_close(ctx).await?; + self.dispatch_outputs(close_outs, ctx).await?; + if let Some(next) = &mut self.next { + next.on_close(ctx).await?; } Ok(()) } +} - async fn dispatch_stream_event( - st: &mut EventDispatchState<'_>, - input_idx: usize, - tracked: TrackedEvent, - ) -> Result { - let event = tracked.event; - match event { - StreamEvent::Data(batch) => { - let outputs = st - .operator - .process_data(input_idx, batch, st.ctx) - .await?; - Self::dispatch_stream_outputs(st.ctx, outputs).await?; - } - StreamEvent::Watermark(wm) => { - if let Some(aligned_wm) = st.wm_tracker.update(input_idx, wm) { - if let Watermark::EventTime(t) = aligned_wm { - st.ctx.advance_watermark(t); - } - let outputs = st - .operator - .process_watermark(aligned_wm.clone(), st.ctx) - .await?; - Self::dispatch_stream_outputs(st.ctx, outputs).await?; - st.ctx - .broadcast(StreamEvent::Watermark(aligned_wm)) - .await?; - } +// ========================================== +// 第二部分:物理执行层 - 流水线 (Physical Driver) +// ========================================== + +pub struct Pipeline { + chain_head: Box, + ctx: TaskContext, + inboxes: Vec, + control_rx: Receiver, + + wm_tracker: WatermarkTracker, + barrier_aligner: BarrierAligner, + /// Barrier 未对齐时从轮询池移除的输入流(背压) + paused_streams: Vec>, +} + +impl Pipeline { + pub fn new( + operators: Vec>, + ctx: TaskContext, + inboxes: Vec, + control_rx: Receiver, + ) -> Result { + let input_count = inboxes.len(); + let chain_head = ChainedDriver::build_chain(operators) + .ok_or_else(|| RunError::internal("Cannot build pipeline with empty operators"))?; + + let paused_streams = (0..input_count).map(|_| None).collect(); + + Ok(Self { + chain_head, + ctx, + inboxes, + control_rx, + wm_tracker: WatermarkTracker::new(input_count), + barrier_aligner: BarrierAligner::new(input_count), + paused_streams, + }) + } + + pub async fn run(mut self) -> Result<(), RunError> { + let span = info_span!( + "pipeline_run", + job_id = %self.ctx.job_id, + vertex = self.ctx.vertex_id + ); + + async move { + info!("Pipeline initializing..."); + self.chain_head.on_start(&mut self.ctx).await?; + + let mut active_streams = StreamMap::new(); + for (i, stream) in std::mem::take(&mut self.inboxes).into_iter().enumerate() { + active_streams.insert(i, stream); } - StreamEvent::Barrier(barrier) => { - match st.barrier_aligner.mark(input_idx, &barrier) { - AlignmentStatus::Pending => {} - AlignmentStatus::Complete(buffered) => { - if let Err(e) = st.operator.snapshot_state(barrier, st.ctx).await { - error!("Operator snapshot failed: {}", e); + + loop { + tokio::select! { + biased; + + Some(cmd) = self.control_rx.recv() => { + if self.chain_head.handle_control(cmd, &mut self.ctx).await? { + break; } - st.ctx.broadcast(StreamEvent::Barrier(barrier)).await?; - for pair in buffered { - st.work.push_back(pair); + } + + Some((idx, tracked_event)) = active_streams.next() => { + match tracked_event.event { + StreamEvent::Data(batch) => { + self.chain_head + .process_event( + idx, + TrackedEvent::control(StreamEvent::Data(batch)), + &mut self.ctx, + ) + .await?; + } + + StreamEvent::Barrier(barrier) => { + match self.barrier_aligner.mark(idx, &barrier) { + AlignmentStatus::Pending => { + if let Some(stream) = active_streams.remove(&idx) { + self.paused_streams[idx] = Some(stream); + } + } + AlignmentStatus::Complete => { + self.chain_head + .process_event( + idx, + TrackedEvent::control(StreamEvent::Barrier(barrier)), + &mut self.ctx, + ) + .await?; + + for i in 0..self.paused_streams.len() { + if let Some(stream) = self.paused_streams[i].take() { + active_streams.insert(i, stream); + } + } + } + } + } + + StreamEvent::Watermark(wm) => { + if let Some(aligned_wm) = self.wm_tracker.update(idx, wm) { + if let Watermark::EventTime(t) = aligned_wm { + self.ctx.advance_watermark(t); + } + self.chain_head + .process_event( + idx, + TrackedEvent::control(StreamEvent::Watermark(aligned_wm)), + &mut self.ctx, + ) + .await?; + } + } + + StreamEvent::EndOfStream => { + if self.wm_tracker.increment_eof() == self.wm_tracker.input_count() { + self.chain_head + .process_event( + idx, + TrackedEvent::control(StreamEvent::EndOfStream), + &mut self.ctx, + ) + .await?; + break; + } + } } } + + else => break, } } - StreamEvent::EndOfStream => { - *st.eof_count += 1; - if *st.eof_count == st.input_count { - let close_outs = st.operator.on_close(st.ctx).await?; - Self::dispatch_stream_outputs(st.ctx, close_outs).await?; - *st.closed_on_full_eof = true; - st.ctx.broadcast(StreamEvent::EndOfStream).await?; - return Ok(true); - } - } + + self.teardown().await } - Ok(false) + .instrument(span) + .await } -} -struct EventDispatchState<'a> { - operator: &'a mut Box, - ctx: &'a mut TaskContext, - work: &'a mut VecDeque<(usize, TrackedEvent)>, - wm_tracker: &'a mut WatermarkTracker, - barrier_aligner: &'a mut BarrierAligner, - eof_count: &'a mut usize, - closed_on_full_eof: &'a mut bool, - input_count: usize, + async fn teardown(mut self) -> Result<(), RunError> { + info!("Pipeline tearing down..."); + self.chain_head.on_close(&mut self.ctx).await?; + Ok(()) + } } + +/// 与执行引擎语义对齐的别名 +pub type SubtaskRunner = Pipeline; diff --git a/src/runtime/streaming/execution/source.rs b/src/runtime/streaming/execution/source.rs index 9fe1983e..d51132ac 100644 --- a/src/runtime/streaming/execution/source.rs +++ b/src/runtime/streaming/execution/source.rs @@ -1,18 +1,25 @@ +//! 源任务物理驱动:控制面优先、`fetch_next` 非阻塞契约、可选融合算子链下推。 + use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::source::{SourceEvent, SourceOperator}; use crate::runtime::streaming::error::RunError; +use crate::runtime::streaming::execution::runner::OperatorDrive; use crate::runtime::streaming::protocol::control::ControlCommand; use crate::runtime::streaming::protocol::event::StreamEvent; +use crate::runtime::streaming::protocol::tracked::TrackedEvent; +use crate::sql::common::CheckpointBarrier; use std::time::Duration; use tokio::sync::mpsc::Receiver; -use tokio::time::sleep; -use tracing::{debug, info, warn}; -use crate::sql::common::CheckpointBarrier; +use tokio::time::{interval, MissedTickBehavior}; +use tracing::{info, info_span, warn, Instrument}; pub const SOURCE_IDLE_SLEEP: Duration = Duration::from_millis(50); +pub const WATERMARK_EMIT_INTERVAL: Duration = Duration::from_millis(200); pub struct SourceRunner { operator: Box, + /// 有链时数据与信号经链尾再 `collect` / `broadcast`;无链则直接走 `TaskContext`。 + chain_head: Option>, ctx: TaskContext, control_rx: Receiver, } @@ -20,101 +27,144 @@ pub struct SourceRunner { impl SourceRunner { pub fn new( operator: Box, + chain_head: Option>, ctx: TaskContext, control_rx: Receiver, ) -> Self { Self { operator, + chain_head, ctx, control_rx, } } pub async fn run(mut self) -> Result<(), RunError> { - info!( - job_id = %self.ctx.job_id, + let span = info_span!( + "source_run", vertex = self.ctx.vertex_id, - subtask = self.ctx.subtask_idx, - operator = %self.operator.name(), - "source subtask starting" + op = self.operator.name() ); - self.operator.on_start(&mut self.ctx).await?; - - let mut is_running = true; - let mut idle_pending = false; - - while is_running { - tokio::select! { - biased; - cmd_opt = self.control_rx.recv() => { - match cmd_opt { - None => { - debug!( - vertex = self.ctx.vertex_id, - subtask = self.ctx.subtask_idx, - "source control channel closed" - ); - is_running = false; - } - Some(cmd) => { - match cmd { - ControlCommand::Stop { .. } => { + async move { + info!("Source subtask starting"); + self.operator.on_start(&mut self.ctx).await?; + if let Some(chain) = &mut self.chain_head { + chain.on_start(&mut self.ctx).await?; + } + + let mut idle_timer = interval(SOURCE_IDLE_SLEEP); + idle_timer.set_missed_tick_behavior(MissedTickBehavior::Skip); + + let mut wm_timer = interval(WATERMARK_EMIT_INTERVAL); + wm_timer.set_missed_tick_behavior(MissedTickBehavior::Skip); + + let mut is_idle = false; + let mut is_running = true; + + while is_running { + tokio::select! { + biased; + + cmd_opt = self.control_rx.recv() => { + match cmd_opt { + None => is_running = false, + Some(cmd) => { + if self.handle_control(cmd).await? { is_running = false; } - ControlCommand::TriggerCheckpoint { barrier } => { - let barrier: CheckpointBarrier = barrier.into(); - self.operator - .snapshot_state(barrier, &mut self.ctx) - .await?; - self.ctx - .broadcast(StreamEvent::Barrier(barrier)) - .await?; - } - ControlCommand::Start - | ControlCommand::DropState - | ControlCommand::Commit { .. } - | ControlCommand::UpdateConfig { .. } => { - debug!(?cmd, "source: ignored control command"); - } } } } - } - _ = sleep(SOURCE_IDLE_SLEEP), if is_running && idle_pending => { - idle_pending = false; - } - fetch_res = self.operator.fetch_next(&mut self.ctx), if is_running && !idle_pending => { - match fetch_res { - Ok(SourceEvent::Data(batch)) => { - self.ctx.collect(batch).await?; - } - Ok(SourceEvent::Watermark(wm)) => { - self.ctx.broadcast(StreamEvent::Watermark(wm)).await?; - } - Ok(SourceEvent::Idle) => { - idle_pending = true; + + _ = wm_timer.tick() => { + if let Some(wm) = self.operator.poll_watermark() { + self.dispatch_event(StreamEvent::Watermark(wm)).await?; } - Err(e) => { - warn!( - vertex = self.ctx.vertex_id, - error = %e, - "fetch_next error" - ); - return Err(RunError::Operator(e)); + } + + _ = idle_timer.tick(), if is_idle => { + is_idle = false; + } + + fetch_res = self.operator.fetch_next(&mut self.ctx), if !is_idle => { + match fetch_res { + Ok(SourceEvent::Data(batch)) => { + self.dispatch_event(StreamEvent::Data(batch)).await?; + } + Ok(SourceEvent::Watermark(wm)) => { + self.dispatch_event(StreamEvent::Watermark(wm)).await?; + } + Ok(SourceEvent::Idle) => { + is_idle = true; + idle_timer.reset(); + } + Ok(SourceEvent::EndOfStream) => { + self.dispatch_event(StreamEvent::EndOfStream).await?; + is_running = false; + } + Err(e) => { + warn!("fetch_next error: {}", e); + return Err(RunError::Operator(e)); + } } } } } + + self.teardown().await } + .instrument(span) + .await + } - self.operator.on_close(&mut self.ctx).await?; + async fn dispatch_event(&mut self, event: StreamEvent) -> Result<(), RunError> { + if let Some(chain) = &mut self.chain_head { + let _stop = chain + .process_event(0, TrackedEvent::control(event), &mut self.ctx) + .await?; + } else { + match event { + StreamEvent::Data(b) => self.ctx.collect(b).await?, + StreamEvent::Watermark(w) => { + self.ctx.broadcast(StreamEvent::Watermark(w)).await?; + } + StreamEvent::Barrier(b) => { + self.ctx.broadcast(StreamEvent::Barrier(b)).await?; + } + StreamEvent::EndOfStream => { + self.ctx.broadcast(StreamEvent::EndOfStream).await?; + } + } + } + Ok(()) + } - info!( - vertex = self.ctx.vertex_id, - subtask = self.ctx.subtask_idx, - "source subtask shutdown" - ); + async fn handle_control(&mut self, cmd: ControlCommand) -> Result { + match cmd { + ControlCommand::TriggerCheckpoint { barrier } => { + let b: CheckpointBarrier = barrier.into(); + self.operator.snapshot_state(b.clone(), &mut self.ctx).await?; + self.dispatch_event(StreamEvent::Barrier(b)).await?; + } + ControlCommand::Stop { .. } => return Ok(true), + other => { + if let Some(chain) = &mut self.chain_head { + if chain.handle_control(other, &mut self.ctx).await? { + return Ok(true); + } + } + } + } + Ok(false) + } + + async fn teardown(mut self) -> Result<(), RunError> { + self.operator.on_close(&mut self.ctx).await?; + if let Some(chain) = &mut self.chain_head { + chain.on_close(&mut self.ctx).await?; + } + info!("Source subtask shutdown"); Ok(()) } } diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/runtime/streaming/execution/tracker/barrier_aligner.rs index e284922b..05f2cc90 100644 --- a/src/runtime/streaming/execution/tracker/barrier_aligner.rs +++ b/src/runtime/streaming/execution/tracker/barrier_aligner.rs @@ -1,13 +1,15 @@ -//! Chandy–Lamport 风格屏障对齐。 +//! Chandy–Lamport 风格屏障对齐(零内存缓冲:未对齐时从轮询池移除输入流,依赖底层背压)。 use std::collections::HashSet; -use crate::runtime::streaming::protocol::TrackedEvent; + use crate::sql::common::CheckpointBarrier; #[derive(Debug)] pub enum AlignmentStatus { + /// 未对齐:外层应将当前通道从 `StreamMap` 挂起(Pause)。 Pending, - Complete(Vec<(usize, TrackedEvent)>), + /// 已对齐:外层触发快照并唤醒所有挂起通道(Resume)。 + Complete, } #[derive(Debug)] @@ -15,7 +17,6 @@ pub struct BarrierAligner { input_count: usize, current_epoch: Option, reached_inputs: HashSet, - buffered_events: Vec<(usize, TrackedEvent)>, } impl BarrierAligner { @@ -24,34 +25,23 @@ impl BarrierAligner { input_count, current_epoch: None, reached_inputs: HashSet::new(), - buffered_events: Vec::new(), } } - pub fn is_blocked(&self, input_idx: usize) -> bool { - self.current_epoch.is_some() && self.reached_inputs.contains(&input_idx) - } - - pub fn buffer_event(&mut self, input_idx: usize, event: TrackedEvent) { - self.buffered_events.push((input_idx, event)); - } - pub fn mark(&mut self, input_idx: usize, barrier: &CheckpointBarrier) -> AlignmentStatus { if self.current_epoch != Some(barrier.epoch) { self.current_epoch = Some(barrier.epoch); self.reached_inputs.clear(); - self.buffered_events.clear(); } self.reached_inputs.insert(input_idx); if self.reached_inputs.len() == self.input_count { - let released = std::mem::take(&mut self.buffered_events); self.current_epoch = None; self.reached_inputs.clear(); - AlignmentStatus::Complete(released) + AlignmentStatus::Complete } else { AlignmentStatus::Pending } } -} \ No newline at end of file +} diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/runtime/streaming/execution/tracker/watermark_tracker.rs index 29233fc3..ca2f082f 100644 --- a/src/runtime/streaming/execution/tracker/watermark_tracker.rs +++ b/src/runtime/streaming/execution/tracker/watermark_tracker.rs @@ -5,6 +5,7 @@ use crate::sql::common::Watermark; pub struct WatermarkTracker { watermarks: Vec>, current_min_watermark: Option, + eof_count: usize, } impl WatermarkTracker { @@ -12,6 +13,7 @@ impl WatermarkTracker { Self { watermarks: vec![None; input_count], current_min_watermark: None, + eof_count: 0, } } @@ -31,6 +33,15 @@ impl WatermarkTracker { self.current_min_watermark = Some(new_min); Some(new_min) } + + pub fn increment_eof(&mut self) -> usize { + self.eof_count += 1; + self.eof_count + } + + pub fn input_count(&self) -> usize { + self.watermarks.len() + } } #[cfg(test)] diff --git a/src/runtime/streaming/factory/registry/kafka_factory.rs b/src/runtime/streaming/factory/registry/kafka_factory.rs new file mode 100644 index 00000000..6a451166 --- /dev/null +++ b/src/runtime/streaming/factory/registry/kafka_factory.rs @@ -0,0 +1,328 @@ +//! Kafka Source/Sink:从 [`ConnectorOp`] + [`OperatorConfig`] 构造物理算子(鉴权与 client 配置合并)。 + +use anyhow::{anyhow, bail, Context, Result}; +use prost::Message; +use std::collections::HashMap; +use std::num::NonZeroU32; +use std::sync::Arc; + +use protocol::grpc::api::ConnectorOp; +use tracing::{info, warn}; + +use super::OperatorConstructor; +use crate::runtime::streaming::api::operator::{ConstructedOperator, Registry}; +use crate::runtime::streaming::api::source::SourceOffset; +use crate::runtime::streaming::format::{ + BadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding, Format as RuntimeFormat, + JsonFormat as RuntimeJsonFormat, TimestampFormat as RtTimestampFormat, +}; +use crate::runtime::streaming::operators::sink::kafka::{ConsistencyMode, KafkaSinkOperator}; +use crate::runtime::streaming::operators::source::kafka::{BufferedDeserializer, KafkaSourceOperator}; +use crate::sql::common::formats::{ + BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, JsonFormat as SqlJsonFormat, + TimestampFormat as SqlTimestampFormat, +}; +use crate::sql::common::kafka_catalog::{ + KafkaConfig, KafkaConfigAuthentication, KafkaTable, ReadMode, SinkCommitMode, TableType, +}; +use crate::sql::common::{FsSchema, OperatorConfig}; + +const DEFAULT_SOURCE_BATCH_SIZE: usize = 1024; + +/// 合并连接级鉴权、全局 `connection_properties` 与表级 `client_configs`(表级覆盖同名键)。 +pub fn build_client_configs(config: &KafkaConfig, table: &KafkaTable) -> Result> { + let mut client_configs = HashMap::new(); + + match &config.authentication { + KafkaConfigAuthentication::None => {} + KafkaConfigAuthentication::Sasl { + protocol, + mechanism, + username, + password, + } => { + client_configs.insert("security.protocol".to_string(), protocol.clone()); + client_configs.insert("sasl.mechanism".to_string(), mechanism.clone()); + client_configs.insert("sasl.username".to_string(), username.clone()); + client_configs.insert("sasl.password".to_string(), password.clone()); + } + KafkaConfigAuthentication::AwsMskIam { region } => { + client_configs.insert("security.protocol".to_string(), "SASL_SSL".to_string()); + client_configs.insert("sasl.mechanism".to_string(), "OAUTHBEARER".to_string()); + client_configs.insert( + "sasl.oauthbearer.extensions".to_string(), + format!("logicalCluster=aws_msk;aws_region={region}"), + ); + } + } + + for (k, v) in &config.connection_properties { + client_configs.insert(k.clone(), v.clone()); + } + + for (k, v) in &table.client_configs { + if client_configs.contains_key(k) { + warn!( + "Kafka config key '{}' is defined in both connection and table; using table value", + k + ); + } + client_configs.insert(k.clone(), v.clone()); + } + + Ok(client_configs) +} + +fn bad_data_policy(b: Option) -> BadDataPolicy { + match b.unwrap_or_default() { + BadData::Fail {} => BadDataPolicy::Fail, + BadData::Drop {} => BadDataPolicy::Drop, + } +} + +fn sql_timestamp_format(t: SqlTimestampFormat) -> RtTimestampFormat { + match t { + SqlTimestampFormat::RFC3339 => RtTimestampFormat::RFC3339, + SqlTimestampFormat::UnixMillis => RtTimestampFormat::UnixMillis, + } +} + +fn sql_decimal_encoding(d: SqlDecimalEncoding) -> RtDecimalEncoding { + match d { + SqlDecimalEncoding::Number => RtDecimalEncoding::Number, + SqlDecimalEncoding::String => RtDecimalEncoding::String, + SqlDecimalEncoding::Bytes => RtDecimalEncoding::Bytes, + } +} + +fn sql_json_format_to_runtime(j: &SqlJsonFormat) -> RuntimeJsonFormat { + RuntimeJsonFormat { + timestamp_format: sql_timestamp_format(j.timestamp_format), + decimal_encoding: sql_decimal_encoding(j.decimal_encoding), + include_schema: j.include_schema, + } +} + +fn sql_format_to_runtime(f: SqlFormat) -> Result { + match f { + SqlFormat::Json(j) => Ok(RuntimeFormat::Json(sql_json_format_to_runtime(&j))), + SqlFormat::RawString(_) => Ok(RuntimeFormat::RawString), + SqlFormat::RawBytes(_) => Ok(RuntimeFormat::RawBytes), + other => bail!( + "Kafka connector: format '{}' is not supported for runtime deserializer/serializer yet", + other.name() + ), + } +} + +fn kafka_table_offset_to_runtime(o: crate::sql::common::KafkaTableSourceOffset) -> SourceOffset { + use crate::sql::common::KafkaTableSourceOffset as KOff; + match o { + KOff::Latest => SourceOffset::Latest, + KOff::Earliest => SourceOffset::Earliest, + KOff::Group => SourceOffset::Group, + } +} + +fn non_zero_rate_per_second(op: &OperatorConfig) -> NonZeroU32 { + op.rate_limit + .as_ref() + .and_then(|r| NonZeroU32::new(r.messages_per_second.max(1))) + .unwrap_or_else(|| NonZeroU32::new(1_000_000).expect("nonzero")) +} + +fn sink_fs_schema_adjusted( + fs: FsSchema, + key_field: &Option, + timestamp_field: &Option, +) -> Result { + if key_field.is_none() && timestamp_field.is_none() { + return Ok(fs); + } + let schema = fs.schema.clone(); + let ts = if let Some(name) = timestamp_field { + schema + .column_with_name(name) + .ok_or_else(|| anyhow!("timestamp column '{name}' not found in schema"))? + .0 + } else { + fs.timestamp_index + }; + let keys = fs.clone_storage_key_indices(); + let routing = if let Some(name) = key_field { + let k = schema + .column_with_name(name) + .ok_or_else(|| anyhow!("key column '{name}' not found in schema"))? + .0; + Some(vec![k]) + } else { + fs.clone_routing_key_indices() + }; + Ok(FsSchema::new(schema, ts, keys, routing)) +} + +fn decode_operator_config(op: &ConnectorOp) -> Result { + serde_json::from_str(&op.config).with_context(|| { + format!( + "Invalid OperatorConfig JSON for connector '{}'", + op.connector + ) + }) +} + +/// 由 [`ConnectorOp`] 构造 Kafka Source(`connector` 须为 `kafka`)。 +pub struct KafkaSourceDispatcher; + +impl OperatorConstructor for KafkaSourceDispatcher { + fn with_config(&self, payload: &[u8], _registry: Arc) -> Result { + let op = ConnectorOp::decode(payload) + .context("Failed to decode ConnectorOp protobuf for Kafka Source")?; + + if op.connector != "kafka" { + bail!( + "KafkaSourceDispatcher: expected connector 'kafka', got '{}'", + op.connector + ); + } + + let op_config = decode_operator_config(&op)?; + + let kafka_config: KafkaConfig = serde_json::from_value(op_config.connection.clone()) + .context("Failed to parse Kafka connection configuration")?; + + let kafka_table: KafkaTable = serde_json::from_value(op_config.table.clone()) + .context("Failed to parse Kafka table configuration")?; + + let TableType::Source { + offset, + read_mode, + group_id, + group_id_prefix, + } = &kafka_table.kind + else { + bail!( + "Expected Kafka Source, got Sink configuration for topic '{}'", + kafka_table.topic + ); + }; + + info!("Constructing Kafka Source for topic: {}", kafka_table.topic); + + let mut client_configs = build_client_configs(&kafka_config, &kafka_table)?; + if let Some(ReadMode::ReadCommitted) = read_mode { + client_configs.insert("isolation.level".to_string(), "read_committed".to_string()); + } + + let sql_format = op_config + .format + .clone() + .context("Format must be specified for Kafka Source")?; + let runtime_format = sql_format_to_runtime(sql_format)?; + let fs = op_config + .input_schema + .clone() + .context("input_schema is required for Kafka Source")?; + let bad = bad_data_policy(op_config.bad_data.clone()); + + let deserializer: std::boxed::Box< + dyn crate::runtime::streaming::operators::source::kafka::BatchDeserializer, + > = Box::new(BufferedDeserializer::new( + runtime_format, + fs.schema.clone(), + bad, + DEFAULT_SOURCE_BATCH_SIZE, + )); + + let source_op = KafkaSourceOperator::new( + kafka_table.topic.clone(), + kafka_config.bootstrap_servers.clone(), + group_id.clone(), + group_id_prefix.clone(), + kafka_table_offset_to_runtime(*offset), + client_configs, + non_zero_rate_per_second(&op_config), + op_config.metadata_fields, + deserializer, + ); + + Ok(ConstructedOperator::Source(Box::new(source_op))) + } +} + +/// 由 [`ConnectorOp`] 构造 Kafka Sink(`connector` 须为 `kafka`)。 +pub struct KafkaSinkDispatcher; + +impl OperatorConstructor for KafkaSinkDispatcher { + fn with_config(&self, payload: &[u8], _registry: Arc) -> Result { + let op = ConnectorOp::decode(payload) + .context("Failed to decode ConnectorOp protobuf for Kafka Sink")?; + + if op.connector != "kafka" { + bail!( + "KafkaSinkDispatcher: expected connector 'kafka', got '{}'", + op.connector + ); + } + + let op_config = decode_operator_config(&op)?; + + let kafka_config: KafkaConfig = serde_json::from_value(op_config.connection.clone()) + .context("Failed to parse Kafka connection configuration")?; + + let kafka_table: KafkaTable = serde_json::from_value(op_config.table.clone()) + .context("Failed to parse Kafka table configuration")?; + + let TableType::Sink { + commit_mode, + key_field, + timestamp_field, + } = &kafka_table.kind + else { + bail!( + "Expected Kafka Sink, got Source configuration for topic '{}'", + kafka_table.topic + ); + }; + + info!("Constructing Kafka Sink for topic: {}", kafka_table.topic); + + let client_configs = build_client_configs(&kafka_config, &kafka_table)?; + + let consistency = match commit_mode { + SinkCommitMode::ExactlyOnce => ConsistencyMode::ExactlyOnce, + SinkCommitMode::AtLeastOnce => ConsistencyMode::AtLeastOnce, + }; + + let sql_format = op_config + .format + .clone() + .context("Format must be specified for Kafka Sink")?; + let runtime_format = sql_format_to_runtime(sql_format)?; + + let fs_in = op_config + .input_schema + .clone() + .context("input_schema is required for Kafka Sink")?; + let fs = sink_fs_schema_adjusted(fs_in, key_field, timestamp_field)?; + + let serializer = DataSerializer::new(runtime_format, fs.schema.clone()); + + let sink_op = KafkaSinkOperator::new( + kafka_table.topic.clone(), + kafka_config.bootstrap_servers.clone(), + consistency, + client_configs, + fs, + serializer, + ); + + Ok(ConstructedOperator::Operator(Box::new(sink_op))) + } +} + +/// 注册 `KafkaSource` / `KafkaSink` 构造器(由 [`super::OperatorFactory::register_builtins`] 调用)。 +pub fn register_kafka_plugins(factory: &mut super::OperatorFactory) { + factory.register("KafkaSource", Box::new(KafkaSourceDispatcher)); + factory.register("KafkaSink", Box::new(KafkaSinkDispatcher)); + info!("Registered Kafka connector plugins (KafkaSource, KafkaSink)"); +} diff --git a/src/runtime/streaming/factory/registry.rs b/src/runtime/streaming/factory/registry/mod.rs similarity index 88% rename from src/runtime/streaming/factory/registry.rs rename to src/runtime/streaming/factory/registry/mod.rs index b8b45fff..9bb1148d 100644 --- a/src/runtime/streaming/factory/registry.rs +++ b/src/runtime/streaming/factory/registry/mod.rs @@ -9,7 +9,7 @@ use crate::runtime::streaming::api::operator::ConstructedOperator; use crate::runtime::streaming::operators::PassthroughOperator; use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor; use crate::runtime::streaming::operators::joins::{ - InstantJoinConstructor, JoinWithExpirationConstructor, LookupJoinConstructor, + InstantJoinConstructor, JoinWithExpirationConstructor, }; use crate::runtime::streaming::operators::key_by::KeyByConstructor; use crate::runtime::streaming::operators::watermark::WatermarkGeneratorConstructor; @@ -18,11 +18,14 @@ use crate::runtime::streaming::operators::windows::{ TumblingAggregateWindowConstructor, WindowFunctionConstructor, }; +pub mod kafka_factory; + +use kafka_factory::{register_kafka_plugins, KafkaSinkDispatcher, KafkaSourceDispatcher}; + use protocol::grpc::api::{ ConnectorOp, ExpressionWatermarkConfig, JoinOperator as JoinOperatorProto, KeyPlanOperator as KeyByProto, - LookupJoinOperator as LookupJoinProto, SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator, UpdatingAggregateOperator, WindowFunctionOperator as WindowFunctionProto, @@ -45,7 +48,7 @@ pub trait OperatorConstructor: Send + Sync { /// 持有 `name → OperatorConstructor` 映射与共享 [`Registry`]。 /// -/// [`TaskManager`] 在部署 TDD 时调用 [`create_operator`],完成从字节流到运行时算子的 +/// `JobManager` 在部署任务时调用 [`create_operator`],完成从字节流到运行时算子的 /// 反射式实例化。 pub struct OperatorFactory { constructors: HashMap>, @@ -118,6 +121,8 @@ impl OperatorFactory { self.register("Projection", Box::new(PassthroughConstructor("Projection"))); self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue"))); self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey"))); + + register_kafka_plugins(self); } } @@ -197,11 +202,8 @@ impl OperatorConstructor for InstantJoinBridge { struct LookupJoinBridge; impl OperatorConstructor for LookupJoinBridge { - fn with_config(&self, config: &[u8], registry: Arc) -> Result { - let proto = LookupJoinProto::decode(config) - .map_err(|e| anyhow!("Decode LookupJoinOperator failed: {e}"))?; - let op = LookupJoinConstructor.with_config(proto, registry)?; - Ok(ConstructedOperator::Operator(Box::new(op))) + fn with_config(&self, _config: &[u8], _registry: Arc) -> Result { + Err(anyhow!("LookupJoin is not supported in the current runtime")) } } @@ -232,24 +234,16 @@ impl OperatorConstructor for KeyByBridge { pub struct ConnectorSourceDispatcher; impl OperatorConstructor for ConnectorSourceDispatcher { - fn with_config(&self, config: &[u8], _registry: Arc) -> Result { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { let op = ConnectorOp::decode(config) .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?; match op.connector.as_str() { - "kafka" => { - // TODO: 委托给 crate::connectors::kafka::build_kafka_source(&op.config) - Err(anyhow!( - "ConnectorSource '{}' factory wiring not yet implemented", - op.connector - )) - } - "redis" => { - Err(anyhow!( - "ConnectorSource '{}' factory wiring not yet implemented", - op.connector - )) - } + "kafka" => KafkaSourceDispatcher.with_config(config, registry), + "redis" => Err(anyhow!( + "ConnectorSource '{}' factory wiring not yet implemented", + op.connector + )), other => Err(anyhow!("Unsupported source connector type: {}", other)), } } @@ -258,18 +252,12 @@ impl OperatorConstructor for ConnectorSourceDispatcher { pub struct ConnectorSinkDispatcher; impl OperatorConstructor for ConnectorSinkDispatcher { - fn with_config(&self, config: &[u8], _registry: Arc) -> Result { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { let op = ConnectorOp::decode(config) .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?; match op.connector.as_str() { - "kafka" => { - // TODO: 委托给 crate::connectors::kafka::build_kafka_sink(&op.config) - Err(anyhow!( - "ConnectorSink '{}' factory wiring not yet implemented", - op.connector - )) - } + "kafka" => KafkaSinkDispatcher.with_config(config, registry), other => Err(anyhow!("Unsupported sink connector type: {}", other)), } } diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs index 82b02b3d..6413eba6 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/runtime/streaming/job/job_manager.rs @@ -1,100 +1,92 @@ use std::collections::HashMap; -use std::sync::{Arc, RwLock}; +use std::sync::{Arc, OnceLock, RwLock}; -use protocol::grpc::api::{ChainedOperator, FsProgram}; +use anyhow::anyhow; use tokio::sync::mpsc; -use tracing::error; +use tokio_stream::wrappers::ReceiverStream; +use tracing::{error, info, warn}; + +use protocol::grpc::api::{ChainedOperator, FsProgram}; -use crate::runtime::streaming::api::operator::ConstructedOperator; +use crate::runtime::streaming::api::context::TaskContext; +use crate::runtime::streaming::api::operator::{ConstructedOperator, MessageOperator}; +use crate::runtime::streaming::execution::runner::Pipeline; use crate::runtime::streaming::factory::OperatorFactory; use crate::runtime::streaming::job::edge_manager::EdgeManager; use crate::runtime::streaming::job::models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; -use crate::runtime::streaming::job::pipeline_runner::{FusionOperatorChain, PipelineRunner}; use crate::runtime::streaming::memory::MemoryPool; +use crate::runtime::streaming::network::endpoint::{BoxedEventStream, PhysicalSender}; use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; -use crate::runtime::streaming::storage::manager::TableManager; + +static GLOBAL_JOB_MANAGER: OnceLock> = OnceLock::new(); pub struct JobManager { active_jobs: Arc>>, operator_factory: Arc, memory_pool: Arc, - table_manager: Option>>, } impl JobManager { - pub fn new( - operator_factory: Arc, - max_memory_bytes: usize, - table_manager: Option>>, - ) -> Self { + pub fn new(operator_factory: Arc, max_memory_bytes: usize) -> Self { Self { active_jobs: Arc::new(RwLock::new(HashMap::new())), operator_factory, memory_pool: MemoryPool::new(max_memory_bytes), - table_manager, } } - /// 从逻辑计划点火物理线程 + pub fn init(operator_factory: Arc, max_memory_bytes: usize) -> anyhow::Result<()> { + let manager = Arc::new(Self::new(operator_factory, max_memory_bytes)); + GLOBAL_JOB_MANAGER + .set(manager) + .map_err(|_| anyhow!("JobManager singleton already initialized")) + } + + pub fn global() -> anyhow::Result> { + GLOBAL_JOB_MANAGER + .get() + .cloned() + .ok_or_else(|| anyhow!("JobManager not initialized. Call init() first.")) + } + + /// 核心主干:从逻辑计划点火物理流水线 pub async fn submit_job(&self, program: FsProgram) -> anyhow::Result { let job_id = format!("job-{}", chrono::Utc::now().timestamp_millis()); - let mut edge_manager = EdgeManager::build(&program.nodes, &program.edges); - let mut physical_pipelines = HashMap::new(); + let mut pipelines = HashMap::new(); for node in &program.nodes { - let pipe_id = node.node_index as u32; - let (inbox, outboxes) = edge_manager.take_endpoints(pipe_id); - let chain = self.create_chain(&node.operators)?; - let (ctrl_tx, ctrl_rx) = mpsc::channel(64); + let pipeline_id = node.node_index as u32; + + let (raw_inboxes, raw_outboxes) = edge_manager.take_endpoints(pipeline_id); + let physical_outboxes = raw_outboxes.into_iter().map(PhysicalSender::Local).collect(); + let physical_inboxes: Vec = raw_inboxes + .into_iter() + .map(|rx| Box::pin(ReceiverStream::new(rx)) as _) + .collect(); + + let operators = self.build_operator_chain(&node.operators)?; + + let (control_tx, control_rx) = mpsc::channel(64); let status = Arc::new(RwLock::new(PipelineStatus::Initializing)); - let thread_status = status.clone(); - let job_id_for_thread = job_id.clone(); - let exit_job_id = job_id_for_thread.clone(); - let registry_ptr = self.active_jobs.clone(); - let memory_pool = self.memory_pool.clone(); - let table_manager = self.table_manager.clone(); - - let handle = std::thread::Builder::new() - .name(format!("Job-{}-Pipe-{}", job_id, pipe_id)) - .spawn(move || { - { - let mut st = thread_status.write().unwrap(); - *st = PipelineStatus::Running; - } - - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .expect("build current thread runtime"); - - let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - rt.block_on(async move { - let mut runner = PipelineRunner::new( - pipe_id, - chain, - inbox, - outboxes, - ctrl_rx, - job_id_for_thread.clone(), - memory_pool, - table_manager, - ); - runner.run().await - }) - })); - - Self::on_pipeline_exit(exit_job_id, pipe_id, result, thread_status, registry_ptr); - })?; - - physical_pipelines.insert( - pipe_id, + let handle = self.spawn_pipeline_thread( + job_id.clone(), + pipeline_id, + operators, + physical_inboxes, + physical_outboxes, + control_rx, + Arc::clone(&status), + )?; + + pipelines.insert( + pipeline_id, PhysicalPipeline { - pipeline_id: pipe_id, + pipeline_id, handle: Some(handle), status, - control_tx: ctrl_tx, + control_tx, }, ); } @@ -102,97 +94,157 @@ impl JobManager { let graph = PhysicalExecutionGraph { job_id: job_id.clone(), program, - pipelines: physical_pipelines, + pipelines, start_time: std::time::Instant::now(), }; self.active_jobs.write().unwrap().insert(job_id.clone(), graph); + info!(job_id = %job_id, "Job submitted successfully."); + Ok(job_id) } pub async fn stop_job(&self, job_id: &str, mode: StopMode) -> anyhow::Result<()> { - let controllers = { - let jobs = self.active_jobs.read().unwrap(); - let graph = jobs + let control_senders: Vec<_> = { + let jobs_guard = self.active_jobs.read().unwrap(); + let graph = jobs_guard .get(job_id) - .ok_or_else(|| anyhow::anyhow!("job not found: {job_id}"))?; - graph - .pipelines - .values() - .map(|p| p.control_tx.clone()) - .collect::>() + .ok_or_else(|| anyhow::anyhow!("Job not found: {job_id}"))?; + + graph.pipelines.values().map(|p| p.control_tx.clone()).collect() }; - for tx in controllers { - tx.send(ControlCommand::Stop { mode: mode.clone() }).await?; + for tx in control_senders { + let _ = tx.send(ControlCommand::Stop { mode: mode.clone() }).await; } + + info!(job_id = %job_id, mode = ?mode, "Job stop signal dispatched."); Ok(()) } pub fn get_pipeline_statuses(&self, job_id: &str) -> Option> { - let jobs = self.active_jobs.read().unwrap(); - let graph = jobs.get(job_id)?; + let jobs_guard = self.active_jobs.read().unwrap(); + let graph = jobs_guard.get(job_id)?; + Some( - graph - .pipelines + graph.pipelines .iter() - .map(|(id, pipeline)| (*id, pipeline.status.read().unwrap().clone())) + .map(|(id, pipeline)| { + (*id, pipeline.status.read().unwrap().clone()) + }) .collect(), ) } - fn create_chain(&self, operators: &[ChainedOperator]) -> anyhow::Result { - let mut chain = Vec::with_capacity(operators.len()); - for op in operators { - match self - .operator_factory - .create_operator(&op.operator_name, &op.operator_config)? - { + // ======================================================================== + // 内部私有方法 + // ======================================================================== + + fn build_operator_chain( + &self, + operator_configs: &[ChainedOperator], + ) -> anyhow::Result>> { + let mut chain = Vec::with_capacity(operator_configs.len()); + + for op_config in operator_configs { + let constructed = self.operator_factory + .create_operator(&op_config.operator_name, &op_config.operator_config)?; + + match constructed { ConstructedOperator::Operator(msg_op) => chain.push(msg_op), ConstructedOperator::Source(_) => { - return Err(anyhow::anyhow!( - "source operator '{}' cannot be used inside a physical pipeline chain", - op.operator_name - )); + anyhow::bail!( + "Topology Error: Source operator '{}' cannot be scheduled inside a MessageOperator physical chain.", + op_config.operator_name + ); } } } - Ok(FusionOperatorChain::new(chain)) + Ok(chain) } - fn on_pipeline_exit( + fn spawn_pipeline_thread( + &self, job_id: String, - pipe_id: u32, - result: std::thread::Result>, + pipeline_id: u32, + operators: Vec>, + inboxes: Vec, + outboxes: Vec, + control_rx: mpsc::Receiver, status: Arc>, - _registry: Arc>>, + ) -> anyhow::Result> { + let memory_pool = Arc::clone(&self.memory_pool); + let thread_name = format!("Task-{job_id}-{pipeline_id}"); + + let handle = std::thread::Builder::new() + .name(thread_name) + .spawn(move || { + *status.write().unwrap() = PipelineStatus::Running; + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("Failed to build current-thread Tokio runtime for pipeline"); + + let job_id_inner = job_id.clone(); + let execution_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + rt.block_on(async move { + let ctx = TaskContext::new( + job_id_inner, + pipeline_id, + 0, + 1, + outboxes, + memory_pool, + ); + + let pipeline = Pipeline::new(operators, ctx, inboxes, control_rx) + .map_err(|e| anyhow::anyhow!("Pipeline init failed: {e}"))?; + + pipeline.run().await.map_err(|e| anyhow::anyhow!("Pipeline execution failed: {e}")) + }) + })); + + Self::handle_pipeline_exit(&job_id, pipeline_id, execution_result, &status); + })?; + + Ok(handle) + } + + fn handle_pipeline_exit( + job_id: &str, + pipeline_id: u32, + thread_result: std::thread::Result>, + status: &RwLock, ) { - let mut needs_abort = false; - match result { + let mut is_fatal = false; + let final_status = match thread_result { + Ok(Ok(_)) => { + info!(job_id = %job_id, pipeline_id = pipeline_id, "Pipeline finished gracefully."); + PipelineStatus::Finished + } Ok(Err(e)) => { - *status.write().unwrap() = PipelineStatus::Failed { + error!(job_id = %job_id, pipeline_id = pipeline_id, error = %e, "Pipeline failed."); + is_fatal = true; + PipelineStatus::Failed { error: e.to_string(), is_panic: false, - }; - needs_abort = true; + } } Err(_) => { - *status.write().unwrap() = PipelineStatus::Failed { - error: "panic".into(), + error!(job_id = %job_id, pipeline_id = pipeline_id, "Pipeline thread panicked!"); + is_fatal = true; + PipelineStatus::Failed { + error: "Task thread encountered an unexpected panic".into(), is_panic: true, - }; - needs_abort = true; - } - Ok(Ok(_)) => { - *status.write().unwrap() = PipelineStatus::Finished; + } } - } + }; - if needs_abort { - error!( - "Pipeline {}-{} failed. Initiating Job Abort.", - job_id, pipe_id - ); + *status.write().unwrap() = final_status; + + if is_fatal { + warn!(job_id = %job_id, pipeline_id = pipeline_id, "Pipeline failure detected, Job should be aborted or recovered."); } } } diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs index 9490e84e..448c26cd 100644 --- a/src/runtime/streaming/job/mod.rs +++ b/src/runtime/streaming/job/mod.rs @@ -1,7 +1,6 @@ pub mod edge_manager; pub mod job_manager; pub mod models; -pub mod pipeline_runner; pub use job_manager::JobManager; pub use models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; diff --git a/src/runtime/streaming/job/pipeline_runner.rs b/src/runtime/streaming/job/pipeline_runner.rs deleted file mode 100644 index 57c0fec8..00000000 --- a/src/runtime/streaming/job/pipeline_runner.rs +++ /dev/null @@ -1,242 +0,0 @@ -use std::future::pending; -use std::sync::Arc; - -use tokio::sync::mpsc; - -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; -use crate::runtime::streaming::memory::MemoryPool; -use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; -use crate::runtime::streaming::protocol::event::StreamEvent; -use crate::runtime::streaming::protocol::tracked::TrackedEvent; -use crate::runtime::streaming::storage::manager::TableManager; -use crate::sql::common::CheckpointBarrier; - -pub struct PipelineRunner { - chain: FusionOperatorChain, - inbox: Option>, - outboxes: Vec>, - control_rx: mpsc::Receiver, - ctx: TaskContext, -} - -impl PipelineRunner { - pub fn new( - pipeline_id: u32, - chain: FusionOperatorChain, - inbox: Option>, - outboxes: Vec>, - control_rx: mpsc::Receiver, - job_id: String, - memory_pool: Arc, - table_manager: Option>>, - ) -> Self { - Self { - chain, - inbox, - outboxes, - control_rx, - ctx: TaskContext::new(job_id, pipeline_id, 0, 1, vec![], memory_pool, table_manager), - } - } - - pub async fn run(&mut self) -> anyhow::Result<()> { - self.chain.on_start(&mut self.ctx).await?; - - 'main: loop { - tokio::select! { - biased; - Some(cmd) = self.control_rx.recv() => { - if self.handle_control(cmd).await? { - break 'main; - } - } - Some(event) = async { - if let Some(ref mut rx) = self.inbox { rx.recv().await } - else { pending().await } - } => { - self.process_event(event).await?; - } - } - } - - self.chain.on_close(&mut self.ctx).await?; - Ok(()) - } - - async fn handle_control(&mut self, cmd: ControlCommand) -> anyhow::Result { - match &cmd { - ControlCommand::TriggerCheckpoint { barrier } => { - let barrier: CheckpointBarrier = barrier.clone().into(); - self.chain.snapshot_state(barrier.clone(), &mut self.ctx).await?; - self.broadcast(StreamEvent::Barrier(barrier)).await?; - } - ControlCommand::Commit { epoch } => { - self.chain.commit_checkpoint(*epoch, &mut self.ctx).await?; - } - ControlCommand::Stop { mode } if *mode == StopMode::Immediate => { - return Ok(true); - } - _ => {} - } - - self.chain.handle_control(cmd, &mut self.ctx).await - } - - async fn process_event(&mut self, tracked: TrackedEvent) -> anyhow::Result<()> { - match tracked.event { - StreamEvent::Data(batch) => { - let outputs = self.chain.process_data(0, batch, &mut self.ctx).await?; - self.emit_outputs(outputs).await?; - } - StreamEvent::Watermark(wm) => { - let outputs = self.chain.process_watermark(wm.clone(), &mut self.ctx).await?; - self.emit_outputs(outputs).await?; - self.broadcast(StreamEvent::Watermark(wm)).await?; - } - StreamEvent::Barrier(barrier) => { - self.chain.snapshot_state(barrier.clone(), &mut self.ctx).await?; - self.broadcast(StreamEvent::Barrier(barrier)).await?; - } - StreamEvent::EndOfStream => { - self.broadcast(StreamEvent::EndOfStream).await?; - } - } - Ok(()) - } - - async fn emit_outputs( - &mut self, - outputs: Vec, - ) -> anyhow::Result<()> { - for out in outputs { - match out { - crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward(batch) - | crate::runtime::streaming::protocol::stream_out::StreamOutput::Broadcast(batch) - | crate::runtime::streaming::protocol::stream_out::StreamOutput::Keyed(_, batch) => { - self.broadcast(StreamEvent::Data(batch)).await?; - } - crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(wm) => { - self.broadcast(StreamEvent::Watermark(wm)).await?; - } - } - } - Ok(()) - } - - async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> { - let tracked = TrackedEvent::control(event); - for tx in &self.outboxes { - tx.send(tracked.clone()).await?; - } - Ok(()) - } -} - -pub struct FusionOperatorChain { - operators: Vec>, -} - -impl FusionOperatorChain { - pub fn new(operators: Vec>) -> Self { - Self { operators } - } - - pub async fn on_start(&mut self, ctx: &mut TaskContext) -> anyhow::Result<()> { - for op in &mut self.operators { - op.on_start(ctx).await?; - } - Ok(()) - } - - pub async fn process_data( - &mut self, - input_idx: usize, - batch: arrow_array::RecordBatch, - ctx: &mut TaskContext, - ) -> anyhow::Result> { - let mut data_batches = vec![batch]; - for (idx, op) in self.operators.iter_mut().enumerate() { - let mut next_batches = Vec::new(); - for b in data_batches { - let outputs = op - .process_data(if idx == 0 { input_idx } else { 0 }, b, ctx) - .await?; - for out in outputs { - match out { - crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward(b) - | crate::runtime::streaming::protocol::stream_out::StreamOutput::Broadcast(b) - | crate::runtime::streaming::protocol::stream_out::StreamOutput::Keyed(_, b) => { - next_batches.push(b); - } - crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(_) => {} - } - } - } - data_batches = next_batches; - } - Ok(data_batches - .into_iter() - .map(crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward) - .collect()) - } - - pub async fn process_watermark( - &mut self, - watermark: crate::sql::common::Watermark, - ctx: &mut TaskContext, - ) -> anyhow::Result> { - let mut outs = vec![crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(watermark)]; - for op in &mut self.operators { - let mut next = Vec::new(); - for out in outs { - match out { - crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(wm) => { - let mut produced = op.process_watermark(wm, ctx).await?; - next.append(&mut produced); - } - other => next.push(other), - } - } - outs = next; - } - Ok(outs) - } - - pub async fn snapshot_state( - &mut self, - barrier: CheckpointBarrier, - ctx: &mut TaskContext, - ) -> anyhow::Result<()> { - for op in &mut self.operators { - op.snapshot_state(barrier.clone(), ctx).await?; - } - Ok(()) - } - - pub async fn commit_checkpoint(&mut self, epoch: u32, ctx: &mut TaskContext) -> anyhow::Result<()> { - for op in &mut self.operators { - op.commit_checkpoint(epoch, ctx).await?; - } - Ok(()) - } - - pub async fn handle_control( - &mut self, - cmd: ControlCommand, - ctx: &mut TaskContext, - ) -> anyhow::Result { - let mut should_stop = false; - for op in &mut self.operators { - should_stop = should_stop || op.handle_control(cmd.clone(), ctx).await?; - } - Ok(should_stop) - } - - pub async fn on_close(&mut self, ctx: &mut TaskContext) -> anyhow::Result<()> { - for op in &mut self.operators { - let _ = op.on_close(ctx).await?; - } - Ok(()) - } -} diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs index 6b145cd7..06cab2ee 100644 --- a/src/runtime/streaming/lib.rs +++ b/src/runtime/streaming/lib.rs @@ -15,7 +15,6 @@ pub mod api; pub mod arrow; -pub mod cluster; pub mod error; pub mod execution; pub mod factory; @@ -24,23 +23,19 @@ pub mod memory; pub mod network; pub mod operators; pub mod protocol; -pub mod state; pub use api::{ ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext, }; -pub use cluster::{ - CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy, - PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, TaskManager, - VertexId, -}; pub use error::RunError; -pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; +pub use execution::{ + OperatorDrive, SourceRunner, SubtaskRunner, SOURCE_IDLE_SLEEP, WATERMARK_EMIT_INTERVAL, +}; pub use factory::{OperatorConstructor, OperatorFactory}; pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; pub use memory::{MemoryPool, MemoryTicket}; pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; pub use protocol::{ - CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput, Watermark, + CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput, control_channel, merge_watermarks, watermark_strictly_advances, }; diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs index 237f3c06..4a761460 100644 --- a/src/runtime/streaming/mod.rs +++ b/src/runtime/streaming/mod.rs @@ -15,8 +15,6 @@ pub mod api; pub mod arrow; -pub mod cluster; -pub mod connectors; pub mod error; pub mod execution; pub mod factory; @@ -26,21 +24,13 @@ pub mod memory; pub mod network; pub mod operators; pub mod protocol; -pub mod storage; pub use api::{ - ConstructedOperator, MessageOperator, Registry, SourceEvent, SourceOffset, SourceOperator, - TaskContext, -}; -pub use cluster::{ - CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy, - PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, TaskManager, - VertexId, + ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext, }; pub use error::RunError; pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; pub use factory::{OperatorConstructor, OperatorFactory}; -pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; pub use memory::{MemoryPool, MemoryTicket}; pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; pub use protocol::{ diff --git a/src/runtime/streaming/network/environment.rs b/src/runtime/streaming/network/environment.rs index 789af2a8..19aedec7 100644 --- a/src/runtime/streaming/network/environment.rs +++ b/src/runtime/streaming/network/environment.rs @@ -1,12 +1,8 @@ -use crate::runtime::streaming::cluster::graph::{ - ExchangeMode, ExecutionGraph, SubtaskIndex, VertexId, -}; -use crate::runtime::streaming::protocol::tracked::TrackedEvent; -use super::endpoint::{BoxedEventStream, PhysicalSender, RemoteSenderStub}; +use super::endpoint::{BoxedEventStream, PhysicalSender}; use std::collections::HashMap; -use tokio::sync::mpsc; -use tokio_stream::wrappers::ReceiverStream; -use tracing::info; + +pub type VertexId = u32; +pub type SubtaskIndex = u32; /// 物理网络路由注册表 pub struct NetworkEnvironment { @@ -22,44 +18,6 @@ impl NetworkEnvironment { } } - pub fn build_from_graph(graph: &ExecutionGraph, local_queue_size: usize) -> Self { - let mut env = Self::new(); - - for edge in &graph.edges { - let src_key = (edge.src_vertex, edge.src_subtask); - let dst_key = (edge.dst_vertex, edge.dst_subtask); - - match &edge.exchange_mode { - ExchangeMode::LocalThread => { - let (tx, rx) = mpsc::channel::(local_queue_size); - - let sender = PhysicalSender::Local(tx); - let receiver_stream = - Box::pin(ReceiverStream::new(rx)) as BoxedEventStream; - - env.outboxes.entry(src_key).or_default().push(sender); - env.inboxes.entry(dst_key).or_default().push(receiver_stream); - } - ExchangeMode::RemoteNetwork { target_addr } => { - let remote_stub = RemoteSenderStub { - target_addr: target_addr.clone(), - }; - env.outboxes - .entry(src_key) - .or_default() - .push(PhysicalSender::Remote(remote_stub)); - } - } - } - - info!( - "Network Environment built. Wired {} connections.", - graph.edges.len() - ); - - env - } - pub fn take_outboxes( &mut self, vertex_id: VertexId, diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs index ac2cd585..42eda177 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -473,8 +473,7 @@ impl IncrementalAggregatingFunc { Ok(()) } - async fn initialize(&mut self, ctx: &mut TaskContext) -> Result<()> { - let mut tm = ctx.table_manager_guard().await?; + async fn initialize(&mut self, _ctx: &mut TaskContext) -> Result<()> { // let table = tm // .get_uncached_key_value_view("a") // .await diff --git a/src/runtime/streaming/operators/joins/lookup_join.rs b/src/runtime/streaming/operators/joins/lookup_join.rs deleted file mode 100644 index c6458174..00000000 --- a/src/runtime/streaming/operators/joins/lookup_join.rs +++ /dev/null @@ -1,365 +0,0 @@ -//! 维表 Lookup Join(Enrichment):与 worker `arrow/lookup_join` 逻辑对齐,实现 [`MessageOperator`]。 - -use anyhow::{anyhow, Result}; -use arrow::compute::filter_record_batch; -use arrow::row::{OwnedRow, RowConverter, SortField}; -use arrow_array::cast::AsArray; -use arrow_array::types::UInt64Type; -use arrow_array::{Array, BooleanArray, RecordBatch}; -use arrow_schema::{DataType, Field, FieldRef, Schema}; -use async_trait::async_trait; -use datafusion::physical_expr::PhysicalExpr; -use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; -use datafusion_proto::physical_plan::from_proto::parse_physical_expr; -use datafusion_proto::protobuf::PhysicalExprNode; -use mini_moka::sync::Cache; -use prost::Message; -use protocol::grpc::api::{JoinType, LookupJoinOperator as LookupJoinProto}; -use std::collections::HashMap; -use std::sync::Arc; -use std::time::Duration; - -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; -use crate::runtime::streaming::connectors::{LookupConnector, connectors}; -use crate::runtime::streaming::StreamOutput; -use crate::sql::common::{CheckpointBarrier, FsSchema, MetadataField, OperatorConfig, Watermark, LOOKUP_KEY_INDEX_FIELD}; - -#[derive(Copy, Clone, PartialEq, Eq)] -pub enum LookupJoinType { - Left, - Inner, -} - -/// 维表查询连接算子:外部系统打宽 + 可选 LRU 缓存。 -pub struct LookupJoinOperator { - name: String, - connector: Box, - key_exprs: Vec>, - cache: Option>, - key_row_converter: RowConverter, - result_row_converter: RowConverter, - join_type: LookupJoinType, - lookup_schema: Arc, - metadata_fields: Vec, - input_schema: Arc, - /// 与 worker 侧 `ctx.out_schema` 对齐:由 input 去 key + lookup 列 + 时间列拼成。 - output_schema: Arc, -} - -fn build_lookup_output_schema( - input: &FsSchema, - lookup_columns: &[FieldRef], -) -> anyhow::Result> { - let key_indices = input.routing_keys().cloned().unwrap_or_default(); - let ts = input.timestamp_index; - let mut out: Vec = Vec::new(); - for i in 0..input.schema.fields().len() { - if key_indices.contains(&i) || i == ts { - continue; - } - out.push(input.schema.fields()[i].clone()); - } - out.extend(lookup_columns.iter().cloned()); - out.push(input.schema.fields()[ts].clone()); - Ok(Arc::new(Schema::new(out))) -} - -impl LookupJoinOperator { - async fn process_lookup_batch(&mut self, batch: RecordBatch) -> Result> { - let num_rows = batch.num_rows(); - if num_rows == 0 { - return Ok(vec![]); - } - - let key_arrays: Vec<_> = self - .key_exprs - .iter() - .map(|expr| { - expr.evaluate(&batch) - .map_err(|e| anyhow!("key expr evaluate: {e}"))? - .into_array(num_rows) - .map_err(|e| anyhow!("key expr into_array: {e}")) - }) - .collect::>()?; - - let rows = self - .key_row_converter - .convert_columns(&key_arrays) - .map_err(|e| anyhow!("key_row_converter: {e}"))?; - - let mut key_map: HashMap> = HashMap::new(); - for (i, row) in rows.iter().enumerate() { - key_map.entry(row.owned()).or_default().push(i); - } - - let uncached_keys: Vec<&OwnedRow> = if let Some(cache) = &mut self.cache { - key_map - .keys() - .filter(|k| !cache.contains_key(*k)) - .collect() - } else { - key_map.keys().collect() - }; - - // 按 key 字节存 OwnedRow,避免借用 `convert_columns` 返回的临时行缓冲。 - let mut results: HashMap, OwnedRow> = HashMap::new(); - - if !uncached_keys.is_empty() { - let cols = self - .key_row_converter - .convert_rows(uncached_keys.iter().map(|r| r.row())) - .map_err(|e| anyhow!("convert_rows for lookup: {e}"))?; - - if let Some(result_batch) = self.connector.lookup(&cols).await { - let mut result_batch = result_batch.map_err(|e| anyhow!("connector lookup: {e}"))?; - - let key_idx_col = result_batch - .schema() - .index_of(LOOKUP_KEY_INDEX_FIELD) - .map_err(|e| anyhow!("{e}"))?; - let keys = result_batch.remove_column(key_idx_col); - let keys = keys.as_primitive::(); - - let result_rows = self - .result_row_converter - .convert_columns(result_batch.columns()) - .map_err(|e| anyhow!("result_row_converter: {e}"))?; - - for (i, v) in result_rows.iter().enumerate() { - if keys.is_null(i) { - return Err(anyhow!("lookup key index is null at row {i}")); - } - let req_idx = keys.value(i) as usize; - if req_idx >= uncached_keys.len() { - return Err(anyhow!( - "lookup key index {req_idx} out of range ({} keys)", - uncached_keys.len() - )); - } - let key_bytes = uncached_keys[req_idx].as_ref().to_vec(); - let owned = v.owned(); - results.insert(key_bytes.clone(), owned.clone()); - if let Some(cache) = &mut self.cache { - cache.insert(uncached_keys[req_idx].clone(), owned); - } - } - } - } - - let mut output_rows = self - .result_row_converter - .empty_rows(batch.num_rows(), batch.num_rows().saturating_mul(10)); - - for row in rows.iter() { - let row_owned = self - .cache - .as_mut() - .and_then(|c| c.get(&row.owned())) - .unwrap_or_else(|| { - results - .get(row.as_ref()) - .expect("missing lookup result for key (cache miss without connector row)") - .clone() - }); - output_rows.push(row_owned.row()); - } - - let right_side = self - .result_row_converter - .convert_rows(output_rows.iter()) - .map_err(|e| anyhow!("convert_rows output: {e}"))?; - - let nonnull = (self.join_type == LookupJoinType::Inner).then(|| { - let mut nonnull = vec![false; batch.num_rows()]; - for (_, a) in self - .lookup_schema - .fields() - .iter() - .zip(right_side.iter()) - .filter(|(f, _)| { - !self - .metadata_fields - .iter() - .any(|m| &m.field_name == f.name()) - }) - { - if let Some(nulls) = a.logical_nulls() { - for (valid, b) in nulls.iter().zip(nonnull.iter_mut()) { - *b |= valid; - } - } else { - nonnull.fill(true); - break; - } - } - BooleanArray::from(nonnull) - }); - - let key_indices = self - .input_schema - .routing_keys() - .cloned() - .unwrap_or_default(); - let non_keys: Vec<_> = (0..batch.num_columns()) - .filter(|i| !key_indices.contains(i) && *i != self.input_schema.timestamp_index) - .collect(); - - let mut result_cols = batch - .project(&non_keys) - .map_err(|e| anyhow!("project non_keys: {e}"))? - .columns() - .to_vec(); - result_cols.extend(right_side); - result_cols.push(batch.column(self.input_schema.timestamp_index).clone()); - - let mut out_batch = RecordBatch::try_new(self.output_schema.clone(), result_cols) - .map_err(|e| anyhow!("try_new output batch: {e}"))?; - - if let Some(mask) = nonnull { - out_batch = filter_record_batch(&out_batch, &mask).map_err(|e| anyhow!("{e}"))?; - } - - if out_batch.num_rows() == 0 { - return Ok(vec![]); - } - - Ok(vec![StreamOutput::Forward(out_batch)]) - } -} - -#[async_trait] -impl MessageOperator for LookupJoinOperator { - fn name(&self) -> &str { - &self.name - } - - async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { - Ok(()) - } - - async fn process_data( - &mut self, - _input_idx: usize, - batch: RecordBatch, - _ctx: &mut TaskContext, - ) -> Result> { - self.process_lookup_batch(batch).await - } - - async fn process_watermark( - &mut self, - _watermark: Watermark, - _ctx: &mut TaskContext, - ) -> Result> { - Ok(vec![]) - } - - async fn snapshot_state( - &mut self, - _barrier: CheckpointBarrier, - _ctx: &mut TaskContext, - ) -> Result<()> { - Ok(()) - } - - async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result> { - Ok(vec![]) - } -} - -/// 从配置构造 [`LookupJoinOperator`](非 `ConstructedOperator` / `ArrowOperator`)。 -pub struct LookupJoinConstructor; - -impl LookupJoinConstructor { - pub fn with_config( - &self, - config: LookupJoinProto, - registry: Arc, - ) -> anyhow::Result { - let join_type = config.join_type(); - let input_schema: FsSchema = config.input_schema.unwrap().try_into()?; - let lookup_schema: FsSchema = config.lookup_schema.unwrap().try_into()?; - - let exprs = config - .key_exprs - .iter() - .map(|e| { - let expr = PhysicalExprNode::decode(&mut e.left_expr.as_slice())?; - Ok(parse_physical_expr( - &expr, - registry.as_ref(), - &input_schema.schema, - &DefaultPhysicalExtensionCodec {}, - )?) - }) - .collect::>>()?; - - let op = config.connector.unwrap(); - let operator_config: OperatorConfig = serde_json::from_str(&op.config)?; - - let result_row_converter = RowConverter::new( - lookup_schema - .schema_without_timestamp() - .fields - .iter() - .map(|f| SortField::new(f.data_type().clone())) - .collect(), - )?; - - let lookup_schema_arc = Arc::new( - lookup_schema - .with_additional_fields( - [Field::new(LOOKUP_KEY_INDEX_FIELD, DataType::UInt64, false)].into_iter(), - )? - .schema_without_timestamp(), - ); - - let output_schema = build_lookup_output_schema(&input_schema, lookup_schema_arc.fields())?; - - let connector = connectors() - .get(op.connector.as_str()) - .unwrap_or_else(|| panic!("No connector with name '{}'", op.connector)) - .make_lookup(operator_config.clone(), lookup_schema_arc.clone())?; - - let name = format!("LookupJoin({})", connector.name()); - - let max_capacity_bytes = config.max_capacity_bytes.unwrap_or(8 * 1024 * 1024); - let cache = (max_capacity_bytes > 0).then(|| { - let mut c = Cache::builder() - .weigher(|k: &OwnedRow, v: &OwnedRow| (k.as_ref().len() + v.as_ref().len()) as u32) - .max_capacity(max_capacity_bytes); - - if let Some(ttl) = config.ttl_micros { - c = c.time_to_live(Duration::from_micros(ttl)); - } - c.build() - }); - - let key_row_converter = RowConverter::new( - exprs - .iter() - .map(|e| Ok(SortField::new(e.data_type(&input_schema.schema)?))) - .collect::>()?, - )?; - - Ok(LookupJoinOperator { - name, - connector, - key_exprs: exprs, - cache, - key_row_converter, - result_row_converter, - join_type: match join_type { - JoinType::Inner => LookupJoinType::Inner, - JoinType::Left => LookupJoinType::Left, - jt => panic!("invalid lookup join type {:?}", jt), - }, - lookup_schema: lookup_schema_arc, - metadata_fields: operator_config.metadata_fields, - input_schema: Arc::new(input_schema), - output_schema, - }) - } -} - diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/runtime/streaming/operators/joins/mod.rs index d53e4b91..ccfff792 100644 --- a/src/runtime/streaming/operators/joins/mod.rs +++ b/src/runtime/streaming/operators/joins/mod.rs @@ -1,7 +1,5 @@ pub mod join_instance; pub mod join_with_expiration; -pub mod lookup_join; pub use join_instance::{InstantJoinConstructor, InstantJoinOperator}; pub use join_with_expiration::{JoinWithExpirationConstructor, JoinWithExpirationOperator}; -pub use lookup_join::{LookupJoinConstructor, LookupJoinOperator, LookupJoinType}; diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs index e3c0f566..958b5320 100644 --- a/src/runtime/streaming/operators/mod.rs +++ b/src/runtime/streaming/operators/mod.rs @@ -15,9 +15,7 @@ mod value_execution; pub use stateless_physical_executor::StatelessPhysicalExecutor; pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache}; -pub use joins::{ - InstantJoinOperator, JoinWithExpirationOperator, LookupJoinOperator, LookupJoinType, -}; +pub use joins::{InstantJoinOperator, JoinWithExpirationOperator}; pub use key_by::KeyByOperator; pub use sink::{ConsistencyMode, KafkaSinkOperator}; pub use source::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState}; diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs index 0b68b88b..1ce01673 100644 --- a/src/runtime/streaming/operators/sink/kafka/mod.rs +++ b/src/runtime/streaming/operators/sink/kafka/mod.rs @@ -209,14 +209,7 @@ impl MessageOperator for KafkaSinkOperator { self.at_least_once_producer = Some(self.create_producer(ctx, None)?); } ConsistencyMode::ExactlyOnce => { - let mut next_idx = { - let mut tm = ctx.table_manager_guard().await?; - let index_table = tm - .get_global_keyed_state::("tx_idx") - .await - .map_err(|e| anyhow!(e))?; - index_table.get(&ctx.subtask_idx).copied().unwrap_or(0) - }; + let mut next_idx = 0usize; let active_producer = self.create_producer(ctx, Some(next_idx))?; next_idx += 1; @@ -301,17 +294,6 @@ impl MessageOperator for KafkaSinkOperator { let old_producer = std::mem::replace(&mut state.active_producer, new_producer); state.producer_awaiting_commit = Some(old_producer); - { - let mut tm = ctx.table_manager_guard().await?; - let index_table = tm - .get_global_keyed_state::("tx_idx") - .await - .map_err(|e| anyhow!(e))?; - index_table - .insert(ctx.subtask_idx, state.next_transaction_index) - .await; - } - state.next_transaction_index += 1; } diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs index 595fbcc3..b17a504b 100644 --- a/src/runtime/streaming/operators/source/kafka/mod.rs +++ b/src/runtime/streaming/operators/source/kafka/mod.rs @@ -10,7 +10,7 @@ use rdkafka::consumer::{CommitMode, Consumer, StreamConsumer}; use rdkafka::{ClientConfig, Message as KMessage, Offset, TopicPartitionList}; use std::collections::HashMap; use std::num::NonZeroU32; -use std::time::Duration; +use std::time::{Duration, Instant}; use tracing::{debug, error, info, warn}; use crate::runtime::streaming::api::context::TaskContext; @@ -28,7 +28,7 @@ pub struct KafkaState { offset: i64, } -/// 增量反序列化缓冲 trait:Source 逐条 `deserialize_slice`,攒满后 `flush_buffer` 输出 [`RecordBatch`]。 +/// 增量反序列化缓冲 trait:Source 逐条 `deserialize_slice`,攒满或超时后 `flush_buffer` 输出 [`RecordBatch`]。 pub trait BatchDeserializer: Send + 'static { fn deserialize_slice( &mut self, @@ -40,6 +40,9 @@ pub trait BatchDeserializer: Send + 'static { fn should_flush(&self) -> bool; fn flush_buffer(&mut self) -> Result>; + + /// 缓冲区是否无任何待反序列化数据。 + fn is_empty(&self) -> bool; } // --------------------------------------------------------------------------- @@ -88,6 +91,10 @@ impl BatchDeserializer for BufferedDeserializer { self.buffer.clear(); Ok(Some(batch)) } + + fn is_empty(&self) -> bool { + self.buffer.is_empty() + } } impl SourceOffset { @@ -104,6 +111,9 @@ impl SourceOffset { // 2. 核心算子外壳 // ============================================================================ +const KAFKA_POLL_TIMEOUT: Duration = Duration::from_millis(100); +const MAX_BATCH_LINGER_TIME: Duration = Duration::from_millis(500); + pub struct KafkaSourceOperator { pub topic: String, pub bootstrap_servers: String, @@ -121,6 +131,9 @@ pub struct KafkaSourceOperator { current_offsets: HashMap, is_empty_assignment: bool, + + /// 上次成功 flush 出 batch 的时间,用于低流量时按逗留时间强制发车。 + last_flush_time: Instant, } impl KafkaSourceOperator { @@ -149,6 +162,7 @@ impl KafkaSourceOperator { deserializer, current_offsets: HashMap::new(), is_empty_assignment: false, + last_flush_time: Instant::now(), } } @@ -175,18 +189,8 @@ impl KafkaSourceOperator { .set("group.id", &group_id) .create()?; - let (has_state, state_map) = { - let mut tm = ctx.table_manager_guard().await?; - let global_state = tm - .get_global_keyed_state::("k") - .await - .map_err(|e| anyhow!(e))?; - let restored_states: Vec<_> = global_state.get_all().values().copied().collect(); - let has_state = !restored_states.is_empty(); - let state_map: HashMap = - restored_states.into_iter().map(|s| (s.partition, s)).collect(); - (has_state, state_map) - }; + let has_state = false; + let state_map: HashMap = HashMap::new(); let metadata = consumer .fetch_metadata(Some(&self.topic), Duration::from_secs(30)) @@ -266,12 +270,16 @@ impl SourceOperator for KafkaSourceOperator { .as_ref() .ok_or_else(|| anyhow!("rate limiter not initialized"))?; - let recv_result = tokio::time::timeout(Duration::from_millis(50), consumer.recv()).await; - - match recv_result { + match tokio::time::timeout(KAFKA_POLL_TIMEOUT, consumer.recv()).await { Ok(Ok(msg)) => { + let partition = msg.partition(); + let offset = msg.offset(); + let timestamp = msg.timestamp().to_millis().unwrap_or(0); + + // 无论是否有 payload(含 Tombstone),都必须推进位点,否则会永久卡在墓碑消息上。 + self.current_offsets.insert(partition, offset); + if let Some(payload) = msg.payload() { - let timestamp = msg.timestamp().to_millis().unwrap_or(0); let topic = msg.topic(); let connector_metadata = if !self.metadata_fields.is_empty() { @@ -299,17 +307,25 @@ impl SourceOperator for KafkaSourceOperator { timestamp.max(0) as u64, connector_metadata, )?; + } else { + debug!( + "Received tombstone message at partition {} offset {}", + partition, offset + ); + } - self.current_offsets.insert(msg.partition(), msg.offset()); + rate_limiter.until_ready().await; - rate_limiter.until_ready().await; + let should_flush_by_size = self.deserializer.should_flush(); + let should_flush_by_time = self.last_flush_time.elapsed() > MAX_BATCH_LINGER_TIME; - if self.deserializer.should_flush() { - if let Some(batch) = self.deserializer.flush_buffer()? { - return Ok(SourceEvent::Data(batch)); - } + if !self.deserializer.is_empty() && (should_flush_by_size || should_flush_by_time) { + if let Some(batch) = self.deserializer.flush_buffer()? { + self.last_flush_time = Instant::now(); + return Ok(SourceEvent::Data(batch)); } } + Ok(SourceEvent::Idle) } Ok(Err(e)) => { @@ -317,8 +333,10 @@ impl SourceOperator for KafkaSourceOperator { Err(anyhow!("Kafka error: {}", e)) } Err(_) => { - if self.deserializer.should_flush() { + // 超时内无新消息:若缓冲区仍有积压,强制 flush,避免低流量下数据长期滞留。 + if !self.deserializer.is_empty() { if let Some(batch) = self.deserializer.flush_buffer()? { + self.last_flush_time = Instant::now(); return Ok(SourceEvent::Data(batch)); } } @@ -334,25 +352,8 @@ impl SourceOperator for KafkaSourceOperator { ) -> Result<()> { debug!("Source [{}] executing checkpoint", ctx.subtask_idx); - let mut tm = ctx.table_manager_guard().await?; - let global_state = tm - .get_global_keyed_state::("k") - .await - .map_err(|e| anyhow!(e))?; - let mut topic_partitions = TopicPartitionList::new(); - for (&partition, &offset) in &self.current_offsets { - global_state - .insert( - partition, - KafkaState { - partition, - offset: offset + 1, - }, - ) - .await; - topic_partitions .add_partition_offset(&self.topic, partition, Offset::Offset(offset)) .map_err(|e| anyhow!("add_partition_offset: {e}"))?; diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs index f210c95a..3af64bf7 100644 --- a/src/runtime/streaming/operators/watermark/watermark_generator.rs +++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs @@ -73,6 +73,8 @@ impl WatermarkGeneratorOperator { Some(from_nanos(max_ts as u128)) } + /// 水位线计算必须取评估后数组的 **Max**,不能取 Min:同一 Batch 内多行时, + /// Min 会低估“已见事件时间”的安全基线(例如 ts-5s 在两行上 min 会偏早)。 fn evaluate_watermark(&self, batch: &RecordBatch) -> Result { let watermark_array = self .expression @@ -84,10 +86,10 @@ impl WatermarkGeneratorOperator { .downcast_ref::() .ok_or_else(|| anyhow!("watermark expression must return TimestampNanosecondArray"))?; - let min_watermark_nanos = aggregate::min(typed_array) - .ok_or_else(|| anyhow!("failed to extract min watermark from batch"))?; + let max_watermark_nanos = aggregate::max(typed_array) + .ok_or_else(|| anyhow!("failed to extract max watermark from batch"))?; - Ok(from_nanos(min_watermark_nanos as u128)) + Ok(from_nanos(max_watermark_nanos as u128)) } } @@ -101,19 +103,8 @@ impl MessageOperator for WatermarkGeneratorOperator { Some(Duration::from_secs(1)) } - async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { self.last_event_wall = SystemTime::now(); - - let mut tm = ctx.table_manager_guard().await?; - let gs = tm - .get_global_keyed_state::("s") - .await - .map_err(|e| anyhow!("global keyed state s: {e}"))?; - - if let Some(recovered) = gs.get(&ctx.subtask_idx) { - self.state = *recovered; - } - Ok(()) } @@ -132,12 +123,15 @@ impl MessageOperator for WatermarkGeneratorOperator { }; let new_watermark = self.evaluate_watermark(&batch)?; + + // 死守单调递增底线,绝不倒流 self.state.max_watermark = self.state.max_watermark.max(new_watermark); let time_since_last_emit = max_batch_ts .duration_since(self.state.last_watermark_emitted_at) .unwrap_or(Duration::ZERO); + // 空闲唤醒或达到发射间隔则发射水印 if self.is_idle || time_since_last_emit > self.interval { debug!( "[{}] emitting expression watermark {}", @@ -174,6 +168,7 @@ impl MessageOperator for WatermarkGeneratorOperator { .last_event_wall .elapsed() .unwrap_or(Duration::ZERO); + // 系统时钟超时,发射 Idle 水印,避免下游一直等不到推进 if !self.is_idle && elapsed > idle_timeout { info!( "task [{}] entering Idle after {:?}", @@ -186,13 +181,7 @@ impl MessageOperator for WatermarkGeneratorOperator { Ok(vec![]) } - async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { - let mut tm = ctx.table_manager_guard().await?; - tm.get_global_keyed_state::("s") - .await - .map_err(|e| anyhow!("global keyed state s: {e}"))? - .insert(ctx.subtask_idx, self.state) - .await; + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs index c23da40a..f835bac2 100644 --- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs @@ -139,21 +139,7 @@ impl MessageOperator for TumblingWindowOperator { "TumblingWindow" } - async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { - let watermark = ctx.last_present_watermark(); - let mut tm = ctx.table_manager_guard().await?; - let table = tm - .get_expiring_time_key_table("t", watermark) - .await - .map_err(|e| anyhow!("expiring time key table t: {e}"))?; - - for (timestamp, batches) in table.all_batches_for_watermark(watermark) { - let bin_start = self.bin_start(*timestamp); - let slot = self.active_bins.entry(bin_start).or_default(); - for batch in batches { - slot.finished_batches.push(batch.clone()); - } - } + async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } @@ -287,28 +273,7 @@ impl MessageOperator for TumblingWindowOperator { Ok(final_outputs) } - async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> { - let watermark = ctx.last_present_watermark(); - let mut tm = ctx.table_manager_guard().await?; - let table = tm - .get_expiring_time_key_table("t", watermark) - .await - .map_err(|e| anyhow!("expiring time key table t: {e}"))?; - - for (bin_start, active_bin) in self.active_bins.iter_mut() { - active_bin.close_and_drain().await?; - - for batch in &active_bin.finished_batches { - let state_batch = Self::add_bin_start_as_timestamp( - batch, - *bin_start, - self.partial_schema.schema.clone(), - )?; - table.insert(*bin_start, state_batch); - } - } - - table.flush(watermark).await?; + async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> { Ok(()) } diff --git a/src/runtime/streaming/storage/backend.rs b/src/runtime/streaming/storage/backend.rs deleted file mode 100644 index 265b99ca..00000000 --- a/src/runtime/streaming/storage/backend.rs +++ /dev/null @@ -1,78 +0,0 @@ -use anyhow::Result; -use async_trait::async_trait; - -#[derive(Default, Debug, Clone)] -pub struct CheckpointMetadata { - pub job_id: String, - pub epoch: u32, - pub min_epoch: u32, - pub operator_ids: Vec, -} - -#[derive(Default, Debug, Clone)] -pub struct OperatorCheckpointMetadata { - pub operator_id: String, - pub epoch: u32, -} - -#[async_trait] -pub trait BackingStore: Send + Sync + 'static { - fn name() -> &'static str; - async fn load_checkpoint_metadata(job_id: &str, epoch: u32) -> Result; - async fn load_operator_metadata( - job_id: &str, - operator_id: &str, - epoch: u32, - ) -> Result>; - async fn write_operator_checkpoint_metadata( - metadata: OperatorCheckpointMetadata, - ) -> Result<()>; - async fn write_checkpoint_metadata(metadata: CheckpointMetadata) -> Result<()>; - async fn cleanup_checkpoint( - metadata: CheckpointMetadata, - old_min_epoch: u32, - new_min_epoch: u32, - ) -> Result<()>; -} - -pub struct ParquetStateBackend; - -#[async_trait] -impl BackingStore for ParquetStateBackend { - fn name() -> &'static str { - "parquet" - } - - async fn load_checkpoint_metadata( - _job_id: &str, - _epoch: u32, - ) -> Result { - Ok(CheckpointMetadata::default()) - } - - async fn load_operator_metadata( - _job_id: &str, - _operator_id: &str, - _epoch: u32, - ) -> Result> { - Ok(None) - } - - async fn write_operator_checkpoint_metadata( - _metadata: OperatorCheckpointMetadata, - ) -> Result<()> { - Ok(()) - } - - async fn write_checkpoint_metadata(_metadata: CheckpointMetadata) -> Result<()> { - Ok(()) - } - - async fn cleanup_checkpoint( - _metadata: CheckpointMetadata, - _old_min_epoch: u32, - _new_min_epoch: u32, - ) -> Result<()> { - Ok(()) - } -} diff --git a/src/runtime/streaming/storage/manager.rs b/src/runtime/streaming/storage/manager.rs deleted file mode 100644 index 2aa79e6b..00000000 --- a/src/runtime/streaming/storage/manager.rs +++ /dev/null @@ -1,156 +0,0 @@ -use anyhow::{Result, anyhow}; -use std::collections::HashMap; -use std::sync::Arc; -use std::time::SystemTime; - -use super::table::TaskInfo; -use super::{DummyStorageProvider, StorageProviderRef}; - -#[derive(Default)] -pub struct GlobalKeyedView { - data: HashMap, -} - -impl GlobalKeyedView { - pub async fn insert(&mut self, key: K, value: V) { - self.data.insert(key, value); - } - - pub fn get(&self, key: &K) -> Option<&V> { - self.data.get(key) - } - - pub fn get_all(&self) -> &HashMap { - &self.data - } -} - -#[derive(Default)] -pub struct ExpiringTimeKeyView; - -impl ExpiringTimeKeyView { - pub fn insert(&mut self, _timestamp: SystemTime, _batch: arrow_array::RecordBatch) {} - - pub fn all_batches_for_watermark( - &self, - _watermark: Option, - ) -> std::iter::Empty<(&SystemTime, &Vec)> { - std::iter::empty() - } - - pub async fn flush(&mut self, _watermark: Option) -> Result<()> { - Ok(()) - } -} - -#[derive(Default)] -pub struct KeyTimeView; - -impl KeyTimeView { - pub async fn insert( - &mut self, - _batch: arrow_array::RecordBatch, - ) -> Result> { - Ok(vec![]) - } - - pub fn get_batch(&self, _key: &[u8]) -> Result> { - Ok(None) - } -} - -pub struct BackendWriter {} - -pub struct TableManager { - epoch: u32, - min_epoch: u32, - writer: BackendWriter, - task_info: Arc, - storage: StorageProviderRef, - caches: HashMap>, -} - -impl TableManager { - /// 加载状态后端(返回默认的空 Manager) - pub async fn load(task_info: Arc) -> Result<(Self, Option)> { - let manager = Self { - epoch: 1, - min_epoch: 1, - writer: BackendWriter {}, - task_info, - storage: Arc::new(DummyStorageProvider), - caches: HashMap::new(), - }; - Ok((manager, None)) - } - - /// 接收到 CheckpointBarrier 时(空操作) - pub async fn checkpoint( - &mut self, - _epoch: u32, - _watermark: Option, - _then_stop: bool, - ) { - } - - /// 面向算子的 API:获取全局 Key-Value 表 - pub async fn get_global_keyed_state< - K: Eq + std::hash::Hash + Send + 'static, - V: Send + 'static, - >( - &mut self, - table_name: &str, - ) -> Result<&mut GlobalKeyedView> { - if !self.caches.contains_key(table_name) { - let view: Box = - Box::new(GlobalKeyedView:: { data: HashMap::new() }); - self.caches.insert(table_name.to_string(), view); - } - - let cache = self.caches.get_mut(table_name).unwrap(); - - let view = cache - .downcast_mut::>() - .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?; - - Ok(view) - } - - /// 面向算子的 API:获取带 TTL 的时间键值表 - pub async fn get_expiring_time_key_table( - &mut self, - table_name: &str, - _watermark: Option, - ) -> Result<&mut ExpiringTimeKeyView> { - if !self.caches.contains_key(table_name) { - let view: Box = Box::new(ExpiringTimeKeyView::default()); - self.caches.insert(table_name.to_string(), view); - } - - let cache = self.caches.get_mut(table_name).unwrap(); - let view = cache - .downcast_mut::() - .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?; - - Ok(view) - } - - /// 面向算子的 API:获取标准的 Key-Time 双重映射表 - pub async fn get_key_time_table( - &mut self, - table_name: &str, - _watermark: Option, - ) -> Result<&mut KeyTimeView> { - if !self.caches.contains_key(table_name) { - let view: Box = Box::new(KeyTimeView::default()); - self.caches.insert(table_name.to_string(), view); - } - - let cache = self.caches.get_mut(table_name).unwrap(); - let view = cache - .downcast_mut::() - .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?; - - Ok(view) - } -} diff --git a/src/runtime/streaming/storage/mod.rs b/src/runtime/streaming/storage/mod.rs deleted file mode 100644 index c411b5ee..00000000 --- a/src/runtime/streaming/storage/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -use anyhow::Result; -use async_trait::async_trait; -use std::sync::Arc; - -pub mod backend; -pub mod manager; -pub mod table; - -#[async_trait] -pub trait StorageProvider: Send + Sync + 'static { - async fn get(&self, _path: &str) -> Result>; - async fn put(&self, _path: &str, _data: Vec) -> Result<()>; - async fn delete_if_present(&self, _path: &str) -> Result<()>; -} - -pub type StorageProviderRef = Arc; - -/// 空的存储实现,供测试和占位使用 -pub struct DummyStorageProvider; - -#[async_trait] -impl StorageProvider for DummyStorageProvider { - async fn get(&self, _path: &str) -> Result> { - Ok(vec![]) - } - async fn put(&self, _path: &str, _data: Vec) -> Result<()> { - Ok(()) - } - async fn delete_if_present(&self, _path: &str) -> Result<()> { - Ok(()) - } -} diff --git a/src/runtime/streaming/storage/table.rs b/src/runtime/streaming/storage/table.rs deleted file mode 100644 index 4b37ec4a..00000000 --- a/src/runtime/streaming/storage/table.rs +++ /dev/null @@ -1,91 +0,0 @@ -use anyhow::Result; -use arrow_array::RecordBatch; -use async_trait::async_trait; -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; - -#[derive(Default)] -pub struct TaskInfo { - pub job_id: String, - pub operator_id: String, - pub task_index: u32, -} - -#[derive(Debug)] -pub enum TableData { - RecordBatch(RecordBatch), - CommitData { data: Vec }, - KeyedData { key: Vec, value: Vec }, -} - -pub struct CheckpointMessage { - pub epoch: u32, - pub time: std::time::SystemTime, - pub watermark: Option, - pub then_stop: bool, -} - -#[async_trait] -pub trait TableEpochCheckpointer: Send + 'static { - type SubTableCheckpointMessage: prost::Message + Default; - - async fn insert_data(&mut self, _data: TableData) -> Result<()> { - Ok(()) - } - - async fn finish( - self: Box, - _checkpoint: &CheckpointMessage, - ) -> Result> { - Ok(None) - } - - fn subtask_index(&self) -> u32; -} - -#[async_trait] -pub trait Table: Send + Sync + 'static + Clone { - type Checkpointer: TableEpochCheckpointer< - SubTableCheckpointMessage = Self::TableSubtaskCheckpointMetadata, - >; - type ConfigMessage: prost::Message + Default; - type TableCheckpointMessage: prost::Message + Default + Clone; - type TableSubtaskCheckpointMetadata: prost::Message + Default + Clone; - - fn from_config( - _config: Self::ConfigMessage, - _task_info: Arc, - _storage_provider: super::StorageProviderRef, - _checkpoint_message: Option, - _state_version: u32, - ) -> Result - where - Self: Sized; - - fn epoch_checkpointer( - &self, - _epoch: u32, - _previous_metadata: Option, - ) -> Result; - - fn merge_checkpoint_metadata( - _config: Self::ConfigMessage, - _subtask_metadata: HashMap, - ) -> Result> { - Ok(None) - } - - fn subtask_metadata_from_table( - &self, - _table_metadata: Self::TableCheckpointMessage, - ) -> Result> { - Ok(None) - } - - fn files_to_keep( - _config: Self::ConfigMessage, - _checkpoint: Self::TableCheckpointMessage, - ) -> Result> { - Ok(HashSet::new()) - } -} diff --git a/src/server/initializer.rs b/src/server/initializer.rs index 46eca375..7786169a 100644 --- a/src/server/initializer.rs +++ b/src/server/initializer.rs @@ -92,7 +92,8 @@ pub fn build_core_registry() -> ComponentRegistry { let builder = { let b = ComponentRegistryBuilder::new() .register("WasmCache", initialize_wasm_cache) - .register("TaskManager", initialize_task_manager); + .register("TaskManager", initialize_task_manager) + .register("JobManager", initialize_job_manager); #[cfg(feature = "python")] let b = b.register("PythonService", initialize_python_service); b @@ -150,6 +151,22 @@ fn initialize_python_service(config: &GlobalConfig) -> Result<()> { Ok(()) } +fn initialize_job_manager(config: &GlobalConfig) -> Result<()> { + use crate::runtime::streaming::api::operator::Registry; + use crate::runtime::streaming::factory::OperatorFactory; + use crate::runtime::streaming::job::JobManager; + use std::sync::Arc; + + let registry = Arc::new(Registry::new()); + let factory = Arc::new(OperatorFactory::new(registry)); + let max_memory_bytes = config.streaming.max_memory_bytes.unwrap_or(256 * 1024 * 1024); + + JobManager::init(factory, max_memory_bytes) + .context("JobManager service failed to start")?; + + Ok(()) +} + fn initialize_coordinator(_config: &GlobalConfig) -> Result<()> { crate::runtime::taskexecutor::TaskManager::get() .context("Dependency violation: Coordinator requires TaskManager")?; @@ -157,5 +174,8 @@ fn initialize_coordinator(_config: &GlobalConfig) -> Result<()> { crate::storage::stream_catalog::CatalogManager::global() .context("Dependency violation: Coordinator requires StreamCatalog")?; + crate::runtime::streaming::job::JobManager::global() + .context("Dependency violation: Coordinator requires JobManager")?; + Ok(()) } diff --git a/src/sql/common/fs_schema.rs b/src/sql/common/fs_schema.rs index c99af1e5..f7fd5328 100644 --- a/src/sql/common/fs_schema.rs +++ b/src/sql/common/fs_schema.rs @@ -220,6 +220,14 @@ impl FsSchema { self.key_indices.as_ref() } + pub fn clone_storage_key_indices(&self) -> Option> { + self.key_indices.clone() + } + + pub fn clone_routing_key_indices(&self) -> Option> { + self.routing_key_indices.clone() + } + pub fn filter_by_time( &self, batch: RecordBatch, diff --git a/src/sql/common/kafka_catalog.rs b/src/sql/common/kafka_catalog.rs new file mode 100644 index 00000000..99c8983e --- /dev/null +++ b/src/sql/common/kafka_catalog.rs @@ -0,0 +1,122 @@ +//! Kafka 表级与连接级配置(与 JSON Schema / Catalog 对齐)。 +//! +//! 放在 [`crate::sql::common`] 而非 `runtime::streaming`,以便 **SQL 规划、Coordinator、连接配置存储** +//! 与 **运行时工厂**(如 `ConnectorSourceDispatcher`)共用同一套类型,避免循环依赖。 +//! +//! 与 [`crate::runtime::streaming::api::source::SourceOffset`] 语义相同但独立定义,运行时可用 `From`/`match` 做映射。 + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +// ── KafkaTable:单表 Source/Sink ───────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct KafkaTable { + pub topic: String, + /// Source / Sink 判别及各自字段;与顶层 JSON 扁平字段共用 `type` 标签。 + #[serde(flatten)] + pub kind: TableType, + #[serde(default)] + pub client_configs: HashMap, + pub value_subject: Option, +} + +impl KafkaTable { + /// Schema Registry subject;未配置时与常见约定一致:`{topic}-value`。 + pub fn subject(&self) -> String { + self.value_subject + .clone() + .unwrap_or_else(|| format!("{}-value", self.topic)) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum TableType { + Source { + offset: KafkaTableSourceOffset, + read_mode: Option, + group_id: Option, + group_id_prefix: Option, + }, + Sink { + commit_mode: SinkCommitMode, + key_field: Option, + timestamp_field: Option, + }, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum KafkaTableSourceOffset { + Latest, + Earliest, + #[default] + Group, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ReadMode { + ReadUncommitted, + ReadCommitted, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum SinkCommitMode { + #[default] + AtLeastOnce, + ExactlyOnce, +} + +// ── KafkaConfig:集群 / 鉴权 / Schema Registry ─────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct KafkaConfig { + pub bootstrap_servers: String, + #[serde(default)] + pub authentication: KafkaConfigAuthentication, + #[serde(default)] + pub schema_registry_enum: Option, + #[serde(default)] + pub connection_properties: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type")] +pub enum KafkaConfigAuthentication { + #[serde(rename = "None")] + None, + #[serde(rename = "AWS_MSK_IAM")] + AwsMskIam { region: String }, + #[serde(rename = "SASL")] + Sasl { + protocol: String, + mechanism: String, + username: String, + password: String, + }, +} + +impl Default for KafkaConfigAuthentication { + fn default() -> Self { + Self::None + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type")] +pub enum SchemaRegistryConfig { + #[serde(rename = "None")] + None, + #[serde(rename = "Confluent Schema Registry")] + ConfluentSchemaRegistry { + endpoint: String, + #[serde(rename = "apiKey")] + api_key: Option, + #[serde(rename = "apiSecret")] + api_secret: Option, + }, +} diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index cb833c8e..722b2e58 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -25,6 +25,7 @@ pub mod errors; pub mod format_from_opts; pub mod formats; pub mod hash; +pub mod kafka_catalog; pub mod message; pub mod operator_config; pub mod task_info; @@ -49,6 +50,10 @@ pub use control::{ }; pub use fs_schema::{FsSchema, FsSchemaRef}; pub use connector_options::{ConnectorOptions, FromOpts}; +pub use kafka_catalog::{ + KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode, + SchemaRegistryConfig, SinkCommitMode, TableType, +}; pub use errors::{DataflowError, DataflowResult}; pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat}; pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; diff --git a/src/sql/common/operator_config.rs b/src/sql/common/operator_config.rs index 744dbd85..a1f703f5 100644 --- a/src/sql/common/operator_config.rs +++ b/src/sql/common/operator_config.rs @@ -2,6 +2,7 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use super::formats::{BadData, Format, Framing}; +use super::fs_schema::FsSchema; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RateLimit { @@ -27,4 +28,7 @@ pub struct OperatorConfig { pub rate_limit: Option, #[serde(default)] pub metadata_fields: Vec, + /// Arrow 行 schema(Kafka Source/Sink 反序列化、序列化必需)。 + #[serde(default)] + pub input_schema: Option, } From 157e13d28db99a8a1ca123590a807dc6d8f87fb4 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 00:41:17 +0800 Subject: [PATCH 22/44] update --- Cargo.lock | 1 - cli/cli/Cargo.toml | 1 - src/sql/api/connections.rs | 6 +- src/sql/schema/source_table.rs | 190 ++++++++++++++++++++++++++++++--- 4 files changed, 179 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fc3a898a..e9ce4109 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2326,7 +2326,6 @@ dependencies = [ "arrow-schema 52.2.0", "clap", "comfy-table", - "function-stream", "protocol", "rustyline", "thiserror 2.0.17", diff --git a/cli/cli/Cargo.toml b/cli/cli/Cargo.toml index 72352995..e3c1c591 100644 --- a/cli/cli/Cargo.toml +++ b/cli/cli/Cargo.toml @@ -12,7 +12,6 @@ arrow-array = "52" arrow-ipc = "52" arrow-schema = "52" comfy-table = "7" -function-stream = { path = "../../" } protocol = { path = "../../protocol" } clap = { version = "4.5", features = ["derive"] } thiserror = "2" diff --git a/src/sql/api/connections.rs b/src/sql/api/connections.rs index d88dee75..7873ceb2 100644 --- a/src/sql/api/connections.rs +++ b/src/sql/api/connections.rs @@ -366,7 +366,7 @@ impl TryFrom for SourceField { precision: *p, scale: *s, }), - (DataType::Binary, None) | (DataType::LargeBinary, None) => FieldType::Bytes, + (DataType::Binary | DataType::LargeBinary | DataType::BinaryView, None) => FieldType::Bytes, (DataType::Timestamp(TimeUnit::Second, _), None) => { FieldType::Timestamp(TimestampField { unit: TimestampUnit::Second, @@ -387,8 +387,8 @@ impl TryFrom for SourceField { unit: TimestampUnit::Nanosecond, }) } - (DataType::Utf8, None) => FieldType::String, - (DataType::Utf8, Some(FsExtensionType::JSON)) => FieldType::Json, + (DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View, None) => FieldType::String, + (DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View, Some(FsExtensionType::JSON)) => FieldType::Json, (DataType::Struct(fields), None) => { let fields: Result<_, String> = fields .into_iter() diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs index dd962e34..85041f4b 100644 --- a/src/sql/schema/source_table.rs +++ b/src/sql/schema/source_table.rs @@ -38,7 +38,13 @@ use super::StreamSchemaProvider; use crate::multifield_partial_ord; use crate::sql::api::{ConnectionProfile, ConnectionSchema, SourceField}; use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::{BadData, Format, Framing, JsonCompression, JsonFormat}; +use crate::sql::common::kafka_catalog::{ + KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode, + SinkCommitMode, TableType as KafkaTableType, +}; +use crate::sql::common::{ + BadData, Format, Framing, FsSchema, JsonCompression, JsonFormat, OperatorConfig, RateLimit, +}; use crate::sql::schema::ConnectionType; use crate::sql::schema::table::SqlSource; use crate::sql::types::ProcessingMode; @@ -303,8 +309,8 @@ impl SourceTable { let connection_schema = ConnectionSchema::try_new( format.clone(), - Some(bad_data), - framing, + Some(bad_data.clone()), + framing.clone(), schema_fields, None, Some(inferred_empty), @@ -421,19 +427,34 @@ impl SourceTable { table.lookup_cache_ttl = options.pull_opt_duration("lookup.cache.ttl")?; - let extra_opts = options.drain_remaining_string_values()?; - let mut config_root = serde_json::json!({ - "connector": connector_name, - "connection_schema": connection_schema, - }); - if let serde_json::Value::Object(ref mut map) = config_root { - for (k, v) in extra_opts { - map.insert(k, serde_json::Value::String(v)); + if connector_name.eq_ignore_ascii_case("kafka") { + let physical = table.produce_physical_schema(); + let op_cfg = wire_kafka_operator_config( + options, + role, + &physical, + &format, + bad_data, + framing, + )?; + table.opaque_config = serde_json::to_string(&op_cfg).map_err(|e| { + DataFusionError::Plan(format!("failed to serialize Kafka OperatorConfig: {e}")) + })?; + } else { + let extra_opts = options.drain_remaining_string_values()?; + let mut config_root = serde_json::json!({ + "connector": connector_name, + "connection_schema": connection_schema, + }); + if let serde_json::Value::Object(ref mut map) = config_root { + for (k, v) in extra_opts { + map.insert(k, serde_json::Value::String(v)); + } } + table.opaque_config = serde_json::to_string(&config_root).map_err(|e| { + DataFusionError::Plan(format!("failed to serialize connector config: {e}")) + })?; } - table.opaque_config = serde_json::to_string(&config_root).map_err(|e| { - DataFusionError::Plan(format!("failed to serialize connector config: {e}")) - })?; if role == TableRole::Ingestion && encoding.supports_delta_updates() && primary_keys.is_empty() { @@ -544,6 +565,147 @@ impl SourceTable { } } +/// Kafka: runtime [`KafkaSourceDispatcher`] / [`KafkaSinkDispatcher`] expect [`OperatorConfig`] JSON, +/// not the legacy `{ connector, connection_schema, ... }` blob used by other adapters. +fn wire_kafka_operator_config( + options: &mut ConnectorOptions, + role: TableRole, + physical_schema: &Schema, + format: &Option, + bad_data: BadData, + framing: Option, +) -> Result { + let bootstrap_servers = match options.pull_opt_str("bootstrap.servers")? { + Some(s) => s, + None => options + .pull_opt_str("bootstrap_servers")? + .ok_or_else(|| { + plan_datafusion_err!( + "Kafka connector requires 'bootstrap.servers' in the WITH clause" + ) + })?, + }; + + let topic = options + .pull_opt_str("topic")? + .ok_or_else(|| plan_datafusion_err!("Kafka connector requires 'topic' in the WITH clause"))?; + + let sql_format = format.clone().ok_or_else(|| { + plan_datafusion_err!( + "Kafka connector requires 'format' in the WITH clause (e.g. format = 'json')" + ) + })?; + + let rate_limit = options + .pull_opt_u64("rate_limit.messages_per_second")? + .map(|v| RateLimit { + messages_per_second: v.clamp(1, u32::MAX as u64) as u32, + }); + + let value_subject = options.pull_opt_str("value.subject")?; + + let kind = match role { + TableRole::Ingestion => { + let offset = match options.pull_opt_str("scan.startup.mode")?.as_deref() { + Some("latest") => KafkaTableSourceOffset::Latest, + Some("earliest") => KafkaTableSourceOffset::Earliest, + None | Some("group-offsets") | Some("group") => KafkaTableSourceOffset::Group, + Some(other) => { + return plan_err!( + "invalid scan.startup.mode '{other}'; expected latest, earliest, or group-offsets" + ); + } + }; + let read_mode = match options.pull_opt_str("isolation.level")?.as_deref() { + Some("read_committed") => Some(ReadMode::ReadCommitted), + Some("read_uncommitted") => Some(ReadMode::ReadUncommitted), + None => None, + Some(other) => { + return plan_err!("invalid isolation.level '{other}'"); + } + }; + let group_id = match options.pull_opt_str("group.id")? { + Some(s) => Some(s), + None => options.pull_opt_str("group_id")?, + }; + let group_id_prefix = options.pull_opt_str("group.id.prefix")?; + KafkaTableType::Source { + offset, + read_mode, + group_id, + group_id_prefix, + } + } + TableRole::Egress => { + let commit_mode = match options.pull_opt_str("sink.commit.mode")?.as_deref() { + Some("exactly-once") | Some("exactly_once") => SinkCommitMode::ExactlyOnce, + None | Some("at-least-once") | Some("at_least_once") => SinkCommitMode::AtLeastOnce, + Some(other) => { + return plan_err!("invalid sink.commit.mode '{other}'"); + } + }; + let key_field = match options.pull_opt_str("sink.key.field")? { + Some(s) => Some(s), + None => options.pull_opt_str("key.field")?, + }; + let timestamp_field = match options.pull_opt_str("sink.timestamp.field")? { + Some(s) => Some(s), + None => options.pull_opt_str("timestamp.field")?, + }; + KafkaTableType::Sink { + commit_mode, + key_field, + timestamp_field, + } + } + TableRole::Reference => { + return plan_err!("Kafka connector cannot be used as a lookup table in this path"); + } + }; + + // Role already decided; keep these out of librdkafka `connection_properties`. + let _ = options.pull_opt_str("type")?; + let _ = options.pull_opt_str("connector")?; + + let connection_properties = options.drain_remaining_string_values()?; + + let kafka_connection = KafkaConfig { + bootstrap_servers, + authentication: KafkaConfigAuthentication::None, + schema_registry_enum: None, + connection_properties, + }; + + let kafka_table = KafkaTable { + topic, + kind, + client_configs: HashMap::new(), + value_subject, + }; + + let fields: Vec = physical_schema + .fields() + .iter() + .map(|f| f.as_ref().clone()) + .collect(); + let input_schema = FsSchema::from_fields(fields); + + Ok(OperatorConfig { + connection: serde_json::to_value(&kafka_connection).map_err(|e| { + DataFusionError::Plan(format!("Kafka connection serialization failed: {e}")) + })?, + table: serde_json::to_value(&kafka_table).map_err(|e| { + DataFusionError::Plan(format!("Kafka table serialization failed: {e}")) + })?, + format: Some(sql_format), + bad_data: Some(bad_data), + framing, + rate_limit, + metadata_fields: vec![], + input_schema: Some(input_schema), + }) +} + /// Plan a SQL scalar expression against a table-qualified schema (e.g. watermark `AS` clause). fn plan_generating_expr( ast: &ast::Expr, From de79169e0c4ffa016ab19c7ce872649d248fafb7 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 11:27:08 +0800 Subject: [PATCH 23/44] update --- src/common/fs_schema.rs | 12 +++ src/coordinator/plan/logical_plan_visitor.rs | 16 ++- src/coordinator/tool/mod.rs | 12 +++ src/runtime/streaming/api/context.rs | 12 +++ src/runtime/streaming/api/mod.rs | 12 +++ src/runtime/streaming/api/operator.rs | 12 +++ src/runtime/streaming/api/source.rs | 12 +++ src/runtime/streaming/arrow/mod.rs | 12 +++ src/runtime/streaming/driver.rs | 12 +++ src/runtime/streaming/error.rs | 12 +++ src/runtime/streaming/execution/mod.rs | 12 +++ src/runtime/streaming/execution/runner.rs | 12 +++ src/runtime/streaming/execution/source.rs | 12 +++ .../execution/tracker/barrier_aligner.rs | 12 +++ .../streaming/execution/tracker/mod.rs | 12 +++ .../execution/tracker/watermark_tracker.rs | 12 +++ src/runtime/streaming/factory/mod.rs | 12 +++ .../factory/registry/kafka_factory.rs | 17 +++- src/runtime/streaming/factory/registry/mod.rs | 19 +++- src/runtime/streaming/format/config.rs | 12 +++ src/runtime/streaming/format/deserializer.rs | 12 +++ src/runtime/streaming/format/json_encoder.rs | 12 +++ src/runtime/streaming/format/mod.rs | 12 +++ src/runtime/streaming/format/serializer.rs | 12 +++ src/runtime/streaming/job/edge_manager.rs | 12 +++ src/runtime/streaming/job/job_manager.rs | 12 +++ src/runtime/streaming/job/mod.rs | 12 +++ src/runtime/streaming/job/models.rs | 12 +++ src/runtime/streaming/memory/mod.rs | 12 +++ src/runtime/streaming/memory/pool.rs | 12 +++ src/runtime/streaming/memory/ticket.rs | 12 +++ src/runtime/streaming/network/endpoint.rs | 12 +++ src/runtime/streaming/network/environment.rs | 12 +++ src/runtime/streaming/network/mod.rs | 12 +++ .../grouping/incremental_aggregate.rs | 15 ++- .../streaming/operators/grouping/mod.rs | 12 +++ .../operators/grouping/updating_cache.rs | 12 +++ .../operators/joins/join_instance.rs | 12 +++ .../operators/joins/join_with_expiration.rs | 12 +++ src/runtime/streaming/operators/joins/mod.rs | 12 +++ src/runtime/streaming/operators/key_by.rs | 12 +++ .../streaming/operators/key_operator.rs | 12 +++ src/runtime/streaming/operators/mod.rs | 12 +++ src/runtime/streaming/operators/projection.rs | 12 +++ .../streaming/operators/sink/kafka/mod.rs | 12 +++ src/runtime/streaming/operators/sink/mod.rs | 12 +++ .../streaming/operators/source/kafka/mod.rs | 12 +++ src/runtime/streaming/operators/source/mod.rs | 12 +++ .../operators/stateless_physical_executor.rs | 12 +++ .../streaming/operators/value_execution.rs | 12 +++ .../streaming/operators/watermark/mod.rs | 12 +++ .../watermark/watermark_generator.rs | 12 +++ .../streaming/operators/windows/mod.rs | 12 +++ .../windows/session_aggregating_window.rs | 12 +++ .../windows/sliding_aggregating_window.rs | 12 +++ .../windows/tumbling_aggregating_window.rs | 12 +++ .../operators/windows/window_function.rs | 12 +++ src/runtime/streaming/protocol/control.rs | 12 +++ src/runtime/streaming/protocol/event.rs | 12 +++ src/runtime/streaming/protocol/mod.rs | 12 +++ src/runtime/streaming/protocol/stream_out.rs | 12 +++ src/runtime/streaming/protocol/tracked.rs | 12 +++ src/runtime/streaming/protocol/watermark.rs | 12 +++ src/runtime/wasm/processor/function_error.rs | 12 +++ src/sql/analysis/aggregate_rewriter.rs | 12 +++ src/sql/analysis/async_udf_rewriter.rs | 12 +++ src/sql/analysis/join_rewriter.rs | 12 +++ src/sql/analysis/mod.rs | 12 +++ src/sql/analysis/row_time_rewriter.rs | 12 +++ src/sql/analysis/sink_input_rewriter.rs | 12 +++ src/sql/analysis/source_metadata_visitor.rs | 12 +++ src/sql/analysis/stream_rewriter.rs | 12 +++ src/sql/analysis/streaming_window_analzer.rs | 12 +++ src/sql/analysis/udafs.rs | 12 +++ src/sql/analysis/window_function_rewriter.rs | 12 +++ src/sql/api/checkpoints.rs | 12 +++ src/sql/api/connections.rs | 12 +++ src/sql/api/metrics.rs | 12 +++ src/sql/api/mod.rs | 12 +++ src/sql/api/pipelines.rs | 12 +++ src/sql/api/public_ids.rs | 12 +++ src/sql/api/schema_resolver.rs | 12 +++ src/sql/api/udfs.rs | 12 +++ src/sql/api/var_str.rs | 12 +++ src/sql/common/arrow_ext.rs | 12 +++ src/sql/common/connector_options.rs | 12 +++ src/sql/common/control.rs | 12 +++ src/sql/common/converter.rs | 12 +++ src/sql/common/date.rs | 12 +++ src/sql/common/debezium.rs | 12 +++ src/sql/common/errors.rs | 12 +++ src/sql/common/format_from_opts.rs | 57 ++++++----- src/sql/common/formats.rs | 12 +++ src/sql/common/fs_schema.rs | 12 +++ src/sql/common/hash.rs | 12 +++ src/sql/common/kafka_catalog.rs | 12 +++ src/sql/common/message.rs | 12 +++ src/sql/common/mod.rs | 2 + src/sql/common/operator_config.rs | 12 +++ src/sql/common/task_info.rs | 12 +++ src/sql/common/time_utils.rs | 12 +++ src/sql/common/with_option_keys.rs | 97 +++++++++++++++++++ src/sql/common/worker.rs | 12 +++ src/sql/datastream/logical.rs | 39 +++----- src/sql/datastream/mod.rs | 12 +++ src/sql/extensions/aggregate.rs | 14 +-- src/sql/extensions/async_udf.rs | 6 +- src/sql/extensions/constants.rs | 3 +- src/sql/extensions/debezium.rs | 23 ++--- src/sql/extensions/join.rs | 5 +- src/sql/extensions/key_calculation.rs | 11 ++- src/sql/extensions/lookup.rs | 5 +- src/sql/extensions/projection.rs | 5 +- src/sql/extensions/remote_table.rs | 3 +- src/sql/extensions/sink.rs | 3 +- src/sql/extensions/table_source.rs | 3 +- src/sql/extensions/timestamp_append.rs | 3 +- src/sql/extensions/updating_aggregate.rs | 11 ++- src/sql/extensions/watermark_node.rs | 16 +-- src/sql/extensions/windows_function.rs | 7 +- src/sql/functions/mod.rs | 26 +++-- src/sql/logical_node/logical/operator_name.rs | 23 +++++ src/sql/logical_node/mod.rs | 12 +++ src/sql/logical_planner/mod.rs | 25 +++-- src/sql/logical_planner/planner.rs | 12 +++ src/sql/physical/physical_planner.rs | 12 +++ src/sql/schema/data_encoding_format.rs | 10 +- src/sql/schema/source_table.rs | 73 +++++++------- src/sql/schema/table_role.rs | 5 +- src/sql/types/data_type.rs | 12 +++ src/sql/types/df_field.rs | 12 +++ src/sql/types/mod.rs | 12 +++ src/sql/types/placeholder_udf.rs | 12 +++ src/sql/types/stream_schema.rs | 12 +++ src/sql/types/window.rs | 20 +++- 135 files changed, 1653 insertions(+), 169 deletions(-) create mode 100644 src/sql/common/with_option_keys.rs diff --git a/src/common/fs_schema.rs b/src/common/fs_schema.rs index e9ce6586..4229b957 100644 --- a/src/common/fs_schema.rs +++ b/src/common/fs_schema.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! FunctionStream table/stream schema: Arrow [`Schema`] plus timestamp index and optional key columns. //! //! [`Schema`]: datafusion::arrow::datatypes::Schema diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 14ed01b8..9e95c5bd 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -38,6 +38,7 @@ use crate::coordinator::tool::ConnectorOptions; use crate::sql::analysis::{ maybe_add_key_extension_to_sink, rewrite_sinks, StreamSchemaProvider, }; +use crate::sql::common::with_option_keys as opt; use crate::sql::extensions::sink::StreamEgressNode; use crate::sql::functions::{is_json_union, serialize_outgoing_json}; use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig}; @@ -47,9 +48,6 @@ use crate::sql::rewrite_plan; use crate::sql::schema::source_table::SourceTable; use crate::sql::schema::{ColumnDescriptor, ConnectionType, Table}; -const OPT_CONNECTOR: &str = "connector"; -const OPT_PARTITION_BY: &str = "partition_by"; - #[derive(Clone)] pub struct LogicalPlanVisitor { schema_provider: StreamSchemaProvider, @@ -95,11 +93,11 @@ impl LogicalPlanVisitor { debug!("Initiating streaming sink compilation for identifier: {}", sink_table_name); let mut sink_properties = ConnectorOptions::new(with_options, &None)?; - let connector_type = sink_properties.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| { + let connector_type = sink_properties.pull_opt_str(opt::CONNECTOR)?.ok_or_else(|| { plan_datafusion_err!( "Validation Error: Streaming table '{}' requires the '{}' property", sink_table_name, - OPT_CONNECTOR + opt::CONNECTOR ) })?; @@ -192,7 +190,7 @@ impl LogicalPlanVisitor { options: &mut ConnectorOptions, ) -> Result>> { options - .pull_opt_str(OPT_PARTITION_BY)? + .pull_opt_str(opt::PARTITION_BY)? .map(|raw_cols| raw_cols.split(',').map(|c| col(c.trim())).collect()) .map(Ok) .transpose() @@ -200,7 +198,7 @@ impl LogicalPlanVisitor { fn contains_connector_property(options: &[SqlOption]) -> bool { options.iter().any(|opt| match opt { - SqlOption::KeyValue { key, .. } => key.value.eq_ignore_ascii_case(OPT_CONNECTOR), + SqlOption::KeyValue { key, .. } => key.value.eq_ignore_ascii_case(opt::CONNECTOR), _ => false, }) } @@ -279,10 +277,10 @@ impl LogicalPlanVisitor { .collect::>(); let mut connector_options = ConnectorOptions::new(&stmt.with_options, &None)?; - let adapter_type = connector_options.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| { + let adapter_type = connector_options.pull_opt_str(opt::CONNECTOR)?.ok_or_else(|| { plan_datafusion_err!( "Configuration Error: Missing required property '{}' in WITH clause", - OPT_CONNECTOR + opt::CONNECTOR ) })?; diff --git a/src/coordinator/tool/mod.rs b/src/coordinator/tool/mod.rs index 8ef77230..6b48aa0e 100644 --- a/src/coordinator/tool/mod.rs +++ b/src/coordinator/tool/mod.rs @@ -1 +1,13 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub use crate::sql::common::ConnectorOptions; diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs index 77038bf3..b70d40df 100644 --- a/src/runtime/streaming/api/context.rs +++ b/src/runtime/streaming/api/context.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::runtime::streaming::memory::MemoryPool; use crate::runtime::streaming::protocol::event::StreamEvent; use crate::runtime::streaming::protocol::tracked::TrackedEvent; diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs index 8115b0fe..49e45328 100644 --- a/src/runtime/streaming/api/mod.rs +++ b/src/runtime/streaming/api/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 接口层:算子与源实现需遵循的 trait 与运行时上下文。 pub mod context; diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs index 3c088e3c..eabeff85 100644 --- a/src/runtime/streaming/api/operator.rs +++ b/src/runtime/streaming/api/operator.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::source::SourceOperator; use crate::runtime::streaming::protocol::stream_out::StreamOutput; diff --git a/src/runtime/streaming/api/source.rs b/src/runtime/streaming/api/source.rs index a4ff46c4..1f79de38 100644 --- a/src/runtime/streaming/api/source.rs +++ b/src/runtime/streaming/api/source.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 源算子:由 [`crate::runtime::streaming::execution::SourceRunner`] 驱动 `fetch_next`,不得在内部死循环阻塞控制面。 use crate::runtime::streaming::api::context::TaskContext; diff --git a/src/runtime/streaming/arrow/mod.rs b/src/runtime/streaming/arrow/mod.rs index fdfa87f7..d706199f 100644 --- a/src/runtime/streaming/arrow/mod.rs +++ b/src/runtime/streaming/arrow/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! Arrow / DataFusion 辅助:聚合表达式解码等。 //! //! `UpdatingCache` 位于 [`crate::runtime::streaming::operators::updating_cache`]。 diff --git a/src/runtime/streaming/driver.rs b/src/runtime/streaming/driver.rs index f2abec87..011e49ab 100644 --- a/src/runtime/streaming/driver.rs +++ b/src/runtime/streaming/driver.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::future::pending; use std::sync::Arc; diff --git a/src/runtime/streaming/error.rs b/src/runtime/streaming/error.rs index 3d8fba19..c8d1944a 100644 --- a/src/runtime/streaming/error.rs +++ b/src/runtime/streaming/error.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::fmt::Display; use thiserror::Error; diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs index 4d55e361..a4fb6d95 100644 --- a/src/runtime/streaming/execution/mod.rs +++ b/src/runtime/streaming/execution/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 执行层:Tokio Actor 运行容器。 pub mod runner; diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs index fa907088..994d8c04 100644 --- a/src/runtime/streaming/execution/runner.rs +++ b/src/runtime/streaming/execution/runner.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use async_trait::async_trait; use tokio::sync::mpsc::Receiver; use tokio_stream::{StreamExt, StreamMap}; diff --git a/src/runtime/streaming/execution/source.rs b/src/runtime/streaming/execution/source.rs index d51132ac..a9fbd561 100644 --- a/src/runtime/streaming/execution/source.rs +++ b/src/runtime/streaming/execution/source.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 源任务物理驱动:控制面优先、`fetch_next` 非阻塞契约、可选融合算子链下推。 use crate::runtime::streaming::api::context::TaskContext; diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/runtime/streaming/execution/tracker/barrier_aligner.rs index 05f2cc90..34b5380a 100644 --- a/src/runtime/streaming/execution/tracker/barrier_aligner.rs +++ b/src/runtime/streaming/execution/tracker/barrier_aligner.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! Chandy–Lamport 风格屏障对齐(零内存缓冲:未对齐时从轮询池移除输入流,依赖底层背压)。 use std::collections::HashSet; diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/runtime/streaming/execution/tracker/mod.rs index bfa24e8b..81329c27 100644 --- a/src/runtime/streaming/execution/tracker/mod.rs +++ b/src/runtime/streaming/execution/tracker/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 协调层:屏障对齐与多路水位线追踪。 pub mod barrier_aligner; diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/runtime/streaming/execution/tracker/watermark_tracker.rs index ca2f082f..6304b4c3 100644 --- a/src/runtime/streaming/execution/tracker/watermark_tracker.rs +++ b/src/runtime/streaming/execution/tracker/watermark_tracker.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::runtime::streaming::protocol::watermark::{merge_watermarks, watermark_strictly_advances}; use crate::sql::common::Watermark; diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs index 2cc0cfba..8c03c298 100644 --- a/src/runtime/streaming/factory/mod.rs +++ b/src/runtime/streaming/factory/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod registry; pub use registry::{ diff --git a/src/runtime/streaming/factory/registry/kafka_factory.rs b/src/runtime/streaming/factory/registry/kafka_factory.rs index 6a451166..8f42acd9 100644 --- a/src/runtime/streaming/factory/registry/kafka_factory.rs +++ b/src/runtime/streaming/factory/registry/kafka_factory.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! Kafka Source/Sink:从 [`ConnectorOp`] + [`OperatorConfig`] 构造物理算子(鉴权与 client 配置合并)。 use anyhow::{anyhow, bail, Context, Result}; @@ -18,6 +30,7 @@ use crate::runtime::streaming::format::{ }; use crate::runtime::streaming::operators::sink::kafka::{ConsistencyMode, KafkaSinkOperator}; use crate::runtime::streaming::operators::source::kafka::{BufferedDeserializer, KafkaSourceOperator}; +use crate::sql::common::constants::connector_type; use crate::sql::common::formats::{ BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, JsonFormat as SqlJsonFormat, TimestampFormat as SqlTimestampFormat, @@ -178,7 +191,7 @@ impl OperatorConstructor for KafkaSourceDispatcher { let op = ConnectorOp::decode(payload) .context("Failed to decode ConnectorOp protobuf for Kafka Source")?; - if op.connector != "kafka" { + if op.connector != connector_type::KAFKA { bail!( "KafkaSourceDispatcher: expected connector 'kafka', got '{}'", op.connector @@ -257,7 +270,7 @@ impl OperatorConstructor for KafkaSinkDispatcher { let op = ConnectorOp::decode(payload) .context("Failed to decode ConnectorOp protobuf for Kafka Sink")?; - if op.connector != "kafka" { + if op.connector != connector_type::KAFKA { bail!( "KafkaSinkDispatcher: expected connector 'kafka', got '{}'", op.connector diff --git a/src/runtime/streaming/factory/registry/mod.rs b/src/runtime/streaming/factory/registry/mod.rs index 9bb1148d..d129f644 100644 --- a/src/runtime/streaming/factory/registry/mod.rs +++ b/src/runtime/streaming/factory/registry/mod.rs @@ -1,8 +1,21 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use anyhow::{anyhow, Result}; use prost::Message; use std::collections::HashMap; use std::sync::Arc; +use crate::sql::common::constants::connector_type; use crate::runtime::streaming::api::operator::Registry; use crate::runtime::streaming::api::operator::ConstructedOperator; @@ -239,8 +252,8 @@ impl OperatorConstructor for ConnectorSourceDispatcher { .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?; match op.connector.as_str() { - "kafka" => KafkaSourceDispatcher.with_config(config, registry), - "redis" => Err(anyhow!( + ct if ct == connector_type::KAFKA => KafkaSourceDispatcher.with_config(config, registry), + ct if ct == connector_type::REDIS => Err(anyhow!( "ConnectorSource '{}' factory wiring not yet implemented", op.connector )), @@ -257,7 +270,7 @@ impl OperatorConstructor for ConnectorSinkDispatcher { .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?; match op.connector.as_str() { - "kafka" => KafkaSinkDispatcher.with_config(config, registry), + ct if ct == connector_type::KAFKA => KafkaSinkDispatcher.with_config(config, registry), other => Err(anyhow!("Unsupported sink connector type: {}", other)), } } diff --git a/src/runtime/streaming/format/config.rs b/src/runtime/streaming/format/config.rs index e0ac61bb..235e1d82 100644 --- a/src/runtime/streaming/format/config.rs +++ b/src/runtime/streaming/format/config.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] diff --git a/src/runtime/streaming/format/deserializer.rs b/src/runtime/streaming/format/deserializer.rs index 83360bd8..1c32d48a 100644 --- a/src/runtime/streaming/format/deserializer.rs +++ b/src/runtime/streaming/format/deserializer.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 数据反序列化器:将外界收到的字节流转化为结构化 [`RecordBatch`]。 use anyhow::{anyhow, Result}; diff --git a/src/runtime/streaming/format/json_encoder.rs b/src/runtime/streaming/format/json_encoder.rs index 7721d9bc..8d34e9ef 100644 --- a/src/runtime/streaming/format/json_encoder.rs +++ b/src/runtime/streaming/format/json_encoder.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 极致优化的 Arrow JSON 编码器。 //! //! 解决 Arrow 原生 JSON 导出时不兼容 Kafka / 时间戳 / Decimal 的痛点。 diff --git a/src/runtime/streaming/format/mod.rs b/src/runtime/streaming/format/mod.rs index c4dbbeda..b27935ba 100644 --- a/src/runtime/streaming/format/mod.rs +++ b/src/runtime/streaming/format/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod config; pub mod deserializer; pub mod json_encoder; diff --git a/src/runtime/streaming/format/serializer.rs b/src/runtime/streaming/format/serializer.rs index 80969ec6..7b61d055 100644 --- a/src/runtime/streaming/format/serializer.rs +++ b/src/runtime/streaming/format/serializer.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 数据序列化器:将内存 [`RecordBatch`] 转换为二进制消息流,供 Sink 连接器发送。 use anyhow::{anyhow, Result}; diff --git a/src/runtime/streaming/job/edge_manager.rs b/src/runtime/streaming/job/edge_manager.rs index 10ca97f1..53f82cb9 100644 --- a/src/runtime/streaming/job/edge_manager.rs +++ b/src/runtime/streaming/job/edge_manager.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use protocol::grpc::api::{FsEdge, FsNode}; diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs index 6413eba6..844131a0 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/runtime/streaming/job/job_manager.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::sync::{Arc, OnceLock, RwLock}; diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs index 448c26cd..72f98d69 100644 --- a/src/runtime/streaming/job/mod.rs +++ b/src/runtime/streaming/job/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod edge_manager; pub mod job_manager; pub mod models; diff --git a/src/runtime/streaming/job/models.rs b/src/runtime/streaming/job/models.rs index 3e843ea4..35b48da7 100644 --- a/src/runtime/streaming/job/models.rs +++ b/src/runtime/streaming/job/models.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::sync::{Arc, RwLock}; use std::thread::JoinHandle; diff --git a/src/runtime/streaming/memory/mod.rs b/src/runtime/streaming/memory/mod.rs index 93101fa2..45fc3194 100644 --- a/src/runtime/streaming/memory/mod.rs +++ b/src/runtime/streaming/memory/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod pool; pub mod ticket; diff --git a/src/runtime/streaming/memory/pool.rs b/src/runtime/streaming/memory/pool.rs index 98ba4cf3..54276088 100644 --- a/src/runtime/streaming/memory/pool.rs +++ b/src/runtime/streaming/memory/pool.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use parking_lot::Mutex; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; diff --git a/src/runtime/streaming/memory/ticket.rs b/src/runtime/streaming/memory/ticket.rs index ca1759b9..1c9d2798 100644 --- a/src/runtime/streaming/memory/ticket.rs +++ b/src/runtime/streaming/memory/ticket.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::sync::Arc; use super::pool::MemoryPool; diff --git a/src/runtime/streaming/network/endpoint.rs b/src/runtime/streaming/network/endpoint.rs index 3fc1fc57..a8525e1e 100644 --- a/src/runtime/streaming/network/endpoint.rs +++ b/src/runtime/streaming/network/endpoint.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::runtime::streaming::protocol::event::StreamEvent; use crate::runtime::streaming::protocol::tracked::TrackedEvent; use anyhow::{anyhow, Result}; diff --git a/src/runtime/streaming/network/environment.rs b/src/runtime/streaming/network/environment.rs index 19aedec7..07ea0cab 100644 --- a/src/runtime/streaming/network/environment.rs +++ b/src/runtime/streaming/network/environment.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use super::endpoint::{BoxedEventStream, PhysicalSender}; use std::collections::HashMap; diff --git a/src/runtime/streaming/network/mod.rs b/src/runtime/streaming/network/mod.rs index 259e0f12..4b120781 100644 --- a/src/runtime/streaming/network/mod.rs +++ b/src/runtime/streaming/network/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod endpoint; pub mod environment; diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs index 42eda177..104d24a1 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use anyhow::{anyhow, bail, Result}; use arrow::compute::max_array; use arrow::row::{RowConverter, SortField}; @@ -13,6 +25,7 @@ use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit}; use datafusion::common::{Result as DFResult, ScalarValue}; use datafusion::physical_expr::aggregate::AggregateFunctionExpr; use datafusion::physical_plan::{Accumulator, PhysicalExpr}; +use crate::sql::common::constants::updating_state_field; use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; use datafusion_proto::physical_plan::from_proto::parse_physical_expr; use datafusion_proto::protobuf::PhysicalExprNode; @@ -239,7 +252,7 @@ impl IncrementalAggregatingFunc { .expect("_updating_meta must be StructArray"); let is_retract_array = meta_struct - .column_by_name("is_retract") + .column_by_name(updating_state_field::IS_RETRACT) .expect("meta struct must have is_retract"); Some(is_retract_array.as_any().downcast_ref::().expect("is_retract must be BooleanArray")) diff --git a/src/runtime/streaming/operators/grouping/mod.rs b/src/runtime/streaming/operators/grouping/mod.rs index fb2ae7b1..ef672351 100644 --- a/src/runtime/streaming/operators/grouping/mod.rs +++ b/src/runtime/streaming/operators/grouping/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod incremental_aggregate; pub mod updating_cache; diff --git a/src/runtime/streaming/operators/grouping/updating_cache.rs b/src/runtime/streaming/operators/grouping/updating_cache.rs index b6fbcc99..bdba9fa7 100644 --- a/src/runtime/streaming/operators/grouping/updating_cache.rs +++ b/src/runtime/streaming/operators/grouping/updating_cache.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 按 key 的增量状态缓存:LRU + TTL(idle),供 [`super::incremental_aggregate`] 等使用。 use std::borrow::Borrow; diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs index 639876bf..278bc8fe 100644 --- a/src/runtime/streaming/operators/joins/join_instance.rs +++ b/src/runtime/streaming/operators/joins/join_instance.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 瞬时 JOIN:双通道喂入 DataFusion 物理计划,水位线推进时闭合实例并抽干结果(纯内存版)。 use anyhow::{anyhow, Result}; diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs index c2bb6259..1a31b253 100644 --- a/src/runtime/streaming/operators/joins/join_with_expiration.rs +++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 带 TTL 的 Key-Time Join:纯内存状态版 + DataFusion 物理计划成对计算。 //! 完全移除了底层 TableManager 和持久化状态依赖。 diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/runtime/streaming/operators/joins/mod.rs index ccfff792..bc81f328 100644 --- a/src/runtime/streaming/operators/joins/mod.rs +++ b/src/runtime/streaming/operators/joins/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod join_instance; pub mod join_with_expiration; diff --git a/src/runtime/streaming/operators/key_by.rs b/src/runtime/streaming/operators/key_by.rs index 2c183577..a432011d 100644 --- a/src/runtime/streaming/operators/key_by.rs +++ b/src/runtime/streaming/operators/key_by.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 物理网络路由算子:利用 DataFusion 物理表达式提取 Key,基于 Hash 排序执行零拷贝切片路由。 use anyhow::{anyhow, Result}; diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/runtime/streaming/operators/key_operator.rs index 93a50db5..5dfd66f6 100644 --- a/src/runtime/streaming/operators/key_operator.rs +++ b/src/runtime/streaming/operators/key_operator.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 物理网络路由算子:利用 DataFusion 物理表达式提取 Key,基于 Hash 排序执行零拷贝切片路由。 //! //! 提供两种算子: diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs index 958b5320..dc8b39b7 100644 --- a/src/runtime/streaming/operators/mod.rs +++ b/src/runtime/streaming/operators/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 内置算子。 pub mod grouping; diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs index 5e62afc6..33fe0d51 100644 --- a/src/runtime/streaming/operators/projection.rs +++ b/src/runtime/streaming/operators/projection.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 高性能投影算子:直接操作 Arrow Array 执行列映射与标量运算, //! 避开 DataFusion 执行树开销,适用于 SELECT 字段筛选和简单标量计算。 diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs index 1ce01673..dccc561d 100644 --- a/src/runtime/streaming/operators/sink/kafka/mod.rs +++ b/src/runtime/streaming/operators/sink/kafka/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! Kafka Sink:实现 [`crate::runtime::streaming::api::operator::MessageOperator`],支持 At-Least-Once 与 Exactly-Once(事务 + 二阶段提交)。 use anyhow::{anyhow, bail, Result}; diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/runtime/streaming/operators/sink/mod.rs index 3b88f563..93b3b0ee 100644 --- a/src/runtime/streaming/operators/sink/mod.rs +++ b/src/runtime/streaming/operators/sink/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 与外部系统对接的 Sink 实现(Kafka 等)。 pub mod kafka; diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs index b17a504b..59507c2e 100644 --- a/src/runtime/streaming/operators/source/kafka/mod.rs +++ b/src/runtime/streaming/operators/source/kafka/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! Kafka 源算子:实现 [`crate::runtime::streaming::api::source::SourceOperator`],由 [`crate::runtime::streaming::execution::SourceRunner`] 轮询 `fetch_next`。 use anyhow::{anyhow, Context as _, Result}; diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs index 59b3ff7c..687e2289 100644 --- a/src/runtime/streaming/operators/source/mod.rs +++ b/src/runtime/streaming/operators/source/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 与外部系统对接的源实现(Kafka 等)。 pub mod kafka; diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs index 9e801188..45619dc6 100644 --- a/src/runtime/streaming/operators/stateless_physical_executor.rs +++ b/src/runtime/streaming/operators/stateless_physical_executor.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 无状态物理计划执行器:将单批次写入 `SingleLockedBatch` 并让 DataFusion 计划消费。 use std::sync::{Arc, RwLock}; diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/runtime/streaming/operators/value_execution.rs index b9fb0cd8..c3b3d525 100644 --- a/src/runtime/streaming/operators/value_execution.rs +++ b/src/runtime/streaming/operators/value_execution.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 通用无状态执行算子:驱动 DataFusion 物理计划(Filter, Case When, Scalar UDF 等), //! 不改变分区状态,适用于 Map / Filter 阶段。 diff --git a/src/runtime/streaming/operators/watermark/mod.rs b/src/runtime/streaming/operators/watermark/mod.rs index becc0b8f..4486a0fd 100644 --- a/src/runtime/streaming/operators/watermark/mod.rs +++ b/src/runtime/streaming/operators/watermark/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod watermark_generator; pub use watermark_generator::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState}; diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs index 3af64bf7..2b255f9b 100644 --- a/src/runtime/streaming/operators/watermark/watermark_generator.rs +++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 表达式水位生成器:与 worker `arrow/watermark_generator` 对齐,通过 [`StreamOutput::Watermark`] 向下游广播。 use anyhow::{anyhow, Result}; diff --git a/src/runtime/streaming/operators/windows/mod.rs b/src/runtime/streaming/operators/windows/mod.rs index ba594016..02c9eccb 100644 --- a/src/runtime/streaming/operators/windows/mod.rs +++ b/src/runtime/streaming/operators/windows/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod session_aggregating_window; pub mod sliding_aggregating_window; pub mod tumbling_aggregating_window; diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs index cae0935c..8fa3f2f7 100644 --- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 会话窗口聚合:纯内存版,完全脱离持久化状态存储。 //! 利用 BTreeMap 充当优先队列,数据天然在内存中进行 Gap 合并与触发。 diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs index aa2e2474..e5af57f3 100644 --- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 滑动窗口聚合:纯内存版。 //! 完全依赖内部的 TieredRecordBatchHolder 和 ActiveBin 在内存中进行计算, //! 摆脱 TableManager 依赖,遇到 Barrier 自动透传。 diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs index f835bac2..40c757dc 100644 --- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 滚动(tumbling)窗口聚合:与 worker `arrow/tumbling_aggregating_window` 对齐,实现 [`MessageOperator`]。 use anyhow::{anyhow, Result}; diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs index 03f02a19..4e9c83ce 100644 --- a/src/runtime/streaming/operators/windows/window_function.rs +++ b/src/runtime/streaming/operators/windows/window_function.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 窗口函数(按事件时间分桶的瞬时执行):纯内存版。 //! 完全依赖内部的 ActiveWindowExec 通道在内存中缓冲数据, //! 摆脱持久化状态存储的依赖,遇到 Barrier 自动透传。 diff --git a/src/runtime/streaming/protocol/control.rs b/src/runtime/streaming/protocol/control.rs index a7a9da57..d225e2e8 100644 --- a/src/runtime/streaming/protocol/control.rs +++ b/src/runtime/streaming/protocol/control.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 控制平面:与 [`super::event::StreamEvent`] 队列分离的高优先级指令。 use serde::{Deserialize, Serialize}; diff --git a/src/runtime/streaming/protocol/event.rs b/src/runtime/streaming/protocol/event.rs index ee974e7e..efd43952 100644 --- a/src/runtime/streaming/protocol/event.rs +++ b/src/runtime/streaming/protocol/event.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use arrow_array::RecordBatch; use crate::sql::common::{CheckpointBarrier, Watermark}; diff --git a/src/runtime/streaming/protocol/mod.rs b/src/runtime/streaming/protocol/mod.rs index 852562de..f859df28 100644 --- a/src/runtime/streaming/protocol/mod.rs +++ b/src/runtime/streaming/protocol/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 协议层:数据事件、控制命令、水位线合并与比较语义。 pub mod control; diff --git a/src/runtime/streaming/protocol/stream_out.rs b/src/runtime/streaming/protocol/stream_out.rs index 49d963df..0f6619f9 100644 --- a/src/runtime/streaming/protocol/stream_out.rs +++ b/src/runtime/streaming/protocol/stream_out.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use arrow_array::RecordBatch; use crate::sql::common::Watermark; diff --git a/src/runtime/streaming/protocol/tracked.rs b/src/runtime/streaming/protocol/tracked.rs index c675b5bd..5034abd2 100644 --- a/src/runtime/streaming/protocol/tracked.rs +++ b/src/runtime/streaming/protocol/tracked.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::sync::Arc; use crate::runtime::streaming::memory::MemoryTicket; diff --git a/src/runtime/streaming/protocol/watermark.rs b/src/runtime/streaming/protocol/watermark.rs index 43baeabb..9c039aa5 100644 --- a/src/runtime/streaming/protocol/watermark.rs +++ b/src/runtime/streaming/protocol/watermark.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! 水位线类型来自 `arroyo_types::Watermark`;此处提供 **多路对齐合并** 与 **单调推进** 判断。 use crate::sql::common::Watermark; diff --git a/src/runtime/wasm/processor/function_error.rs b/src/runtime/wasm/processor/function_error.rs index b38f8dd9..f9b8fe8e 100644 --- a/src/runtime/wasm/processor/function_error.rs +++ b/src/runtime/wasm/processor/function_error.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #[derive(Debug, Clone)] pub enum FunctionErrorStage { Input, diff --git a/src/sql/analysis/aggregate_rewriter.rs b/src/sql/analysis/aggregate_rewriter.rs index f11b53d0..36024ab0 100644 --- a/src/sql/analysis/aggregate_rewriter.rs +++ b/src/sql/analysis/aggregate_rewriter.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err}; use datafusion::functions_aggregate::expr_fn::max; diff --git a/src/sql/analysis/async_udf_rewriter.rs b/src/sql/analysis/async_udf_rewriter.rs index 0ad4dfc2..31a92057 100644 --- a/src/sql/analysis/async_udf_rewriter.rs +++ b/src/sql/analysis/async_udf_rewriter.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncFunctionExecutionNode}; use crate::sql::schema::StreamSchemaProvider; diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs index e9efe96b..77131595 100644 --- a/src/sql/analysis/join_rewriter.rs +++ b/src/sql/analysis/join_rewriter.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::sql::schema::StreamSchemaProvider; use crate::sql::extensions::join::StreamingJoinNode; use crate::sql::extensions::key_calculation::KeyExtractionNode; diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs index 697d8c97..d417ebd1 100644 --- a/src/sql/analysis/mod.rs +++ b/src/sql/analysis/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #![allow(clippy::new_without_default)] pub(crate) mod aggregate_rewriter; diff --git a/src/sql/analysis/row_time_rewriter.rs b/src/sql/analysis/row_time_rewriter.rs index 0a31d9f8..f0c4e435 100644 --- a/src/sql/analysis/row_time_rewriter.rs +++ b/src/sql/analysis/row_time_rewriter.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{Column, Result as DFResult}; use datafusion::logical_expr::Expr; diff --git a/src/sql/analysis/sink_input_rewriter.rs b/src/sql/analysis/sink_input_rewriter.rs index e491a75a..6b8b2de1 100644 --- a/src/sql/analysis/sink_input_rewriter.rs +++ b/src/sql/analysis/sink_input_rewriter.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::sql::extensions::sink::StreamEgressNode; use crate::sql::extensions::StreamingOperatorBlueprint; use datafusion::common::Result as DFResult; diff --git a/src/sql/analysis/source_metadata_visitor.rs b/src/sql/analysis/source_metadata_visitor.rs index 0d2e1455..81b9b179 100644 --- a/src/sql/analysis/source_metadata_visitor.rs +++ b/src/sql/analysis/source_metadata_visitor.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::sql::extensions::sink::{StreamEgressNode, STREAM_EGRESS_NODE_NAME}; use crate::sql::extensions::table_source::{StreamIngestionNode, STREAM_INGESTION_NODE_NAME}; use crate::sql::schema::StreamSchemaProvider; diff --git a/src/sql/analysis/stream_rewriter.rs b/src/sql/analysis/stream_rewriter.rs index 22ed3c83..a62a7bd1 100644 --- a/src/sql/analysis/stream_rewriter.rs +++ b/src/sql/analysis/stream_rewriter.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::sync::Arc; use super::StreamSchemaProvider; diff --git a/src/sql/analysis/streaming_window_analzer.rs b/src/sql/analysis/streaming_window_analzer.rs index 5eed3d2b..609bd2ee 100644 --- a/src/sql/analysis/streaming_window_analzer.rs +++ b/src/sql/analysis/streaming_window_analzer.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashSet; use std::sync::Arc; diff --git a/src/sql/analysis/udafs.rs b/src/sql/analysis/udafs.rs index 9685c2d4..73fc062c 100644 --- a/src/sql/analysis/udafs.rs +++ b/src/sql/analysis/udafs.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use datafusion::arrow::array::ArrayRef; use datafusion::error::Result; use datafusion::physical_plan::Accumulator; diff --git a/src/sql/analysis/window_function_rewriter.rs b/src/sql/analysis/window_function_rewriter.rs index 8f195325..7b94b841 100644 --- a/src/sql/analysis/window_function_rewriter.rs +++ b/src/sql/analysis/window_function_rewriter.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use datafusion::common::tree_node::Transformed; use datafusion::common::{Column, Result as DFResult, plan_err, tree_node::TreeNodeRewriter}; use datafusion::logical_expr::{ diff --git a/src/sql/api/checkpoints.rs b/src/sql/api/checkpoints.rs index 243cae40..d9bdc139 100644 --- a/src/sql/api/checkpoints.rs +++ b/src/sql/api/checkpoints.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::sql::common::to_micros; use serde::{Deserialize, Serialize}; use std::time::SystemTime; diff --git a/src/sql/api/connections.rs b/src/sql/api/connections.rs index 7873ceb2..148df69d 100644 --- a/src/sql/api/connections.rs +++ b/src/sql/api/connections.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::sql::common::formats::{BadData, Format, Framing}; use crate::sql::common::{FsExtensionType, FsSchema}; use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit}; diff --git a/src/sql/api/metrics.rs b/src/sql/api/metrics.rs index 25d129e5..671b52f6 100644 --- a/src/sql/api/metrics.rs +++ b/src/sql/api/metrics.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Copy, Clone, Debug, Hash, PartialEq, Eq)] diff --git a/src/sql/api/mod.rs b/src/sql/api/mod.rs index 85cbcaaa..3969296a 100644 --- a/src/sql/api/mod.rs +++ b/src/sql/api/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! REST/RPC API types for the FunctionStream system. //! //! Adapted from Arroyo's `arroyo-rpc/src/api_types` and utility modules. diff --git a/src/sql/api/pipelines.rs b/src/sql/api/pipelines.rs index 8b42036c..d6cc5253 100644 --- a/src/sql/api/pipelines.rs +++ b/src/sql/api/pipelines.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use super::udfs::Udf; use crate::sql::common::control::ErrorDomain; use serde::{Deserialize, Serialize}; diff --git a/src/sql/api/public_ids.rs b/src/sql/api/public_ids.rs index 15a9f72e..33aa6427 100644 --- a/src/sql/api/public_ids.rs +++ b/src/sql/api/public_ids.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::time::{SystemTime, UNIX_EPOCH}; const ID_LENGTH: usize = 10; diff --git a/src/sql/api/schema_resolver.rs b/src/sql/api/schema_resolver.rs index a9124900..57d3d702 100644 --- a/src/sql/api/schema_resolver.rs +++ b/src/sql/api/schema_resolver.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use async_trait::async_trait; /// Trait for resolving schemas by ID (e.g., from a schema registry). diff --git a/src/sql/api/udfs.rs b/src/sql/api/udfs.rs index 41085168..781d5b07 100644 --- a/src/sql/api/udfs.rs +++ b/src/sql/api/udfs.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Clone, Debug)] diff --git a/src/sql/api/var_str.rs b/src/sql/api/var_str.rs index c4256e38..2638cd06 100644 --- a/src/sql/api/var_str.rs +++ b/src/sql/api/var_str.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use serde::{Deserialize, Serialize}; use std::env; diff --git a/src/sql/common/arrow_ext.rs b/src/sql/common/arrow_ext.rs index 701bf8e4..782f4358 100644 --- a/src/sql/common/arrow_ext.rs +++ b/src/sql/common/arrow_ext.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::fmt::{Display, Formatter}; use std::time::SystemTime; diff --git a/src/sql/common/connector_options.rs b/src/sql/common/connector_options.rs index 308d5197..6bd6dfa6 100644 --- a/src/sql/common/connector_options.rs +++ b/src/sql/common/connector_options.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::num::{NonZero, NonZeroU64}; use std::str::FromStr; diff --git a/src/sql/common/control.rs b/src/sql/common/control.rs index efdc754e..4ea9a12f 100644 --- a/src/sql/common/control.rs +++ b/src/sql/common/control.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::time::SystemTime; diff --git a/src/sql/common/converter.rs b/src/sql/common/converter.rs index 8f6a2ba8..ec4687f8 100644 --- a/src/sql/common/converter.rs +++ b/src/sql/common/converter.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::sync::Arc; use arrow::row::{OwnedRow, RowConverter, RowParser, Rows, SortField}; use arrow_array::{Array, ArrayRef, BooleanArray}; diff --git a/src/sql/common/date.rs b/src/sql/common/date.rs index c18e31a7..718d5f56 100644 --- a/src/sql/common/date.rs +++ b/src/sql/common/date.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use serde::Serialize; use std::convert::TryFrom; diff --git a/src/sql/common/debezium.rs b/src/sql/common/debezium.rs index 3c9f4747..9dbc401f 100644 --- a/src/sql/common/debezium.rs +++ b/src/sql/common/debezium.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use bincode::{Decode, Encode}; use serde::{Deserialize, Serialize}; use std::convert::TryFrom; diff --git a/src/sql/common/errors.rs b/src/sql/common/errors.rs index 507851bd..fa4a722e 100644 --- a/src/sql/common/errors.rs +++ b/src/sql/common/errors.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::fmt; /// Result type for streaming operators and collectors. diff --git a/src/sql/common/format_from_opts.rs b/src/sql/common/format_from_opts.rs index dc9a43da..2469fb08 100644 --- a/src/sql/common/format_from_opts.rs +++ b/src/sql/common/format_from_opts.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! Parse `WITH` clause format / framing / bad-data options (Arroyo-compatible keys). use std::str::FromStr; @@ -5,6 +17,7 @@ use std::str::FromStr; use datafusion::common::{Result as DFResult, plan_datafusion_err, plan_err}; use super::connector_options::ConnectorOptions; +use super::with_option_keys as opt; use super::formats::{ AvroFormat, BadData, DecimalEncoding, Format, Framing, JsonCompression, JsonFormat, NewlineDelimitedFraming, ParquetCompression, ParquetFormat, ProtobufFormat, RawBytesFormat, @@ -14,32 +27,32 @@ use super::formats::{ impl JsonFormat { pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult { let mut j = JsonFormat::default(); - if let Some(v) = opts.pull_opt_bool("json.confluent_schema_registry")? { + if let Some(v) = opts.pull_opt_bool(opt::JSON_CONFLUENT_SCHEMA_REGISTRY)? { j.confluent_schema_registry = v; } - if let Some(v) = opts.pull_opt_u64("json.confluent_schema_version")? { + if let Some(v) = opts.pull_opt_u64(opt::JSON_CONFLUENT_SCHEMA_VERSION)? { j.schema_id = Some(v as u32); } - if let Some(v) = opts.pull_opt_bool("json.include_schema")? { + if let Some(v) = opts.pull_opt_bool(opt::JSON_INCLUDE_SCHEMA)? { j.include_schema = v; } - if let Some(v) = opts.pull_opt_bool("json.debezium")? { + if let Some(v) = opts.pull_opt_bool(opt::JSON_DEBEZIUM)? { j.debezium = v; } - if let Some(v) = opts.pull_opt_bool("json.unstructured")? { + if let Some(v) = opts.pull_opt_bool(opt::JSON_UNSTRUCTURED)? { j.unstructured = v; } - if let Some(s) = opts.pull_opt_str("json.timestamp_format")? { + if let Some(s) = opts.pull_opt_str(opt::JSON_TIMESTAMP_FORMAT)? { j.timestamp_format = TimestampFormat::try_from(s.as_str()).map_err(|_| { plan_datafusion_err!("invalid json.timestamp_format '{}'", s) })?; } - if let Some(s) = opts.pull_opt_str("json.decimal_encoding")? { + if let Some(s) = opts.pull_opt_str(opt::JSON_DECIMAL_ENCODING)? { j.decimal_encoding = DecimalEncoding::try_from(s.as_str()).map_err(|_| { plan_datafusion_err!("invalid json.decimal_encoding '{s}'") })?; } - if let Some(s) = opts.pull_opt_str("json.compression")? { + if let Some(s) = opts.pull_opt_str(opt::JSON_COMPRESSION)? { j.compression = JsonCompression::from_str(&s) .map_err(|e| plan_datafusion_err!("invalid json.compression: {e}"))?; } @@ -49,7 +62,7 @@ impl JsonFormat { impl Format { pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult> { - let Some(name) = opts.pull_opt_str("format")? else { + let Some(name) = opts.pull_opt_str(opt::FORMAT)? else { return Ok(None); }; match name.to_lowercase().as_str() { @@ -77,16 +90,16 @@ impl AvroFormat { into_unstructured_json: false, schema_id: None, }; - if let Some(v) = opts.pull_opt_bool("avro.confluent_schema_registry")? { + if let Some(v) = opts.pull_opt_bool(opt::AVRO_CONFLUENT_SCHEMA_REGISTRY)? { a.confluent_schema_registry = v; } - if let Some(v) = opts.pull_opt_bool("avro.raw_datums")? { + if let Some(v) = opts.pull_opt_bool(opt::AVRO_RAW_DATUMS)? { a.raw_datums = v; } - if let Some(v) = opts.pull_opt_bool("avro.into_unstructured_json")? { + if let Some(v) = opts.pull_opt_bool(opt::AVRO_INTO_UNSTRUCTURED_JSON)? { a.into_unstructured_json = v; } - if let Some(v) = opts.pull_opt_u64("avro.schema_id")? { + if let Some(v) = opts.pull_opt_u64(opt::AVRO_SCHEMA_ID)? { a.schema_id = Some(v as u32); } Ok(a) @@ -96,11 +109,11 @@ impl AvroFormat { impl ParquetFormat { fn from_opts(opts: &mut ConnectorOptions) -> DFResult { let mut p = ParquetFormat::default(); - if let Some(s) = opts.pull_opt_str("parquet.compression")? { + if let Some(s) = opts.pull_opt_str(opt::PARQUET_COMPRESSION)? { p.compression = ParquetCompression::from_str(&s) .map_err(|e| plan_datafusion_err!("invalid parquet.compression: {e}"))?; } - if let Some(v) = opts.pull_opt_u64("parquet.row_group_bytes")? { + if let Some(v) = opts.pull_opt_u64(opt::PARQUET_ROW_GROUP_BYTES)? { p.row_group_bytes = Some(v); } Ok(p) @@ -116,16 +129,16 @@ impl ProtobufFormat { confluent_schema_registry: false, length_delimited: false, }; - if let Some(v) = opts.pull_opt_bool("protobuf.into_unstructured_json")? { + if let Some(v) = opts.pull_opt_bool(opt::PROTOBUF_INTO_UNSTRUCTURED_JSON)? { p.into_unstructured_json = v; } - if let Some(s) = opts.pull_opt_str("protobuf.message_name")? { + if let Some(s) = opts.pull_opt_str(opt::PROTOBUF_MESSAGE_NAME)? { p.message_name = Some(s); } - if let Some(v) = opts.pull_opt_bool("protobuf.confluent_schema_registry")? { + if let Some(v) = opts.pull_opt_bool(opt::PROTOBUF_CONFLUENT_SCHEMA_REGISTRY)? { p.confluent_schema_registry = v; } - if let Some(v) = opts.pull_opt_bool("protobuf.length_delimited")? { + if let Some(v) = opts.pull_opt_bool(opt::PROTOBUF_LENGTH_DELIMITED)? { p.length_delimited = v; } Ok(p) @@ -134,11 +147,11 @@ impl ProtobufFormat { impl Framing { pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult> { - let method = opts.pull_opt_str("framing.method")?; + let method = opts.pull_opt_str(opt::FRAMING_METHOD)?; match method.as_deref() { None => Ok(None), Some("newline") | Some("newline_delimited") => { - let max = opts.pull_opt_u64("framing.max_line_length")?; + let max = opts.pull_opt_u64(opt::FRAMING_MAX_LINE_LENGTH)?; Ok(Some(Framing::Newline(NewlineDelimitedFraming { max_line_length: max, }))) @@ -150,7 +163,7 @@ impl Framing { impl BadData { pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult { - let Some(s) = opts.pull_opt_str("bad_data")? else { + let Some(s) = opts.pull_opt_str(opt::BAD_DATA)? else { return Ok(BadData::Fail {}); }; match s.to_lowercase().as_str() { diff --git a/src/sql/common/formats.rs b/src/sql/common/formats.rs index 25d09a74..e37be020 100644 --- a/src/sql/common/formats.rs +++ b/src/sql/common/formats.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use serde::{Deserialize, Serialize}; use std::fmt::{Display, Formatter}; use std::str::FromStr; diff --git a/src/sql/common/fs_schema.rs b/src/sql/common/fs_schema.rs index f7fd5328..eb92d4ac 100644 --- a/src/sql/common/fs_schema.rs +++ b/src/sql/common/fs_schema.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! FunctionStream table/stream schema: Arrow [`Schema`] plus timestamp index and optional key columns. //! //! [`Schema`]: datafusion::arrow::datatypes::Schema diff --git a/src/sql/common/hash.rs b/src/sql/common/hash.rs index 8f47a8fa..6dce5b9a 100644 --- a/src/sql/common/hash.rs +++ b/src/sql/common/hash.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::ops::RangeInclusive; /// Randomly generated seeds for consistent hashing. Changing these breaks existing state. diff --git a/src/sql/common/kafka_catalog.rs b/src/sql/common/kafka_catalog.rs index 99c8983e..e54e6901 100644 --- a/src/sql/common/kafka_catalog.rs +++ b/src/sql/common/kafka_catalog.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + //! Kafka 表级与连接级配置(与 JSON Schema / Catalog 对齐)。 //! //! 放在 [`crate::sql::common`] 而非 `runtime::streaming`,以便 **SQL 规划、Coordinator、连接配置存储** diff --git a/src/sql/common/message.rs b/src/sql/common/message.rs index 29b7f3a5..4dcde95b 100644 --- a/src/sql/common/message.rs +++ b/src/sql/common/message.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use bincode::{Decode, Encode}; use datafusion::arrow::array::RecordBatch; use serde::{Deserialize, Serialize}; diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index 722b2e58..7a4b4ee4 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -17,6 +17,8 @@ pub mod arrow_ext; pub mod connector_options; +pub mod with_option_keys; +pub mod constants; pub mod control; pub mod date; pub mod debezium; diff --git a/src/sql/common/operator_config.rs b/src/sql/common/operator_config.rs index a1f703f5..b9e40391 100644 --- a/src/sql/common/operator_config.rs +++ b/src/sql/common/operator_config.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use serde::{Deserialize, Serialize}; use serde_json::Value; diff --git a/src/sql/common/task_info.rs b/src/sql/common/task_info.rs index 5a31511b..479ab082 100644 --- a/src/sql/common/task_info.rs +++ b/src/sql/common/task_info.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use bincode::{Decode, Encode}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; diff --git a/src/sql/common/time_utils.rs b/src/sql/common/time_utils.rs index 2ee5a126..323445cd 100644 --- a/src/sql/common/time_utils.rs +++ b/src/sql/common/time_utils.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::hash::Hash; use std::time::{Duration, SystemTime, UNIX_EPOCH}; diff --git a/src/sql/common/with_option_keys.rs b/src/sql/common/with_option_keys.rs new file mode 100644 index 00000000..e48d9b7a --- /dev/null +++ b/src/sql/common/with_option_keys.rs @@ -0,0 +1,97 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL `WITH` 子句中的选项名,以及部分连接器序列化 JSON 的字段名(单一来源)。 + +// ── 通用 / 表级 ───────────────────────────────────────────────────────────── + +pub const CONNECTOR: &str = "connector"; +pub const TYPE: &str = "type"; +pub const FORMAT: &str = "format"; +/// 未指定 `format` 选项时的默认格式名(值,非键)。 +pub const DEFAULT_FORMAT_VALUE: &str = "json"; +pub const BAD_DATA: &str = "bad_data"; +pub const PARTITION_BY: &str = "partition_by"; + +pub const EVENT_TIME_FIELD: &str = "event_time_field"; +pub const WATERMARK_FIELD: &str = "watermark_field"; + +pub const IDLE_MICROS: &str = "idle_micros"; +pub const IDLE_TIME: &str = "idle_time"; + +pub const LOOKUP_CACHE_MAX_BYTES: &str = "lookup.cache.max_bytes"; +pub const LOOKUP_CACHE_TTL: &str = "lookup.cache.ttl"; + +// ── 非 Kafka 连接器的 opaque JSON(`CONNECTOR` 与 WITH 选项同名)──────────── + +pub const CONNECTION_SCHEMA: &str = "connection_schema"; + +// ── 后端参数序列化(如 lookup)────────────────────────────────────────────── + +pub const ADAPTER: &str = "adapter"; + +// ── Kafka ───────────────────────────────────────────────────────────────── + +pub const KAFKA_BOOTSTRAP_SERVERS: &str = "bootstrap.servers"; +pub const KAFKA_BOOTSTRAP_SERVERS_LEGACY: &str = "bootstrap_servers"; +pub const KAFKA_TOPIC: &str = "topic"; +pub const KAFKA_RATE_LIMIT_MESSAGES_PER_SECOND: &str = "rate_limit.messages_per_second"; +pub const KAFKA_VALUE_SUBJECT: &str = "value.subject"; +pub const KAFKA_SCAN_STARTUP_MODE: &str = "scan.startup.mode"; +pub const KAFKA_ISOLATION_LEVEL: &str = "isolation.level"; +pub const KAFKA_GROUP_ID: &str = "group.id"; +pub const KAFKA_GROUP_ID_LEGACY: &str = "group_id"; +pub const KAFKA_GROUP_ID_PREFIX: &str = "group.id.prefix"; +pub const KAFKA_SINK_COMMIT_MODE: &str = "sink.commit.mode"; +pub const KAFKA_SINK_KEY_FIELD: &str = "sink.key.field"; +pub const KAFKA_KEY_FIELD_LEGACY: &str = "key.field"; +pub const KAFKA_SINK_TIMESTAMP_FIELD: &str = "sink.timestamp.field"; +pub const KAFKA_TIMESTAMP_FIELD_LEGACY: &str = "timestamp.field"; + +// ── JSON format ─────────────────────────────────────────────────────────── + +pub const JSON_CONFLUENT_SCHEMA_REGISTRY: &str = "json.confluent_schema_registry"; +pub const JSON_CONFLUENT_SCHEMA_VERSION: &str = "json.confluent_schema_version"; +pub const JSON_INCLUDE_SCHEMA: &str = "json.include_schema"; +pub const JSON_DEBEZIUM: &str = "json.debezium"; +pub const JSON_UNSTRUCTURED: &str = "json.unstructured"; +pub const JSON_TIMESTAMP_FORMAT: &str = "json.timestamp_format"; +pub const JSON_DECIMAL_ENCODING: &str = "json.decimal_encoding"; +pub const JSON_COMPRESSION: &str = "json.compression"; + +// ── Avro ────────────────────────────────────────────────────────────────── + +pub const AVRO_CONFLUENT_SCHEMA_REGISTRY: &str = "avro.confluent_schema_registry"; +pub const AVRO_RAW_DATUMS: &str = "avro.raw_datums"; +pub const AVRO_INTO_UNSTRUCTURED_JSON: &str = "avro.into_unstructured_json"; +pub const AVRO_SCHEMA_ID: &str = "avro.schema_id"; + +// ── Parquet ─────────────────────────────────────────────────────────────── + +pub const PARQUET_COMPRESSION: &str = "parquet.compression"; +pub const PARQUET_ROW_GROUP_BYTES: &str = "parquet.row_group_bytes"; + +// ── Protobuf ──────────────────────────────────────────────────────────────── + +pub const PROTOBUF_INTO_UNSTRUCTURED_JSON: &str = "protobuf.into_unstructured_json"; +pub const PROTOBUF_MESSAGE_NAME: &str = "protobuf.message_name"; +pub const PROTOBUF_CONFLUENT_SCHEMA_REGISTRY: &str = "protobuf.confluent_schema_registry"; +pub const PROTOBUF_LENGTH_DELIMITED: &str = "protobuf.length_delimited"; + +// ── Framing ───────────────────────────────────────────────────────────────── + +pub const FRAMING_METHOD: &str = "framing.method"; +pub const FRAMING_MAX_LINE_LENGTH: &str = "framing.max_line_length"; + +// ── 从字符串 map 推断编码(catalog 等)────────────────────────────────────── + +pub const FORMAT_DEBEZIUM_FLAG: &str = "format.debezium"; diff --git a/src/sql/common/worker.rs b/src/sql/common/worker.rs index c12163ba..48c218fb 100644 --- a/src/sql/common/worker.rs +++ b/src/sql/common/worker.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::fmt::{Display, Formatter}; use std::sync::Arc; diff --git a/src/sql/datastream/logical.rs b/src/sql/datastream/logical.rs index c0e5465e..e26be9f3 100644 --- a/src/sql/datastream/logical.rs +++ b/src/sql/datastream/logical.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use itertools::Itertools; use datafusion::arrow::datatypes::DataType; @@ -301,31 +313,10 @@ impl LogicalProgram { let mut s = HashSet::new(); for n in self.graph.node_weights() { for t in &n.operator_chain.operators { - let feature = match &t.operator_name { - OperatorName::AsyncUdf => "async-udf".to_string(), - OperatorName::ExpressionWatermark - | OperatorName::ArrowValue - | OperatorName::ArrowKey - | OperatorName::Projection => continue, - OperatorName::Join => "join-with-expiration".to_string(), - OperatorName::InstantJoin => "windowed-join".to_string(), - OperatorName::WindowFunction => "sql-window-function".to_string(), - OperatorName::LookupJoin => "lookup-join".to_string(), - OperatorName::TumblingWindowAggregate => { - "sql-tumbling-window-aggregate".to_string() - } - OperatorName::SlidingWindowAggregate => { - "sql-sliding-window-aggregate".to_string() - } - OperatorName::SessionWindowAggregate => { - "sql-session-window-aggregate".to_string() - } - OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(), - OperatorName::KeyBy => "key-by-routing".to_string(), - OperatorName::ConnectorSource => "connector-source".to_string(), - OperatorName::ConnectorSink => "connector-sink".to_string(), + let Some(tag) = t.operator_name.feature_tag() else { + continue; }; - s.insert(feature); + s.insert(tag.to_string()); } } s diff --git a/src/sql/datastream/mod.rs b/src/sql/datastream/mod.rs index 82d25f24..922801f6 100644 --- a/src/sql/datastream/mod.rs +++ b/src/sql/datastream/mod.rs @@ -1 +1,13 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod logical; diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs index 7ba16f7a..e05129c8 100644 --- a/src/sql/extensions/aggregate.rs +++ b/src/sql/extensions/aggregate.rs @@ -31,6 +31,7 @@ use protocol::grpc::api::{ }; use crate::multifield_partial_ord; +use crate::sql::common::constants::{extension_node, proto_operator_name}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{ CompiledTopologyNode, StreamingOperatorBlueprint, SystemTimestampInjectorNode, @@ -43,8 +44,7 @@ use crate::sql::types::{ schema_from_df_fields, schema_from_df_fields_with_metadata, }; -pub(crate) const STREAM_AGG_EXTENSION_NAME: &str = "StreamWindowAggregateNode"; -const INTERNAL_TIMESTAMP_COL: &str = "_timestamp"; +pub(crate) const STREAM_AGG_EXTENSION_NAME: &str = extension_node::STREAM_WINDOW_AGGREGATE; /// Represents a streaming windowed aggregation node in the logical plan. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -104,7 +104,7 @@ impl StreamWindowAggregateNode { )?; let operator_config = TumblingWindowAggregateOperator { - name: "TumblingWindow".to_string(), + name: proto_operator_name::TUMBLING_WINDOW.to_string(), width_micros: duration.as_micros() as u64, binning_function: binning_expr.encode_to_vec(), input_schema: Some( @@ -175,7 +175,7 @@ impl StreamWindowAggregateNode { format!("sliding_window_{node_id}"), OperatorName::SlidingWindowAggregate, operator_config.encode_to_vec(), - "sliding window".to_string(), + proto_operator_name::SLIDING_WINDOW_LABEL.to_string(), 1, )) } @@ -255,7 +255,7 @@ impl StreamWindowAggregateNode { apply_final_projection: bool, ) -> Result { let ts_column_expr = - Expr::Column(Column::new_unqualified(INTERNAL_TIMESTAMP_COL.to_string())); + Expr::Column(Column::new_unqualified(TIMESTAMP_FIELD.to_string())); let binning_expr = planner.create_physical_expr(&ts_column_expr, &input_schema)?; let binning_proto = serialize_physical_expr(&binning_expr, &DefaultPhysicalExtensionCodec {})?; @@ -277,7 +277,7 @@ impl StreamWindowAggregateNode { } = planner.split_physical_plan(self.partition_keys.clone(), &self.base_agg_plan, true)?; let operator_config = TumblingWindowAggregateOperator { - name: "InstantWindow".to_string(), + name: proto_operator_name::INSTANT_WINDOW.to_string(), width_micros: 0, binning_function: binning_proto.encode_to_vec(), input_schema: Some( @@ -298,7 +298,7 @@ impl StreamWindowAggregateNode { format!("instant_window_{node_id}"), OperatorName::TumblingWindowAggregate, operator_config.encode_to_vec(), - "instant window".to_string(), + proto_operator_name::INSTANT_WINDOW_LABEL.to_string(), 1, )) } diff --git a/src/sql/extensions/async_udf.rs b/src/sql/extensions/async_udf.rs index 147e0f90..8add0625 100644 --- a/src/sql/extensions/async_udf.rs +++ b/src/sql/extensions/async_udf.rs @@ -25,6 +25,7 @@ use prost::Message; use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering}; use crate::multifield_partial_ord; +use crate::sql::common::constants::extension_node; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{ @@ -33,8 +34,9 @@ use crate::sql::logical_node::logical::{ use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields}; -pub(crate) const NODE_TYPE_NAME: &str = "AsyncFunctionExecutionNode"; -pub const ASYNC_RESULT_FIELD: &str = "__async_result"; +use super::ASYNC_RESULT_FIELD; + +pub(crate) const NODE_TYPE_NAME: &str = extension_node::ASYNC_FUNCTION_EXECUTION; /// Represents a logical node that executes an external asynchronous function (UDF) /// and projects the final results into the streaming pipeline. diff --git a/src/sql/extensions/constants.rs b/src/sql/extensions/constants.rs index 489af179..245dacec 100644 --- a/src/sql/extensions/constants.rs +++ b/src/sql/extensions/constants.rs @@ -10,5 +10,4 @@ // See the License for the specific language governing permissions and // limitations under the License. -/// Column name substituted for an async UDF call after rewrite. -pub const ASYNC_RESULT_FIELD: &str = "__async_result"; +pub use crate::sql::common::constants::sql_field::ASYNC_RESULT as ASYNC_RESULT_FIELD; diff --git a/src/sql/extensions/debezium.rs b/src/sql/extensions/debezium.rs index 612c0d79..a1042194 100644 --- a/src/sql/extensions/debezium.rs +++ b/src/sql/extensions/debezium.rs @@ -20,6 +20,7 @@ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use datafusion::physical_plan::DisplayAs; use crate::multifield_partial_ord; +use crate::sql::common::constants::{cdc, extension_node}; use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::logical_planner::updating_meta_field; @@ -31,12 +32,8 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const UNROLL_NODE_NAME: &str = "UnrollDebeziumPayloadNode"; -pub(crate) const PACK_NODE_NAME: &str = "PackDebeziumEnvelopeNode"; - -const CDC_FIELD_BEFORE: &str = "before"; -const CDC_FIELD_AFTER: &str = "after"; -const CDC_FIELD_OP: &str = "op"; +pub(crate) const UNROLL_NODE_NAME: &str = extension_node::UNROLL_DEBEZIUM_PAYLOAD; +pub(crate) const PACK_NODE_NAME: &str = extension_node::PACK_DEBEZIUM_ENVELOPE; // ----------------------------------------------------------------------------- // Core Schema Codec @@ -68,12 +65,12 @@ impl DebeziumSchemaCodec { let mut envelope_fields = vec![ Arc::new(Field::new( - CDC_FIELD_BEFORE, + cdc::BEFORE, payload_struct_type.clone(), true, )), - Arc::new(Field::new(CDC_FIELD_AFTER, payload_struct_type, true)), - Arc::new(Field::new(CDC_FIELD_OP, DataType::Utf8, true)), + Arc::new(Field::new(cdc::AFTER, payload_struct_type, true)), + Arc::new(Field::new(cdc::OP, DataType::Utf8, true)), ]; if let Some(ts) = ts_field { @@ -134,15 +131,15 @@ impl UnrollDebeziumPayloadNode { } fn validate_envelope_structure(schema: &DFSchemaRef) -> Result<(usize, usize)> { - let before_idx = schema.index_of_column_by_name(None, CDC_FIELD_BEFORE).ok_or_else( + let before_idx = schema.index_of_column_by_name(None, cdc::BEFORE).ok_or_else( || DataFusionError::Plan("Missing 'before' state column in CDC stream".into()), )?; - let after_idx = schema.index_of_column_by_name(None, CDC_FIELD_AFTER).ok_or_else( + let after_idx = schema.index_of_column_by_name(None, cdc::AFTER).ok_or_else( || DataFusionError::Plan("Missing 'after' state column in CDC stream".into()), )?; - let op_idx = schema.index_of_column_by_name(None, CDC_FIELD_OP).ok_or_else(|| { + let op_idx = schema.index_of_column_by_name(None, cdc::OP).ok_or_else(|| { DataFusionError::Plan("Missing 'op' operation column in CDC stream".into()) })?; @@ -158,7 +155,7 @@ impl UnrollDebeziumPayloadNode { if *schema.field(op_idx).data_type() != DataType::Utf8 { return plan_err!( "The '{}' column must be of type Utf8", - CDC_FIELD_OP + cdc::OP ); } diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs index 70fbf3a3..9789a216 100644 --- a/src/sql/extensions/join.rs +++ b/src/sql/extensions/join.rs @@ -22,6 +22,7 @@ use datafusion_proto::protobuf::PhysicalPlanNode; use prost::Message; use protocol::grpc::api::JoinOperator; +use crate::sql::common::constants::{extension_node, runtime_operator_kind}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{ @@ -34,7 +35,7 @@ use crate::sql::logical_planner::FsPhysicalExtensionCodec; // Constants // ----------------------------------------------------------------------------- -pub(crate) const STREAM_JOIN_NODE_TYPE: &str = "StreamingJoinNode"; +pub(crate) const STREAM_JOIN_NODE_TYPE: &str = extension_node::STREAMING_JOIN; // ----------------------------------------------------------------------------- // Logical Node Definition @@ -187,7 +188,7 @@ impl StreamingOperatorBlueprint for StreamingJoinNode { node_identifier.clone(), self.determine_operator_type(), operator_config.encode_to_vec(), - "streaming_join".to_string(), + runtime_operator_kind::STREAMING_JOIN.to_string(), 1, ); diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs index 484d464c..6a9e924b 100644 --- a/src/sql/extensions/key_calculation.rs +++ b/src/sql/extensions/key_calculation.rs @@ -27,6 +27,7 @@ use prost::Message; use protocol::grpc::api::{KeyPlanOperator, ProjectionOperator}; use crate::multifield_partial_ord; +use crate::sql::common::constants::{extension_node, sql_field}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; @@ -34,7 +35,7 @@ use crate::sql::logical_planner::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::{fields_with_qualifiers, schema_from_df_fields_with_metadata}; -pub(crate) const EXTENSION_NODE_IDENTIFIER: &str = "KeyExtractionNode"; +pub(crate) const EXTENSION_NODE_IDENTIFIER: &str = extension_node::KEY_EXTRACTION; /// Routing strategy for shuffling data across the stream topology. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] @@ -101,7 +102,7 @@ impl KeyExtractionNode { indices: &[usize], ) -> (Vec, OperatorName) { let operator_config = KeyPlanOperator { - name: "key".into(), + name: sql_field::DEFAULT_KEY_LABEL.into(), physical_plan: physical_plan_proto.encode_to_vec(), key_fields: indices.iter().map(|&idx| idx as u64).collect(), }; @@ -153,7 +154,11 @@ impl KeyExtractionNode { } let operator_config = ProjectionOperator { - name: self.operator_label.as_deref().unwrap_or("key").to_string(), + name: self + .operator_label + .as_deref() + .unwrap_or(sql_field::DEFAULT_KEY_LABEL) + .to_string(), input_schema: Some(input_schema_ref.as_ref().clone().into()), output_schema: Some(output_fs_schema.into()), exprs: physical_expr_payloads, diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs index c34c5b10..684a8f97 100644 --- a/src/sql/extensions/lookup.rs +++ b/src/sql/extensions/lookup.rs @@ -24,6 +24,7 @@ use protocol::grpc::api; use protocol::grpc::api::{ConnectorOp, LookupJoinCondition, LookupJoinOperator}; use crate::multifield_partial_ord; +use crate::sql::common::constants::extension_node; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; @@ -35,8 +36,8 @@ use crate::sql::schema::utils::add_timestamp_field_arrow; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub const DICTIONARY_SOURCE_NODE_NAME: &str = "ReferenceTableSource"; -pub const STREAM_DICTIONARY_JOIN_NODE_NAME: &str = "StreamReferenceJoin"; +pub const DICTIONARY_SOURCE_NODE_NAME: &str = extension_node::REFERENCE_TABLE_SOURCE; +pub const STREAM_DICTIONARY_JOIN_NODE_NAME: &str = extension_node::STREAM_REFERENCE_JOIN; // ----------------------------------------------------------------------------- // Logical Node: Reference Table Source diff --git a/src/sql/extensions/projection.rs b/src/sql/extensions/projection.rs index 2175bddf..d1b9e755 100644 --- a/src/sql/extensions/projection.rs +++ b/src/sql/extensions/projection.rs @@ -22,6 +22,7 @@ use prost::Message; use protocol::grpc::api::ProjectionOperator; use crate::multifield_partial_ord; +use crate::sql::common::constants::{extension_node, sql_field}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; @@ -32,8 +33,8 @@ use crate::sql::types::{DFField, schema_from_df_fields}; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const STREAM_PROJECTION_NODE_NAME: &str = "StreamProjectionNode"; -const DEFAULT_PROJECTION_LABEL: &str = "projection"; +pub(crate) const STREAM_PROJECTION_NODE_NAME: &str = extension_node::STREAM_PROJECTION; +const DEFAULT_PROJECTION_LABEL: &str = sql_field::DEFAULT_PROJECTION_LABEL; // ----------------------------------------------------------------------------- // Logical Node Definition diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs index 5011bb4c..a9a65c51 100644 --- a/src/sql/extensions/remote_table.rs +++ b/src/sql/extensions/remote_table.rs @@ -22,6 +22,7 @@ use prost::Message; use protocol::grpc::api::ValuePlanOperator; use crate::multifield_partial_ord; +use crate::sql::common::constants::extension_node; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; @@ -32,7 +33,7 @@ use crate::sql::logical_planner::planner::{NamedNode, Planner}; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const REMOTE_TABLE_NODE_NAME: &str = "RemoteTableBoundaryNode"; +pub(crate) const REMOTE_TABLE_NODE_NAME: &str = extension_node::REMOTE_TABLE_BOUNDARY; // ----------------------------------------------------------------------------- // Logical Node Definition diff --git a/src/sql/extensions/sink.rs b/src/sql/extensions/sink.rs index 8fc31aac..d2916486 100644 --- a/src/sql/extensions/sink.rs +++ b/src/sql/extensions/sink.rs @@ -18,6 +18,7 @@ use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalN use prost::Message; use crate::multifield_partial_ord; +use crate::sql::common::constants::extension_node; use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; @@ -31,7 +32,7 @@ use super::remote_table::RemoteTableBoundaryNode; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const STREAM_EGRESS_NODE_NAME: &str = "StreamEgressNode"; +pub(crate) const STREAM_EGRESS_NODE_NAME: &str = extension_node::STREAM_EGRESS; // ----------------------------------------------------------------------------- // Logical Node Definition diff --git a/src/sql/extensions/table_source.rs b/src/sql/extensions/table_source.rs index 292284ba..3f998c5a 100644 --- a/src/sql/extensions/table_source.rs +++ b/src/sql/extensions/table_source.rs @@ -18,6 +18,7 @@ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use prost::Message; use crate::multifield_partial_ord; +use crate::sql::common::constants::extension_node; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::debezium::DebeziumSchemaCodec; use crate::sql::logical_node::logical::{LogicalNode, OperatorName}; @@ -32,7 +33,7 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const STREAM_INGESTION_NODE_NAME: &str = "StreamIngestionNode"; +pub(crate) const STREAM_INGESTION_NODE_NAME: &str = extension_node::STREAM_INGESTION; // ----------------------------------------------------------------------------- // Logical Node Definition diff --git a/src/sql/extensions/timestamp_append.rs b/src/sql/extensions/timestamp_append.rs index 7a3a07e9..2d8b985b 100644 --- a/src/sql/extensions/timestamp_append.rs +++ b/src/sql/extensions/timestamp_append.rs @@ -16,13 +16,14 @@ use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use crate::multifield_partial_ord; +use crate::sql::common::constants::extension_node; use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field}; // ----------------------------------------------------------------------------- // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const TIMESTAMP_INJECTOR_NODE_NAME: &str = "SystemTimestampInjectorNode"; +pub(crate) const TIMESTAMP_INJECTOR_NODE_NAME: &str = extension_node::SYSTEM_TIMESTAMP_INJECTOR; // ----------------------------------------------------------------------------- // Logical Node Definition diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs index 1671fb13..9d12806f 100644 --- a/src/sql/extensions/updating_aggregate.rs +++ b/src/sql/extensions/updating_aggregate.rs @@ -25,6 +25,7 @@ use datafusion_proto::protobuf::PhysicalPlanNode; use prost::Message; use protocol::grpc::api::UpdatingAggregateOperator; +use crate::sql::common::constants::{extension_node, proto_operator_name, updating_state_field}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, IsRetractExtension, StreamingOperatorBlueprint}; use crate::sql::functions::multi_hash; @@ -36,7 +37,7 @@ use crate::sql::logical_planner::planner::{NamedNode, Planner}; // Constants & Configuration // ----------------------------------------------------------------------------- -pub(crate) const CONTINUOUS_AGGREGATE_NODE_NAME: &str = "ContinuousAggregateNode"; +pub(crate) const CONTINUOUS_AGGREGATE_NODE_NAME: &str = extension_node::CONTINUOUS_AGGREGATE; const DEFAULT_FLUSH_INTERVAL_MICROS: u64 = 10_000_000; @@ -102,9 +103,9 @@ impl ContinuousAggregateNode { }; named_struct(vec![ - lit("is_retract"), + lit(updating_state_field::IS_RETRACT), lit(false), - lit("id"), + lit(updating_state_field::ID), state_id_hash, ]) } @@ -128,7 +129,7 @@ impl ContinuousAggregateNode { planner.serialize_as_physical_expr(&meta_expr, &upstream_df_schema)?; Ok(UpdatingAggregateOperator { - name: "UpdatingAggregate".to_string(), + name: proto_operator_name::UPDATING_AGGREGATE.to_string(), input_schema: Some((**upstream_schema).clone().into()), final_schema: Some(self.yielded_schema().into()), aggregate_exec: compiled_agg_payload, @@ -220,7 +221,7 @@ impl StreamingOperatorBlueprint for ContinuousAggregateNode { format!("updating_aggregate_{node_index}"), OperatorName::UpdatingAggregate, operator_config.encode_to_vec(), - "UpdatingAggregate".to_string(), + proto_operator_name::UPDATING_AGGREGATE.to_string(), 1, ); diff --git a/src/sql/extensions/watermark_node.rs b/src/sql/extensions/watermark_node.rs index 7cdb9b67..231e1951 100644 --- a/src/sql/extensions/watermark_node.rs +++ b/src/sql/extensions/watermark_node.rs @@ -22,19 +22,19 @@ use prost::Message; use protocol::grpc::api::ExpressionWatermarkConfig; use crate::multifield_partial_ord; +use crate::sql::common::constants::{extension_node, runtime_operator_kind}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::schema::utils::add_timestamp_field; +use crate::sql::types::TIMESTAMP_FIELD; // ----------------------------------------------------------------------------- // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const EVENT_TIME_WATERMARK_NODE_NAME: &str = "EventTimeWatermarkNode"; - -const INTERNAL_TIMESTAMP_COLUMN: &str = "_timestamp"; +pub(crate) const EVENT_TIME_WATERMARK_NODE_NAME: &str = extension_node::EVENT_TIME_WATERMARK; const DEFAULT_WATERMARK_EMISSION_PERIOD_MICROS: u64 = 1_000_000; @@ -72,11 +72,11 @@ impl EventTimeWatermarkNode { )?; let internal_timestamp_offset = resolved_schema - .index_of_column_by_name(None, INTERNAL_TIMESTAMP_COLUMN) + .index_of_column_by_name(None, TIMESTAMP_FIELD) .ok_or_else(|| { DataFusionError::Plan(format!( "Fatal: Failed to resolve mandatory temporal column '{}'", - INTERNAL_TIMESTAMP_COLUMN + TIMESTAMP_FIELD )) })?; @@ -163,11 +163,11 @@ impl UserDefinedLogicalNodeCore for EventTimeWatermarkNode { let internal_timestamp_offset = self .resolved_schema - .index_of_column_by_name(Some(&self.namespace_qualifier), INTERNAL_TIMESTAMP_COLUMN) + .index_of_column_by_name(Some(&self.namespace_qualifier), TIMESTAMP_FIELD) .ok_or_else(|| { DataFusionError::Plan(format!( "Optimizer Error: Lost tracking of temporal column '{}'", - INTERNAL_TIMESTAMP_COLUMN + TIMESTAMP_FIELD )) })?; @@ -210,7 +210,7 @@ impl StreamingOperatorBlueprint for EventTimeWatermarkNode { format!("watermark_{node_index}"), OperatorName::ExpressionWatermark, operator_config.encode_to_vec(), - "watermark_generator".to_string(), + runtime_operator_kind::WATERMARK_GENERATOR.to_string(), 1, ); diff --git a/src/sql/extensions/windows_function.rs b/src/sql/extensions/windows_function.rs index e53e2ee9..09945378 100644 --- a/src/sql/extensions/windows_function.rs +++ b/src/sql/extensions/windows_function.rs @@ -21,6 +21,7 @@ use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNod use prost::Message; use protocol::grpc::api::WindowFunctionOperator; +use crate::sql::common::constants::{extension_node, proto_operator_name, runtime_operator_kind}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::sql::logical_planner::FsPhysicalExtensionCodec; @@ -33,7 +34,7 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const STREAMING_WINDOW_NODE_NAME: &str = "StreamingWindowFunctionNode"; +pub(crate) const STREAMING_WINDOW_NODE_NAME: &str = extension_node::STREAMING_WINDOW_FUNCTION; // ----------------------------------------------------------------------------- // Logical Node Definition @@ -163,7 +164,7 @@ impl StreamingOperatorBlueprint for StreamingWindowFunctionNode { let evaluation_plan_payload = self.compile_physical_evaluation_plan(planner)?; let operator_config = WindowFunctionOperator { - name: "WindowFunction".to_string(), + name: proto_operator_name::WINDOW_FUNCTION.to_string(), input_schema: Some(input_schema.as_ref().clone().into()), binning_function: binning_payload, window_function_plan: evaluation_plan_payload, @@ -174,7 +175,7 @@ impl StreamingOperatorBlueprint for StreamingWindowFunctionNode { format!("window_function_{node_index}"), OperatorName::WindowFunction, operator_config.encode_to_vec(), - "streaming_window_evaluator".to_string(), + runtime_operator_kind::STREAMING_WINDOW_EVALUATOR.to_string(), 1, ); diff --git a/src/sql/functions/mod.rs b/src/sql/functions/mod.rs index bfd59654..b78f5d2a 100644 --- a/src/sql/functions/mod.rs +++ b/src/sql/functions/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::sql::schema::StreamSchemaProvider; use datafusion::arrow::array::{ Array, ArrayRef, StringArray, UnionArray, @@ -22,7 +34,7 @@ use std::collections::HashMap; use std::fmt::{Debug, Write}; use std::sync::{Arc, OnceLock}; -const SERIALIZE_JSON_UNION: &str = "serialize_json_union"; +use crate::sql::common::constants::scalar_fn; /// Borrowed from DataFusion /// @@ -57,7 +69,7 @@ make_udf_function!(MultiHashFunction, MULTI_HASH, multi_hash); pub fn register_all(registry: &mut dyn FunctionRegistry) { registry .register_udf(Arc::new(create_udf( - "get_first_json_object", + scalar_fn::GET_FIRST_JSON_OBJECT, vec![DataType::Utf8, DataType::Utf8], DataType::Utf8, Volatility::Immutable, @@ -67,7 +79,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) { registry .register_udf(Arc::new(create_udf( - "extract_json", + scalar_fn::EXTRACT_JSON, vec![DataType::Utf8, DataType::Utf8], DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), Volatility::Immutable, @@ -77,7 +89,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) { registry .register_udf(Arc::new(create_udf( - "extract_json_string", + scalar_fn::EXTRACT_JSON_STRING, vec![DataType::Utf8, DataType::Utf8], DataType::Utf8, Volatility::Immutable, @@ -87,7 +99,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) { registry .register_udf(Arc::new(create_udf( - SERIALIZE_JSON_UNION, + scalar_fn::SERIALIZE_JSON_UNION, vec![DataType::Union(union_fields(), UnionMode::Sparse)], DataType::Utf8, Volatility::Immutable, @@ -190,7 +202,7 @@ impl ScalarUDFImpl for MultiHashFunction { } fn name(&self) -> &str { - "multi_hash" + scalar_fn::MULTI_HASH } fn signature(&self) -> &Signature { @@ -456,7 +468,7 @@ pub(crate) fn serialize_outgoing_json( if is_json_union(f.data_type()) { Expr::Alias(Alias::new( Expr::ScalarFunction(ScalarFunction::new_udf( - registry.udf(SERIALIZE_JSON_UNION).unwrap(), + registry.udf(scalar_fn::SERIALIZE_JSON_UNION).unwrap(), vec![col(f.name())], )), Option::::None, diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs index 2fd9ad82..6cb00914 100644 --- a/src/sql/logical_node/logical/operator_name.rs +++ b/src/sql/logical_node/logical/operator_name.rs @@ -15,6 +15,8 @@ use std::str::FromStr; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use strum::{Display, EnumString}; +use crate::sql::common::constants::operator_feature; + #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] pub enum OperatorName { ExpressionWatermark, @@ -35,6 +37,27 @@ pub enum OperatorName { ConnectorSink, } +impl OperatorName { + /// 特性 / 指标聚合使用的 kebab-case 标签(与 [`crate::sql::common::constants::operator_feature`] 一致)。 + pub fn feature_tag(self) -> Option<&'static str> { + match self { + Self::ExpressionWatermark | Self::ArrowValue | Self::ArrowKey | Self::Projection => None, + Self::AsyncUdf => Some(operator_feature::ASYNC_UDF), + Self::Join => Some(operator_feature::JOIN_WITH_EXPIRATION), + Self::InstantJoin => Some(operator_feature::WINDOWED_JOIN), + Self::WindowFunction => Some(operator_feature::SQL_WINDOW_FUNCTION), + Self::LookupJoin => Some(operator_feature::LOOKUP_JOIN), + Self::TumblingWindowAggregate => Some(operator_feature::SQL_TUMBLING_WINDOW_AGGREGATE), + Self::SlidingWindowAggregate => Some(operator_feature::SQL_SLIDING_WINDOW_AGGREGATE), + Self::SessionWindowAggregate => Some(operator_feature::SQL_SESSION_WINDOW_AGGREGATE), + Self::UpdatingAggregate => Some(operator_feature::SQL_UPDATING_AGGREGATE), + Self::KeyBy => Some(operator_feature::KEY_BY_ROUTING), + Self::ConnectorSource => Some(operator_feature::CONNECTOR_SOURCE), + Self::ConnectorSink => Some(operator_feature::CONNECTOR_SINK), + } + } +} + impl Serialize for OperatorName { fn serialize(&self, serializer: S) -> Result where diff --git a/src/sql/logical_node/mod.rs b/src/sql/logical_node/mod.rs index 82d25f24..922801f6 100644 --- a/src/sql/logical_node/mod.rs +++ b/src/sql/logical_node/mod.rs @@ -1 +1,13 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + pub mod logical; diff --git a/src/sql/logical_planner/mod.rs b/src/sql/logical_planner/mod.rs index 85046c0d..54634651 100644 --- a/src/sql/logical_planner/mod.rs +++ b/src/sql/logical_planner/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use datafusion::arrow::{ array::{ Array, AsArray, BooleanBuilder, PrimitiveArray, RecordBatch, StringArray, StructArray, @@ -31,6 +43,7 @@ use crate::make_udf_function; use crate::sql::functions::MultiHashFunction; use crate::sql::analysis::UNNESTED_COL; use crate::sql::schema::utils::window_arrow_struct; +use crate::sql::common::constants::cdc; use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type}; use datafusion::catalog::memory::MemorySourceConfig; @@ -763,9 +776,9 @@ pub struct DebeziumUnrollingExec { impl DebeziumUnrollingExec { pub fn try_new(input: Arc, primary_keys: Vec) -> Result { let input_schema = input.schema(); - let before_index = input_schema.index_of("before")?; - let after_index = input_schema.index_of("after")?; - let op_index = input_schema.index_of("op")?; + let before_index = input_schema.index_of(cdc::BEFORE)?; + let after_index = input_schema.index_of(cdc::AFTER)?; + let op_index = input_schema.index_of(cdc::OP)?; let _timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; let before_type = input_schema.field(before_index).data_type(); let after_type = input_schema.field(after_index).data_type(); @@ -888,9 +901,9 @@ impl DebeziumUnrollingStream { return plan_err!("there must be at least one primary key for a Debezium source"); } let input_schema = input.schema(); - let before_index = input_schema.index_of("before")?; - let after_index = input_schema.index_of("after")?; - let op_index = input_schema.index_of("op")?; + let before_index = input_schema.index_of(cdc::BEFORE)?; + let after_index = input_schema.index_of(cdc::AFTER)?; + let op_index = input_schema.index_of(cdc::OP)?; let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; Ok(Self { diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs index bd25423c..be388ad4 100644 --- a/src/sql/logical_planner/planner.rs +++ b/src/sql/logical_planner/planner.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::sync::Arc; use std::thread; diff --git a/src/sql/physical/physical_planner.rs b/src/sql/physical/physical_planner.rs index 963fa76f..fc66b3b0 100644 --- a/src/sql/physical/physical_planner.rs +++ b/src/sql/physical/physical_planner.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::sync::Arc; use std::thread; diff --git a/src/sql/schema/data_encoding_format.rs b/src/sql/schema/data_encoding_format.rs index 5b93c90a..67e6d7e3 100644 --- a/src/sql/schema/data_encoding_format.rs +++ b/src/sql/schema/data_encoding_format.rs @@ -16,6 +16,7 @@ use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::common::{Result, plan_err}; use super::column_descriptor::ColumnDescriptor; +use crate::sql::common::with_option_keys as opt; use crate::sql::common::Format; /// High-level payload encoding (orthogonal to `Format` wire details in `ConnectionSchema`). @@ -30,10 +31,13 @@ pub enum DataEncodingFormat { impl DataEncodingFormat { pub fn extract_from_map(opts: &HashMap) -> Result { - let format_str = opts.get("format").map(|s| s.as_str()).unwrap_or("json"); + let format_str = opts + .get(opt::FORMAT) + .map(|s| s.as_str()) + .unwrap_or(opt::DEFAULT_FORMAT_VALUE); let is_debezium = opts - .get("format.debezium") - .or_else(|| opts.get("json.debezium")) + .get(opt::FORMAT_DEBEZIUM_FLAG) + .or_else(|| opts.get(opt::JSON_DEBEZIUM)) .map(|s| s == "true") .unwrap_or(false); diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs index 85041f4b..9e0caddf 100644 --- a/src/sql/schema/source_table.rs +++ b/src/sql/schema/source_table.rs @@ -42,6 +42,7 @@ use crate::sql::common::kafka_catalog::{ KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode, SinkCommitMode, TableType as KafkaTableType, }; +use crate::sql::common::with_option_keys as opt; use crate::sql::common::{ BadData, Format, Framing, FsSchema, JsonCompression, JsonFormat, OperatorConfig, RateLimit, }; @@ -246,7 +247,7 @@ impl SourceTable { ) -> Result { let _ = connection_profile; - if let Some(c) = options.pull_opt_str("connector")? { + if let Some(c) = options.pull_opt_str(opt::CONNECTOR)? { if c != connector_name { return plan_err!( "WITH option `connector` is '{c}' but table uses connector '{connector_name}'" @@ -274,7 +275,7 @@ impl SourceTable { .map_err(|e| DataFusionError::Plan(format!("invalid framing: '{e}'")))?; if temporary - && let Some(t) = options.insert_str("type", "lookup")? + && let Some(t) = options.insert_str(opt::TYPE, "lookup")? && t != "lookup" { return plan_err!( @@ -321,7 +322,7 @@ impl SourceTable { let role = if let Some(t) = connection_type_override { t.into() } else { - match options.pull_opt_str("type")?.as_deref() { + match options.pull_opt_str(opt::TYPE)?.as_deref() { None | Some("source") => TableRole::Ingestion, Some("sink") => TableRole::Egress, Some("lookup") => TableRole::Reference, @@ -349,12 +350,12 @@ impl SourceTable { inferred_fields: None, }; - if let Some(event_time_field) = options.pull_opt_field("event_time_field")? { + if let Some(event_time_field) = options.pull_opt_field(opt::EVENT_TIME_FIELD)? { warn!("`event_time_field` WITH option is deprecated; use WATERMARK FOR syntax"); table.temporal_config.event_column = Some(event_time_field); } - if let Some(watermark_field) = options.pull_opt_field("watermark_field")? { + if let Some(watermark_field) = options.pull_opt_field(opt::WATERMARK_FIELD)? { warn!("`watermark_field` WITH option is deprecated; use WATERMARK FOR syntax"); table.temporal_config.watermark_strategy_column = Some(watermark_field); } @@ -417,15 +418,15 @@ impl SourceTable { } let idle_from_micros = options - .pull_opt_i64("idle_micros")? + .pull_opt_i64(opt::IDLE_MICROS)? .filter(|t| *t > 0) .map(|t| Duration::from_micros(t as u64)); - let idle_from_duration = options.pull_opt_duration("idle_time")?; + let idle_from_duration = options.pull_opt_duration(opt::IDLE_TIME)?; table.temporal_config.liveness_timeout = idle_from_micros.or(idle_from_duration); - table.lookup_cache_max_bytes = options.pull_opt_u64("lookup.cache.max_bytes")?; + table.lookup_cache_max_bytes = options.pull_opt_u64(opt::LOOKUP_CACHE_MAX_BYTES)?; - table.lookup_cache_ttl = options.pull_opt_duration("lookup.cache.ttl")?; + table.lookup_cache_ttl = options.pull_opt_duration(opt::LOOKUP_CACHE_TTL)?; if connector_name.eq_ignore_ascii_case("kafka") { let physical = table.produce_physical_schema(); @@ -442,15 +443,19 @@ impl SourceTable { })?; } else { let extra_opts = options.drain_remaining_string_values()?; - let mut config_root = serde_json::json!({ - "connector": connector_name, - "connection_schema": connection_schema, - }); - if let serde_json::Value::Object(ref mut map) = config_root { - for (k, v) in extra_opts { - map.insert(k, serde_json::Value::String(v)); - } + let mut map = serde_json::Map::new(); + map.insert( + opt::CONNECTOR.to_string(), + serde_json::Value::String(connector_name.to_string()), + ); + let schema_val = serde_json::to_value(&connection_schema).map_err(|e| { + DataFusionError::Plan(format!("failed to serialize connection schema: {e}")) + })?; + map.insert(opt::CONNECTION_SCHEMA.to_string(), schema_val); + for (k, v) in extra_opts { + map.insert(k, serde_json::Value::String(v)); } + let config_root = serde_json::Value::Object(map); table.opaque_config = serde_json::to_string(&config_root).map_err(|e| { DataFusionError::Plan(format!("failed to serialize connector config: {e}")) })?; @@ -575,10 +580,10 @@ fn wire_kafka_operator_config( bad_data: BadData, framing: Option, ) -> Result { - let bootstrap_servers = match options.pull_opt_str("bootstrap.servers")? { + let bootstrap_servers = match options.pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS)? { Some(s) => s, None => options - .pull_opt_str("bootstrap_servers")? + .pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS_LEGACY)? .ok_or_else(|| { plan_datafusion_err!( "Kafka connector requires 'bootstrap.servers' in the WITH clause" @@ -587,7 +592,7 @@ fn wire_kafka_operator_config( }; let topic = options - .pull_opt_str("topic")? + .pull_opt_str(opt::KAFKA_TOPIC)? .ok_or_else(|| plan_datafusion_err!("Kafka connector requires 'topic' in the WITH clause"))?; let sql_format = format.clone().ok_or_else(|| { @@ -597,16 +602,16 @@ fn wire_kafka_operator_config( })?; let rate_limit = options - .pull_opt_u64("rate_limit.messages_per_second")? + .pull_opt_u64(opt::KAFKA_RATE_LIMIT_MESSAGES_PER_SECOND)? .map(|v| RateLimit { messages_per_second: v.clamp(1, u32::MAX as u64) as u32, }); - let value_subject = options.pull_opt_str("value.subject")?; + let value_subject = options.pull_opt_str(opt::KAFKA_VALUE_SUBJECT)?; let kind = match role { TableRole::Ingestion => { - let offset = match options.pull_opt_str("scan.startup.mode")?.as_deref() { + let offset = match options.pull_opt_str(opt::KAFKA_SCAN_STARTUP_MODE)?.as_deref() { Some("latest") => KafkaTableSourceOffset::Latest, Some("earliest") => KafkaTableSourceOffset::Earliest, None | Some("group-offsets") | Some("group") => KafkaTableSourceOffset::Group, @@ -616,7 +621,7 @@ fn wire_kafka_operator_config( ); } }; - let read_mode = match options.pull_opt_str("isolation.level")?.as_deref() { + let read_mode = match options.pull_opt_str(opt::KAFKA_ISOLATION_LEVEL)?.as_deref() { Some("read_committed") => Some(ReadMode::ReadCommitted), Some("read_uncommitted") => Some(ReadMode::ReadUncommitted), None => None, @@ -624,11 +629,11 @@ fn wire_kafka_operator_config( return plan_err!("invalid isolation.level '{other}'"); } }; - let group_id = match options.pull_opt_str("group.id")? { + let group_id = match options.pull_opt_str(opt::KAFKA_GROUP_ID)? { Some(s) => Some(s), - None => options.pull_opt_str("group_id")?, + None => options.pull_opt_str(opt::KAFKA_GROUP_ID_LEGACY)?, }; - let group_id_prefix = options.pull_opt_str("group.id.prefix")?; + let group_id_prefix = options.pull_opt_str(opt::KAFKA_GROUP_ID_PREFIX)?; KafkaTableType::Source { offset, read_mode, @@ -637,20 +642,20 @@ fn wire_kafka_operator_config( } } TableRole::Egress => { - let commit_mode = match options.pull_opt_str("sink.commit.mode")?.as_deref() { + let commit_mode = match options.pull_opt_str(opt::KAFKA_SINK_COMMIT_MODE)?.as_deref() { Some("exactly-once") | Some("exactly_once") => SinkCommitMode::ExactlyOnce, None | Some("at-least-once") | Some("at_least_once") => SinkCommitMode::AtLeastOnce, Some(other) => { return plan_err!("invalid sink.commit.mode '{other}'"); } }; - let key_field = match options.pull_opt_str("sink.key.field")? { + let key_field = match options.pull_opt_str(opt::KAFKA_SINK_KEY_FIELD)? { Some(s) => Some(s), - None => options.pull_opt_str("key.field")?, + None => options.pull_opt_str(opt::KAFKA_KEY_FIELD_LEGACY)?, }; - let timestamp_field = match options.pull_opt_str("sink.timestamp.field")? { + let timestamp_field = match options.pull_opt_str(opt::KAFKA_SINK_TIMESTAMP_FIELD)? { Some(s) => Some(s), - None => options.pull_opt_str("timestamp.field")?, + None => options.pull_opt_str(opt::KAFKA_TIMESTAMP_FIELD_LEGACY)?, }; KafkaTableType::Sink { commit_mode, @@ -664,8 +669,8 @@ fn wire_kafka_operator_config( }; // Role already decided; keep these out of librdkafka `connection_properties`. - let _ = options.pull_opt_str("type")?; - let _ = options.pull_opt_str("connector")?; + let _ = options.pull_opt_str(opt::TYPE)?; + let _ = options.pull_opt_str(opt::CONNECTOR)?; let connection_properties = options.drain_remaining_string_values()?; diff --git a/src/sql/schema/table_role.rs b/src/sql/schema/table_role.rs index 31629ad8..12bd8068 100644 --- a/src/sql/schema/table_role.rs +++ b/src/sql/schema/table_role.rs @@ -18,6 +18,7 @@ use datafusion::error::DataFusionError; use super::column_descriptor::ColumnDescriptor; use super::connection_type::ConnectionType; +use crate::sql::common::with_option_keys as opt; /// Role of a connector-backed table in the pipeline (ingest / egress / lookup). #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -87,7 +88,7 @@ pub fn apply_adapter_specific_rules(adapter: &str, mut cols: Vec) -> Result { - match options.get("type").map(|s| s.as_str()) { + match options.get(opt::TYPE).map(|s| s.as_str()) { None | Some("source") => Ok(TableRole::Ingestion), Some("sink") => Ok(TableRole::Egress), Some("lookup") => Ok(TableRole::Reference), @@ -98,7 +99,7 @@ pub fn deduce_role(options: &HashMap) -> Result { pub fn serialize_backend_params(adapter: &str, options: &HashMap) -> Result { let mut payload = serde_json::Map::new(); payload.insert( - "adapter".to_string(), + opt::ADAPTER.to_string(), serde_json::Value::String(adapter.to_string()), ); diff --git a/src/sql/types/data_type.rs b/src/sql/types/data_type.rs index 66076da3..1fc55ecc 100644 --- a/src/sql/types/data_type.rs +++ b/src/sql/types/data_type.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::sync::Arc; use datafusion::arrow::datatypes::{ diff --git a/src/sql/types/df_field.rs b/src/sql/types/df_field.rs index 3797adb2..435ae30a 100644 --- a/src/sql/types/df_field.rs +++ b/src/sql/types/df_field.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::collections::HashMap; use std::sync::Arc; diff --git a/src/sql/types/mod.rs b/src/sql/types/mod.rs index 25c67574..16d7033b 100644 --- a/src/sql/types/mod.rs +++ b/src/sql/types/mod.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + mod data_type; mod df_field; pub(crate) mod placeholder_udf; diff --git a/src/sql/types/placeholder_udf.rs b/src/sql/types/placeholder_udf.rs index 5cf96d28..0bdf17e6 100644 --- a/src/sql/types/placeholder_udf.rs +++ b/src/sql/types/placeholder_udf.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::any::Any; use std::fmt::{Debug, Formatter}; use std::sync::Arc; diff --git a/src/sql/types/stream_schema.rs b/src/sql/types/stream_schema.rs index e981111b..4b63182d 100644 --- a/src/sql/types/stream_schema.rs +++ b/src/sql/types/stream_schema.rs @@ -1,3 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::sync::Arc; use datafusion::arrow::datatypes::{Field, Schema, SchemaRef}; diff --git a/src/sql/types/window.rs b/src/sql/types/window.rs index 9687974a..7934bc1d 100644 --- a/src/sql/types/window.rs +++ b/src/sql/types/window.rs @@ -1,8 +1,22 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::time::Duration; use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::Expr; +use crate::sql::common::constants::window_fn; + use super::DFField; #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -54,7 +68,7 @@ pub fn find_window(expression: &Expr) -> Result> { match expression { Expr::ScalarFunction(ScalarFunction { func: fun, args }) => match fun.name() { - "hop" => { + name if name == window_fn::HOP => { if args.len() != 2 { unreachable!(); } @@ -73,14 +87,14 @@ pub fn find_window(expression: &Expr) -> Result> { Ok(Some(WindowType::Sliding { width, slide })) } } - "tumble" => { + name if name == window_fn::TUMBLE => { if args.len() != 1 { unreachable!("wrong number of arguments for tumble(), expect one"); } let width = get_duration(&args[0])?; Ok(Some(WindowType::Tumbling { width })) } - "session" => { + name if name == window_fn::SESSION => { if args.len() != 1 { unreachable!("wrong number of arguments for session(), expected one"); } From 58a9e5c10e59960f9053a4ac19b697674fcc655b Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 11:27:15 +0800 Subject: [PATCH 24/44] update --- src/sql/common/constants.rs | 121 ++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 src/sql/common/constants.rs diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs new file mode 100644 index 00000000..8f791222 --- /dev/null +++ b/src/sql/common/constants.rs @@ -0,0 +1,121 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL / 流算子相关的**名称与标识符常量**(标量函数名、窗口 TVF、逻辑扩展节点名、CDC 字段、 +//! 运行时 blueprint 字符串、`OperatorName` 特性标签等);与 [`super::with_option_keys`](WITH 选项键)分工。 + +// ── 内置标量 UDF(`register_all` / `ScalarUDFImpl::name`)────────────────────── + +pub mod scalar_fn { + pub const GET_FIRST_JSON_OBJECT: &str = "get_first_json_object"; + pub const EXTRACT_JSON: &str = "extract_json"; + pub const EXTRACT_JSON_STRING: &str = "extract_json_string"; + pub const SERIALIZE_JSON_UNION: &str = "serialize_json_union"; + pub const MULTI_HASH: &str = "multi_hash"; +} + +// ── 窗口 TVF(`hop` / `tumble` / `session` 等,与 DataFusion 解析一致)────────── + +pub mod window_fn { + pub const HOP: &str = "hop"; + pub const TUMBLE: &str = "tumble"; + pub const SESSION: &str = "session"; +} + +// ── `OperatorName` 在指标 / 特性集合中使用的 kebab-case 标签 ───────────────── + +pub mod operator_feature { + pub const ASYNC_UDF: &str = "async-udf"; + pub const JOIN_WITH_EXPIRATION: &str = "join-with-expiration"; + pub const WINDOWED_JOIN: &str = "windowed-join"; + pub const SQL_WINDOW_FUNCTION: &str = "sql-window-function"; + pub const LOOKUP_JOIN: &str = "lookup-join"; + pub const SQL_TUMBLING_WINDOW_AGGREGATE: &str = "sql-tumbling-window-aggregate"; + pub const SQL_SLIDING_WINDOW_AGGREGATE: &str = "sql-sliding-window-aggregate"; + pub const SQL_SESSION_WINDOW_AGGREGATE: &str = "sql-session-window-aggregate"; + pub const SQL_UPDATING_AGGREGATE: &str = "sql-updating-aggregate"; + pub const KEY_BY_ROUTING: &str = "key-by-routing"; + pub const CONNECTOR_SOURCE: &str = "connector-source"; + pub const CONNECTOR_SINK: &str = "connector-sink"; +} + +// ── 逻辑计划扩展节点的 `UserDefinedLogicalNodeCore::name` / 类型字符串 ──────── + +pub mod extension_node { + pub const STREAM_WINDOW_AGGREGATE: &str = "StreamWindowAggregateNode"; + pub const STREAMING_WINDOW_FUNCTION: &str = "StreamingWindowFunctionNode"; + pub const EVENT_TIME_WATERMARK: &str = "EventTimeWatermarkNode"; + pub const CONTINUOUS_AGGREGATE: &str = "ContinuousAggregateNode"; + pub const SYSTEM_TIMESTAMP_INJECTOR: &str = "SystemTimestampInjectorNode"; + pub const STREAM_INGESTION: &str = "StreamIngestionNode"; + pub const STREAM_EGRESS: &str = "StreamEgressNode"; + pub const STREAM_PROJECTION: &str = "StreamProjectionNode"; + pub const REMOTE_TABLE_BOUNDARY: &str = "RemoteTableBoundaryNode"; + pub const REFERENCE_TABLE_SOURCE: &str = "ReferenceTableSource"; + pub const STREAM_REFERENCE_JOIN: &str = "StreamReferenceJoin"; + pub const KEY_EXTRACTION: &str = "KeyExtractionNode"; + pub const STREAMING_JOIN: &str = "StreamingJoinNode"; + pub const ASYNC_FUNCTION_EXECUTION: &str = "AsyncFunctionExecutionNode"; + pub const UNROLL_DEBEZIUM_PAYLOAD: &str = "UnrollDebeziumPayloadNode"; + pub const PACK_DEBEZIUM_ENVELOPE: &str = "PackDebeziumEnvelopeNode"; +} + +// ── gRPC / proto 算子配置里的 `name` 字段(与 `OperatorName` 展示相关)────────── + +pub mod proto_operator_name { + pub const TUMBLING_WINDOW: &str = "TumblingWindow"; + pub const UPDATING_AGGREGATE: &str = "UpdatingAggregate"; + pub const WINDOW_FUNCTION: &str = "WindowFunction"; + /// 滑动窗口 human-readable 描述片段(非固定 id) + pub const SLIDING_WINDOW_LABEL: &str = "sliding window"; + pub const INSTANT_WINDOW: &str = "InstantWindow"; + pub const INSTANT_WINDOW_LABEL: &str = "instant window"; +} + +// ── 下发到运行时的 blueprint / 算子种类字符串 ────────────────────────────────── + +pub mod runtime_operator_kind { + pub const STREAMING_JOIN: &str = "streaming_join"; + pub const WATERMARK_GENERATOR: &str = "watermark_generator"; + pub const STREAMING_WINDOW_EVALUATOR: &str = "streaming_window_evaluator"; +} + +// ── Debezium CDC 信封字段 ─────────────────────────────────────────────────── + +pub mod cdc { + pub const BEFORE: &str = "before"; + pub const AFTER: &str = "after"; + pub const OP: &str = "op"; +} + +// ── updating aggregate 状态元数据 struct 字段 ──────────────────────────────── + +pub mod updating_state_field { + pub const IS_RETRACT: &str = "is_retract"; + pub const ID: &str = "id"; +} + +// ── 计划里常用的列名 / 别名 ─────────────────────────────────────────────────── + +pub mod sql_field { + /// 异步 UDF 重写后的结果列(与历史 `extensions::constants` 对齐)。 + pub const ASYNC_RESULT: &str = "__async_result"; + pub const DEFAULT_KEY_LABEL: &str = "key"; + pub const DEFAULT_PROJECTION_LABEL: &str = "projection"; +} + +// ── 连接器类型短名(工厂注册等)────────────────────────────────────────────── + +pub mod connector_type { + pub const KAFKA: &str = "kafka"; + pub const REDIS: &str = "redis"; +} From 0321c502aa270b8bc46053eeef4d07c557993764 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 13:53:39 +0800 Subject: [PATCH 25/44] update --- .../grouping/incremental_aggregate.rs | 2 +- .../operators/joins/join_instance.rs | 7 +- .../operators/joins/join_with_expiration.rs | 2 +- .../operators/stateless_physical_executor.rs | 2 +- .../windows/session_aggregating_window.rs | 2 +- .../windows/sliding_aggregating_window.rs | 2 +- .../windows/tumbling_aggregating_window.rs | 2 +- .../operators/windows/window_function.rs | 2 +- src/sql/analysis/async_udf_rewriter.rs | 5 +- src/sql/analysis/join_rewriter.rs | 5 +- src/sql/analysis/row_time_rewriter.rs | 5 +- src/sql/analysis/unnest_rewriter.rs | 3 +- src/sql/common/connector_options.rs | 26 +- src/sql/common/constants.rs | 200 ++- src/sql/common/date.rs | 48 +- src/sql/common/format_from_opts.rs | 33 +- src/sql/common/formats.rs | 48 +- src/sql/extensions/aggregate.rs | 2 +- src/sql/extensions/async_udf.rs | 5 +- src/sql/extensions/debezium.rs | 2 +- src/sql/extensions/is_retract.rs | 2 +- src/sql/extensions/join.rs | 2 +- src/sql/extensions/key_calculation.rs | 2 +- src/sql/extensions/mod.rs | 3 - src/sql/extensions/remote_table.rs | 2 +- src/sql/extensions/updating_aggregate.rs | 2 +- src/sql/extensions/windows_function.rs | 2 +- src/sql/logical_planner/mod.rs | 1267 ----------------- src/sql/logical_planner/planner.rs | 2 +- src/sql/mod.rs | 1 + src/sql/physical/cdc/encode.rs | 331 +++++ .../constants.rs => physical/cdc/mod.rs} | 8 +- src/sql/physical/cdc/unroll.rs | 300 ++++ src/sql/physical/codec.rs | 263 ++++ src/sql/physical/meta.rs | 53 + src/sql/physical/mod.rs | 25 + src/sql/physical/physical_planner.rs | 418 ------ src/sql/physical/readers.rs | 372 +++++ src/sql/physical/udfs.rs | 132 ++ src/sql/schema/data_encoding_format.rs | 18 +- src/sql/schema/schema_provider.rs | 19 +- src/sql/schema/source_table.rs | 57 +- src/sql/schema/table_role.rs | 31 +- src/sql/schema/temporal_pipeline_config.rs | 3 +- src/sql/schema/utils.rs | 5 +- src/sql/types/data_type.rs | 3 +- 46 files changed, 1889 insertions(+), 1837 deletions(-) create mode 100644 src/sql/physical/cdc/encode.rs rename src/sql/{extensions/constants.rs => physical/cdc/mod.rs} (78%) create mode 100644 src/sql/physical/cdc/unroll.rs create mode 100644 src/sql/physical/codec.rs create mode 100644 src/sql/physical/meta.rs create mode 100644 src/sql/physical/mod.rs delete mode 100644 src/sql/physical/physical_planner.rs create mode 100644 src/sql/physical/readers.rs create mode 100644 src/sql/physical/udfs.rs diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs index 104d24a1..2d2abf18 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -49,7 +49,7 @@ use crate::runtime::streaming::arrow::decode_aggregate; use crate::runtime::streaming::operators::{Key, UpdatingCache}; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{to_nanos, CheckpointBarrier, FsSchema, Watermark, TIMESTAMP_FIELD, UPDATING_META_FIELD}; -use crate::sql::logical_planner::updating_meta_fields; +use crate::sql::physical::updating_meta_fields; #[derive(Debug, Copy, Clone)] struct BatchData { diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs index 278bc8fe..cd5b3764 100644 --- a/src/runtime/streaming/operators/joins/join_instance.rs +++ b/src/runtime/streaming/operators/joins/join_instance.rs @@ -34,8 +34,9 @@ use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; use async_trait::async_trait; use protocol::grpc::api::JoinOperator; use crate::runtime::streaming::StreamOutput; +use crate::sql::common::constants::mem_exec_join_side; use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; -use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum JoinSide { @@ -47,8 +48,8 @@ impl JoinSide { #[allow(dead_code)] fn name(&self) -> &'static str { match self { - JoinSide::Left => "left", - JoinSide::Right => "right", + JoinSide::Left => mem_exec_join_side::LEFT, + JoinSide::Right => mem_exec_join_side::RIGHT, } } } diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs index 1a31b253..34d15932 100644 --- a/src/runtime/streaming/operators/joins/join_with_expiration.rs +++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs @@ -33,7 +33,7 @@ use async_trait::async_trait; use protocol::grpc::api::JoinOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; -use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum JoinSide { diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs index 45619dc6..dee92eb3 100644 --- a/src/runtime/streaming/operators/stateless_physical_executor.rs +++ b/src/runtime/streaming/operators/stateless_physical_executor.rs @@ -27,7 +27,7 @@ use futures::StreamExt; use prost::Message; use crate::runtime::streaming::api::operator::Registry; -use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; pub struct StatelessPhysicalExecutor { batch: Arc>>, diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs index 8fa3f2f7..73642992 100644 --- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs @@ -44,7 +44,7 @@ use protocol::grpc::api::SessionWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; use crate::sql::common::converter::Converter; -use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; use crate::sql::schema::utils::window_arrow_struct; // ============================================================================ // 领域模型与纯内存状态 diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs index e5af57f3..7d801fd6 100644 --- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs @@ -43,7 +43,7 @@ use crate::runtime::streaming::api::operator::Registry; use protocol::grpc::api::SlidingWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; -use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; // ============================================================================ // 纯内存状态:阶梯式时间面板 (Tiered panes) // 这部分本身就是极佳的内存数据结构,原样保留! diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs index 40c757dc..004cc205 100644 --- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs @@ -44,7 +44,7 @@ use protocol::grpc::api::TumblingWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; use crate::sql::common::time_utils::print_time; -use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; use crate::sql::schema::utils::add_timestamp_field_arrow; struct ActiveBin { diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs index 4e9c83ce..641b0ef6 100644 --- a/src/runtime/streaming/operators/windows/window_function.rs +++ b/src/runtime/streaming/operators/windows/window_function.rs @@ -37,7 +37,7 @@ use async_trait::async_trait; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; use crate::sql::common::time_utils::print_time; -use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec}; +use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; // ============================================================================ // 纯内存执行缓冲区 diff --git a/src/sql/analysis/async_udf_rewriter.rs b/src/sql/analysis/async_udf_rewriter.rs index 31a92057..073a1f42 100644 --- a/src/sql/analysis/async_udf_rewriter.rs +++ b/src/sql/analysis/async_udf_rewriter.rs @@ -11,7 +11,8 @@ // limitations under the License. use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; -use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncFunctionExecutionNode}; +use crate::sql::common::constants::sql_field; +use crate::sql::extensions::AsyncFunctionExecutionNode; use crate::sql::schema::StreamSchemaProvider; use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion::common::{Column, Result as DFResult, TableReference, plan_err}; @@ -55,7 +56,7 @@ impl<'a> AsyncUdfRewriter<'a> { ); } return Ok(Transformed::yes(Expr::Column(Column::new_unqualified( - ASYNC_RESULT_FIELD, + sql_field::ASYNC_RESULT, )))); } } diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs index 77131595..4421aa99 100644 --- a/src/sql/analysis/join_rewriter.rs +++ b/src/sql/analysis/join_rewriter.rs @@ -15,6 +15,7 @@ use crate::sql::extensions::join::StreamingJoinNode; use crate::sql::extensions::key_calculation::KeyExtractionNode; use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer; use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata}; +use crate::sql::common::constants::mem_exec_join_side; use crate::sql::common::TIMESTAMP_FIELD; use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{ @@ -198,8 +199,8 @@ impl TreeNodeRewriter for JoinRewriter<'_> { // 2. Prepare Keyed Inputs for Shuffle let (left_on, right_on): (Vec<_>, Vec<_>) = join.on.clone().into_iter().unzip(); - let keyed_left = self.build_keyed_side(join.left, left_on, "left")?; - let keyed_right = self.build_keyed_side(join.right, right_on, "right")?; + let keyed_left = self.build_keyed_side(join.left, left_on, mem_exec_join_side::LEFT)?; + let keyed_right = self.build_keyed_side(join.right, right_on, mem_exec_join_side::RIGHT)?; // 3. Assemble Rewritten Join Node let join_schema = Arc::new(build_join_schema( diff --git a/src/sql/analysis/row_time_rewriter.rs b/src/sql/analysis/row_time_rewriter.rs index f0c4e435..13e2a048 100644 --- a/src/sql/analysis/row_time_rewriter.rs +++ b/src/sql/analysis/row_time_rewriter.rs @@ -14,6 +14,7 @@ use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{Column, Result as DFResult}; use datafusion::logical_expr::Expr; +use crate::sql::common::constants::planning_placeholder_udf; use crate::sql::types::TIMESTAMP_FIELD; /// Replaces the virtual `row_time()` scalar function with a physical reference to `_timestamp`. @@ -26,9 +27,9 @@ impl TreeNodeRewriter for RowTimeRewriter { type Node = Expr; fn f_down(&mut self, node: Self::Node) -> DFResult> { - // Use pattern matching to identify the 'row_time' scalar function. + // Use pattern matching to identify the `row_time` scalar function. if let Expr::ScalarFunction(func) = &node - && func.name() == "row_time" + && func.name() == planning_placeholder_udf::ROW_TIME { // Map the virtual function to the physical internal timestamp column. // We use .alias() to preserve the original name "row_time()" in the output schema, diff --git a/src/sql/analysis/unnest_rewriter.rs b/src/sql/analysis/unnest_rewriter.rs index 2a9eabda..535590c8 100644 --- a/src/sql/analysis/unnest_rewriter.rs +++ b/src/sql/analysis/unnest_rewriter.rs @@ -18,6 +18,7 @@ use datafusion::common::{Column, Result as DFResult, plan_err}; use datafusion::logical_expr::expr::ScalarFunction; use datafusion::logical_expr::{ColumnUnnestList, Expr, LogicalPlan, Projection, Unnest}; +use crate::sql::common::constants::planning_placeholder_udf; use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields}; pub const UNNESTED_COL: &str = "__unnested"; @@ -31,7 +32,7 @@ impl UnnestRewriter { let expr = expr.transform_up(|e| { if let Expr::ScalarFunction(ScalarFunction { func: udf, args }) = &e - && udf.name() == "unnest" + && udf.name() == planning_placeholder_udf::UNNEST { match args.len() { 1 => { diff --git a/src/sql/common/connector_options.rs b/src/sql/common/connector_options.rs index 6bd6dfa6..bffa766a 100644 --- a/src/sql/common/connector_options.rs +++ b/src/sql/common/connector_options.rs @@ -20,6 +20,8 @@ use datafusion::error::DataFusionError; use datafusion::sql::sqlparser::ast::{Expr, Ident, SqlOption, Value as SqlValue, ValueWithSpan}; use tracing::warn; +use super::constants::{interval_duration_unit, with_opt_bool_str}; + pub trait FromOpts: Sized { fn from_opts(opts: &mut ConnectorOptions) -> DFResult; } @@ -88,8 +90,8 @@ impl ConnectorOptions { value: SqlValue::SingleQuotedString(s), span: _, })) => match s.as_str() { - "true" | "yes" => Ok(Some(true)), - "false" | "no" => Ok(Some(false)), + with_opt_bool_str::TRUE | with_opt_bool_str::YES => Ok(Some(true)), + with_opt_bool_str::FALSE | with_opt_bool_str::NO => Ok(Some(false)), _ => Err(plan_datafusion_err!( "expected with option '{}' to be a boolean, but it was `'{}'`", name, @@ -367,11 +369,21 @@ fn parse_interval_to_duration(s: &str) -> Result { let value: u64 = parts[0] .parse() .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?; - let duration = match parts[1].to_lowercase().as_str() { - "second" | "seconds" | "s" => Duration::from_secs(value), - "minute" | "minutes" | "min" => Duration::from_secs(value * 60), - "hour" | "hours" | "h" => Duration::from_secs(value * 3600), - "day" | "days" | "d" => Duration::from_secs(value * 86400), + let unit_lc = parts[1].to_lowercase(); + let unit = unit_lc.as_str(); + let duration = match unit { + interval_duration_unit::SECOND + | interval_duration_unit::SECONDS + | interval_duration_unit::S => Duration::from_secs(value), + interval_duration_unit::MINUTE + | interval_duration_unit::MINUTES + | interval_duration_unit::MIN => Duration::from_secs(value * 60), + interval_duration_unit::HOUR + | interval_duration_unit::HOURS + | interval_duration_unit::H => Duration::from_secs(value * 3600), + interval_duration_unit::DAY + | interval_duration_unit::DAYS + | interval_duration_unit::D => Duration::from_secs(value * 86400), unit => { return Err(DataFusionError::Plan(format!( "unsupported interval unit '{unit}'" diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs index 8f791222..f5dd56ef 100644 --- a/src/sql/common/constants.rs +++ b/src/sql/common/constants.rs @@ -31,6 +31,15 @@ pub mod window_fn { pub const SESSION: &str = "session"; } +// ── 流规划期占位标量 UDF(`StreamPlanningContextBuilder::with_streaming_extensions`)── + +pub mod planning_placeholder_udf { + pub const UNNEST: &str = "unnest"; + pub const ROW_TIME: &str = "row_time"; + /// `List` 内元素字段名,仅用于占位签名的 Arrow 形态 + pub const LIST_ELEMENT_FIELD: &str = "field"; +} + // ── `OperatorName` 在指标 / 特性集合中使用的 kebab-case 标签 ───────────────── pub mod operator_feature { @@ -107,15 +116,204 @@ pub mod updating_state_field { // ── 计划里常用的列名 / 别名 ─────────────────────────────────────────────────── pub mod sql_field { - /// 异步 UDF 重写后的结果列(与历史 `extensions::constants` 对齐)。 + /// 异步 UDF 重写后的结果列名。 pub const ASYNC_RESULT: &str = "__async_result"; pub const DEFAULT_KEY_LABEL: &str = "key"; pub const DEFAULT_PROJECTION_LABEL: &str = "projection"; + /// `WATERMARK FOR … AS expr` 生成的计算列名(与 `TemporalPipelineConfig` 一致)。 + pub const COMPUTED_WATERMARK: &str = "__watermark"; +} + +// ── `ConnectorOptions` / WITH 解析用到的字面量 ──────────────────────────────── + +/// 单引号字符串形式的布尔取值(见 [`super::connector_options::ConnectorOptions::pull_opt_bool`])。 +pub mod with_opt_bool_str { + pub const TRUE: &str = "true"; + pub const YES: &str = "yes"; + pub const FALSE: &str = "false"; + pub const NO: &str = "no"; +} + +/// `INTERVAL '…'` / 间隔字符串解析中的单位 token(小写;解析前会对单位做 `to_lowercase`)。 +pub mod interval_duration_unit { + pub const SECOND: &str = "second"; + pub const SECONDS: &str = "seconds"; + pub const S: &str = "s"; + pub const MINUTE: &str = "minute"; + pub const MINUTES: &str = "minutes"; + pub const MIN: &str = "min"; + pub const HOUR: &str = "hour"; + pub const HOURS: &str = "hours"; + pub const H: &str = "h"; + pub const DAY: &str = "day"; + pub const DAYS: &str = "days"; + pub const D: &str = "d"; +} + +// ── `format` / `framing.method` / `bad_data` 的 WITH 取值(见 `format_from_opts`)────── + +/// `format = '…'` 的名称(小写;`Format::from_opts` 会对值做 `to_lowercase`)。 +pub mod connection_format_value { + pub const JSON: &str = "json"; + pub const DEBEZIUM_JSON: &str = "debezium_json"; + pub const AVRO: &str = "avro"; + pub const PARQUET: &str = "parquet"; + pub const PROTOBUF: &str = "protobuf"; + pub const RAW_STRING: &str = "raw_string"; + pub const RAW_BYTES: &str = "raw_bytes"; +} + +/// `framing.method` 合法取值(与 `Framing::from_opts` 一致;当前不做大小写折叠)。 +pub mod framing_method_value { + pub const NEWLINE: &str = "newline"; + pub const NEWLINE_DELIMITED: &str = "newline_delimited"; +} + +/// `bad_data = '…'`(小写;解析前 `to_lowercase`)。 +pub mod bad_data_value { + pub const FAIL: &str = "fail"; + pub const DROP: &str = "drop"; +} + +// ── `formats.rs` 里枚举的 wire 名(与 serde `snake_case` / `TryFrom` / `FromStr` 一致)──── + +pub mod timestamp_format_value { + pub const RFC3339_SNAKE: &str = "rfc3339"; + pub const RFC3339_UPPER: &str = "RFC3339"; + pub const UNIX_MILLIS_SNAKE: &str = "unix_millis"; + pub const UNIX_MILLIS_PASCAL: &str = "UnixMillis"; +} + +pub mod decimal_encoding_value { + pub const NUMBER: &str = "number"; + pub const STRING: &str = "string"; + pub const BYTES: &str = "bytes"; +} + +pub mod json_compression_value { + pub const UNCOMPRESSED: &str = "uncompressed"; + pub const GZIP: &str = "gzip"; +} + +pub mod parquet_compression_value { + pub const UNCOMPRESSED: &str = "uncompressed"; + pub const SNAPPY: &str = "snappy"; + pub const GZIP: &str = "gzip"; + pub const ZSTD: &str = "zstd"; + pub const LZ4: &str = "lz4"; + pub const LZ4_RAW: &str = "lz4_raw"; +} + +// ── `date_part` / `date_trunc` SQL 关键字(小写;解析前对输入做 `to_lowercase`)──────── + +pub mod date_part_keyword { + pub const YEAR: &str = "year"; + pub const MONTH: &str = "month"; + pub const WEEK: &str = "week"; + pub const DAY: &str = "day"; + pub const HOUR: &str = "hour"; + pub const MINUTE: &str = "minute"; + pub const SECOND: &str = "second"; + pub const MILLISECOND: &str = "millisecond"; + pub const MICROSECOND: &str = "microsecond"; + pub const NANOSECOND: &str = "nanosecond"; + pub const DOW: &str = "dow"; + pub const DOY: &str = "doy"; +} + +pub mod date_trunc_keyword { + pub const YEAR: &str = "year"; + pub const QUARTER: &str = "quarter"; + pub const MONTH: &str = "month"; + pub const WEEK: &str = "week"; + pub const DAY: &str = "day"; + pub const HOUR: &str = "hour"; + pub const MINUTE: &str = "minute"; + pub const SECOND: &str = "second"; +} + +// ── `logical_planner/mod.rs` 物理计划与 Debezium 流水线 ─────────────────────── + +/// `FsMemExec` / codec 里表示 join 左右输入的 `table_name`。 +pub mod mem_exec_join_side { + pub const LEFT: &str = "left"; + pub const RIGHT: &str = "right"; +} + +/// 自定义 `ExecutionPlan::name()`(与 DataFusion explain / 调试一致)。 +pub mod physical_plan_node_name { + pub const RW_LOCK_READER: &str = "rw_lock_reader"; + pub const UNBOUNDED_READER: &str = "unbounded_reader"; + pub const VEC_READER: &str = "vec_reader"; + pub const MEM_EXEC: &str = "mem_exec"; + pub const DEBEZIUM_UNROLLING_EXEC: &str = "debezium_unrolling_exec"; + pub const TO_DEBEZIUM_EXEC: &str = "to_debezium_exec"; +} + +/// 流式 `window(start, end)` 标量 UDF 的注册名。 +pub mod window_function_udf { + pub const NAME: &str = "window"; +} + +/// `window()` UDF 返回 struct 的字段名(与 `window_arrow_struct` 一致)。 +pub mod window_interval_field { + pub const START: &str = "start"; + pub const END: &str = "end"; +} + +/// Debezium `op` 列中的单字母取值(unroll / pack 路径)。 +pub mod debezium_op_short { + pub const CREATE: &str = "c"; + pub const READ: &str = "r"; + pub const UPDATE: &str = "u"; + pub const DELETE: &str = "d"; } // ── 连接器类型短名(工厂注册等)────────────────────────────────────────────── pub mod connector_type { pub const KAFKA: &str = "kafka"; + pub const KINESIS: &str = "kinesis"; + pub const FILESYSTEM: &str = "filesystem"; + pub const DELTA: &str = "delta"; + pub const ICEBERG: &str = "iceberg"; + pub const PULSAR: &str = "pulsar"; + pub const NATS: &str = "nats"; pub const REDIS: &str = "redis"; + pub const MQTT: &str = "mqtt"; + pub const WEBSOCKET: &str = "websocket"; + pub const SSE: &str = "sse"; + pub const NEXMARK: &str = "nexmark"; + pub const BLACKHOLE: &str = "blackhole"; + pub const MEMORY: &str = "memory"; + pub const POSTGRES: &str = "postgres"; +} + +// ── 连接表 `WITH type = 'source'|'sink'|'lookup'`(`SourceTable::from_options` / `deduce_role`)── + +pub mod connection_table_role { + pub const SOURCE: &str = "source"; + pub const SINK: &str = "sink"; + /// 与虚拟 `lookup` 连接器短名相同(亦在 [`SUPPORTED_CONNECTOR_ADAPTERS`] 中)。 + pub const LOOKUP: &str = "lookup"; +} + +/// [`crate::sql::schema::table_role::validate_adapter_availability`] 白名单(与 SQL `connector = '…'` 短名一致)。 +pub const SUPPORTED_CONNECTOR_ADAPTERS: &[&str] = &[ + connector_type::KAFKA, +]; + +// ── Kafka 连接器 WITH 选项取值(`wire_kafka_operator_config`)──────────────── + +pub mod kafka_with_value { + pub const SCAN_LATEST: &str = "latest"; + pub const SCAN_EARLIEST: &str = "earliest"; + pub const SCAN_GROUP_OFFSETS: &str = "group-offsets"; + pub const SCAN_GROUP: &str = "group"; + pub const ISOLATION_READ_COMMITTED: &str = "read_committed"; + pub const ISOLATION_READ_UNCOMMITTED: &str = "read_uncommitted"; + pub const SINK_COMMIT_EXACTLY_ONCE_HYPHEN: &str = "exactly-once"; + pub const SINK_COMMIT_EXACTLY_ONCE_UNDERSCORE: &str = "exactly_once"; + pub const SINK_COMMIT_AT_LEAST_ONCE_HYPHEN: &str = "at-least-once"; + pub const SINK_COMMIT_AT_LEAST_ONCE_UNDERSCORE: &str = "at_least_once"; } diff --git a/src/sql/common/date.rs b/src/sql/common/date.rs index 718d5f56..ec310326 100644 --- a/src/sql/common/date.rs +++ b/src/sql/common/date.rs @@ -13,6 +13,8 @@ use serde::Serialize; use std::convert::TryFrom; +use super::constants::{date_part_keyword, date_trunc_keyword}; + #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Hash, Serialize)] pub enum DatePart { Year, @@ -33,19 +35,20 @@ impl TryFrom<&str> for DatePart { type Error = String; fn try_from(value: &str) -> Result { - match value.to_lowercase().as_str() { - "year" => Ok(DatePart::Year), - "month" => Ok(DatePart::Month), - "week" => Ok(DatePart::Week), - "day" => Ok(DatePart::Day), - "hour" => Ok(DatePart::Hour), - "minute" => Ok(DatePart::Minute), - "second" => Ok(DatePart::Second), - "millisecond" => Ok(DatePart::Millisecond), - "microsecond" => Ok(DatePart::Microsecond), - "nanosecond" => Ok(DatePart::Nanosecond), - "dow" => Ok(DatePart::DayOfWeek), - "doy" => Ok(DatePart::DayOfYear), + let v = value.to_lowercase(); + match v.as_str() { + date_part_keyword::YEAR => Ok(DatePart::Year), + date_part_keyword::MONTH => Ok(DatePart::Month), + date_part_keyword::WEEK => Ok(DatePart::Week), + date_part_keyword::DAY => Ok(DatePart::Day), + date_part_keyword::HOUR => Ok(DatePart::Hour), + date_part_keyword::MINUTE => Ok(DatePart::Minute), + date_part_keyword::SECOND => Ok(DatePart::Second), + date_part_keyword::MILLISECOND => Ok(DatePart::Millisecond), + date_part_keyword::MICROSECOND => Ok(DatePart::Microsecond), + date_part_keyword::NANOSECOND => Ok(DatePart::Nanosecond), + date_part_keyword::DOW => Ok(DatePart::DayOfWeek), + date_part_keyword::DOY => Ok(DatePart::DayOfYear), _ => Err(format!("'{value}' is not a valid DatePart")), } } @@ -67,15 +70,16 @@ impl TryFrom<&str> for DateTruncPrecision { type Error = String; fn try_from(value: &str) -> Result { - match value.to_lowercase().as_str() { - "year" => Ok(DateTruncPrecision::Year), - "quarter" => Ok(DateTruncPrecision::Quarter), - "month" => Ok(DateTruncPrecision::Month), - "week" => Ok(DateTruncPrecision::Week), - "day" => Ok(DateTruncPrecision::Day), - "hour" => Ok(DateTruncPrecision::Hour), - "minute" => Ok(DateTruncPrecision::Minute), - "second" => Ok(DateTruncPrecision::Second), + let v = value.to_lowercase(); + match v.as_str() { + date_trunc_keyword::YEAR => Ok(DateTruncPrecision::Year), + date_trunc_keyword::QUARTER => Ok(DateTruncPrecision::Quarter), + date_trunc_keyword::MONTH => Ok(DateTruncPrecision::Month), + date_trunc_keyword::WEEK => Ok(DateTruncPrecision::Week), + date_trunc_keyword::DAY => Ok(DateTruncPrecision::Day), + date_trunc_keyword::HOUR => Ok(DateTruncPrecision::Hour), + date_trunc_keyword::MINUTE => Ok(DateTruncPrecision::Minute), + date_trunc_keyword::SECOND => Ok(DateTruncPrecision::Second), _ => Err(format!("'{value}' is not a valid DateTruncPrecision")), } } diff --git a/src/sql/common/format_from_opts.rs b/src/sql/common/format_from_opts.rs index 2469fb08..34b6a586 100644 --- a/src/sql/common/format_from_opts.rs +++ b/src/sql/common/format_from_opts.rs @@ -17,6 +17,7 @@ use std::str::FromStr; use datafusion::common::{Result as DFResult, plan_datafusion_err, plan_err}; use super::connector_options::ConnectorOptions; +use super::constants::{bad_data_value, connection_format_value, framing_method_value}; use super::with_option_keys as opt; use super::formats::{ AvroFormat, BadData, DecimalEncoding, Format, Framing, JsonCompression, JsonFormat, @@ -65,18 +66,25 @@ impl Format { let Some(name) = opts.pull_opt_str(opt::FORMAT)? else { return Ok(None); }; - match name.to_lowercase().as_str() { - "json" => Ok(Some(Format::Json(JsonFormat::from_opts(opts)?))), - "debezium_json" => { + let n = name.to_lowercase(); + match n.as_str() { + connection_format_value::JSON => Ok(Some(Format::Json(JsonFormat::from_opts(opts)?))), + connection_format_value::DEBEZIUM_JSON => { let mut j = JsonFormat::from_opts(opts)?; j.debezium = true; Ok(Some(Format::Json(j))) } - "avro" => Ok(Some(Format::Avro(AvroFormat::from_opts(opts)?))), - "parquet" => Ok(Some(Format::Parquet(ParquetFormat::from_opts(opts)?))), - "protobuf" => Ok(Some(Format::Protobuf(ProtobufFormat::from_opts(opts)?))), - "raw_string" => Ok(Some(Format::RawString(RawStringFormat {}))), - "raw_bytes" => Ok(Some(Format::RawBytes(RawBytesFormat {}))), + connection_format_value::AVRO => Ok(Some(Format::Avro(AvroFormat::from_opts(opts)?))), + connection_format_value::PARQUET => { + Ok(Some(Format::Parquet(ParquetFormat::from_opts(opts)?))) + } + connection_format_value::PROTOBUF => { + Ok(Some(Format::Protobuf(ProtobufFormat::from_opts(opts)?))) + } + connection_format_value::RAW_STRING => { + Ok(Some(Format::RawString(RawStringFormat {}))) + } + connection_format_value::RAW_BYTES => Ok(Some(Format::RawBytes(RawBytesFormat {}))), _ => plan_err!("unknown format '{name}'"), } } @@ -150,7 +158,7 @@ impl Framing { let method = opts.pull_opt_str(opt::FRAMING_METHOD)?; match method.as_deref() { None => Ok(None), - Some("newline") | Some("newline_delimited") => { + Some(framing_method_value::NEWLINE) | Some(framing_method_value::NEWLINE_DELIMITED) => { let max = opts.pull_opt_u64(opt::FRAMING_MAX_LINE_LENGTH)?; Ok(Some(Framing::Newline(NewlineDelimitedFraming { max_line_length: max, @@ -166,9 +174,10 @@ impl BadData { let Some(s) = opts.pull_opt_str(opt::BAD_DATA)? else { return Ok(BadData::Fail {}); }; - match s.to_lowercase().as_str() { - "fail" => Ok(BadData::Fail {}), - "drop" => Ok(BadData::Drop {}), + let v = s.to_lowercase(); + match v.as_str() { + bad_data_value::FAIL => Ok(BadData::Fail {}), + bad_data_value::DROP => Ok(BadData::Drop {}), _ => plan_err!("invalid bad_data '{s}'"), } } diff --git a/src/sql/common/formats.rs b/src/sql/common/formats.rs index e37be020..b2885797 100644 --- a/src/sql/common/formats.rs +++ b/src/sql/common/formats.rs @@ -11,9 +11,15 @@ // limitations under the License. use serde::{Deserialize, Serialize}; +use std::convert::TryFrom; use std::fmt::{Display, Formatter}; use std::str::FromStr; +use super::constants::{ + connection_format_value, decimal_encoding_value, json_compression_value, + parquet_compression_value, timestamp_format_value, +}; + #[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default, Hash, PartialOrd)] #[serde(rename_all = "snake_case")] pub enum TimestampFormat { @@ -28,8 +34,12 @@ impl TryFrom<&str> for TimestampFormat { fn try_from(value: &str) -> Result { match value { - "RFC3339" | "rfc3339" => Ok(TimestampFormat::RFC3339), - "UnixMillis" | "unix_millis" => Ok(TimestampFormat::UnixMillis), + timestamp_format_value::RFC3339_UPPER | timestamp_format_value::RFC3339_SNAKE => { + Ok(TimestampFormat::RFC3339) + } + timestamp_format_value::UNIX_MILLIS_PASCAL | timestamp_format_value::UNIX_MILLIS_SNAKE => { + Ok(TimestampFormat::UnixMillis) + } _ => Err(()), } } @@ -49,9 +59,9 @@ impl TryFrom<&str> for DecimalEncoding { fn try_from(s: &str) -> Result { match s { - "number" => Ok(Self::Number), - "string" => Ok(Self::String), - "bytes" => Ok(Self::Bytes), + decimal_encoding_value::NUMBER => Ok(Self::Number), + decimal_encoding_value::STRING => Ok(Self::String), + decimal_encoding_value::BYTES => Ok(Self::Bytes), _ => Err(()), } } @@ -70,8 +80,8 @@ impl FromStr for JsonCompression { fn from_str(s: &str) -> Result { match s { - "uncompressed" => Ok(JsonCompression::Uncompressed), - "gzip" => Ok(JsonCompression::Gzip), + json_compression_value::UNCOMPRESSED => Ok(JsonCompression::Uncompressed), + json_compression_value::GZIP => Ok(JsonCompression::Gzip), _ => Err(format!("invalid json compression '{s}'")), } } @@ -151,12 +161,12 @@ impl FromStr for ParquetCompression { fn from_str(s: &str) -> Result { match s { - "uncompressed" => Ok(ParquetCompression::Uncompressed), - "snappy" => Ok(ParquetCompression::Snappy), - "gzip" => Ok(ParquetCompression::Gzip), - "zstd" => Ok(ParquetCompression::Zstd), - "lz4" => Ok(ParquetCompression::Lz4), - "lz4_raw" => Ok(ParquetCompression::Lz4Raw), + parquet_compression_value::UNCOMPRESSED => Ok(ParquetCompression::Uncompressed), + parquet_compression_value::SNAPPY => Ok(ParquetCompression::Snappy), + parquet_compression_value::GZIP => Ok(ParquetCompression::Gzip), + parquet_compression_value::ZSTD => Ok(ParquetCompression::Zstd), + parquet_compression_value::LZ4 => Ok(ParquetCompression::Lz4), + parquet_compression_value::LZ4_RAW => Ok(ParquetCompression::Lz4Raw), _ => Err(format!("invalid parquet compression '{s}'")), } } @@ -206,12 +216,12 @@ impl Display for Format { impl Format { pub fn name(&self) -> &'static str { match self { - Format::Json(_) => "json", - Format::Avro(_) => "avro", - Format::Protobuf(_) => "protobuf", - Format::Parquet(_) => "parquet", - Format::RawString(_) => "raw_string", - Format::RawBytes(_) => "raw_bytes", + Format::Json(_) => connection_format_value::JSON, + Format::Avro(_) => connection_format_value::AVRO, + Format::Protobuf(_) => connection_format_value::PROTOBUF, + Format::Parquet(_) => connection_format_value::PARQUET, + Format::RawString(_) => connection_format_value::RAW_STRING, + Format::RawBytes(_) => connection_format_value::RAW_BYTES, } } diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs index e05129c8..645315af 100644 --- a/src/sql/extensions/aggregate.rs +++ b/src/sql/extensions/aggregate.rs @@ -38,7 +38,7 @@ use crate::sql::extensions::{ }; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; use crate::sql::logical_planner::planner::{NamedNode, Planner, SplitPlanOutput}; -use crate::sql::logical_planner::{window, FsPhysicalExtensionCodec}; +use crate::sql::physical::{window, FsPhysicalExtensionCodec}; use crate::sql::types::{ DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata, diff --git a/src/sql/extensions/async_udf.rs b/src/sql/extensions/async_udf.rs index 8add0625..ee2ce60a 100644 --- a/src/sql/extensions/async_udf.rs +++ b/src/sql/extensions/async_udf.rs @@ -31,11 +31,10 @@ use crate::sql::extensions::streaming_operator_blueprint::{CompiledTopologyNode, use crate::sql::logical_node::logical::{ DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName, }; +use crate::sql::common::constants::sql_field; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields}; -use super::ASYNC_RESULT_FIELD; - pub(crate) const NODE_TYPE_NAME: &str = extension_node::ASYNC_FUNCTION_EXECUTION; /// Represents a logical node that executes an external asynchronous function (UDF) @@ -91,7 +90,7 @@ impl AsyncFunctionExecutionNode { let raw_result_field = DFField::new( None, - ASYNC_RESULT_FIELD, + sql_field::ASYNC_RESULT, self.function_config.return_type.clone(), true, ); diff --git a/src/sql/extensions/debezium.rs b/src/sql/extensions/debezium.rs index a1042194..2afda2b4 100644 --- a/src/sql/extensions/debezium.rs +++ b/src/sql/extensions/debezium.rs @@ -23,7 +23,7 @@ use crate::multifield_partial_ord; use crate::sql::common::constants::{cdc, extension_node}; use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::logical_planner::updating_meta_field; +use crate::sql::physical::updating_meta_field; use crate::sql::types::TIMESTAMP_FIELD; use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; diff --git a/src/sql/extensions/is_retract.rs b/src/sql/extensions/is_retract.rs index 4375b716..96493781 100644 --- a/src/sql/extensions/is_retract.rs +++ b/src/sql/extensions/is_retract.rs @@ -17,7 +17,7 @@ use datafusion::common::{DFSchemaRef, Result, TableReference}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use crate::multifield_partial_ord; -use crate::sql::logical_planner::updating_meta_field; +use crate::sql::physical::updating_meta_field; use crate::sql::types::{DFField, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs index 9789a216..829247ae 100644 --- a/src/sql/extensions/join.rs +++ b/src/sql/extensions/join.rs @@ -29,7 +29,7 @@ use crate::sql::logical_node::logical::{ LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName, }; use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::physical::FsPhysicalExtensionCodec; // ----------------------------------------------------------------------------- // Constants diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs index 6a9e924b..1d271698 100644 --- a/src/sql/extensions/key_calculation.rs +++ b/src/sql/extensions/key_calculation.rs @@ -31,7 +31,7 @@ use crate::sql::common::constants::{extension_node, sql_field}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::physical::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::{fields_with_qualifiers, schema_from_df_fields_with_metadata}; diff --git a/src/sql/extensions/mod.rs b/src/sql/extensions/mod.rs index eab2443b..6c0ca08a 100644 --- a/src/sql/extensions/mod.rs +++ b/src/sql/extensions/mod.rs @@ -12,9 +12,6 @@ mod macros; -pub(crate) mod constants; -pub(crate) use constants::ASYNC_RESULT_FIELD; - pub(crate) mod streaming_operator_blueprint; pub(crate) use streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint}; diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs index a9a65c51..7025e254 100644 --- a/src/sql/extensions/remote_table.rs +++ b/src/sql/extensions/remote_table.rs @@ -26,7 +26,7 @@ use crate::sql::common::constants::extension_node; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::physical::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; // ----------------------------------------------------------------------------- diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs index 9d12806f..a76d15d4 100644 --- a/src/sql/extensions/updating_aggregate.rs +++ b/src/sql/extensions/updating_aggregate.rs @@ -30,7 +30,7 @@ use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::extensions::{CompiledTopologyNode, IsRetractExtension, StreamingOperatorBlueprint}; use crate::sql::functions::multi_hash; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::physical::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; // ----------------------------------------------------------------------------- diff --git a/src/sql/extensions/windows_function.rs b/src/sql/extensions/windows_function.rs index 09945378..ccb0ff89 100644 --- a/src/sql/extensions/windows_function.rs +++ b/src/sql/extensions/windows_function.rs @@ -24,7 +24,7 @@ use protocol::grpc::api::WindowFunctionOperator; use crate::sql::common::constants::{extension_node, proto_operator_name, runtime_operator_kind}; use crate::sql::common::{FsSchema, FsSchemaRef}; use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_planner::FsPhysicalExtensionCodec; +use crate::sql::physical::FsPhysicalExtensionCodec; use crate::sql::logical_planner::planner::{NamedNode, Planner}; use crate::sql::types::TIMESTAMP_FIELD; diff --git a/src/sql/logical_planner/mod.rs b/src/sql/logical_planner/mod.rs index 54634651..f29cba18 100644 --- a/src/sql/logical_planner/mod.rs +++ b/src/sql/logical_planner/mod.rs @@ -10,1272 +10,5 @@ // See the License for the specific language governing permissions and // limitations under the License. -use datafusion::arrow::{ - array::{ - Array, AsArray, BooleanBuilder, PrimitiveArray, RecordBatch, StringArray, StructArray, - TimestampNanosecondArray, TimestampNanosecondBuilder, UInt32Builder, - }, - buffer::NullBuffer, - compute::{concat, take}, - datatypes::{DataType, Field, Fields, Schema, SchemaRef, TimeUnit}, -}; -use datafusion::common::{ - DataFusionError, Result, ScalarValue, Statistics, UnnestOptions, not_impl_err, plan_err, -}; -use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream}; -use datafusion::{ - execution::TaskContext, - physical_plan::{ - DisplayAs, ExecutionPlan, Partitioning, memory::MemoryStream, - stream::RecordBatchStreamAdapter, - }, -}; -use std::collections::HashMap; -use std::{ - any::Any, - mem, - pin::Pin, - sync::{Arc, OnceLock, RwLock}, - task::{Context, Poll}, -}; - -use crate::make_udf_function; -use crate::sql::functions::MultiHashFunction; -use crate::sql::analysis::UNNESTED_COL; -use crate::sql::schema::utils::window_arrow_struct; -use crate::sql::common::constants::cdc; -use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; -use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type}; -use datafusion::catalog::memory::MemorySourceConfig; -use datafusion::datasource::memory::DataSourceExec; -use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, -}; -use datafusion::physical_expr::EquivalenceProperties; -use datafusion::physical_plan::PlanProperties; -use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; -use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec}; -use datafusion_proto::physical_plan::PhysicalExtensionCodec; -use futures::{ - ready, - stream::{Stream, StreamExt}, -}; -use prost::Message; -use protocol::grpc::api::{ - DebeziumDecodeNode, DebeziumEncodeNode, FsExecNode, MemExecNode, UnnestExecNode, - fs_exec_node::Node, -}; -use std::fmt::Debug; -use tokio::sync::mpsc::UnboundedReceiver; -use tokio_stream::wrappers::UnboundedReceiverStream; - pub(crate) mod planner; pub mod optimizers; - -// ─────────────────── Updating Meta Helpers ─────────────────── - -pub fn updating_meta_fields() -> Fields { - static FIELDS: OnceLock = OnceLock::new(); - FIELDS - .get_or_init(|| { - Fields::from(vec![ - Field::new("is_retract", DataType::Boolean, true), - Field::new("id", DataType::FixedSizeBinary(16), true), - ]) - }) - .clone() -} - -pub fn updating_meta_field() -> Arc { - static FIELD: OnceLock> = OnceLock::new(); - FIELD - .get_or_init(|| { - Arc::new(Field::new( - UPDATING_META_FIELD, - DataType::Struct(updating_meta_fields()), - false, - )) - }) - .clone() -} - -// ─────────────────── WindowFunctionUdf ─────────────────── - -#[derive(Debug)] -pub struct WindowFunctionUdf { - signature: Signature, -} - -impl Default for WindowFunctionUdf { - fn default() -> Self { - Self { - signature: Signature::new( - TypeSignature::Exact(vec![ - DataType::Timestamp(TimeUnit::Nanosecond, None), - DataType::Timestamp(TimeUnit::Nanosecond, None), - ]), - Volatility::Immutable, - ), - } - } -} - -impl ScalarUDFImpl for WindowFunctionUdf { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "window" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _: &[DataType]) -> Result { - Ok(window_arrow_struct()) - } - - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - let columns = args.args; - if columns.len() != 2 { - return plan_err!( - "window function expected 2 arguments, got {}", - columns.len() - ); - } - if columns[0].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) { - return plan_err!( - "window function expected first argument to be a timestamp, got {:?}", - columns[0].data_type() - ); - } - if columns[1].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) { - return plan_err!( - "window function expected second argument to be a timestamp, got {:?}", - columns[1].data_type() - ); - } - let fields = vec![ - Arc::new(Field::new( - "start", - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - )), - Arc::new(Field::new( - "end", - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - )), - ] - .into(); - - match (&columns[0], &columns[1]) { - (ColumnarValue::Array(start), ColumnarValue::Array(end)) => { - Ok(ColumnarValue::Array(Arc::new(StructArray::new( - fields, - vec![start.clone(), end.clone()], - None, - )))) - } - (ColumnarValue::Array(start), ColumnarValue::Scalar(end)) => { - let end = end.to_array_of_size(start.len())?; - Ok(ColumnarValue::Array(Arc::new(StructArray::new( - fields, - vec![start.clone(), end], - None, - )))) - } - (ColumnarValue::Scalar(start), ColumnarValue::Array(end)) => { - let start = start.to_array_of_size(end.len())?; - Ok(ColumnarValue::Array(Arc::new(StructArray::new( - fields, - vec![start, end.clone()], - None, - )))) - } - (ColumnarValue::Scalar(start), ColumnarValue::Scalar(end)) => { - Ok(ColumnarValue::Scalar(ScalarValue::Struct( - StructArray::new(fields, vec![start.to_array()?, end.to_array()?], None).into(), - ))) - } - } - } -} - -make_udf_function!(WindowFunctionUdf, WINDOW_FUNCTION, window); - -// ─────────────────── Physical Extension Codec ─────────────────── - -#[derive(Debug)] -pub struct FsPhysicalExtensionCodec { - pub context: DecodingContext, -} - -impl Default for FsPhysicalExtensionCodec { - fn default() -> Self { - Self { - context: DecodingContext::None, - } - } -} - -#[derive(Debug)] -pub enum DecodingContext { - None, - Planning, - SingleLockedBatch(Arc>>), - UnboundedBatchStream(Arc>>>), - LockedBatchVec(Arc>>), - LockedJoinPair { - left: Arc>>, - right: Arc>>, - }, - LockedJoinStream { - left: Arc>>>, - right: Arc>>>, - }, -} - -fn make_properties(schema: SchemaRef) -> PlanProperties { - PlanProperties::new( - EquivalenceProperties::new(schema), - Partitioning::UnknownPartitioning(1), - EmissionType::Incremental, - Boundedness::Unbounded { - requires_infinite_memory: false, - }, - ) -} - -impl PhysicalExtensionCodec for FsPhysicalExtensionCodec { - fn try_decode( - &self, - buf: &[u8], - inputs: &[Arc], - _registry: &dyn datafusion::execution::FunctionRegistry, - ) -> Result> { - let exec: FsExecNode = Message::decode(buf) - .map_err(|err| DataFusionError::Internal(format!("couldn't deserialize: {err}")))?; - - match exec - .node - .ok_or_else(|| DataFusionError::Internal("exec node is empty".to_string()))? - { - Node::MemExec(mem_exec) => { - let schema: Schema = serde_json::from_str(&mem_exec.schema).map_err(|e| { - DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")) - })?; - let schema = Arc::new(schema); - match &self.context { - DecodingContext::SingleLockedBatch(single_batch) => Ok(Arc::new( - RwLockRecordBatchReader::new(schema, single_batch.clone()), - )), - DecodingContext::UnboundedBatchStream(unbounded_stream) => Ok(Arc::new( - UnboundedRecordBatchReader::new(schema, unbounded_stream.clone()), - )), - DecodingContext::LockedBatchVec(locked_batches) => Ok(Arc::new( - RecordBatchVecReader::new(schema, locked_batches.clone()), - )), - DecodingContext::Planning => { - Ok(Arc::new(FsMemExec::new(mem_exec.table_name, schema))) - } - DecodingContext::None => Err(DataFusionError::Internal( - "Need an internal context to decode".into(), - )), - DecodingContext::LockedJoinPair { left, right } => { - match mem_exec.table_name.as_str() { - "left" => { - Ok(Arc::new(RwLockRecordBatchReader::new(schema, left.clone()))) - } - "right" => Ok(Arc::new(RwLockRecordBatchReader::new( - schema, - right.clone(), - ))), - _ => Err(DataFusionError::Internal(format!( - "unknown table name {}", - mem_exec.table_name - ))), - } - } - DecodingContext::LockedJoinStream { left, right } => { - match mem_exec.table_name.as_str() { - "left" => Ok(Arc::new(UnboundedRecordBatchReader::new( - schema, - left.clone(), - ))), - "right" => Ok(Arc::new(UnboundedRecordBatchReader::new( - schema, - right.clone(), - ))), - _ => Err(DataFusionError::Internal(format!( - "unknown table name {}", - mem_exec.table_name - ))), - } - } - } - } - Node::UnnestExec(unnest) => { - let schema: Schema = serde_json::from_str(&unnest.schema).map_err(|e| { - DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")) - })?; - - let column = schema.index_of(UNNESTED_COL).map_err(|_| { - DataFusionError::Internal(format!( - "unnest node schema does not contain {UNNESTED_COL} col" - )) - })?; - - Ok(Arc::new(UnnestExec::new( - inputs - .first() - .ok_or_else(|| { - DataFusionError::Internal("no input for unnest node".to_string()) - })? - .clone(), - vec![ListUnnest { - index_in_input_schema: column, - depth: 1, - }], - vec![], - Arc::new(schema), - UnnestOptions::default(), - ))) - } - Node::DebeziumDecode(debezium) => { - let schema = Arc::new(serde_json::from_str::(&debezium.schema).map_err( - |e| DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")), - )?); - Ok(Arc::new(DebeziumUnrollingExec { - input: inputs - .first() - .ok_or_else(|| { - DataFusionError::Internal("no input for debezium node".to_string()) - })? - .clone(), - schema: schema.clone(), - properties: make_properties(schema), - primary_keys: debezium - .primary_keys - .into_iter() - .map(|c| c as usize) - .collect(), - })) - } - Node::DebeziumEncode(debezium) => { - let schema = Arc::new(serde_json::from_str::(&debezium.schema).map_err( - |e| DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")), - )?); - Ok(Arc::new(ToDebeziumExec { - input: inputs - .first() - .ok_or_else(|| { - DataFusionError::Internal("no input for debezium node".to_string()) - })? - .clone(), - schema: schema.clone(), - properties: make_properties(schema), - })) - } - } - } - - fn try_encode(&self, node: Arc, buf: &mut Vec) -> Result<()> { - let mut proto = None; - - let mem_table: Option<&FsMemExec> = node.as_any().downcast_ref(); - if let Some(table) = mem_table { - proto = Some(FsExecNode { - node: Some(Node::MemExec(MemExecNode { - table_name: table.table_name.clone(), - schema: serde_json::to_string(&table.schema).unwrap(), - })), - }); - } - - let unnest: Option<&UnnestExec> = node.as_any().downcast_ref(); - if let Some(unnest) = unnest { - proto = Some(FsExecNode { - node: Some(Node::UnnestExec(UnnestExecNode { - schema: serde_json::to_string(&unnest.schema()).unwrap(), - })), - }); - } - - let debezium_decode: Option<&DebeziumUnrollingExec> = node.as_any().downcast_ref(); - if let Some(decode) = debezium_decode { - proto = Some(FsExecNode { - node: Some(Node::DebeziumDecode(DebeziumDecodeNode { - schema: serde_json::to_string(&decode.schema).unwrap(), - primary_keys: (*decode.primary_keys).iter().map(|c| *c as u64).collect(), - })), - }); - } - - let debezium_encode: Option<&ToDebeziumExec> = node.as_any().downcast_ref(); - if let Some(encode) = debezium_encode { - proto = Some(FsExecNode { - node: Some(Node::DebeziumEncode(DebeziumEncodeNode { - schema: serde_json::to_string(&encode.schema).unwrap(), - })), - }); - } - - if let Some(node) = proto { - node.encode(buf).map_err(|err| { - DataFusionError::Internal(format!("couldn't serialize exec node {err}")) - })?; - Ok(()) - } else { - Err(DataFusionError::Internal(format!( - "cannot serialize {node:?}" - ))) - } - } -} - -// ─────────────────── RwLockRecordBatchReader ─────────────────── - -#[derive(Debug)] -struct RwLockRecordBatchReader { - schema: SchemaRef, - locked_batch: Arc>>, - properties: PlanProperties, -} - -impl RwLockRecordBatchReader { - fn new(schema: SchemaRef, locked_batch: Arc>>) -> Self { - Self { - schema: schema.clone(), - locked_batch, - properties: make_properties(schema), - } - } -} - -impl DisplayAs for RwLockRecordBatchReader { - fn fmt_as( - &self, - _t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - write!(f, "RW Lock RecordBatchReader") - } -} - -impl ExecutionPlan for RwLockRecordBatchReader { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - fn children(&self) -> Vec<&Arc> { - vec![] - } - - fn with_new_children( - self: Arc, - _children: Vec>, - ) -> Result> { - Err(DataFusionError::Internal("not supported".into())) - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> Result { - let result = self - .locked_batch - .write() - .unwrap() - .take() - .expect("should have set a record batch before calling execute()"); - Ok(Box::pin(MemoryStream::try_new( - vec![result], - self.schema.clone(), - None, - )?)) - } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema)) - } - - fn reset(&self) -> Result<()> { - Ok(()) - } - - fn properties(&self) -> &PlanProperties { - &self.properties - } - - fn name(&self) -> &str { - "rw_lock_reader" - } -} - -// ─────────────────── UnboundedRecordBatchReader ─────────────────── - -#[derive(Debug)] -struct UnboundedRecordBatchReader { - schema: SchemaRef, - receiver: Arc>>>, - properties: PlanProperties, -} - -impl UnboundedRecordBatchReader { - fn new( - schema: SchemaRef, - receiver: Arc>>>, - ) -> Self { - Self { - schema: schema.clone(), - receiver, - properties: make_properties(schema), - } - } -} - -impl DisplayAs for UnboundedRecordBatchReader { - fn fmt_as( - &self, - _t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - write!(f, "unbounded record batch reader") - } -} - -impl ExecutionPlan for UnboundedRecordBatchReader { - fn name(&self) -> &str { - "unbounded_reader" - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - fn properties(&self) -> &PlanProperties { - &self.properties - } - - fn children(&self) -> Vec<&Arc> { - vec![] - } - - fn with_new_children( - self: Arc, - _children: Vec>, - ) -> Result> { - Err(DataFusionError::Internal("not supported".into())) - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> Result { - Ok(Box::pin(RecordBatchStreamAdapter::new( - self.schema.clone(), - UnboundedReceiverStream::new( - self.receiver - .write() - .unwrap() - .take() - .expect("unbounded receiver should be present before calling exec"), - ) - .map(Ok), - ))) - } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema)) - } - - fn reset(&self) -> Result<()> { - Ok(()) - } -} - -// ─────────────────── RecordBatchVecReader ─────────────────── - -#[derive(Debug)] -struct RecordBatchVecReader { - schema: SchemaRef, - receiver: Arc>>, - properties: PlanProperties, -} - -impl RecordBatchVecReader { - fn new(schema: SchemaRef, receiver: Arc>>) -> Self { - Self { - schema: schema.clone(), - receiver, - properties: make_properties(schema), - } - } -} - -impl DisplayAs for RecordBatchVecReader { - fn fmt_as( - &self, - _t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - write!(f, "record batch vec reader") - } -} - -impl ExecutionPlan for RecordBatchVecReader { - fn name(&self) -> &str { - "vec_reader" - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - fn properties(&self) -> &PlanProperties { - &self.properties - } - - fn children(&self) -> Vec<&Arc> { - vec![] - } - - fn with_new_children( - self: Arc, - _children: Vec>, - ) -> Result> { - Err(DataFusionError::Internal("not supported".into())) - } - - fn execute( - &self, - partition: usize, - context: Arc, - ) -> Result { - let memory = MemorySourceConfig::try_new( - &[mem::take(self.receiver.write().unwrap().as_mut())], - self.schema.clone(), - None, - )?; - - DataSourceExec::new(Arc::new(memory)).execute(partition, context) - } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema)) - } - - fn reset(&self) -> Result<()> { - Ok(()) - } -} - -// ─────────────────── FsMemExec ─────────────────── - -#[derive(Debug, Clone)] -pub struct FsMemExec { - pub table_name: String, - pub schema: SchemaRef, - properties: PlanProperties, -} - -impl DisplayAs for FsMemExec { - fn fmt_as( - &self, - _t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - write!(f, "EmptyPartitionStream: schema={}", self.schema) - } -} - -impl FsMemExec { - pub fn new(table_name: String, schema: SchemaRef) -> Self { - Self { - schema: schema.clone(), - table_name, - properties: make_properties(schema), - } - } -} - -impl ExecutionPlan for FsMemExec { - fn name(&self) -> &str { - "mem_exec" - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - fn properties(&self) -> &PlanProperties { - &self.properties - } - - fn children(&self) -> Vec<&Arc> { - vec![] - } - - fn with_new_children( - self: Arc, - _children: Vec>, - ) -> Result> { - not_impl_err!("with_new_children is not implemented for mem_exec; should not be called") - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> Result { - plan_err!( - "EmptyPartitionStream cannot be executed, this is only used for physical planning before serialization" - ) - } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema)) - } - - fn reset(&self) -> Result<()> { - Ok(()) - } -} - -// ─────────────────── DebeziumUnrollingExec ─────────────────── - -#[derive(Debug)] -pub struct DebeziumUnrollingExec { - input: Arc, - schema: SchemaRef, - properties: PlanProperties, - primary_keys: Vec, -} - -impl DebeziumUnrollingExec { - pub fn try_new(input: Arc, primary_keys: Vec) -> Result { - let input_schema = input.schema(); - let before_index = input_schema.index_of(cdc::BEFORE)?; - let after_index = input_schema.index_of(cdc::AFTER)?; - let op_index = input_schema.index_of(cdc::OP)?; - let _timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; - let before_type = input_schema.field(before_index).data_type(); - let after_type = input_schema.field(after_index).data_type(); - if before_type != after_type { - return Err(DataFusionError::Internal( - "before and after columns must have the same type".to_string(), - )); - } - let op_type = input_schema.field(op_index).data_type(); - if *op_type != DataType::Utf8 { - return Err(DataFusionError::Internal( - "op column must be a string".to_string(), - )); - } - let DataType::Struct(fields) = before_type else { - return Err(DataFusionError::Internal( - "before and after columns must be structs".to_string(), - )); - }; - let mut fields = fields.to_vec(); - fields.push(updating_meta_field()); - fields.push(Arc::new(Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ))); - - let schema = Arc::new(Schema::new(fields)); - Ok(Self { - input, - schema: schema.clone(), - properties: make_properties(schema), - primary_keys, - }) - } -} - -impl DisplayAs for DebeziumUnrollingExec { - fn fmt_as( - &self, - _t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - write!(f, "DebeziumUnrollingExec") - } -} - -impl ExecutionPlan for DebeziumUnrollingExec { - fn name(&self) -> &str { - "debezium_unrolling_exec" - } - - fn as_any(&self) -> &dyn Any { - self as &dyn Any - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - fn properties(&self) -> &PlanProperties { - &self.properties - } - - fn children(&self) -> Vec<&Arc> { - vec![&self.input] - } - - fn with_new_children( - self: Arc, - children: Vec>, - ) -> Result> { - if children.len() != 1 { - return Err(DataFusionError::Internal( - "DebeziumUnrollingExec wrong number of children".to_string(), - )); - } - Ok(Arc::new(DebeziumUnrollingExec { - input: children[0].clone(), - schema: self.schema.clone(), - properties: self.properties.clone(), - primary_keys: self.primary_keys.clone(), - })) - } - - fn execute( - &self, - partition: usize, - context: Arc, - ) -> Result { - Ok(Box::pin(DebeziumUnrollingStream::try_new( - self.input.execute(partition, context)?, - self.schema.clone(), - self.primary_keys.clone(), - )?)) - } - - fn reset(&self) -> Result<()> { - self.input.reset() - } -} - -struct DebeziumUnrollingStream { - input: SendableRecordBatchStream, - schema: SchemaRef, - before_index: usize, - after_index: usize, - op_index: usize, - timestamp_index: usize, - primary_keys: Vec, -} - -impl DebeziumUnrollingStream { - fn try_new( - input: SendableRecordBatchStream, - schema: SchemaRef, - primary_keys: Vec, - ) -> Result { - if primary_keys.is_empty() { - return plan_err!("there must be at least one primary key for a Debezium source"); - } - let input_schema = input.schema(); - let before_index = input_schema.index_of(cdc::BEFORE)?; - let after_index = input_schema.index_of(cdc::AFTER)?; - let op_index = input_schema.index_of(cdc::OP)?; - let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; - - Ok(Self { - input, - schema, - before_index, - after_index, - op_index, - timestamp_index, - primary_keys, - }) - } - - fn unroll_batch(&self, batch: &RecordBatch) -> Result { - let before = batch.column(self.before_index).as_ref(); - let after = batch.column(self.after_index).as_ref(); - let op = batch - .column(self.op_index) - .as_any() - .downcast_ref::() - .ok_or_else(|| DataFusionError::Internal("op column is not a string".to_string()))?; - - let timestamp = batch - .column(self.timestamp_index) - .as_any() - .downcast_ref::() - .ok_or_else(|| { - DataFusionError::Internal("timestamp column is not a timestamp".to_string()) - })?; - - let num_rows = batch.num_rows(); - let combined_array = concat(&[before, after])?; - let mut take_indices = UInt32Builder::with_capacity(num_rows); - let mut is_retract_builder = BooleanBuilder::with_capacity(num_rows); - - let mut timestamp_builder = TimestampNanosecondBuilder::with_capacity(2 * num_rows); - for i in 0..num_rows { - let op = op.value(i); - match op { - "c" | "r" => { - take_indices.append_value((i + num_rows) as u32); - is_retract_builder.append_value(false); - timestamp_builder.append_value(timestamp.value(i)); - } - "u" => { - take_indices.append_value(i as u32); - is_retract_builder.append_value(true); - timestamp_builder.append_value(timestamp.value(i)); - take_indices.append_value((i + num_rows) as u32); - is_retract_builder.append_value(false); - timestamp_builder.append_value(timestamp.value(i)); - } - "d" => { - take_indices.append_value(i as u32); - is_retract_builder.append_value(true); - timestamp_builder.append_value(timestamp.value(i)); - } - _ => { - return Err(DataFusionError::Internal(format!( - "unexpected op value: {op}" - ))); - } - } - } - let take_indices = take_indices.finish(); - let unrolled_array = take(&combined_array, &take_indices, None)?; - - let mut columns = unrolled_array.as_struct().columns().to_vec(); - - let hash = MultiHashFunction::default().invoke( - &self - .primary_keys - .iter() - .map(|i| ColumnarValue::Array(columns[*i].clone())) - .collect::>(), - )?; - - let ids = hash.into_array(num_rows)?; - - let meta = StructArray::try_new( - updating_meta_fields(), - vec![Arc::new(is_retract_builder.finish()), ids], - None, - )?; - columns.push(Arc::new(meta)); - columns.push(Arc::new(timestamp_builder.finish())); - Ok(RecordBatch::try_new(self.schema.clone(), columns)?) - } -} - -impl Stream for DebeziumUnrollingStream { - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - let result = - ready!(self.input.poll_next_unpin(cx)).map(|result| self.unroll_batch(&result?)); - Poll::Ready(result) - } -} - -impl RecordBatchStream for DebeziumUnrollingStream { - fn schema(&self) -> SchemaRef { - self.schema.clone() - } -} - -// ─────────────────── ToDebeziumExec ─────────────────── - -#[derive(Debug)] -pub struct ToDebeziumExec { - input: Arc, - schema: SchemaRef, - properties: PlanProperties, -} - -impl ToDebeziumExec { - pub fn try_new(input: Arc) -> Result { - let input_schema = input.schema(); - let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; - let struct_fields: Vec<_> = input_schema - .fields() - .into_iter() - .enumerate() - .filter_map(|(index, field)| { - if field.name() == UPDATING_META_FIELD || index == timestamp_index { - None - } else { - Some(field.clone()) - } - }) - .collect(); - let struct_data_type = DataType::Struct(struct_fields.into()); - let before_field = Arc::new(Field::new("before", struct_data_type.clone(), true)); - let after_field = Arc::new(Field::new("after", struct_data_type, true)); - let op_field = Arc::new(Field::new("op", DataType::Utf8, false)); - let timestamp_field = Arc::new(input_schema.field(timestamp_index).clone()); - - let output_schema = Arc::new(Schema::new(vec![ - before_field, - after_field, - op_field, - timestamp_field, - ])); - - Ok(Self { - input, - schema: output_schema.clone(), - properties: make_properties(output_schema), - }) - } -} - -impl DisplayAs for ToDebeziumExec { - fn fmt_as( - &self, - _t: datafusion::physical_plan::DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - write!(f, "ToDebeziumExec") - } -} - -impl ExecutionPlan for ToDebeziumExec { - fn name(&self) -> &str { - "to_debezium_exec" - } - - fn as_any(&self) -> &dyn Any { - self as &dyn Any - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - fn properties(&self) -> &PlanProperties { - &self.properties - } - - fn children(&self) -> Vec<&Arc> { - vec![&self.input] - } - - fn with_new_children( - self: Arc, - children: Vec>, - ) -> Result> { - if children.len() != 1 { - return Err(DataFusionError::Internal( - "ToDebeziumExec wrong number of children".to_string(), - )); - } - Ok(Arc::new(ToDebeziumExec::try_new(children[0].clone())?)) - } - - fn execute( - &self, - partition: usize, - context: Arc, - ) -> Result { - let updating_meta_index = self.input.schema().index_of(UPDATING_META_FIELD).ok(); - let timestamp_index = self.input.schema().index_of(TIMESTAMP_FIELD)?; - let struct_projection = (0..self.input.schema().fields().len()) - .filter(|index| { - updating_meta_index - .map(|is_retract_index| *index != is_retract_index) - .unwrap_or(true) - && *index != timestamp_index - }) - .collect(); - - Ok(Box::pin(ToDebeziumStream { - input: self.input.execute(partition, context)?, - schema: self.schema.clone(), - updating_meta_index, - timestamp_index, - struct_projection, - })) - } - - fn reset(&self) -> Result<()> { - self.input.reset() - } -} - -struct ToDebeziumStream { - input: SendableRecordBatchStream, - schema: SchemaRef, - updating_meta_index: Option, - timestamp_index: usize, - struct_projection: Vec, -} - -impl ToDebeziumStream { - fn as_debezium_batch(&mut self, batch: &RecordBatch) -> Result { - let value_struct = batch.project(&self.struct_projection)?; - let timestamps = batch - .column(self.timestamp_index) - .as_primitive::(); - - let columns: Vec> = if let Some(metadata_index) = self.updating_meta_index { - let metadata = batch - .column(metadata_index) - .as_any() - .downcast_ref::() - .ok_or_else(|| { - DataFusionError::Internal("Invalid type for updating_meta column".to_string()) - })?; - - let is_retract = metadata.column(0).as_boolean(); - let id = metadata.column(1).as_fixed_size_binary(); - - let mut id_map: HashMap<&[u8], (usize, usize, bool, bool, i64)> = HashMap::new(); - let mut order = vec![]; - for i in 0..batch.num_rows() { - let row_id = id.value(i); - let is_create = !is_retract.value(i); - let timestamp = timestamps.value(i); - - id_map - .entry(row_id) - .and_modify(|e| { - e.1 = i; - e.3 = is_create; - e.4 = e.4.max(timestamp); - }) - .or_insert_with(|| { - order.push(row_id); - (i, i, is_create, is_create, timestamp) - }); - } - - let mut before = Vec::with_capacity(id_map.len()); - let mut after = Vec::with_capacity(id_map.len()); - let mut op = Vec::with_capacity(id_map.len()); - let mut ts = TimestampNanosecondBuilder::with_capacity(id_map.len()); - - for row_id in order { - let (first_idx, last_idx, first_is_create, last_is_create, timestamp) = - id_map.get(row_id).unwrap(); - - if *first_is_create && *last_is_create { - before.push(None); - after.push(Some(*last_idx)); - op.push("c"); - } else if !(*first_is_create) && !(*last_is_create) { - before.push(Some(*first_idx)); - after.push(None); - op.push("d"); - } else if !(*first_is_create) && *last_is_create { - before.push(Some(*first_idx)); - after.push(Some(*last_idx)); - op.push("u"); - } else { - continue; - } - - ts.append_value(*timestamp); - } - - let before_array = Self::create_output_array(&value_struct, &before)?; - let after_array = Self::create_output_array(&value_struct, &after)?; - let op_array = StringArray::from(op); - - vec![ - Arc::new(before_array), - Arc::new(after_array), - Arc::new(op_array), - Arc::new(ts.finish()), - ] - } else { - let after_array = StructArray::try_new( - value_struct.schema().fields().clone(), - value_struct.columns().to_vec(), - None, - )?; - - let before_array = StructArray::new_null( - value_struct.schema().fields().clone(), - value_struct.num_rows(), - ); - - vec![ - Arc::new(before_array), - Arc::new(after_array), - Arc::new(StringArray::from(vec!["c"; value_struct.num_rows()])), - batch.column(self.timestamp_index).clone(), - ] - }; - - Ok(RecordBatch::try_new(self.schema.clone(), columns)?) - } - - fn create_output_array( - value_struct: &RecordBatch, - indices: &[Option], - ) -> Result { - let mut arrays: Vec> = Vec::with_capacity(value_struct.num_columns()); - for col in value_struct.columns() { - let new_array = take( - col.as_ref(), - &indices - .iter() - .map(|&idx| idx.map(|i| i as u64)) - .collect::>(), - None, - )?; - arrays.push(new_array); - } - - Ok(StructArray::try_new( - value_struct.schema().fields().clone(), - arrays, - Some(NullBuffer::from( - indices.iter().map(|&idx| idx.is_some()).collect::>(), - )), - )?) - } -} - -impl Stream for ToDebeziumStream { - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - let result = - ready!(self.input.poll_next_unpin(cx)).map(|result| self.as_debezium_batch(&result?)); - Poll::Ready(result) - } -} - -impl RecordBatchStream for ToDebeziumStream { - fn schema(&self) -> SchemaRef { - self.schema.clone() - } -} diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs index be388ad4..b0a712c7 100644 --- a/src/sql/logical_planner/planner.rs +++ b/src/sql/logical_planner/planner.rs @@ -43,7 +43,7 @@ use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use crate::sql::logical_node::logical::{LogicalEdge, LogicalGraph, LogicalNode}; -use crate::sql::logical_planner::{ +use crate::sql::physical::{ DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec, }; use crate::sql::extensions::debezium::{PACK_NODE_NAME, UNROLL_NODE_NAME, UnrollDebeziumPayloadNode}; diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 04f6c897..5cb53705 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -18,6 +18,7 @@ pub mod functions; pub mod parse; pub mod logical_node; pub mod logical_planner; +pub mod physical; pub mod analysis; pub(crate) mod extensions; pub mod types; diff --git a/src/sql/physical/cdc/encode.rs b/src/sql/physical/cdc/encode.rs new file mode 100644 index 00000000..b1a1cc2e --- /dev/null +++ b/src/sql/physical/cdc/encode.rs @@ -0,0 +1,331 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 内部回撤流压回 Debezium `before` / `after` / `op` 信封。 + +use std::any::Any; +use std::collections::HashMap; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use datafusion::arrow::array::AsArray; +use datafusion::arrow::array::{ + Array, BooleanArray, FixedSizeBinaryArray, PrimitiveArray, RecordBatch, StringArray, + StructArray, TimestampNanosecondBuilder, +}; +use datafusion::arrow::buffer::NullBuffer; +use datafusion::arrow::compute::take; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, UInt64Type}; +use datafusion::arrow::datatypes::TimestampNanosecondType; +use datafusion::common::{DataFusionError, Result}; +use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}; +use datafusion::physical_plan::{DisplayAs, ExecutionPlan, PlanProperties}; +use futures::{ready, stream::Stream, StreamExt}; + +use crate::sql::common::constants::{cdc, debezium_op_short, physical_plan_node_name}; +use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; +use crate::sql::physical::readers::make_stream_properties; + +#[derive(Debug)] +pub struct ToDebeziumExec { + input: Arc, + schema: SchemaRef, + properties: PlanProperties, +} + +impl ToDebeziumExec { + pub fn try_new(input: Arc) -> Result { + let input_schema = input.schema(); + let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; + let struct_fields: Vec<_> = input_schema + .fields() + .into_iter() + .enumerate() + .filter_map(|(index, field)| { + if field.name() == UPDATING_META_FIELD || index == timestamp_index { + None + } else { + Some(field.clone()) + } + }) + .collect(); + let struct_data_type = DataType::Struct(struct_fields.into()); + let before_field = Arc::new(Field::new(cdc::BEFORE, struct_data_type.clone(), true)); + let after_field = Arc::new(Field::new(cdc::AFTER, struct_data_type, true)); + let op_field = Arc::new(Field::new(cdc::OP, DataType::Utf8, false)); + let timestamp_field = Arc::new(input_schema.field(timestamp_index).clone()); + + let output_schema = Arc::new(Schema::new(vec![ + before_field, + after_field, + op_field, + timestamp_field, + ])); + + Ok(Self { + input, + schema: output_schema.clone(), + properties: make_stream_properties(output_schema), + }) + } + + pub(crate) fn from_decoded_parts(input: Arc, schema: SchemaRef) -> Self { + Self { + properties: make_stream_properties(schema.clone()), + input, + schema, + } + } +} + +impl DisplayAs for ToDebeziumExec { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "ToDebeziumExec") + } +} + +impl ExecutionPlan for ToDebeziumExec { + fn name(&self) -> &str { + physical_plan_node_name::TO_DEBEZIUM_EXEC + } + + fn as_any(&self) -> &dyn Any { + self as &dyn Any + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.input] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result> { + if children.len() != 1 { + return Err(DataFusionError::Internal( + "ToDebeziumExec wrong number of children".to_string(), + )); + } + Ok(Arc::new(ToDebeziumExec::try_new(children[0].clone())?)) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> Result { + let updating_meta_index = self.input.schema().index_of(UPDATING_META_FIELD).ok(); + let timestamp_index = self.input.schema().index_of(TIMESTAMP_FIELD)?; + let struct_projection = (0..self.input.schema().fields().len()) + .filter(|index| { + updating_meta_index + .map(|is_retract_index| *index != is_retract_index) + .unwrap_or(true) + && *index != timestamp_index + }) + .collect(); + + Ok(Box::pin(ToDebeziumStream { + input: self.input.execute(partition, context)?, + schema: self.schema.clone(), + updating_meta_index, + timestamp_index, + struct_projection, + })) + } + + fn reset(&self) -> Result<()> { + self.input.reset() + } +} + +struct ToDebeziumStream { + input: SendableRecordBatchStream, + schema: SchemaRef, + updating_meta_index: Option, + timestamp_index: usize, + struct_projection: Vec, +} + +/// 按主键 id 归并一行内的 changelog,输出 before/after 行索引与 op 字母。 +fn compact_changelog_by_id<'a>( + num_rows: usize, + is_retract: &'a BooleanArray, + id: &'a FixedSizeBinaryArray, + timestamps: &'a PrimitiveArray, +) -> ( + Vec<&'a [u8]>, + HashMap<&'a [u8], (usize, usize, bool, bool, i64)>, +) { + let mut id_map: HashMap<&[u8], (usize, usize, bool, bool, i64)> = HashMap::new(); + let mut order = vec![]; + for i in 0..num_rows { + let row_id = id.value(i); + let is_create = !is_retract.value(i); + let timestamp = timestamps.value(i); + + id_map + .entry(row_id) + .and_modify(|e| { + e.1 = i; + e.3 = is_create; + e.4 = e.4.max(timestamp); + }) + .or_insert_with(|| { + order.push(row_id); + (i, i, is_create, is_create, timestamp) + }); + } + (order, id_map) +} + +impl ToDebeziumStream { + fn as_debezium_batch(&mut self, batch: &RecordBatch) -> Result { + let value_struct = batch.project(&self.struct_projection)?; + let timestamps = batch + .column(self.timestamp_index) + .as_primitive::(); + + let columns: Vec> = if let Some(metadata_index) = self.updating_meta_index { + let metadata = batch + .column(metadata_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal("Invalid type for updating_meta column".to_string()) + })?; + + let is_retract = metadata.column(0).as_boolean(); + let id = metadata.column(1).as_fixed_size_binary(); + + let (order, id_map) = + compact_changelog_by_id(batch.num_rows(), is_retract, id, timestamps); + + let mut before = Vec::with_capacity(id_map.len()); + let mut after = Vec::with_capacity(id_map.len()); + let mut op = Vec::with_capacity(id_map.len()); + let mut ts = TimestampNanosecondBuilder::with_capacity(id_map.len()); + + for row_id in order { + let (first_idx, last_idx, first_is_create, last_is_create, timestamp) = + id_map.get(row_id).unwrap(); + + if *first_is_create && *last_is_create { + before.push(None); + after.push(Some(*last_idx)); + op.push(debezium_op_short::CREATE); + } else if !(*first_is_create) && !(*last_is_create) { + before.push(Some(*first_idx)); + after.push(None); + op.push(debezium_op_short::DELETE); + } else if !(*first_is_create) && *last_is_create { + before.push(Some(*first_idx)); + after.push(Some(*last_idx)); + op.push(debezium_op_short::UPDATE); + } else { + continue; + } + + ts.append_value(*timestamp); + } + + let before_array = Self::create_output_array(&value_struct, &before)?; + let after_array = Self::create_output_array(&value_struct, &after)?; + let op_array = StringArray::from(op); + + vec![ + Arc::new(before_array), + Arc::new(after_array), + Arc::new(op_array), + Arc::new(ts.finish()), + ] + } else { + let after_array = StructArray::try_new( + value_struct.schema().fields().clone(), + value_struct.columns().to_vec(), + None, + )?; + + let before_array = StructArray::new_null( + value_struct.schema().fields().clone(), + value_struct.num_rows(), + ); + + vec![ + Arc::new(before_array), + Arc::new(after_array), + Arc::new(StringArray::from(vec![ + debezium_op_short::CREATE; + value_struct.num_rows() + ])), + batch.column(self.timestamp_index).clone(), + ] + }; + + Ok(RecordBatch::try_new(self.schema.clone(), columns)?) + } + + fn create_output_array( + value_struct: &RecordBatch, + indices: &[Option], + ) -> Result { + let mut arrays: Vec> = Vec::with_capacity(value_struct.num_columns()); + for col in value_struct.columns() { + let new_array = take( + col.as_ref(), + &indices + .iter() + .map(|&idx| idx.map(|i| i as u64)) + .collect::>(), + None, + )?; + arrays.push(new_array); + } + + Ok(StructArray::try_new( + value_struct.schema().fields().clone(), + arrays, + Some(NullBuffer::from( + indices.iter().map(|&idx| idx.is_some()).collect::>(), + )), + )?) + } +} + +impl Stream for ToDebeziumStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let result = + ready!(self.input.poll_next_unpin(cx)).map(|result| self.as_debezium_batch(&result?)); + Poll::Ready(result) + } +} + +impl RecordBatchStream for ToDebeziumStream { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} diff --git a/src/sql/extensions/constants.rs b/src/sql/physical/cdc/mod.rs similarity index 78% rename from src/sql/extensions/constants.rs rename to src/sql/physical/cdc/mod.rs index 245dacec..743ca966 100644 --- a/src/sql/extensions/constants.rs +++ b/src/sql/physical/cdc/mod.rs @@ -10,4 +10,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub use crate::sql::common::constants::sql_field::ASYNC_RESULT as ASYNC_RESULT_FIELD; +//! Debezium 与内部 changelog 行格式的互转。 + +mod encode; +mod unroll; + +pub use encode::ToDebeziumExec; +pub use unroll::DebeziumUnrollingExec; diff --git a/src/sql/physical/cdc/unroll.rs b/src/sql/physical/cdc/unroll.rs new file mode 100644 index 00000000..345d2642 --- /dev/null +++ b/src/sql/physical/cdc/unroll.rs @@ -0,0 +1,300 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Debezium 信封展开为内部带 retract 语义的行流。 + +use std::any::Any; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use datafusion::arrow::array::AsArray; +use datafusion::arrow::array::{ + Array, BooleanBuilder, RecordBatch, StringArray, StructArray, TimestampNanosecondArray, + TimestampNanosecondBuilder, UInt32Builder, +}; +use datafusion::arrow::compute::{concat, take}; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use datafusion::common::{DataFusionError, Result, plan_err}; +use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}; +use datafusion::logical_expr::ColumnarValue; +use datafusion::physical_plan::{DisplayAs, ExecutionPlan, PlanProperties}; +use futures::{ready, stream::Stream, StreamExt}; + +use crate::sql::common::constants::{cdc, debezium_op_short, physical_plan_node_name}; +use crate::sql::common::TIMESTAMP_FIELD; +use crate::sql::functions::MultiHashFunction; +use crate::sql::physical::meta::{updating_meta_field, updating_meta_fields}; +use crate::sql::physical::readers::make_stream_properties; + +#[derive(Debug)] +pub struct DebeziumUnrollingExec { + input: Arc, + schema: SchemaRef, + properties: PlanProperties, + primary_keys: Vec, +} + +impl DebeziumUnrollingExec { + pub fn try_new(input: Arc, primary_keys: Vec) -> Result { + let input_schema = input.schema(); + let before_index = input_schema.index_of(cdc::BEFORE)?; + let after_index = input_schema.index_of(cdc::AFTER)?; + let op_index = input_schema.index_of(cdc::OP)?; + let _timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; + let before_type = input_schema.field(before_index).data_type(); + let after_type = input_schema.field(after_index).data_type(); + if before_type != after_type { + return Err(DataFusionError::Internal( + "before and after columns must have the same type".to_string(), + )); + } + let op_type = input_schema.field(op_index).data_type(); + if *op_type != DataType::Utf8 { + return Err(DataFusionError::Internal( + "op column must be a string".to_string(), + )); + } + let DataType::Struct(fields) = before_type else { + return Err(DataFusionError::Internal( + "before and after columns must be structs".to_string(), + )); + }; + let mut fields = fields.to_vec(); + fields.push(updating_meta_field()); + fields.push(Arc::new(Field::new( + TIMESTAMP_FIELD, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + ))); + + let schema = Arc::new(Schema::new(fields)); + Ok(Self { + input, + schema: schema.clone(), + properties: make_stream_properties(schema), + primary_keys, + }) + } + + /// 分布式反序列化路径:跳过 `try_new` 的 schema 校验(proto 已约定形态)。 + pub(crate) fn from_decoded_parts( + input: Arc, + schema: SchemaRef, + primary_keys: Vec, + ) -> Self { + Self { + properties: make_stream_properties(schema.clone()), + input, + schema, + primary_keys, + } + } + + pub fn primary_key_indices(&self) -> &[usize] { + &self.primary_keys + } +} + +impl DisplayAs for DebeziumUnrollingExec { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "DebeziumUnrollingExec") + } +} + +impl ExecutionPlan for DebeziumUnrollingExec { + fn name(&self) -> &str { + physical_plan_node_name::DEBEZIUM_UNROLLING_EXEC + } + + fn as_any(&self) -> &dyn Any { + self as &dyn Any + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.input] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result> { + if children.len() != 1 { + return Err(DataFusionError::Internal( + "DebeziumUnrollingExec wrong number of children".to_string(), + )); + } + Ok(Arc::new(DebeziumUnrollingExec { + input: children[0].clone(), + schema: self.schema.clone(), + properties: self.properties.clone(), + primary_keys: self.primary_keys.clone(), + })) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> Result { + Ok(Box::pin(DebeziumUnrollingStream::try_new( + self.input.execute(partition, context)?, + self.schema.clone(), + self.primary_keys.clone(), + )?)) + } + + fn reset(&self) -> Result<()> { + self.input.reset() + } +} + +struct DebeziumUnrollingStream { + input: SendableRecordBatchStream, + schema: SchemaRef, + before_index: usize, + after_index: usize, + op_index: usize, + timestamp_index: usize, + primary_keys: Vec, +} + +impl DebeziumUnrollingStream { + fn try_new( + input: SendableRecordBatchStream, + schema: SchemaRef, + primary_keys: Vec, + ) -> Result { + if primary_keys.is_empty() { + return plan_err!("there must be at least one primary key for a Debezium source"); + } + let input_schema = input.schema(); + let before_index = input_schema.index_of(cdc::BEFORE)?; + let after_index = input_schema.index_of(cdc::AFTER)?; + let op_index = input_schema.index_of(cdc::OP)?; + let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?; + + Ok(Self { + input, + schema, + before_index, + after_index, + op_index, + timestamp_index, + primary_keys, + }) + } + + fn unroll_batch(&self, batch: &RecordBatch) -> Result { + let before = batch.column(self.before_index).as_ref(); + let after = batch.column(self.after_index).as_ref(); + let op = batch + .column(self.op_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| DataFusionError::Internal("op column is not a string".to_string()))?; + + let timestamp = batch + .column(self.timestamp_index) + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal("timestamp column is not a timestamp".to_string()) + })?; + + let num_rows = batch.num_rows(); + let combined_array = concat(&[before, after])?; + let mut take_indices = UInt32Builder::with_capacity(num_rows); + let mut is_retract_builder = BooleanBuilder::with_capacity(num_rows); + + let mut timestamp_builder = TimestampNanosecondBuilder::with_capacity(2 * num_rows); + for i in 0..num_rows { + let op = op.value(i); + match op { + debezium_op_short::CREATE | debezium_op_short::READ => { + take_indices.append_value((i + num_rows) as u32); + is_retract_builder.append_value(false); + timestamp_builder.append_value(timestamp.value(i)); + } + debezium_op_short::UPDATE => { + take_indices.append_value(i as u32); + is_retract_builder.append_value(true); + timestamp_builder.append_value(timestamp.value(i)); + take_indices.append_value((i + num_rows) as u32); + is_retract_builder.append_value(false); + timestamp_builder.append_value(timestamp.value(i)); + } + debezium_op_short::DELETE => { + take_indices.append_value(i as u32); + is_retract_builder.append_value(true); + timestamp_builder.append_value(timestamp.value(i)); + } + _ => { + return Err(DataFusionError::Internal(format!( + "unexpected op value: {op}" + ))); + } + } + } + let take_indices = take_indices.finish(); + let unrolled_array = take(&combined_array, &take_indices, None)?; + + let mut columns = unrolled_array.as_struct().columns().to_vec(); + + let hash = MultiHashFunction::default().invoke( + &self + .primary_keys + .iter() + .map(|i| ColumnarValue::Array(columns[*i].clone())) + .collect::>(), + )?; + + let ids = hash.into_array(num_rows)?; + + let meta = StructArray::try_new( + updating_meta_fields(), + vec![Arc::new(is_retract_builder.finish()), ids], + None, + )?; + columns.push(Arc::new(meta)); + columns.push(Arc::new(timestamp_builder.finish())); + Ok(RecordBatch::try_new(self.schema.clone(), columns)?) + } +} + +impl Stream for DebeziumUnrollingStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let result = + ready!(self.input.poll_next_unpin(cx)).map(|result| self.unroll_batch(&result?)); + Poll::Ready(result) + } +} + +impl RecordBatchStream for DebeziumUnrollingStream { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} diff --git a/src/sql/physical/codec.rs b/src/sql/physical/codec.rs new file mode 100644 index 00000000..e90e4b3a --- /dev/null +++ b/src/sql/physical/codec.rs @@ -0,0 +1,263 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 分布式物理计划 proto 编解码(`FsExecNode`)。 + +use std::sync::Arc; + +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::datatypes::Schema; +use datafusion::common::{DataFusionError, Result, UnnestOptions}; +use datafusion::execution::FunctionRegistry; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec}; +use datafusion_proto::physical_plan::PhysicalExtensionCodec; +use prost::Message; +use protocol::grpc::api::{ + DebeziumDecodeNode, DebeziumEncodeNode, FsExecNode, MemExecNode, UnnestExecNode, + fs_exec_node::Node, +}; +use tokio::sync::mpsc::UnboundedReceiver; + +use crate::sql::analysis::UNNESTED_COL; +use crate::sql::common::constants::mem_exec_join_side; +use crate::sql::physical::cdc::{DebeziumUnrollingExec, ToDebeziumExec}; +use crate::sql::physical::readers::{ + FsMemExec, RecordBatchVecReader, RwLockRecordBatchReader, UnboundedRecordBatchReader, +}; + +#[derive(Debug)] +pub struct FsPhysicalExtensionCodec { + pub context: DecodingContext, +} + +impl Default for FsPhysicalExtensionCodec { + fn default() -> Self { + Self { + context: DecodingContext::None, + } + } +} + +#[derive(Debug)] +pub enum DecodingContext { + None, + Planning, + SingleLockedBatch(Arc>>), + UnboundedBatchStream(Arc>>>), + LockedBatchVec(Arc>>), + LockedJoinPair { + left: Arc>>, + right: Arc>>, + }, + LockedJoinStream { + left: Arc>>>, + right: Arc>>>, + }, +} + +impl PhysicalExtensionCodec for FsPhysicalExtensionCodec { + fn try_decode( + &self, + buf: &[u8], + inputs: &[Arc], + _registry: &dyn FunctionRegistry, + ) -> Result> { + let exec: FsExecNode = Message::decode(buf) + .map_err(|err| DataFusionError::Internal(format!("couldn't deserialize: {err}")))?; + + let node = exec + .node + .ok_or_else(|| DataFusionError::Internal("exec node is empty".to_string()))?; + + match node { + Node::MemExec(mem) => self.decode_mem_exec(mem), + Node::UnnestExec(unnest) => decode_unnest_exec(unnest, inputs), + Node::DebeziumDecode(debezium) => decode_debezium_decode(debezium, inputs), + Node::DebeziumEncode(debezium) => decode_debezium_encode(debezium, inputs), + } + } + + fn try_encode(&self, node: Arc, buf: &mut Vec) -> Result<()> { + let mut proto = None; + + if let Some(table) = node.as_any().downcast_ref::() { + proto = Some(FsExecNode { + node: Some(Node::MemExec(MemExecNode { + table_name: table.table_name.clone(), + schema: serde_json::to_string(&table.schema).unwrap(), + })), + }); + } + + if let Some(unnest) = node.as_any().downcast_ref::() { + proto = Some(FsExecNode { + node: Some(Node::UnnestExec(UnnestExecNode { + schema: serde_json::to_string(&unnest.schema()).unwrap(), + })), + }); + } + + if let Some(decode) = node.as_any().downcast_ref::() { + proto = Some(FsExecNode { + node: Some(Node::DebeziumDecode(DebeziumDecodeNode { + schema: serde_json::to_string(decode.schema().as_ref()).unwrap(), + primary_keys: decode + .primary_key_indices() + .iter() + .map(|c| *c as u64) + .collect(), + })), + }); + } + + if let Some(encode) = node.as_any().downcast_ref::() { + proto = Some(FsExecNode { + node: Some(Node::DebeziumEncode(DebeziumEncodeNode { + schema: serde_json::to_string(encode.schema().as_ref()).unwrap(), + })), + }); + } + + if let Some(node) = proto { + node.encode(buf).map_err(|err| { + DataFusionError::Internal(format!("couldn't serialize exec node {err}")) + })?; + Ok(()) + } else { + Err(DataFusionError::Internal(format!( + "cannot serialize {node:?}" + ))) + } + } +} + +impl FsPhysicalExtensionCodec { + fn decode_mem_exec(&self, mem_exec: MemExecNode) -> Result> { + let schema: Schema = serde_json::from_str(&mem_exec.schema).map_err(|e| { + DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")) + })?; + let schema = Arc::new(schema); + match &self.context { + DecodingContext::SingleLockedBatch(single_batch) => Ok(Arc::new( + RwLockRecordBatchReader::new(schema, single_batch.clone()), + )), + DecodingContext::UnboundedBatchStream(unbounded_stream) => Ok(Arc::new( + UnboundedRecordBatchReader::new(schema, unbounded_stream.clone()), + )), + DecodingContext::LockedBatchVec(locked_batches) => Ok(Arc::new( + RecordBatchVecReader::new(schema, locked_batches.clone()), + )), + DecodingContext::Planning => Ok(Arc::new(FsMemExec::new(mem_exec.table_name, schema))), + DecodingContext::None => Err(DataFusionError::Internal( + "Need an internal context to decode".into(), + )), + DecodingContext::LockedJoinPair { left, right } => { + match mem_exec.table_name.as_str() { + mem_exec_join_side::LEFT => { + Ok(Arc::new(RwLockRecordBatchReader::new(schema, left.clone()))) + } + mem_exec_join_side::RIGHT => Ok(Arc::new(RwLockRecordBatchReader::new( + schema, + right.clone(), + ))), + _ => Err(DataFusionError::Internal(format!( + "unknown table name {}", + mem_exec.table_name + ))), + } + } + DecodingContext::LockedJoinStream { left, right } => { + match mem_exec.table_name.as_str() { + mem_exec_join_side::LEFT => Ok(Arc::new(UnboundedRecordBatchReader::new( + schema, + left.clone(), + ))), + mem_exec_join_side::RIGHT => Ok(Arc::new(UnboundedRecordBatchReader::new( + schema, + right.clone(), + ))), + _ => Err(DataFusionError::Internal(format!( + "unknown table name {}", + mem_exec.table_name + ))), + } + } + } + } +} + +fn decode_unnest_exec( + unnest: UnnestExecNode, + inputs: &[Arc], +) -> Result> { + let schema: Schema = serde_json::from_str(&unnest.schema).map_err(|e| { + DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")) + })?; + + let column = schema.index_of(UNNESTED_COL).map_err(|_| { + DataFusionError::Internal(format!( + "unnest node schema does not contain {UNNESTED_COL} col" + )) + })?; + + Ok(Arc::new(UnnestExec::new( + inputs + .first() + .ok_or_else(|| DataFusionError::Internal("no input for unnest node".to_string()))? + .clone(), + vec![ListUnnest { + index_in_input_schema: column, + depth: 1, + }], + vec![], + Arc::new(schema), + UnnestOptions::default(), + ))) +} + +fn decode_debezium_decode( + debezium: DebeziumDecodeNode, + inputs: &[Arc], +) -> Result> { + let schema = Arc::new(serde_json::from_str::(&debezium.schema).map_err(|e| { + DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")) + })?); + let input = inputs + .first() + .ok_or_else(|| DataFusionError::Internal("no input for debezium node".to_string()))? + .clone(); + let primary_keys = debezium + .primary_keys + .into_iter() + .map(|c| c as usize) + .collect(); + Ok(Arc::new(DebeziumUnrollingExec::from_decoded_parts( + input, + schema.clone(), + primary_keys, + ))) +} + +fn decode_debezium_encode( + debezium: DebeziumEncodeNode, + inputs: &[Arc], +) -> Result> { + let schema = Arc::new(serde_json::from_str::(&debezium.schema).map_err(|e| { + DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")) + })?); + let input = inputs + .first() + .ok_or_else(|| DataFusionError::Internal("no input for debezium node".to_string()))? + .clone(); + Ok(Arc::new(ToDebeziumExec::from_decoded_parts(input, schema))) +} diff --git a/src/sql/physical/meta.rs b/src/sql/physical/meta.rs new file mode 100644 index 00000000..5828593c --- /dev/null +++ b/src/sql/physical/meta.rs @@ -0,0 +1,53 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Changelog 元数据列:`is_retract`、行 `id`(FixedSizeBinary)。 + +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::datatypes::{DataType, Field, Fields}; + +use crate::sql::common::constants::updating_state_field; +use crate::sql::common::UPDATING_META_FIELD; + +pub fn updating_meta_fields() -> Fields { + static FIELDS: OnceLock = OnceLock::new(); + FIELDS + .get_or_init(|| { + Fields::from(vec![ + Field::new( + updating_state_field::IS_RETRACT, + DataType::Boolean, + true, + ), + Field::new( + updating_state_field::ID, + DataType::FixedSizeBinary(16), + true, + ), + ]) + }) + .clone() +} + +pub fn updating_meta_field() -> Arc { + static FIELD: OnceLock> = OnceLock::new(); + FIELD + .get_or_init(|| { + Arc::new(Field::new( + UPDATING_META_FIELD, + DataType::Struct(updating_meta_fields()), + false, + )) + }) + .clone() +} diff --git a/src/sql/physical/mod.rs b/src/sql/physical/mod.rs new file mode 100644 index 00000000..1ba09eb0 --- /dev/null +++ b/src/sql/physical/mod.rs @@ -0,0 +1,25 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 流式物理执行扩展:元数据列、UDF、内存/无界 Reader、CDC 与 proto 编解码。 + +mod cdc; +mod codec; +mod meta; +mod readers; +mod udfs; + +pub use cdc::{DebeziumUnrollingExec, ToDebeziumExec}; +pub use codec::{DecodingContext, FsPhysicalExtensionCodec}; +pub use meta::{updating_meta_field, updating_meta_fields}; +pub use readers::FsMemExec; +pub use udfs::{WindowFunctionUdf, window}; diff --git a/src/sql/physical/physical_planner.rs b/src/sql/physical/physical_planner.rs deleted file mode 100644 index fc66b3b0..00000000 --- a/src/sql/physical/physical_planner.rs +++ /dev/null @@ -1,418 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; -use std::sync::Arc; -use std::thread; -use std::time::Duration; - -use datafusion::arrow::datatypes::IntervalMonthDayNanoType; -use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; -use datafusion::common::{ - DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, Spans, plan_err, -}; -use datafusion::execution::context::SessionState; -use datafusion::execution::runtime_env::RuntimeEnvBuilder; -use datafusion::functions::datetime::date_bin; -use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNode}; -use datafusion::physical_expr::PhysicalExpr; -use datafusion::physical_plan::ExecutionPlan; -use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner}; -use datafusion_proto::protobuf::{PhysicalExprNode, PhysicalPlanNode}; -use datafusion_proto::{ - physical_plan::AsExecutionPlan, - protobuf::{AggregateMode, physical_plan_node::PhysicalPlanType}, -}; -use petgraph::graph::{DiGraph, NodeIndex}; -use prost::Message; -use tokio::runtime::Builder; -use tokio::sync::oneshot; - -use async_trait::async_trait; -use datafusion_common::TableReference; -use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; -use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; - -use crate::sql::datastream::logical::{LogicalEdge, LogicalGraph, LogicalNode}; -use crate::sql::physical::{ - DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec, -}; -use crate::sql::logical_node::debezium::{ - DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME, -}; -use crate::sql::logical_node::key_calculation::KeyCalculationExtension; -use crate::sql::logical_node::{NodeWithIncomingEdges, StreamExtension}; -use crate::sql::schema::utils::add_timestamp_field_arrow; -use crate::sql::schema::StreamSchemaProvider; -use crate::types::{FsSchema, FsSchemaRef}; - -#[derive(Eq, Hash, PartialEq)] -#[derive(Debug)] -pub(crate) enum NamedNode { - Source(TableReference), - Watermark(TableReference), - RemoteTable(TableReference), - Sink(TableReference), -} - -pub(crate) struct PlanToGraphVisitor<'a> { - graph: DiGraph, - output_schemas: HashMap, - named_nodes: HashMap, - traversal: Vec>, - planner: Planner<'a>, -} - -impl<'a> PlanToGraphVisitor<'a> { - pub fn new(schema_provider: &'a StreamSchemaProvider, session_state: &'a SessionState) -> Self { - Self { - graph: Default::default(), - output_schemas: Default::default(), - named_nodes: Default::default(), - traversal: vec![], - planner: Planner::new(schema_provider, session_state), - } - } -} - -pub(crate) struct Planner<'a> { - schema_provider: &'a StreamSchemaProvider, - planner: DefaultPhysicalPlanner, - session_state: &'a SessionState, -} - -impl<'a> Planner<'a> { - pub(crate) fn new( - schema_provider: &'a StreamSchemaProvider, - session_state: &'a SessionState, - ) -> Self { - let planner = - DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(FsExtensionPlanner {})]); - Self { - schema_provider, - planner, - session_state, - } - } - - pub(crate) fn sync_plan(&self, plan: &LogicalPlan) -> Result> { - let fut = self.planner.create_physical_plan(plan, self.session_state); - let (tx, mut rx) = oneshot::channel(); - thread::scope(|s| { - let _handle = tokio::runtime::Handle::current(); - let builder = thread::Builder::new(); - let builder = if cfg!(debug_assertions) { - builder.stack_size(10_000_000) - } else { - builder - }; - builder - .spawn_scoped(s, move || { - let rt = Builder::new_current_thread().enable_all().build().unwrap(); - rt.block_on(async { - let plan = fut.await; - tx.send(plan).unwrap(); - }); - }) - .unwrap(); - }); - - rx.try_recv().unwrap() - } - - pub(crate) fn create_physical_expr( - &self, - expr: &Expr, - input_dfschema: &DFSchema, - ) -> Result> { - self.planner - .create_physical_expr(expr, input_dfschema, self.session_state) - } - - pub(crate) fn serialize_as_physical_expr( - &self, - expr: &Expr, - schema: &DFSchema, - ) -> Result> { - let physical = self.create_physical_expr(expr, schema)?; - let proto = serialize_physical_expr(&physical, &DefaultPhysicalExtensionCodec {})?; - Ok(proto.encode_to_vec()) - } - - pub(crate) fn split_physical_plan( - &self, - key_indices: Vec, - aggregate: &LogicalPlan, - add_timestamp_field: bool, - ) -> Result { - let physical_plan = self.sync_plan(aggregate)?; - let codec = FsPhysicalExtensionCodec { - context: DecodingContext::Planning, - }; - let mut physical_plan_node = - PhysicalPlanNode::try_from_physical_plan(physical_plan.clone(), &codec)?; - let PhysicalPlanType::Aggregate(mut final_aggregate_proto) = physical_plan_node - .physical_plan_type - .take() - .ok_or_else(|| DataFusionError::Plan("missing physical plan type".to_string()))? - else { - return plan_err!("unexpected physical plan type"); - }; - let AggregateMode::Final = final_aggregate_proto.mode() else { - return plan_err!("unexpected physical plan type"); - }; - - let partial_aggregation_plan = *final_aggregate_proto - .input - .take() - .ok_or_else(|| DataFusionError::Plan("missing input".to_string()))?; - - let partial_aggregation_exec_plan = partial_aggregation_plan.try_into_physical_plan( - self.schema_provider, - &RuntimeEnvBuilder::new().build().unwrap(), - &codec, - )?; - - let partial_schema = partial_aggregation_exec_plan.schema(); - let final_input_table_provider = FsMemExec::new("partial".into(), partial_schema.clone()); - - final_aggregate_proto.input = Some(Box::new(PhysicalPlanNode::try_from_physical_plan( - Arc::new(final_input_table_provider), - &codec, - )?)); - - let finish_plan = PhysicalPlanNode { - physical_plan_type: Some(PhysicalPlanType::Aggregate(final_aggregate_proto)), - }; - - let (partial_schema, timestamp_index) = if add_timestamp_field { - ( - add_timestamp_field_arrow((*partial_schema).clone()), - partial_schema.fields().len(), - ) - } else { - (partial_schema.clone(), partial_schema.fields().len() - 1) - }; - - let partial_schema = FsSchema::new_keyed(partial_schema, timestamp_index, key_indices); - - Ok(SplitPlanOutput { - partial_aggregation_plan, - partial_schema, - finish_plan, - }) - } - - pub fn binning_function_proto( - &self, - width: Duration, - input_schema: DFSchemaRef, - ) -> Result { - let date_bin = date_bin().call(vec![ - Expr::Literal( - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - 0, - 0, - width.as_nanos() as i64, - ))), - None, - ), - Expr::Column(datafusion::common::Column { - relation: None, - name: "_timestamp".into(), - spans: Spans::new(), - }), - ]); - - let binning_function = self.create_physical_expr(&date_bin, &input_schema)?; - serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {}) - } -} - -struct FsExtensionPlanner {} - -#[async_trait] -impl ExtensionPlanner for FsExtensionPlanner { - async fn plan_extension( - &self, - _planner: &dyn PhysicalPlanner, - node: &dyn UserDefinedLogicalNode, - _logical_inputs: &[&LogicalPlan], - physical_inputs: &[Arc], - _session_state: &SessionState, - ) -> Result>> { - let schema = node.schema().as_ref().into(); - if let Ok::<&dyn StreamExtension, _>(stream_extension) = node.try_into() { - if stream_extension.transparent() { - match node.name() { - DEBEZIUM_UNROLLING_EXTENSION_NAME => { - let node = node - .as_any() - .downcast_ref::() - .unwrap(); - let input = physical_inputs[0].clone(); - return Ok(Some(Arc::new(DebeziumUnrollingExec::try_new( - input, - node.primary_keys.clone(), - )?))); - } - TO_DEBEZIUM_EXTENSION_NAME => { - let input = physical_inputs[0].clone(); - return Ok(Some(Arc::new(ToDebeziumExec::try_new(input)?))); - } - _ => return Ok(None), - } - } - }; - let name = - if let Some(key_extension) = node.as_any().downcast_ref::() { - key_extension.name.clone() - } else { - None - }; - Ok(Some(Arc::new(FsMemExec::new( - name.unwrap_or("memory".to_string()), - Arc::new(schema), - )))) - } -} - -impl PlanToGraphVisitor<'_> { - fn add_index_to_traversal(&mut self, index: NodeIndex) { - if let Some(last) = self.traversal.last_mut() { - last.push(index); - } - } - - pub(crate) fn add_plan(&mut self, plan: LogicalPlan) -> Result<()> { - self.traversal.clear(); - plan.visit(self)?; - Ok(()) - } - - pub fn into_graph(self) -> LogicalGraph { - self.graph - } - - pub fn build_extension( - &mut self, - input_nodes: Vec, - extension: &dyn StreamExtension, - ) -> Result<()> { - if let Some(node_name) = extension.node_name() { - if self.named_nodes.contains_key(&node_name) { - return plan_err!( - "extension {:?} has already been planned, shouldn't try again.", - node_name - ); - } - } - - let input_schemas = input_nodes - .iter() - .map(|index| { - Ok(self - .output_schemas - .get(index) - .ok_or_else(|| DataFusionError::Plan("missing input node".to_string()))? - .clone()) - }) - .collect::>>()?; - - let NodeWithIncomingEdges { node, edges } = extension - .plan_node(&self.planner, self.graph.node_count(), input_schemas) - .map_err(|e| e.context(format!("planning operator {extension:?}")))?; - - let node_index = self.graph.add_node(node); - self.add_index_to_traversal(node_index); - - for (source, edge) in input_nodes.into_iter().zip(edges.into_iter()) { - self.graph.add_edge(source, node_index, edge); - } - - self.output_schemas - .insert(node_index, extension.output_schema().into()); - - if let Some(node_name) = extension.node_name() { - self.named_nodes.insert(node_name, node_index); - } - Ok(()) - } -} - -impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> { - type Node = LogicalPlan; - - fn f_down(&mut self, node: &Self::Node) -> Result { - let LogicalPlan::Extension(Extension { node }) = node else { - return Ok(TreeNodeRecursion::Continue); - }; - - let stream_extension: &dyn StreamExtension = node - .try_into() - .map_err(|e: DataFusionError| e.context("converting extension"))?; - if stream_extension.transparent() { - return Ok(TreeNodeRecursion::Continue); - } - - if let Some(name) = stream_extension.node_name() { - if let Some(node_index) = self.named_nodes.get(&name) { - self.add_index_to_traversal(*node_index); - return Ok(TreeNodeRecursion::Jump); - } - } - - if !node.inputs().is_empty() { - self.traversal.push(vec![]); - } - - Ok(TreeNodeRecursion::Continue) - } - - fn f_up(&mut self, node: &Self::Node) -> Result { - let LogicalPlan::Extension(Extension { node }) = node else { - return Ok(TreeNodeRecursion::Continue); - }; - - let stream_extension: &dyn StreamExtension = node - .try_into() - .map_err(|e: DataFusionError| e.context("planning extension"))?; - - if stream_extension.transparent() { - return Ok(TreeNodeRecursion::Continue); - } - - if let Some(name) = stream_extension.node_name() { - if self.named_nodes.contains_key(&name) { - return Ok(TreeNodeRecursion::Continue); - } - } - - let input_nodes = if !node.inputs().is_empty() { - self.traversal.pop().unwrap_or_default() - } else { - vec![] - }; - let stream_extension: &dyn StreamExtension = node - .try_into() - .map_err(|e: DataFusionError| e.context("converting extension"))?; - self.build_extension(input_nodes, stream_extension)?; - - Ok(TreeNodeRecursion::Continue) - } -} - -pub(crate) struct SplitPlanOutput { - pub(crate) partial_aggregation_plan: PhysicalPlanNode, - pub(crate) partial_schema: FsSchema, - pub(crate) finish_plan: PhysicalPlanNode, -} diff --git a/src/sql/physical/readers.rs b/src/sql/physical/readers.rs new file mode 100644 index 00000000..67a250fd --- /dev/null +++ b/src/sql/physical/readers.rs @@ -0,0 +1,372 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 无界/锁控 `RecordBatch` 数据源与规划期占位 `FsMemExec`。 + +use std::any::Any; +use std::mem; +use std::sync::Arc; + +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::catalog::memory::MemorySourceConfig; +use datafusion::common::{DataFusionError, Result, Statistics, not_impl_err, plan_err}; +use datafusion::datasource::memory::DataSourceExec; +use datafusion::execution::{SendableRecordBatchStream, TaskContext}; +use datafusion::physical_expr::EquivalenceProperties; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion::physical_plan::memory::MemoryStream; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{ + DisplayAs, ExecutionPlan, Partitioning, PlanProperties, +}; +use futures::StreamExt; +use tokio::sync::mpsc::UnboundedReceiver; +use tokio_stream::wrappers::UnboundedReceiverStream; + +use crate::sql::common::constants::physical_plan_node_name; + +pub(crate) fn make_stream_properties(schema: SchemaRef) -> PlanProperties { + PlanProperties::new( + EquivalenceProperties::new(schema), + Partitioning::UnknownPartitioning(1), + EmissionType::Incremental, + Boundedness::Unbounded { + requires_infinite_memory: false, + }, + ) +} + +#[derive(Debug)] +pub(crate) struct RwLockRecordBatchReader { + schema: SchemaRef, + locked_batch: Arc>>, + properties: PlanProperties, +} + +impl RwLockRecordBatchReader { + pub(crate) fn new( + schema: SchemaRef, + locked_batch: Arc>>, + ) -> Self { + Self { + schema: schema.clone(), + locked_batch, + properties: make_stream_properties(schema), + } + } +} + +impl DisplayAs for RwLockRecordBatchReader { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "RW Lock RecordBatchReader") + } +} + +impl ExecutionPlan for RwLockRecordBatchReader { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Err(DataFusionError::Internal("not supported".into())) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + let result = self + .locked_batch + .write() + .unwrap() + .take() + .expect("should have set a record batch before calling execute()"); + Ok(Box::pin(MemoryStream::try_new( + vec![result], + self.schema.clone(), + None, + )?)) + } + + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema)) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn name(&self) -> &str { + physical_plan_node_name::RW_LOCK_READER + } +} + +#[derive(Debug)] +pub(crate) struct UnboundedRecordBatchReader { + schema: SchemaRef, + receiver: Arc>>>, + properties: PlanProperties, +} + +impl UnboundedRecordBatchReader { + pub(crate) fn new( + schema: SchemaRef, + receiver: Arc>>>, + ) -> Self { + Self { + schema: schema.clone(), + receiver, + properties: make_stream_properties(schema), + } + } +} + +impl DisplayAs for UnboundedRecordBatchReader { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "unbounded record batch reader") + } +} + +impl ExecutionPlan for UnboundedRecordBatchReader { + fn name(&self) -> &str { + physical_plan_node_name::UNBOUNDED_READER + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Err(DataFusionError::Internal("not supported".into())) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + Ok(Box::pin(RecordBatchStreamAdapter::new( + self.schema.clone(), + UnboundedReceiverStream::new( + self.receiver + .write() + .unwrap() + .take() + .expect("unbounded receiver should be present before calling exec"), + ) + .map(Ok), + ))) + } + + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema)) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } +} + +#[derive(Debug)] +pub(crate) struct RecordBatchVecReader { + schema: SchemaRef, + receiver: Arc>>, + properties: PlanProperties, +} + +impl RecordBatchVecReader { + pub(crate) fn new( + schema: SchemaRef, + receiver: Arc>>, + ) -> Self { + Self { + schema: schema.clone(), + receiver, + properties: make_stream_properties(schema), + } + } +} + +impl DisplayAs for RecordBatchVecReader { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "record batch vec reader") + } +} + +impl ExecutionPlan for RecordBatchVecReader { + fn name(&self) -> &str { + physical_plan_node_name::VEC_READER + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Err(DataFusionError::Internal("not supported".into())) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> Result { + let memory = MemorySourceConfig::try_new( + &[mem::take(self.receiver.write().unwrap().as_mut())], + self.schema.clone(), + None, + )?; + + DataSourceExec::new(Arc::new(memory)).execute(partition, context) + } + + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema)) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct FsMemExec { + pub table_name: String, + pub schema: SchemaRef, + properties: PlanProperties, +} + +impl DisplayAs for FsMemExec { + fn fmt_as( + &self, + _t: datafusion::physical_plan::DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + write!(f, "EmptyPartitionStream: schema={}", self.schema) + } +} + +impl FsMemExec { + pub fn new(table_name: String, schema: SchemaRef) -> Self { + Self { + schema: schema.clone(), + table_name, + properties: make_stream_properties(schema), + } + } +} + +impl ExecutionPlan for FsMemExec { + fn name(&self) -> &str { + physical_plan_node_name::MEM_EXEC + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + not_impl_err!("with_new_children is not implemented for mem_exec; should not be called") + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + plan_err!( + "EmptyPartitionStream cannot be executed, this is only used for physical planning before serialization" + ) + } + + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema)) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } +} diff --git a/src/sql/physical/udfs.rs b/src/sql/physical/udfs.rs new file mode 100644 index 00000000..267cb6e3 --- /dev/null +++ b/src/sql/physical/udfs.rs @@ -0,0 +1,132 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 流式 `window(start, end)` 标量 UDF。 + +use std::any::Any; +use std::sync::Arc; + +use datafusion::arrow::array::StructArray; +use datafusion::arrow::datatypes::{DataType, Field, TimeUnit}; +use datafusion::common::{Result, ScalarValue, plan_err}; +use datafusion::logical_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, +}; + +use crate::make_udf_function; +use crate::sql::common::constants::{window_function_udf, window_interval_field}; +use crate::sql::schema::utils::window_arrow_struct; + +#[derive(Debug)] +pub struct WindowFunctionUdf { + signature: Signature, +} + +impl Default for WindowFunctionUdf { + fn default() -> Self { + Self { + signature: Signature::new( + TypeSignature::Exact(vec![ + DataType::Timestamp(TimeUnit::Nanosecond, None), + DataType::Timestamp(TimeUnit::Nanosecond, None), + ]), + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for WindowFunctionUdf { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + window_function_udf::NAME + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _: &[DataType]) -> Result { + Ok(window_arrow_struct()) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let columns = args.args; + if columns.len() != 2 { + return plan_err!( + "window function expected 2 arguments, got {}", + columns.len() + ); + } + if columns[0].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) { + return plan_err!( + "window function expected first argument to be a timestamp, got {:?}", + columns[0].data_type() + ); + } + if columns[1].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) { + return plan_err!( + "window function expected second argument to be a timestamp, got {:?}", + columns[1].data_type() + ); + } + let fields = vec![ + Arc::new(Field::new( + window_interval_field::START, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )), + Arc::new(Field::new( + window_interval_field::END, + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )), + ] + .into(); + + match (&columns[0], &columns[1]) { + (ColumnarValue::Array(start), ColumnarValue::Array(end)) => { + Ok(ColumnarValue::Array(Arc::new(StructArray::new( + fields, + vec![start.clone(), end.clone()], + None, + )))) + } + (ColumnarValue::Array(start), ColumnarValue::Scalar(end)) => { + let end = end.to_array_of_size(start.len())?; + Ok(ColumnarValue::Array(Arc::new(StructArray::new( + fields, + vec![start.clone(), end], + None, + )))) + } + (ColumnarValue::Scalar(start), ColumnarValue::Array(end)) => { + let start = start.to_array_of_size(end.len())?; + Ok(ColumnarValue::Array(Arc::new(StructArray::new( + fields, + vec![start, end.clone()], + None, + )))) + } + (ColumnarValue::Scalar(start), ColumnarValue::Scalar(end)) => { + Ok(ColumnarValue::Scalar(ScalarValue::Struct( + StructArray::new(fields, vec![start.to_array()?, end.to_array()?], None).into(), + ))) + } + } + } +} + +make_udf_function!(WindowFunctionUdf, WINDOW_FUNCTION, window); diff --git a/src/sql/schema/data_encoding_format.rs b/src/sql/schema/data_encoding_format.rs index 67e6d7e3..29828c86 100644 --- a/src/sql/schema/data_encoding_format.rs +++ b/src/sql/schema/data_encoding_format.rs @@ -16,6 +16,7 @@ use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::common::{Result, plan_err}; use super::column_descriptor::ColumnDescriptor; +use crate::sql::common::constants::{cdc, connection_format_value, with_opt_bool_str}; use crate::sql::common::with_option_keys as opt; use crate::sql::common::Format; @@ -38,14 +39,15 @@ impl DataEncodingFormat { let is_debezium = opts .get(opt::FORMAT_DEBEZIUM_FLAG) .or_else(|| opts.get(opt::JSON_DEBEZIUM)) - .map(|s| s == "true") + .map(|s| s == with_opt_bool_str::TRUE) .unwrap_or(false); match (format_str, is_debezium) { - ("json", true) | ("debezium_json", _) => Ok(Self::DebeziumJson), - ("json", false) => Ok(Self::StandardJson), - ("avro", _) => Ok(Self::Avro), - ("parquet", _) => Ok(Self::Parquet), + (f, true) if f == connection_format_value::JSON => Ok(Self::DebeziumJson), + (f, _) if f == connection_format_value::DEBEZIUM_JSON => Ok(Self::DebeziumJson), + (f, false) if f == connection_format_value::JSON => Ok(Self::StandardJson), + (f, _) if f == connection_format_value::AVRO => Ok(Self::Avro), + (f, _) if f == connection_format_value::PARQUET => Ok(Self::Parquet), _ => Ok(Self::Raw), } } @@ -78,9 +80,9 @@ impl DataEncodingFormat { let struct_type = DataType::Struct(fields.into()); Ok(vec![ - ColumnDescriptor::new_physical(Field::new("before", struct_type.clone(), true)), - ColumnDescriptor::new_physical(Field::new("after", struct_type.clone(), true)), - ColumnDescriptor::new_physical(Field::new("op", DataType::Utf8, true)), + ColumnDescriptor::new_physical(Field::new(cdc::BEFORE, struct_type.clone(), true)), + ColumnDescriptor::new_physical(Field::new(cdc::AFTER, struct_type.clone(), true)), + ColumnDescriptor::new_physical(Field::new(cdc::OP, DataType::Utf8, true)), ]) } } diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs index 2bb5ef87..5ecde68e 100644 --- a/src/sql/schema/schema_provider.rs +++ b/src/sql/schema/schema_provider.rs @@ -26,6 +26,7 @@ use datafusion::sql::TableReference; use unicase::UniCase; use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalProgram}; +use crate::sql::common::constants::{planning_placeholder_udf, window_fn}; use crate::sql::schema::table::Table as CatalogTable; use crate::sql::schema::utils::window_arrow_struct; use crate::sql::types::{PlaceholderUdf, PlanningOptions}; @@ -95,7 +96,7 @@ impl TableProvider for LogicalBatchInput { _filters: &[Expr], _limit: Option, ) -> Result> { - Ok(Arc::new(crate::sql::logical_planner::FsMemExec::new( + Ok(Arc::new(crate::sql::physical::FsMemExec::new( self.table_name.clone(), Arc::clone(&self.schema), ))) @@ -378,7 +379,7 @@ impl StreamPlanningContextBuilder { pub fn with_streaming_extensions(mut self) -> Result { let extensions = vec![ PlaceholderUdf::with_return( - "hop", + window_fn::HOP, vec![ DataType::Interval(datatypes::IntervalUnit::MonthDayNano), DataType::Interval(datatypes::IntervalUnit::MonthDayNano), @@ -386,22 +387,26 @@ impl StreamPlanningContextBuilder { window_arrow_struct(), ), PlaceholderUdf::with_return( - "tumble", + window_fn::TUMBLE, vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], window_arrow_struct(), ), PlaceholderUdf::with_return( - "session", + window_fn::SESSION, vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)], window_arrow_struct(), ), PlaceholderUdf::with_return( - "unnest", - vec![DataType::List(Arc::new(Field::new("field", DataType::Utf8, true)))], + planning_placeholder_udf::UNNEST, + vec![DataType::List(Arc::new(Field::new( + planning_placeholder_udf::LIST_ELEMENT_FIELD, + DataType::Utf8, + true, + )))], DataType::Utf8, ), PlaceholderUdf::with_return( - "row_time", + planning_placeholder_udf::ROW_TIME, vec![], DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None), ), diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs index 9e0caddf..63baca2a 100644 --- a/src/sql/schema/source_table.rs +++ b/src/sql/schema/source_table.rs @@ -37,6 +37,9 @@ use super::temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineCo use super::StreamSchemaProvider; use crate::multifield_partial_ord; use crate::sql::api::{ConnectionProfile, ConnectionSchema, SourceField}; +use crate::sql::common::constants::{ + connection_table_role, connector_type, kafka_with_value, sql_field, +}; use crate::sql::common::connector_options::ConnectorOptions; use crate::sql::common::kafka_catalog::{ KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode, @@ -266,7 +269,7 @@ impl SourceTable { if let Some(Format::Json(JsonFormat { compression, .. })) = &format && !matches!(compression, JsonCompression::Uncompressed) - && connector_name != "filesystem" + && connector_name != connector_type::FILESYSTEM { return plan_err!("'json.compression' is only supported for the filesystem connector"); } @@ -275,8 +278,8 @@ impl SourceTable { .map_err(|e| DataFusionError::Plan(format!("invalid framing: '{e}'")))?; if temporary - && let Some(t) = options.insert_str(opt::TYPE, "lookup")? - && t != "lookup" + && let Some(t) = options.insert_str(opt::TYPE, connection_table_role::LOOKUP)? + && t != connection_table_role::LOOKUP { return plan_err!( "Cannot have a temporary table with type '{t}'; temporary tables must be type 'lookup'" @@ -323,9 +326,9 @@ impl SourceTable { t.into() } else { match options.pull_opt_str(opt::TYPE)?.as_deref() { - None | Some("source") => TableRole::Ingestion, - Some("sink") => TableRole::Egress, - Some("lookup") => TableRole::Reference, + None | Some(connection_table_role::SOURCE) => TableRole::Ingestion, + Some(connection_table_role::SINK) => TableRole::Egress, + Some(connection_table_role::LOOKUP) => TableRole::Reference, Some(other) => { return plan_err!("invalid connection type '{other}' in WITH options"); } @@ -405,13 +408,14 @@ impl SourceTable { table.schema_specs.push(ColumnDescriptor::new_computed( Field::new( - "__watermark", + sql_field::COMPUTED_WATERMARK, logical_expr.get_type(&df_schema)?, false, ), logical_expr, )); - table.temporal_config.watermark_strategy_column = Some("__watermark".to_string()); + table.temporal_config.watermark_strategy_column = + Some(sql_field::COMPUTED_WATERMARK.to_string()); } else { table.temporal_config.watermark_strategy_column = Some(time_field); } @@ -428,7 +432,7 @@ impl SourceTable { table.lookup_cache_ttl = options.pull_opt_duration(opt::LOOKUP_CACHE_TTL)?; - if connector_name.eq_ignore_ascii_case("kafka") { + if connector_name.eq_ignore_ascii_case(connector_type::KAFKA) { let physical = table.produce_physical_schema(); let op_cfg = wire_kafka_operator_config( options, @@ -612,9 +616,15 @@ fn wire_kafka_operator_config( let kind = match role { TableRole::Ingestion => { let offset = match options.pull_opt_str(opt::KAFKA_SCAN_STARTUP_MODE)?.as_deref() { - Some("latest") => KafkaTableSourceOffset::Latest, - Some("earliest") => KafkaTableSourceOffset::Earliest, - None | Some("group-offsets") | Some("group") => KafkaTableSourceOffset::Group, + Some(s) if s == kafka_with_value::SCAN_LATEST => KafkaTableSourceOffset::Latest, + Some(s) if s == kafka_with_value::SCAN_EARLIEST => KafkaTableSourceOffset::Earliest, + Some(s) + if s == kafka_with_value::SCAN_GROUP_OFFSETS + || s == kafka_with_value::SCAN_GROUP => + { + KafkaTableSourceOffset::Group + } + None => KafkaTableSourceOffset::Group, Some(other) => { return plan_err!( "invalid scan.startup.mode '{other}'; expected latest, earliest, or group-offsets" @@ -622,8 +632,12 @@ fn wire_kafka_operator_config( } }; let read_mode = match options.pull_opt_str(opt::KAFKA_ISOLATION_LEVEL)?.as_deref() { - Some("read_committed") => Some(ReadMode::ReadCommitted), - Some("read_uncommitted") => Some(ReadMode::ReadUncommitted), + Some(s) if s == kafka_with_value::ISOLATION_READ_COMMITTED => { + Some(ReadMode::ReadCommitted) + } + Some(s) if s == kafka_with_value::ISOLATION_READ_UNCOMMITTED => { + Some(ReadMode::ReadUncommitted) + } None => None, Some(other) => { return plan_err!("invalid isolation.level '{other}'"); @@ -643,8 +657,19 @@ fn wire_kafka_operator_config( } TableRole::Egress => { let commit_mode = match options.pull_opt_str(opt::KAFKA_SINK_COMMIT_MODE)?.as_deref() { - Some("exactly-once") | Some("exactly_once") => SinkCommitMode::ExactlyOnce, - None | Some("at-least-once") | Some("at_least_once") => SinkCommitMode::AtLeastOnce, + Some(s) + if s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_HYPHEN + || s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_UNDERSCORE => + { + SinkCommitMode::ExactlyOnce + } + None => SinkCommitMode::AtLeastOnce, + Some(s) + if s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_HYPHEN + || s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_UNDERSCORE => + { + SinkCommitMode::AtLeastOnce + } Some(other) => { return plan_err!("invalid sink.commit.mode '{other}'"); } diff --git a/src/sql/schema/table_role.rs b/src/sql/schema/table_role.rs index 12bd8068..bf3fed74 100644 --- a/src/sql/schema/table_role.rs +++ b/src/sql/schema/table_role.rs @@ -18,6 +18,9 @@ use datafusion::error::DataFusionError; use super::column_descriptor::ColumnDescriptor; use super::connection_type::ConnectionType; +use crate::sql::common::constants::{ + connection_table_role, connector_type, SUPPORTED_CONNECTOR_ADAPTERS, +}; use crate::sql::common::with_option_keys as opt; /// Role of a connector-backed table in the pipeline (ingest / egress / lookup). @@ -49,25 +52,7 @@ impl From for TableRole { } pub fn validate_adapter_availability(adapter: &str) -> Result<()> { - let supported = [ - "kafka", - "kinesis", - "filesystem", - "delta", - "iceberg", - "pulsar", - "nats", - "redis", - "mqtt", - "websocket", - "sse", - "nexmark", - "blackhole", - "lookup", - "memory", - "postgres", - ]; - if !supported.contains(&adapter) { + if !SUPPORTED_CONNECTOR_ADAPTERS.contains(&adapter) { return Err(DataFusionError::Plan(format!("Unknown adapter '{adapter}'"))); } Ok(()) @@ -75,7 +60,7 @@ pub fn validate_adapter_availability(adapter: &str) -> Result<()> { pub fn apply_adapter_specific_rules(adapter: &str, mut cols: Vec) -> Vec { match adapter { - "delta" | "iceberg" => { + a if a == connector_type::DELTA || a == connector_type::ICEBERG => { for c in &mut cols { if matches!(c.data_type(), DataType::Timestamp(_, _)) { c.force_precision(TimeUnit::Microsecond); @@ -89,9 +74,9 @@ pub fn apply_adapter_specific_rules(adapter: &str, mut cols: Vec) -> Result { match options.get(opt::TYPE).map(|s| s.as_str()) { - None | Some("source") => Ok(TableRole::Ingestion), - Some("sink") => Ok(TableRole::Egress), - Some("lookup") => Ok(TableRole::Reference), + None | Some(connection_table_role::SOURCE) => Ok(TableRole::Ingestion), + Some(connection_table_role::SINK) => Ok(TableRole::Egress), + Some(connection_table_role::LOOKUP) => Ok(TableRole::Reference), Some(other) => plan_err!("Invalid role '{other}'"), } } diff --git a/src/sql/schema/temporal_pipeline_config.rs b/src/sql/schema/temporal_pipeline_config.rs index eb29f71c..f672e552 100644 --- a/src/sql/schema/temporal_pipeline_config.rs +++ b/src/sql/schema/temporal_pipeline_config.rs @@ -16,6 +16,7 @@ use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::Expr; use super::column_descriptor::ColumnDescriptor; +use crate::sql::common::constants::sql_field; /// Event-time and watermark configuration for streaming tables. #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] @@ -47,7 +48,7 @@ pub fn resolve_temporal_logic( config.event_column = Some(meta.time_field.clone()); if meta.watermark_expr.is_some() { - config.watermark_strategy_column = Some("__watermark".to_string()); + config.watermark_strategy_column = Some(sql_field::COMPUTED_WATERMARK.to_string()); } else { config.watermark_strategy_column = Some(meta.time_field); } diff --git a/src/sql/schema/utils.rs b/src/sql/schema/utils.rs index c0b8a7d0..ba408f22 100644 --- a/src/sql/schema/utils.rs +++ b/src/sql/schema/utils.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference}; +use crate::sql::common::constants::window_interval_field; use crate::sql::types::{DFField, TIMESTAMP_FIELD}; /// Returns the Arrow struct type for a window (start, end) pair. @@ -23,12 +24,12 @@ pub fn window_arrow_struct() -> DataType { DataType::Struct( vec![ Arc::new(Field::new( - "start", + window_interval_field::START, DataType::Timestamp(TimeUnit::Nanosecond, None), false, )), Arc::new(Field::new( - "end", + window_interval_field::END, DataType::Timestamp(TimeUnit::Nanosecond, None), false, )), diff --git a/src/sql/types/data_type.rs b/src/sql/types/data_type.rs index 1fc55ecc..4736f812 100644 --- a/src/sql/types/data_type.rs +++ b/src/sql/types/data_type.rs @@ -17,6 +17,7 @@ use datafusion::arrow::datatypes::{ }; use datafusion::common::{Result, plan_datafusion_err, plan_err}; +use crate::sql::common::constants::planning_placeholder_udf; use crate::sql::common::FsExtensionType; pub fn convert_data_type( @@ -33,7 +34,7 @@ pub fn convert_data_type( Ok(( DataType::List(Arc::new(FsExtensionType::add_metadata( extension, - Field::new("field", data_type, true), + Field::new(planning_placeholder_udf::LIST_ELEMENT_FIELD, data_type, true), ))), None, )) From 9b41175685bae4f26ba4b9d66b1f58bba871d7b8 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 15:56:22 +0800 Subject: [PATCH 26/44] update --- src/sql/common/constants.rs | 12 ++++++++++++ src/sql/common/mod.rs | 3 +-- src/sql/types/mod.rs | 8 +++++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs index f5dd56ef..5f249227 100644 --- a/src/sql/common/constants.rs +++ b/src/sql/common/constants.rs @@ -122,6 +122,18 @@ pub mod sql_field { pub const DEFAULT_PROJECTION_LABEL: &str = "projection"; /// `WATERMARK FOR … AS expr` 生成的计算列名(与 `TemporalPipelineConfig` 一致)。 pub const COMPUTED_WATERMARK: &str = "__watermark"; + /// 流表事件时间物理列名(与 DataFusion 计划注入列一致)。 + pub const TIMESTAMP_FIELD: &str = "_timestamp"; + /// Changelog / updating 模式下的元数据列名。 + pub const UPDATING_META_FIELD: &str = "_updating_meta"; +} + +// ── `SqlConfig` / `PlanningOptions` 默认值 ──────────────────────────────────── + +pub mod sql_planning_default { + pub const DEFAULT_PARALLELISM: usize = 4; + /// [`PlanningOptions::default`] 的 TTL(秒):24h。 + pub const PLANNING_TTL_SECS: u64 = 24 * 60 * 60; } // ── `ConnectorOptions` / WITH 解析用到的字面量 ──────────────────────────────── diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index 7a4b4ee4..3302eb10 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -61,8 +61,7 @@ pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat}; pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; // ── Well-known column names ── -pub const TIMESTAMP_FIELD: &str = "_timestamp"; -pub const UPDATING_META_FIELD: &str = "_updating_meta"; +pub use constants::sql_field::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; // ── Environment variables ── pub const JOB_ID_ENV: &str = "JOB_ID"; diff --git a/src/sql/types/mod.rs b/src/sql/types/mod.rs index 16d7033b..41753e38 100644 --- a/src/sql/types/mod.rs +++ b/src/sql/types/mod.rs @@ -18,6 +18,8 @@ mod window; use std::time::Duration; +use crate::sql::common::constants::sql_planning_default; + pub use data_type::convert_data_type; pub use df_field::{ DFField, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata, @@ -27,7 +29,7 @@ pub use stream_schema::StreamSchema; pub(crate) use window::WindowBehavior; pub use window::{WindowType, find_window, get_duration}; -pub const TIMESTAMP_FIELD: &str = "_timestamp"; +pub use crate::sql::common::constants::sql_field::TIMESTAMP_FIELD; #[derive(Clone, Debug, Eq, PartialEq)] pub enum ProcessingMode { @@ -43,7 +45,7 @@ pub struct SqlConfig { impl Default for SqlConfig { fn default() -> Self { Self { - default_parallelism: 4, + default_parallelism: sql_planning_default::DEFAULT_PARALLELISM, } } } @@ -56,7 +58,7 @@ pub struct PlanningOptions { impl Default for PlanningOptions { fn default() -> Self { Self { - ttl: Duration::from_secs(24 * 60 * 60), + ttl: Duration::from_secs(sql_planning_default::PLANNING_TTL_SECS), } } } From 5b596f2c59432652d6e533b95466df6d4eab2eb2 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 17:15:58 +0800 Subject: [PATCH 27/44] update --- Cargo.lock | 53 ------------ Cargo.toml | 4 - src/coordinator/execution/executor.rs | 3 +- src/coordinator/plan/logical_plan_visitor.rs | 1 - src/coordinator/plan/streaming_table_plan.rs | 2 - src/runtime/mod.rs | 1 + src/runtime/streaming/api/mod.rs | 2 +- src/runtime/streaming/api/operator.rs | 59 ------------- .../factory/connector/dispatchers.rs | 58 +++++++++++++ .../kafka_factory.rs => connector/kafka.rs} | 12 +-- .../streaming/factory/connector/mod.rs | 19 +++++ src/runtime/streaming/factory/global/mod.rs | 17 ++++ .../factory/global/session_registry.rs | 64 ++++++++++++++ src/runtime/streaming/factory/mod.rs | 53 +++++++++++- .../streaming/factory/operator_constructor.rs | 26 ++++++ .../{registry/mod.rs => operator_factory.rs} | 83 ++----------------- src/runtime/streaming/job/job_manager.rs | 7 +- src/runtime/streaming/lib.rs | 1 - src/runtime/streaming/mod.rs | 1 - .../grouping/incremental_aggregate.rs | 5 +- .../operators/joins/join_instance.rs | 3 +- .../operators/joins/join_with_expiration.rs | 3 +- .../streaming/operators/sink/kafka/mod.rs | 3 +- .../operators/stateless_physical_executor.rs | 2 +- .../watermark/watermark_generator.rs | 3 +- .../windows/session_aggregating_window.rs | 2 +- .../windows/sliding_aggregating_window.rs | 2 +- .../windows/tumbling_aggregating_window.rs | 2 +- .../operators/windows/window_function.rs | 3 +- src/runtime/util/mod.rs | 17 ++++ .../mod.rs => util/physical_aggregate.rs} | 9 +- src/server/initializer.rs | 2 +- src/sql/common/constants.rs | 9 ++ 33 files changed, 300 insertions(+), 231 deletions(-) create mode 100644 src/runtime/streaming/factory/connector/dispatchers.rs rename src/runtime/streaming/factory/{registry/kafka_factory.rs => connector/kafka.rs} (95%) create mode 100644 src/runtime/streaming/factory/connector/mod.rs create mode 100644 src/runtime/streaming/factory/global/mod.rs create mode 100644 src/runtime/streaming/factory/global/session_registry.rs create mode 100644 src/runtime/streaming/factory/operator_constructor.rs rename src/runtime/streaming/factory/{registry/mod.rs => operator_factory.rs} (72%) create mode 100644 src/runtime/util/mod.rs rename src/runtime/{streaming/arrow/mod.rs => util/physical_aggregate.rs} (95%) diff --git a/Cargo.lock b/Cargo.lock index e9ce4109..c6994ec0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2264,14 +2264,12 @@ dependencies = [ "base64", "bincode", "chrono", - "clap", "crossbeam-channel", "datafusion", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-functions", - "datafusion-functions-aggregate", "datafusion-functions-window", "datafusion-physical-expr", "datafusion-physical-plan", @@ -2286,8 +2284,6 @@ dependencies = [ "num_cpus", "parking_lot", "parquet 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fparquet)", - "pest", - "pest_derive", "petgraph 0.7.1", "proctitle", "prost", @@ -3762,49 +3758,6 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" -[[package]] -name = "pest" -version = "2.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7" -dependencies = [ - "memchr", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pest_meta" -version = "2.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365" -dependencies = [ - "pest", - "sha2", -] - [[package]] name = "petgraph" version = "0.6.5" @@ -5431,12 +5384,6 @@ dependencies = [ "typify-impl", ] -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - [[package]] name = "unicase" version = "2.9.0" diff --git a/Cargo.toml b/Cargo.toml index 8e343baa..7c49d04c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,9 +38,6 @@ protocol = { path = "./protocol" } prost = "0.13" rdkafka = { version = "0.38", features = ["cmake-build", "ssl", "gssapi"] } crossbeam-channel = "0.5" -pest = "2.7" -pest_derive = "2.7" -clap = { version = "4.5", features = ["derive"] } wasmtime = { version = "41.0.3", features = ["component-model", "async"] } base64 = "0.22" wasmtime-wasi = "41.0.3" @@ -63,7 +60,6 @@ petgraph = "0.7" rand = { version = "0.8", features = ["small_rng"] } itertools = "0.14" strum = { version = "0.26", features = ["derive"] } -datafusion-functions-aggregate = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'} typify = { git = 'https://github.com/ArroyoSystems/typify.git', branch = 'arroyo' } parquet = {git = 'https://github.com/ArroyoSystems/arrow-rs', branch = '55.2.0/parquet'} diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 28082abe..f9f26cd0 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -284,8 +284,9 @@ impl PlanVisitor for Executor { let fs_program: FsProgram = plan.program.clone().into(); let job_manager: Arc = Arc::clone(&self.job_manager); + let job_id = plan.name.clone(); let job_id = tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(job_manager.submit_job(fs_program)) + tokio::runtime::Handle::current().block_on(job_manager.submit_job(job_id, fs_program)) }) .map_err(|e| ExecuteError::Internal(format!("Failed to submit streaming job: {e}")))?; diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 9e95c5bd..e2e457eb 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -155,7 +155,6 @@ impl LogicalPlanVisitor { Ok(StreamingTable { name: sink_table_name, comment: comment.clone(), - source_table: sink_definition, program: validated_program, }) } diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs index c7b09c26..512ec266 100644 --- a/src/coordinator/plan/streaming_table_plan.rs +++ b/src/coordinator/plan/streaming_table_plan.rs @@ -12,14 +12,12 @@ use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; use crate::sql::logical_node::logical::LogicalProgram; -use crate::sql::schema::source_table::SourceTable; /// Plan node representing a fully resolved streaming table (DDL). #[derive(Debug)] pub struct StreamingTable { pub name: String, pub comment: Option, - pub source_table: SourceTable, pub program: LogicalProgram, } diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index 0dce921e..61b67e1f 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -15,6 +15,7 @@ pub mod buffer_and_event; pub mod common; pub mod streaming; +pub mod util; pub mod task; pub mod taskexecutor; pub mod wasm; diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs index 49e45328..aec4b8fb 100644 --- a/src/runtime/streaming/api/mod.rs +++ b/src/runtime/streaming/api/mod.rs @@ -17,5 +17,5 @@ pub mod operator; pub mod source; pub use context::TaskContext; -pub use operator::{ConstructedOperator, MessageOperator, Registry}; +pub use operator::{ConstructedOperator, MessageOperator}; pub use source::{SourceEvent, SourceOffset, SourceOperator}; diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs index eabeff85..29d24f82 100644 --- a/src/runtime/streaming/api/operator.rs +++ b/src/runtime/streaming/api/operator.rs @@ -15,68 +15,9 @@ use crate::runtime::streaming::api::source::SourceOperator; use crate::runtime::streaming::protocol::stream_out::StreamOutput; use arrow_array::RecordBatch; use async_trait::async_trait; -use datafusion::common::Result as DfResult; -use datafusion::execution::context::SessionContext; -use datafusion::execution::FunctionRegistry; -use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; -use datafusion::logical_expr::planner::ExprPlanner; -use std::collections::HashSet; -use std::sync::Arc; use std::time::Duration; use crate::sql::common::{CheckpointBarrier, Watermark}; -// --------------------------------------------------------------------------- -// Registry — 算子 / UDF 注册表(取代 tracing_subscriber::Registry) -// --------------------------------------------------------------------------- - -/// 运行时函数与状态注册表。 -/// -/// 包装 DataFusion [`SessionContext`],为物理计划反序列化提供 UDF / UDAF / UDWF 查询能力。 -/// `Arc` 在工厂中创建后,由各构造器共享。 -pub struct Registry { - ctx: SessionContext, -} - -impl Registry { - pub fn new() -> Self { - Self { - ctx: SessionContext::new(), - } - } - - pub fn session_context(&self) -> &SessionContext { - &self.ctx - } -} - -impl Default for Registry { - fn default() -> Self { - Self::new() - } -} - -impl FunctionRegistry for Registry { - fn udfs(&self) -> HashSet { - self.ctx.udfs() - } - - fn udf(&self, name: &str) -> DfResult> { - self.ctx.udf(name) - } - - fn udaf(&self, name: &str) -> DfResult> { - self.ctx.udaf(name) - } - - fn udwf(&self, name: &str) -> DfResult> { - self.ctx.udwf(name) - } - - fn expr_planners(&self) -> Vec> { - self.ctx.expr_planners() - } -} - // --------------------------------------------------------------------------- // ConstructedOperator // --------------------------------------------------------------------------- diff --git a/src/runtime/streaming/factory/connector/dispatchers.rs b/src/runtime/streaming/factory/connector/dispatchers.rs new file mode 100644 index 00000000..dcdd1e32 --- /dev/null +++ b/src/runtime/streaming/factory/connector/dispatchers.rs @@ -0,0 +1,58 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Source / Sink 连接器协议:按 [`ConnectorOp::connector`] 分发到具体实现。 + +use anyhow::{anyhow, Result}; +use prost::Message; +use std::sync::Arc; + +use protocol::grpc::api::ConnectorOp; + +use crate::runtime::streaming::api::operator::ConstructedOperator; +use crate::runtime::streaming::factory::global::Registry; +use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; +use crate::sql::common::constants::connector_type; + +use super::kafka::{KafkaSinkDispatcher, KafkaSourceDispatcher}; + +pub struct ConnectorSourceDispatcher; + +impl OperatorConstructor for ConnectorSourceDispatcher { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let op = ConnectorOp::decode(config) + .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?; + + match op.connector.as_str() { + ct if ct == connector_type::KAFKA => KafkaSourceDispatcher.with_config(config, registry), + ct if ct == connector_type::REDIS => Err(anyhow!( + "ConnectorSource '{}' factory wiring not yet implemented", + op.connector + )), + other => Err(anyhow!("Unsupported source connector type: {}", other)), + } + } +} + +pub struct ConnectorSinkDispatcher; + +impl OperatorConstructor for ConnectorSinkDispatcher { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let op = ConnectorOp::decode(config) + .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?; + + match op.connector.as_str() { + ct if ct == connector_type::KAFKA => KafkaSinkDispatcher.with_config(config, registry), + other => Err(anyhow!("Unsupported sink connector type: {}", other)), + } + } +} diff --git a/src/runtime/streaming/factory/registry/kafka_factory.rs b/src/runtime/streaming/factory/connector/kafka.rs similarity index 95% rename from src/runtime/streaming/factory/registry/kafka_factory.rs rename to src/runtime/streaming/factory/connector/kafka.rs index 8f42acd9..ab72ea9e 100644 --- a/src/runtime/streaming/factory/registry/kafka_factory.rs +++ b/src/runtime/streaming/factory/connector/kafka.rs @@ -21,9 +21,10 @@ use std::sync::Arc; use protocol::grpc::api::ConnectorOp; use tracing::{info, warn}; -use super::OperatorConstructor; -use crate::runtime::streaming::api::operator::{ConstructedOperator, Registry}; +use crate::runtime::streaming::api::operator::ConstructedOperator; use crate::runtime::streaming::api::source::SourceOffset; +use crate::runtime::streaming::factory::global::Registry; +use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; use crate::runtime::streaming::format::{ BadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding, Format as RuntimeFormat, JsonFormat as RuntimeJsonFormat, TimestampFormat as RtTimestampFormat, @@ -332,10 +333,3 @@ impl OperatorConstructor for KafkaSinkDispatcher { Ok(ConstructedOperator::Operator(Box::new(sink_op))) } } - -/// 注册 `KafkaSource` / `KafkaSink` 构造器(由 [`super::OperatorFactory::register_builtins`] 调用)。 -pub fn register_kafka_plugins(factory: &mut super::OperatorFactory) { - factory.register("KafkaSource", Box::new(KafkaSourceDispatcher)); - factory.register("KafkaSink", Box::new(KafkaSinkDispatcher)); - info!("Registered Kafka connector plugins (KafkaSource, KafkaSink)"); -} diff --git a/src/runtime/streaming/factory/connector/mod.rs b/src/runtime/streaming/factory/connector/mod.rs new file mode 100644 index 00000000..3b8af292 --- /dev/null +++ b/src/runtime/streaming/factory/connector/mod.rs @@ -0,0 +1,19 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Source / Sink 连接器:`ConnectorOp` 分发与各连接器实现(如 Kafka)。 + +mod dispatchers; +pub mod kafka; + +pub use dispatchers::{ConnectorSinkDispatcher, ConnectorSourceDispatcher}; +pub use kafka::{KafkaSinkDispatcher, KafkaSourceDispatcher}; diff --git a/src/runtime/streaming/factory/global/mod.rs b/src/runtime/streaming/factory/global/mod.rs new file mode 100644 index 00000000..9434c157 --- /dev/null +++ b/src/runtime/streaming/factory/global/mod.rs @@ -0,0 +1,17 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 全局运行时资源:共享 [`Registry`](Session + UDF 表),与连接器协议无关。 + +mod session_registry; + +pub use session_registry::Registry; diff --git a/src/runtime/streaming/factory/global/session_registry.rs b/src/runtime/streaming/factory/global/session_registry.rs new file mode 100644 index 00000000..ef32c30e --- /dev/null +++ b/src/runtime/streaming/factory/global/session_registry.rs @@ -0,0 +1,64 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 运行时 UDF / UDAF / UDWF 查询表(基于 DataFusion [`SessionContext`])。 + +use std::collections::HashSet; +use std::sync::Arc; + +use datafusion::common::Result as DfResult; +use datafusion::execution::context::SessionContext; +use datafusion::execution::FunctionRegistry; +use datafusion::logical_expr::planner::ExprPlanner; +use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; + +/// 为物理计划反序列化等路径提供 [`FunctionRegistry`] 实现。 +/// +/// 由 [`crate::runtime::streaming::factory::OperatorFactory`] 持有 `Arc`, +/// 与各 [`crate::runtime::streaming::factory::OperatorConstructor`] 共享;须显式 [`Self::new`] 构造,无默认实例。 +pub struct Registry { + ctx: SessionContext, +} + +impl Registry { + pub fn new() -> Self { + Self { + ctx: SessionContext::new(), + } + } + + pub fn session_context(&self) -> &SessionContext { + &self.ctx + } +} + +impl FunctionRegistry for Registry { + fn udfs(&self) -> HashSet { + self.ctx.udfs() + } + + fn udf(&self, name: &str) -> DfResult> { + self.ctx.udf(name) + } + + fn udaf(&self, name: &str) -> DfResult> { + self.ctx.udaf(name) + } + + fn udwf(&self, name: &str) -> DfResult> { + self.ctx.udwf(name) + } + + fn expr_planners(&self) -> Vec> { + self.ctx.expr_planners() + } +} diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs index 8c03c298..8a7e686a 100644 --- a/src/runtime/streaming/factory/mod.rs +++ b/src/runtime/streaming/factory/mod.rs @@ -10,9 +10,54 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub mod registry; +//! 流算子工厂:[`global`] 为共享注册表;[`connector`] 为 Source/Sink 协议与实现; +//! [`OperatorFactory`]、[`OperatorConstructor`] 在根模块,避免与 `connector` 循环依赖。 -pub use registry::{ - ConnectorSinkDispatcher, ConnectorSourceDispatcher, OperatorConstructor, OperatorFactory, - PassthroughConstructor, +pub mod connector; +pub mod global; + +mod operator_constructor; +mod operator_factory; + +use tracing::info; + +use crate::sql::common::constants::factory_operator_name; + +#[allow(unused_imports)] +pub use connector::{ + ConnectorSinkDispatcher, ConnectorSourceDispatcher, KafkaSinkDispatcher, KafkaSourceDispatcher, }; +pub use global::Registry; +pub use operator_constructor::OperatorConstructor; +pub use operator_factory::OperatorFactory; +#[allow(unused_imports)] +pub use operator_factory::PassthroughConstructor; + +/// 注册 `ConnectorSource` / `ConnectorSink` 分发器(打破 `operator_factory` ↔ `connector` 依赖环)。 +fn register_builtin_connectors(factory: &mut OperatorFactory) { + factory.register( + factory_operator_name::CONNECTOR_SOURCE, + Box::new(connector::ConnectorSourceDispatcher), + ); + factory.register( + factory_operator_name::CONNECTOR_SINK, + Box::new(connector::ConnectorSinkDispatcher), + ); +} + +/// 注册直连 Kafka 算子(名称见 [`crate::sql::common::constants::factory_operator_name`])。 +fn register_kafka_connector_plugins(factory: &mut OperatorFactory) { + factory.register( + factory_operator_name::KAFKA_SOURCE, + Box::new(connector::KafkaSourceDispatcher), + ); + factory.register( + factory_operator_name::KAFKA_SINK, + Box::new(connector::KafkaSinkDispatcher), + ); + info!( + "Registered Kafka connector plugins ({}, {})", + factory_operator_name::KAFKA_SOURCE, + factory_operator_name::KAFKA_SINK + ); +} diff --git a/src/runtime/streaming/factory/operator_constructor.rs b/src/runtime/streaming/factory/operator_constructor.rs new file mode 100644 index 00000000..b6b6203f --- /dev/null +++ b/src/runtime/streaming/factory/operator_constructor.rs @@ -0,0 +1,26 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 算子构造协议:与具体连接器实现解耦,供 [`super::OperatorFactory`] 与 `connector` 共用。 + +use anyhow::Result; +use std::sync::Arc; + +use crate::runtime::streaming::api::operator::ConstructedOperator; +use crate::runtime::streaming::factory::global::Registry; + +/// 算子构造器 trait:每个实现者负责从 protobuf 字节流反序列化配置并构造 [`ConstructedOperator`]。 +/// +/// 外部插件可实现此 trait 并通过 [`crate::runtime::streaming::factory::OperatorFactory::register`] 注入。 +pub trait OperatorConstructor: Send + Sync { + fn with_config(&self, config: &[u8], registry: Arc) -> Result; +} diff --git a/src/runtime/streaming/factory/registry/mod.rs b/src/runtime/streaming/factory/operator_factory.rs similarity index 72% rename from src/runtime/streaming/factory/registry/mod.rs rename to src/runtime/streaming/factory/operator_factory.rs index d129f644..dfc6bb87 100644 --- a/src/runtime/streaming/factory/registry/mod.rs +++ b/src/runtime/streaming/factory/operator_factory.rs @@ -10,15 +10,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! 全局算子工厂:内置窗口 / Join / KeyBy 等 Bridge。 + use anyhow::{anyhow, Result}; use prost::Message; use std::collections::HashMap; use std::sync::Arc; -use crate::sql::common::constants::connector_type; -use crate::runtime::streaming::api::operator::Registry; - +use super::operator_constructor::OperatorConstructor; use crate::runtime::streaming::api::operator::ConstructedOperator; +use crate::runtime::streaming::factory::global::Registry; use crate::runtime::streaming::operators::PassthroughOperator; use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor; use crate::runtime::streaming::operators::joins::{ @@ -31,34 +32,13 @@ use crate::runtime::streaming::operators::windows::{ TumblingAggregateWindowConstructor, WindowFunctionConstructor, }; -pub mod kafka_factory; - -use kafka_factory::{register_kafka_plugins, KafkaSinkDispatcher, KafkaSourceDispatcher}; - use protocol::grpc::api::{ - ConnectorOp, ExpressionWatermarkConfig, - JoinOperator as JoinOperatorProto, - KeyPlanOperator as KeyByProto, - SessionWindowAggregateOperator, SlidingWindowAggregateOperator, + ExpressionWatermarkConfig, JoinOperator as JoinOperatorProto, + KeyPlanOperator as KeyByProto, SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator, UpdatingAggregateOperator, WindowFunctionOperator as WindowFunctionProto, }; -// --------------------------------------------------------------------------- -// 1. Core Trait (工厂契约) -// --------------------------------------------------------------------------- - -/// 算子构造器 trait:每个实现者负责从 protobuf 字节流反序列化配置并构造 [`ConstructedOperator`]。 -/// -/// 外部插件可实现此 trait 并通过 [`OperatorFactory::register`] 注入。 -pub trait OperatorConstructor: Send + Sync { - fn with_config(&self, config: &[u8], registry: Arc) -> Result; -} - -// --------------------------------------------------------------------------- -// 2. 工业级工厂注册表 -// --------------------------------------------------------------------------- - /// 持有 `name → OperatorConstructor` 映射与共享 [`Registry`]。 /// /// `JobManager` 在部署任务时调用 [`create_operator`],完成从字节流到运行时算子的 @@ -126,23 +106,16 @@ impl OperatorFactory { // ─── 物理网络路由 ─── self.register("KeyBy", Box::new(KeyByBridge)); - // ─── 连接器 Source / Sink(分发器模式,不硬编码具体连接器) ─── - self.register("ConnectorSource", Box::new(ConnectorSourceDispatcher)); - self.register("ConnectorSink", Box::new(ConnectorSinkDispatcher)); - // ─── 透传类算子 ─── self.register("Projection", Box::new(PassthroughConstructor("Projection"))); self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue"))); self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey"))); - register_kafka_plugins(self); + crate::runtime::streaming::factory::register_builtin_connectors(self); + crate::runtime::streaming::factory::register_kafka_connector_plugins(self); } } -// --------------------------------------------------------------------------- -// 3. 构造器适配 — 解码 protobuf 后委托给各算子模块的 Constructor -// --------------------------------------------------------------------------- - struct TumblingWindowBridge; impl OperatorConstructor for TumblingWindowBridge { fn with_config(&self, config: &[u8], registry: Arc) -> Result { @@ -240,46 +213,6 @@ impl OperatorConstructor for KeyByBridge { } } -// --------------------------------------------------------------------------- -// 4. 连接器分发抽象 (Connector Dispatcher) — 不硬编码具体连接器 -// --------------------------------------------------------------------------- - -pub struct ConnectorSourceDispatcher; - -impl OperatorConstructor for ConnectorSourceDispatcher { - fn with_config(&self, config: &[u8], registry: Arc) -> Result { - let op = ConnectorOp::decode(config) - .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?; - - match op.connector.as_str() { - ct if ct == connector_type::KAFKA => KafkaSourceDispatcher.with_config(config, registry), - ct if ct == connector_type::REDIS => Err(anyhow!( - "ConnectorSource '{}' factory wiring not yet implemented", - op.connector - )), - other => Err(anyhow!("Unsupported source connector type: {}", other)), - } - } -} - -pub struct ConnectorSinkDispatcher; - -impl OperatorConstructor for ConnectorSinkDispatcher { - fn with_config(&self, config: &[u8], registry: Arc) -> Result { - let op = ConnectorOp::decode(config) - .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?; - - match op.connector.as_str() { - ct if ct == connector_type::KAFKA => KafkaSinkDispatcher.with_config(config, registry), - other => Err(anyhow!("Unsupported sink connector type: {}", other)), - } - } -} - -// --------------------------------------------------------------------------- -// 5. 透传类算子 -// --------------------------------------------------------------------------- - pub struct PassthroughConstructor(pub &'static str); impl OperatorConstructor for PassthroughConstructor { diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs index 844131a0..e13279e1 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/runtime/streaming/job/job_manager.rs @@ -61,9 +61,10 @@ impl JobManager { .ok_or_else(|| anyhow!("JobManager not initialized. Call init() first.")) } - /// 核心主干:从逻辑计划点火物理流水线 - pub async fn submit_job(&self, program: FsProgram) -> anyhow::Result { - let job_id = format!("job-{}", chrono::Utc::now().timestamp_millis()); + /// 核心主干:从逻辑计划点火物理流水线。 + /// + /// `job_id` 由调用方指定(须全局唯一),用于线程命名、`PhysicalExecutionGraph` 与 [`Self::stop_job`] 等。 + pub async fn submit_job(&self, job_id: String, program: FsProgram) -> anyhow::Result { let mut edge_manager = EdgeManager::build(&program.nodes, &program.edges); let mut pipelines = HashMap::new(); diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs index 06cab2ee..4dd6316b 100644 --- a/src/runtime/streaming/lib.rs +++ b/src/runtime/streaming/lib.rs @@ -14,7 +14,6 @@ //! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`). pub mod api; -pub mod arrow; pub mod error; pub mod execution; pub mod factory; diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs index 4a761460..1da5f952 100644 --- a/src/runtime/streaming/mod.rs +++ b/src/runtime/streaming/mod.rs @@ -14,7 +14,6 @@ //! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`). pub mod api; -pub mod arrow; pub mod error; pub mod execution; pub mod factory; diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs index 2d2abf18..5d174323 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -44,8 +44,9 @@ use protocol::grpc::api::UpdatingAggregateOperator; // 引入全新的 Actor 框架核心协议 (取代了老旧的 ArrowOperator 和 Collector) // ========================================================================= use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; -use crate::runtime::streaming::arrow::decode_aggregate; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::factory::Registry; +use crate::runtime::util::decode_aggregate; use crate::runtime::streaming::operators::{Key, UpdatingCache}; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{to_nanos, CheckpointBarrier, FsSchema, Watermark, TIMESTAMP_FIELD, UPDATING_META_FIELD}; diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs index cd5b3764..7fe32d6e 100644 --- a/src/runtime/streaming/operators/joins/join_instance.rs +++ b/src/runtime/streaming/operators/joins/join_instance.rs @@ -30,7 +30,8 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::factory::Registry; use async_trait::async_trait; use protocol::grpc::api::JoinOperator; use crate::runtime::streaming::StreamOutput; diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs index 34d15932..9e6de6c9 100644 --- a/src/runtime/streaming/operators/joins/join_with_expiration.rs +++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs @@ -28,7 +28,8 @@ use std::time::{Duration, SystemTime}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::factory::Registry; use async_trait::async_trait; use protocol::grpc::api::JoinOperator; use crate::runtime::streaming::StreamOutput; diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs index dccc561d..aecef032 100644 --- a/src/runtime/streaming/operators/sink/kafka/mod.rs +++ b/src/runtime/streaming/operators/sink/kafka/mod.rs @@ -31,6 +31,7 @@ use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; use crate::runtime::streaming::format::DataSerializer; use crate::runtime::streaming::StreamOutput; +use crate::sql::common::constants::factory_operator_name; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; // ============================================================================ // 1. 领域模型:一致性级别与事务状态机 @@ -210,7 +211,7 @@ fn row_key_bytes(batch: &RecordBatch, row: usize, col: usize) -> Option> #[async_trait] impl MessageOperator for KafkaSinkOperator { fn name(&self) -> &str { - "KafkaSink" + factory_operator_name::KAFKA_SINK } async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> { diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs index dee92eb3..188015e2 100644 --- a/src/runtime/streaming/operators/stateless_physical_executor.rs +++ b/src/runtime/streaming/operators/stateless_physical_executor.rs @@ -26,7 +26,7 @@ use datafusion_proto::protobuf::PhysicalPlanNode; use futures::StreamExt; use prost::Message; -use crate::runtime::streaming::api::operator::Registry; +use crate::runtime::streaming::factory::Registry; use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; pub struct StatelessPhysicalExecutor { diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs index 2b255f9b..63f5acec 100644 --- a/src/runtime/streaming/operators/watermark/watermark_generator.rs +++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs @@ -28,7 +28,8 @@ use std::time::{Duration, SystemTime}; use tracing::{debug, info}; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::factory::Registry; use async_trait::async_trait; use protocol::grpc::api::ExpressionWatermarkConfig; use crate::runtime::streaming::StreamOutput; diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs index 73642992..aaf65cf1 100644 --- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs @@ -39,7 +39,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; use async_trait::async_trait; -use crate::runtime::streaming::api::operator::Registry; +use crate::runtime::streaming::factory::Registry; use protocol::grpc::api::SessionWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs index 7d801fd6..6f0aa7f9 100644 --- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs @@ -39,7 +39,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; use async_trait::async_trait; -use crate::runtime::streaming::api::operator::Registry; +use crate::runtime::streaming::factory::Registry; use protocol::grpc::api::SlidingWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs index 004cc205..30724f59 100644 --- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs @@ -39,7 +39,7 @@ use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; use async_trait::async_trait; -use crate::runtime::streaming::api::operator::Registry; +use crate::runtime::streaming::factory::Registry; use protocol::grpc::api::TumblingWindowAggregateOperator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs index 641b0ef6..f6ae2a1a 100644 --- a/src/runtime/streaming/operators/windows/window_function.rs +++ b/src/runtime/streaming/operators/windows/window_function.rs @@ -32,7 +32,8 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{MessageOperator, Registry}; +use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::factory::Registry; use async_trait::async_trait; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; diff --git a/src/runtime/util/mod.rs b/src/runtime/util/mod.rs new file mode 100644 index 00000000..3b4c7e60 --- /dev/null +++ b/src/runtime/util/mod.rs @@ -0,0 +1,17 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! 运行时跨子系统复用的工具函数(物理计划 proto 解码等)。 + +mod physical_aggregate; + +pub use physical_aggregate::decode_aggregate; diff --git a/src/runtime/streaming/arrow/mod.rs b/src/runtime/util/physical_aggregate.rs similarity index 95% rename from src/runtime/streaming/arrow/mod.rs rename to src/runtime/util/physical_aggregate.rs index d706199f..83a6e3bd 100644 --- a/src/runtime/streaming/arrow/mod.rs +++ b/src/runtime/util/physical_aggregate.rs @@ -10,9 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Arrow / DataFusion 辅助:聚合表达式解码等。 -//! -//! `UpdatingCache` 位于 [`crate::runtime::streaming::operators::updating_cache`]。 +//! 从 DataFusion proto 物理表达式节点解码聚合(UDAF)表达式。 + +use std::sync::Arc; use arrow::datatypes::SchemaRef; use datafusion::common::internal_err; @@ -25,9 +25,8 @@ use datafusion_proto::physical_plan::{DefaultPhysicalExtensionCodec, PhysicalExt use datafusion_proto::protobuf::physical_aggregate_expr_node::AggregateFunction; use datafusion_proto::protobuf::physical_expr_node::ExprType; use datafusion_proto::protobuf::{PhysicalExprNode, proto_error}; -use std::sync::Arc; -/// 从 `PhysicalExprNode` 解码 UDAF 聚合表达式(与 worker `arrow/mod` 一致)。 +/// 从 `PhysicalExprNode` 解码 UDAF 聚合表达式(与 worker `arrow/mod` 历史路径语义一致)。 pub fn decode_aggregate( schema: &SchemaRef, name: &str, diff --git a/src/server/initializer.rs b/src/server/initializer.rs index 7786169a..a73ec14a 100644 --- a/src/server/initializer.rs +++ b/src/server/initializer.rs @@ -152,7 +152,7 @@ fn initialize_python_service(config: &GlobalConfig) -> Result<()> { } fn initialize_job_manager(config: &GlobalConfig) -> Result<()> { - use crate::runtime::streaming::api::operator::Registry; + use crate::runtime::streaming::factory::Registry; use crate::runtime::streaming::factory::OperatorFactory; use crate::runtime::streaming::job::JobManager; use std::sync::Arc; diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs index 5f249227..cf2a39cc 100644 --- a/src/sql/common/constants.rs +++ b/src/sql/common/constants.rs @@ -98,6 +98,15 @@ pub mod runtime_operator_kind { pub const STREAMING_WINDOW_EVALUATOR: &str = "streaming_window_evaluator"; } +// ── Worker [`OperatorFactory`] 注册键(须与任务包 `operator_name`、`OperatorName::Display` 一致)── + +pub mod factory_operator_name { + pub const CONNECTOR_SOURCE: &str = "ConnectorSource"; + pub const CONNECTOR_SINK: &str = "ConnectorSink"; + pub const KAFKA_SOURCE: &str = "KafkaSource"; + pub const KAFKA_SINK: &str = "KafkaSink"; +} + // ── Debezium CDC 信封字段 ─────────────────────────────────────────────────── pub mod cdc { From 18a19f1d8a33f6d005f3e39b7c85daad8abce10a Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 17:20:20 +0800 Subject: [PATCH 28/44] update --- src/runtime/streaming/api/context.rs | 9 ---- src/runtime/streaming/api/mod.rs | 1 - src/runtime/streaming/api/operator.rs | 6 --- src/runtime/streaming/api/source.rs | 5 --- src/runtime/streaming/error.rs | 9 ---- src/runtime/streaming/execution/mod.rs | 1 - src/runtime/streaming/execution/runner.rs | 5 --- src/runtime/streaming/execution/source.rs | 2 - .../execution/tracker/barrier_aligner.rs | 3 -- .../streaming/execution/tracker/mod.rs | 1 - .../factory/connector/dispatchers.rs | 1 - .../streaming/factory/connector/kafka.rs | 4 -- .../streaming/factory/connector/mod.rs | 1 - src/runtime/streaming/factory/global/mod.rs | 1 - .../factory/global/session_registry.rs | 4 -- src/runtime/streaming/factory/mod.rs | 4 -- .../streaming/factory/operator_constructor.rs | 3 -- .../streaming/factory/operator_factory.rs | 11 ----- src/runtime/streaming/format/config.rs | 3 -- src/runtime/streaming/format/deserializer.rs | 2 - src/runtime/streaming/format/json_encoder.rs | 7 ---- src/runtime/streaming/format/serializer.rs | 1 - src/runtime/streaming/job/edge_manager.rs | 1 - src/runtime/streaming/job/job_manager.rs | 3 -- src/runtime/streaming/job/models.rs | 3 -- src/runtime/streaming/memory/pool.rs | 1 - src/runtime/streaming/memory/ticket.rs | 3 -- src/runtime/streaming/network/endpoint.rs | 7 ---- src/runtime/streaming/network/environment.rs | 1 - .../grouping/incremental_aggregate.rs | 14 ------- .../operators/grouping/updating_cache.rs | 2 - .../operators/joins/join_instance.rs | 5 --- .../operators/joins/join_with_expiration.rs | 5 --- src/runtime/streaming/operators/key_by.rs | 6 --- .../streaming/operators/key_operator.rs | 18 -------- src/runtime/streaming/operators/mod.rs | 2 - src/runtime/streaming/operators/projection.rs | 2 - .../streaming/operators/sink/kafka/mod.rs | 4 -- src/runtime/streaming/operators/sink/mod.rs | 1 - .../streaming/operators/source/kafka/mod.rs | 11 ----- src/runtime/streaming/operators/source/mod.rs | 1 - .../operators/stateless_physical_executor.rs | 1 - .../streaming/operators/value_execution.rs | 2 - .../watermark/watermark_generator.rs | 7 ---- .../windows/session_aggregating_window.rs | 7 +--- .../windows/sliding_aggregating_window.rs | 8 ---- .../windows/tumbling_aggregating_window.rs | 1 - .../operators/windows/window_function.rs | 7 ---- src/runtime/streaming/protocol/control.rs | 4 -- src/runtime/streaming/protocol/event.rs | 1 - src/runtime/streaming/protocol/mod.rs | 1 - src/runtime/streaming/protocol/stream_out.rs | 5 --- src/runtime/streaming/protocol/tracked.rs | 4 -- src/runtime/streaming/protocol/watermark.rs | 4 -- src/runtime/util/mod.rs | 1 - src/runtime/util/physical_aggregate.rs | 2 - src/sql/common/constants.rs | 41 ------------------- src/sql/common/kafka_catalog.rs | 8 ---- src/sql/common/operator_config.rs | 1 - src/sql/common/with_option_keys.rs | 6 --- src/sql/logical_node/logical/operator_name.rs | 1 - src/sql/physical/cdc/encode.rs | 2 - src/sql/physical/cdc/mod.rs | 1 - src/sql/physical/cdc/unroll.rs | 2 - src/sql/physical/codec.rs | 1 - src/sql/physical/meta.rs | 1 - src/sql/physical/mod.rs | 1 - src/sql/physical/readers.rs | 1 - src/sql/physical/udfs.rs | 1 - 69 files changed, 1 insertion(+), 295 deletions(-) diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs index b70d40df..f0c3dfcb 100644 --- a/src/runtime/streaming/api/context.rs +++ b/src/runtime/streaming/api/context.rs @@ -52,15 +52,12 @@ impl TaskContext { } // ======================================================================== - // 水位线与时间流管理 API // ======================================================================== - /// 供业务算子调用:获取当前任务的安全水位线 pub fn last_present_watermark(&self) -> Option { self.current_watermark } - /// 供底座框架 (SubtaskRunner) 调用:推进本地时间,保证单调递增 pub fn advance_watermark(&mut self, watermark: std::time::SystemTime) { if let Some(current) = self.current_watermark { if watermark > current { @@ -72,10 +69,8 @@ impl TaskContext { } // ======================================================================== - // 可观测性 API (Observability) // ======================================================================== - /// 格式化当前 Task 的唯一标识,用于分布式追踪和日志打印 pub fn task_identity(&self) -> String { format!( "Job[{}], Vertex[{}], Subtask[{}/{}]", @@ -84,10 +79,8 @@ impl TaskContext { } // ======================================================================== - // 背压网络发送 API // ======================================================================== - /// 受内存池管控的数据发送:申请精准字节的内存船票后广播到所有下游 pub async fn collect(&self, batch: RecordBatch) -> anyhow::Result<()> { if self.outboxes.is_empty() { return Ok(()); @@ -103,7 +96,6 @@ impl TaskContext { Ok(()) } - /// 按 Key 哈希路由到单分区(用于 Shuffle / KeyBy) pub async fn collect_keyed( &self, key_hash: u64, @@ -122,7 +114,6 @@ impl TaskContext { Ok(()) } - /// 广播控制信号(如 Watermark, Barrier:不申请内存船票,保证在拥堵时畅通无阻) pub async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> { let tracked_event = TrackedEvent::control(event); for outbox in &self.outboxes { diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs index aec4b8fb..f004de58 100644 --- a/src/runtime/streaming/api/mod.rs +++ b/src/runtime/streaming/api/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 接口层:算子与源实现需遵循的 trait 与运行时上下文。 pub mod context; pub mod operator; diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs index 29d24f82..4683379b 100644 --- a/src/runtime/streaming/api/operator.rs +++ b/src/runtime/streaming/api/operator.rs @@ -22,13 +22,11 @@ use crate::sql::common::{CheckpointBarrier, Watermark}; // ConstructedOperator // --------------------------------------------------------------------------- -/// 工厂反射产出的具体算子实例 pub enum ConstructedOperator { Source(Box), Operator(Box), } -/// 多上游、被动驱动的消息算子。 #[async_trait] pub trait MessageOperator: Send + 'static { fn name(&self) -> &str; @@ -37,7 +35,6 @@ pub trait MessageOperator: Send + 'static { Ok(()) } - /// `input_idx`:多输入拓扑下第几条边(与 `SubtaskRunner` 的 inbox 下标一致;单输入恒为 0)。 async fn process_data( &mut self, input_idx: usize, @@ -57,7 +54,6 @@ pub trait MessageOperator: Send + 'static { ctx: &mut TaskContext, ) -> anyhow::Result<()>; - /// 全局 checkpoint 确认后由 `SubtaskRunner` 在 [`ControlCommand::Commit`] 上调用(如 Kafka EOS 二阶段提交)。 async fn commit_checkpoint( &mut self, _epoch: u32, @@ -66,12 +62,10 @@ pub trait MessageOperator: Send + 'static { Ok(()) } - /// 周期性时钟(如 Idle 检测);`None` 表示不注册 tick。 fn tick_interval(&self) -> Option { None } - /// 与 [`Self::tick_interval`] 配套,由 `SubtaskRunner` 按固定间隔调用。 async fn process_tick( &mut self, _tick_index: u64, diff --git a/src/runtime/streaming/api/source.rs b/src/runtime/streaming/api/source.rs index 1f79de38..f46f3de7 100644 --- a/src/runtime/streaming/api/source.rs +++ b/src/runtime/streaming/api/source.rs @@ -10,14 +10,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 源算子:由 [`crate::runtime::streaming::execution::SourceRunner`] 驱动 `fetch_next`,不得在内部死循环阻塞控制面。 use crate::runtime::streaming::api::context::TaskContext; use arrow_array::RecordBatch; use async_trait::async_trait; use crate::sql::common::{CheckpointBarrier, Watermark}; -/// Kafka 等外部源在 **无已存位点** 时的起始消费策略(与 `arroyo-connectors` 语义对齐)。 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum SourceOffset { Earliest, @@ -30,7 +28,6 @@ pub enum SourceOffset { pub enum SourceEvent { Data(RecordBatch), Watermark(Watermark), - /// 无数据可读:必须由 Runner 调度退避,禁止在 `fetch_next` 内长时间阻塞。 Idle, EndOfStream, } @@ -43,10 +40,8 @@ pub trait SourceOperator: Send + 'static { Ok(()) } - /// 核心拉取:无数据时必须返回 [`SourceEvent::Idle`],严禁内部阻塞控制面。 async fn fetch_next(&mut self, ctx: &mut TaskContext) -> anyhow::Result; - /// 独立于 `fetch_next` 的水位线脉搏(例如解决 Idle 时仍要推进水印)。 fn poll_watermark(&mut self) -> Option { None } diff --git a/src/runtime/streaming/error.rs b/src/runtime/streaming/error.rs index c8d1944a..178f5bbb 100644 --- a/src/runtime/streaming/error.rs +++ b/src/runtime/streaming/error.rs @@ -13,42 +13,33 @@ use std::fmt::Display; use thiserror::Error; -/// 流水线 / 子任务运行期间的错误定义。 #[derive(Debug, Error)] pub enum RunError { - /// 算子内部业务逻辑抛出的错误 #[error("Operator execution failed: {0:#}")] Operator(#[from] anyhow::Error), - /// 向下游 Task 发送数据/信号时通道阻塞或断开 #[error("Downstream send failed: {0}")] DownstreamSend(String), - /// 引擎内部状态机错误或拓扑规划错误(如:DAG 为空、在链条中间发生 Shuffle) #[error("Internal engine error: {0}")] Internal(String), - /// Checkpoint 状态持久化或恢复时发生的错误 #[error("State backend error: {0}")] State(String), - /// 底层网络或文件 I/O 错误 #[error("I/O error: {0}")] Io(#[from] std::io::Error), } impl RunError { - /// 快捷构造器:引擎内部错误(常用于防御性编程和边界校验) pub fn internal(msg: T) -> Self { Self::Internal(msg.to_string()) } - /// 快捷构造器:下游发送异常 pub fn downstream(msg: T) -> Self { Self::DownstreamSend(msg.to_string()) } - /// 快捷构造器:状态后端异常 pub fn state(msg: T) -> Self { Self::State(msg.to_string()) } diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs index a4fb6d95..40beabe4 100644 --- a/src/runtime/streaming/execution/mod.rs +++ b/src/runtime/streaming/execution/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 执行层:Tokio Actor 运行容器。 pub mod runner; pub mod source; diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs index 994d8c04..d43f052d 100644 --- a/src/runtime/streaming/execution/runner.rs +++ b/src/runtime/streaming/execution/runner.rs @@ -32,7 +32,6 @@ use crate::runtime::streaming::execution::tracker::{ use crate::sql::common::{CheckpointBarrier, Watermark}; // ========================================== -// 第一部分:逻辑处理层 - 算子融合链 (Logical Driver) // ========================================== #[async_trait] @@ -62,7 +61,6 @@ impl ChainedDriver { Self { operator, next } } - /// 从后往前组装算子,构建责任链 pub fn build_chain(mut operators: Vec>) -> Option> { if operators.is_empty() { return None; @@ -227,7 +225,6 @@ impl OperatorDrive for ChainedDriver { } // ========================================== -// 第二部分:物理执行层 - 流水线 (Physical Driver) // ========================================== pub struct Pipeline { @@ -238,7 +235,6 @@ pub struct Pipeline { wm_tracker: WatermarkTracker, barrier_aligner: BarrierAligner, - /// Barrier 未对齐时从轮询池移除的输入流(背压) paused_streams: Vec>, } @@ -376,5 +372,4 @@ impl Pipeline { } } -/// 与执行引擎语义对齐的别名 pub type SubtaskRunner = Pipeline; diff --git a/src/runtime/streaming/execution/source.rs b/src/runtime/streaming/execution/source.rs index a9fbd561..a85b0839 100644 --- a/src/runtime/streaming/execution/source.rs +++ b/src/runtime/streaming/execution/source.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 源任务物理驱动:控制面优先、`fetch_next` 非阻塞契约、可选融合算子链下推。 use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::source::{SourceEvent, SourceOperator}; @@ -30,7 +29,6 @@ pub const WATERMARK_EMIT_INTERVAL: Duration = Duration::from_millis(200); pub struct SourceRunner { operator: Box, - /// 有链时数据与信号经链尾再 `collect` / `broadcast`;无链则直接走 `TaskContext`。 chain_head: Option>, ctx: TaskContext, control_rx: Receiver, diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/runtime/streaming/execution/tracker/barrier_aligner.rs index 34b5380a..b227e439 100644 --- a/src/runtime/streaming/execution/tracker/barrier_aligner.rs +++ b/src/runtime/streaming/execution/tracker/barrier_aligner.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Chandy–Lamport 风格屏障对齐(零内存缓冲:未对齐时从轮询池移除输入流,依赖底层背压)。 use std::collections::HashSet; @@ -18,9 +17,7 @@ use crate::sql::common::CheckpointBarrier; #[derive(Debug)] pub enum AlignmentStatus { - /// 未对齐:外层应将当前通道从 `StreamMap` 挂起(Pause)。 Pending, - /// 已对齐:外层触发快照并唤醒所有挂起通道(Resume)。 Complete, } diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/runtime/streaming/execution/tracker/mod.rs index 81329c27..b00ee0a2 100644 --- a/src/runtime/streaming/execution/tracker/mod.rs +++ b/src/runtime/streaming/execution/tracker/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 协调层:屏障对齐与多路水位线追踪。 pub mod barrier_aligner; pub mod watermark_tracker; diff --git a/src/runtime/streaming/factory/connector/dispatchers.rs b/src/runtime/streaming/factory/connector/dispatchers.rs index dcdd1e32..cca85c1a 100644 --- a/src/runtime/streaming/factory/connector/dispatchers.rs +++ b/src/runtime/streaming/factory/connector/dispatchers.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Source / Sink 连接器协议:按 [`ConnectorOp::connector`] 分发到具体实现。 use anyhow::{anyhow, Result}; use prost::Message; diff --git a/src/runtime/streaming/factory/connector/kafka.rs b/src/runtime/streaming/factory/connector/kafka.rs index ab72ea9e..7e548cec 100644 --- a/src/runtime/streaming/factory/connector/kafka.rs +++ b/src/runtime/streaming/factory/connector/kafka.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Kafka Source/Sink:从 [`ConnectorOp`] + [`OperatorConfig`] 构造物理算子(鉴权与 client 配置合并)。 use anyhow::{anyhow, bail, Context, Result}; use prost::Message; @@ -43,7 +42,6 @@ use crate::sql::common::{FsSchema, OperatorConfig}; const DEFAULT_SOURCE_BATCH_SIZE: usize = 1024; -/// 合并连接级鉴权、全局 `connection_properties` 与表级 `client_configs`(表级覆盖同名键)。 pub fn build_client_configs(config: &KafkaConfig, table: &KafkaTable) -> Result> { let mut client_configs = HashMap::new(); @@ -184,7 +182,6 @@ fn decode_operator_config(op: &ConnectorOp) -> Result { }) } -/// 由 [`ConnectorOp`] 构造 Kafka Source(`connector` 须为 `kafka`)。 pub struct KafkaSourceDispatcher; impl OperatorConstructor for KafkaSourceDispatcher { @@ -263,7 +260,6 @@ impl OperatorConstructor for KafkaSourceDispatcher { } } -/// 由 [`ConnectorOp`] 构造 Kafka Sink(`connector` 须为 `kafka`)。 pub struct KafkaSinkDispatcher; impl OperatorConstructor for KafkaSinkDispatcher { diff --git a/src/runtime/streaming/factory/connector/mod.rs b/src/runtime/streaming/factory/connector/mod.rs index 3b8af292..be63478d 100644 --- a/src/runtime/streaming/factory/connector/mod.rs +++ b/src/runtime/streaming/factory/connector/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Source / Sink 连接器:`ConnectorOp` 分发与各连接器实现(如 Kafka)。 mod dispatchers; pub mod kafka; diff --git a/src/runtime/streaming/factory/global/mod.rs b/src/runtime/streaming/factory/global/mod.rs index 9434c157..0dc2130e 100644 --- a/src/runtime/streaming/factory/global/mod.rs +++ b/src/runtime/streaming/factory/global/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 全局运行时资源:共享 [`Registry`](Session + UDF 表),与连接器协议无关。 mod session_registry; diff --git a/src/runtime/streaming/factory/global/session_registry.rs b/src/runtime/streaming/factory/global/session_registry.rs index ef32c30e..4b7895a2 100644 --- a/src/runtime/streaming/factory/global/session_registry.rs +++ b/src/runtime/streaming/factory/global/session_registry.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 运行时 UDF / UDAF / UDWF 查询表(基于 DataFusion [`SessionContext`])。 use std::collections::HashSet; use std::sync::Arc; @@ -21,10 +20,7 @@ use datafusion::execution::FunctionRegistry; use datafusion::logical_expr::planner::ExprPlanner; use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; -/// 为物理计划反序列化等路径提供 [`FunctionRegistry`] 实现。 /// -/// 由 [`crate::runtime::streaming::factory::OperatorFactory`] 持有 `Arc`, -/// 与各 [`crate::runtime::streaming::factory::OperatorConstructor`] 共享;须显式 [`Self::new`] 构造,无默认实例。 pub struct Registry { ctx: SessionContext, } diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs index 8a7e686a..024dfb14 100644 --- a/src/runtime/streaming/factory/mod.rs +++ b/src/runtime/streaming/factory/mod.rs @@ -10,8 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 流算子工厂:[`global`] 为共享注册表;[`connector`] 为 Source/Sink 协议与实现; -//! [`OperatorFactory`]、[`OperatorConstructor`] 在根模块,避免与 `connector` 循环依赖。 pub mod connector; pub mod global; @@ -33,7 +31,6 @@ pub use operator_factory::OperatorFactory; #[allow(unused_imports)] pub use operator_factory::PassthroughConstructor; -/// 注册 `ConnectorSource` / `ConnectorSink` 分发器(打破 `operator_factory` ↔ `connector` 依赖环)。 fn register_builtin_connectors(factory: &mut OperatorFactory) { factory.register( factory_operator_name::CONNECTOR_SOURCE, @@ -45,7 +42,6 @@ fn register_builtin_connectors(factory: &mut OperatorFactory) { ); } -/// 注册直连 Kafka 算子(名称见 [`crate::sql::common::constants::factory_operator_name`])。 fn register_kafka_connector_plugins(factory: &mut OperatorFactory) { factory.register( factory_operator_name::KAFKA_SOURCE, diff --git a/src/runtime/streaming/factory/operator_constructor.rs b/src/runtime/streaming/factory/operator_constructor.rs index b6b6203f..832fe734 100644 --- a/src/runtime/streaming/factory/operator_constructor.rs +++ b/src/runtime/streaming/factory/operator_constructor.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 算子构造协议:与具体连接器实现解耦,供 [`super::OperatorFactory`] 与 `connector` 共用。 use anyhow::Result; use std::sync::Arc; @@ -18,9 +17,7 @@ use std::sync::Arc; use crate::runtime::streaming::api::operator::ConstructedOperator; use crate::runtime::streaming::factory::global::Registry; -/// 算子构造器 trait:每个实现者负责从 protobuf 字节流反序列化配置并构造 [`ConstructedOperator`]。 /// -/// 外部插件可实现此 trait 并通过 [`crate::runtime::streaming::factory::OperatorFactory::register`] 注入。 pub trait OperatorConstructor: Send + Sync { fn with_config(&self, config: &[u8], registry: Arc) -> Result; } diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs index dfc6bb87..a95c0241 100644 --- a/src/runtime/streaming/factory/operator_factory.rs +++ b/src/runtime/streaming/factory/operator_factory.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 全局算子工厂:内置窗口 / Join / KeyBy 等 Bridge。 use anyhow::{anyhow, Result}; use prost::Message; @@ -39,10 +38,7 @@ use protocol::grpc::api::{ WindowFunctionOperator as WindowFunctionProto, }; -/// 持有 `name → OperatorConstructor` 映射与共享 [`Registry`]。 /// -/// `JobManager` 在部署任务时调用 [`create_operator`],完成从字节流到运行时算子的 -/// 反射式实例化。 pub struct OperatorFactory { constructors: HashMap>, registry: Arc, @@ -62,7 +58,6 @@ impl OperatorFactory { self.constructors.insert(name.to_string(), constructor); } - /// 反射与实例化:从 TDD 的字节流中拉起运行时的业务算子 pub fn create_operator(&self, name: &str, payload: &[u8]) -> Result { let ctor = self .constructors @@ -78,18 +73,15 @@ impl OperatorFactory { ctor.with_config(payload, self.registry.clone()) } - /// 列出已注册的所有算子名称(调试用)。 pub fn registered_operators(&self) -> Vec<&str> { self.constructors.keys().map(|s| s.as_str()).collect() } fn register_builtins(&mut self) { - // ─── 窗口聚合 ─── self.register("TumblingWindowAggregate", Box::new(TumblingWindowBridge)); self.register("SlidingWindowAggregate", Box::new(SlidingWindowBridge)); self.register("SessionWindowAggregate", Box::new(SessionWindowBridge)); - // ─── 水位 ─── self.register("ExpressionWatermark", Box::new(WatermarkBridge)); // ─── SQL Window Function ─── @@ -100,13 +92,10 @@ impl OperatorFactory { self.register("InstantJoin", Box::new(InstantJoinBridge)); self.register("LookupJoin", Box::new(LookupJoinBridge)); - // ─── 增量聚合 ─── self.register("UpdatingAggregate", Box::new(IncrementalAggregateBridge)); - // ─── 物理网络路由 ─── self.register("KeyBy", Box::new(KeyByBridge)); - // ─── 透传类算子 ─── self.register("Projection", Box::new(PassthroughConstructor("Projection"))); self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue"))); self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey"))); diff --git a/src/runtime/streaming/format/config.rs b/src/runtime/streaming/format/config.rs index 235e1d82..15a58008 100644 --- a/src/runtime/streaming/format/config.rs +++ b/src/runtime/streaming/format/config.rs @@ -26,12 +26,9 @@ pub enum DecimalEncoding { Bytes, } -/// 数据容错策略 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum BadDataPolicy { - /// 遇到脏数据直接报错,导致算子 Panic 和重启 Fail, - /// 丢弃脏数据,并记录监控 Metrics Drop, } diff --git a/src/runtime/streaming/format/deserializer.rs b/src/runtime/streaming/format/deserializer.rs index 1c32d48a..3e9e6d66 100644 --- a/src/runtime/streaming/format/deserializer.rs +++ b/src/runtime/streaming/format/deserializer.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 数据反序列化器:将外界收到的字节流转化为结构化 [`RecordBatch`]。 use anyhow::{anyhow, Result}; use arrow_array::builder::StringBuilder; @@ -36,7 +35,6 @@ impl DataDeserializer { } } - /// 工业级反序列化:包含完整的脏数据容错兜底 pub fn deserialize_batch(&self, messages: &[&[u8]]) -> Result { match &self.format { Format::Json(_) => self.deserialize_json(messages), diff --git a/src/runtime/streaming/format/json_encoder.rs b/src/runtime/streaming/format/json_encoder.rs index 8d34e9ef..f834a192 100644 --- a/src/runtime/streaming/format/json_encoder.rs +++ b/src/runtime/streaming/format/json_encoder.rs @@ -10,9 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 极致优化的 Arrow JSON 编码器。 //! -//! 解决 Arrow 原生 JSON 导出时不兼容 Kafka / 时间戳 / Decimal 的痛点。 use arrow_array::{ Array, Decimal128Array, TimestampMicrosecondArray, @@ -44,7 +42,6 @@ impl EncoderFactory for CustomEncoderFactory { &self.timestamp_format, array.data_type(), ) { - // ── Timestamp → Unix 毫秒 ── (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Nanosecond, _)) => { let arr = array .as_any() @@ -106,7 +103,6 @@ impl EncoderFactory for CustomEncoderFactory { Box::new(BinaryEncoder(arr)) } - // 其他类型:降级使用 Arrow 原生 encoder _ => return Ok(None), }; @@ -115,7 +111,6 @@ impl EncoderFactory for CustomEncoderFactory { } // --------------------------------------------------------------------------- -// UnixMillisEncoder — 各精度 Timestamp → i64 毫秒 // --------------------------------------------------------------------------- enum UnixMillisEncoder { @@ -138,7 +133,6 @@ impl Encoder for UnixMillisEncoder { } // --------------------------------------------------------------------------- -// DecimalEncoder — Decimal128 → JSON 字符串 / Base64 Bytes // --------------------------------------------------------------------------- enum DecimalEncoder { @@ -168,7 +162,6 @@ impl Encoder for DecimalEncoder { } // --------------------------------------------------------------------------- -// BinaryEncoder — Binary → Base64 字符串 // --------------------------------------------------------------------------- struct BinaryEncoder(arrow_array::BinaryArray); diff --git a/src/runtime/streaming/format/serializer.rs b/src/runtime/streaming/format/serializer.rs index 7b61d055..bb123499 100644 --- a/src/runtime/streaming/format/serializer.rs +++ b/src/runtime/streaming/format/serializer.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 数据序列化器:将内存 [`RecordBatch`] 转换为二进制消息流,供 Sink 连接器发送。 use anyhow::{anyhow, Result}; use arrow_array::{Array, RecordBatch, StructArray}; diff --git a/src/runtime/streaming/job/edge_manager.rs b/src/runtime/streaming/job/edge_manager.rs index 53f82cb9..b57b761f 100644 --- a/src/runtime/streaming/job/edge_manager.rs +++ b/src/runtime/streaming/job/edge_manager.rs @@ -18,7 +18,6 @@ use tokio::sync::mpsc; use crate::runtime::streaming::protocol::tracked::TrackedEvent; pub struct EdgeManager { - // PipelineID -> (输入 Receiver, 输出 Sender 列表) endpoints: HashMap>, Vec>)>, } diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs index e13279e1..20ecad9f 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/runtime/streaming/job/job_manager.rs @@ -61,9 +61,7 @@ impl JobManager { .ok_or_else(|| anyhow!("JobManager not initialized. Call init() first.")) } - /// 核心主干:从逻辑计划点火物理流水线。 /// - /// `job_id` 由调用方指定(须全局唯一),用于线程命名、`PhysicalExecutionGraph` 与 [`Self::stop_job`] 等。 pub async fn submit_job(&self, job_id: String, program: FsProgram) -> anyhow::Result { let mut edge_manager = EdgeManager::build(&program.nodes, &program.edges); let mut pipelines = HashMap::new(); @@ -150,7 +148,6 @@ impl JobManager { } // ======================================================================== - // 内部私有方法 // ======================================================================== fn build_operator_chain( diff --git a/src/runtime/streaming/job/models.rs b/src/runtime/streaming/job/models.rs index 35b48da7..45ea3bb7 100644 --- a/src/runtime/streaming/job/models.rs +++ b/src/runtime/streaming/job/models.rs @@ -20,7 +20,6 @@ use tokio::sync::mpsc; use crate::runtime::streaming::protocol::control::ControlCommand; -/// 物理 Pipeline 的实时状态 #[derive(Debug, Clone, PartialEq)] pub enum PipelineStatus { Initializing, @@ -30,7 +29,6 @@ pub enum PipelineStatus { Stopping, } -/// 物理执行图中的一个执行单元 pub struct PhysicalPipeline { pub pipeline_id: u32, pub handle: Option>, @@ -38,7 +36,6 @@ pub struct PhysicalPipeline { pub control_tx: mpsc::Sender, } -/// 一个 SQL Job 的物理执行图 pub struct PhysicalExecutionGraph { pub job_id: String, pub program: FsProgram, diff --git a/src/runtime/streaming/memory/pool.rs b/src/runtime/streaming/memory/pool.rs index 54276088..4813a63e 100644 --- a/src/runtime/streaming/memory/pool.rs +++ b/src/runtime/streaming/memory/pool.rs @@ -18,7 +18,6 @@ use tracing::{debug, warn}; use super::ticket::MemoryTicket; -/// 工业级全局内存池 (Global Memory Pool) #[derive(Debug)] pub struct MemoryPool { max_bytes: usize, diff --git a/src/runtime/streaming/memory/ticket.rs b/src/runtime/streaming/memory/ticket.rs index 1c9d2798..cb105be0 100644 --- a/src/runtime/streaming/memory/ticket.rs +++ b/src/runtime/streaming/memory/ticket.rs @@ -14,9 +14,6 @@ use std::sync::Arc; use super::pool::MemoryPool; -/// 内存船票 (RAII Guard) -/// 不实现 Clone:生命周期严格对应唯一的字节扣减。 -/// 跨多路广播时应包裹在 `Arc` 中。 #[derive(Debug)] pub struct MemoryTicket { bytes: usize, diff --git a/src/runtime/streaming/network/endpoint.rs b/src/runtime/streaming/network/endpoint.rs index a8525e1e..7448e9cd 100644 --- a/src/runtime/streaming/network/endpoint.rs +++ b/src/runtime/streaming/network/endpoint.rs @@ -19,7 +19,6 @@ use tokio_stream::Stream; use tracing::debug; // ======================================================================== -// 1. 网络桩 (Stub):为后续 gRPC/TCP 扩展预留孔位 // ======================================================================== #[derive(Clone)] @@ -34,16 +33,11 @@ impl RemoteSenderStub { } // ======================================================================== -// 2. 物理发送端点 (Physical Sender Endpoint) // ======================================================================== -/// 统一的物理发送端点。 -/// 算子无需知道目标是同机还是异机,只管调用 `send`。 #[derive(Clone)] pub enum PhysicalSender { - /// 本地线程间传输,携带内存船票,零开销 Local(mpsc::Sender), - /// 跨机网络传输,需要序列化,并在发送后丢弃本地船票 Remote(RemoteSenderStub), } @@ -65,7 +59,6 @@ impl PhysicalSender { } // ======================================================================== -// 3. 物理接收端点 (Physical Receiver Endpoint) // ======================================================================== pub type BoxedEventStream = Pin + Send>>; diff --git a/src/runtime/streaming/network/environment.rs b/src/runtime/streaming/network/environment.rs index 07ea0cab..fe8544c5 100644 --- a/src/runtime/streaming/network/environment.rs +++ b/src/runtime/streaming/network/environment.rs @@ -16,7 +16,6 @@ use std::collections::HashMap; pub type VertexId = u32; pub type SubtaskIndex = u32; -/// 物理网络路由注册表 pub struct NetworkEnvironment { pub outboxes: HashMap<(VertexId, SubtaskIndex), Vec>, pub inboxes: HashMap<(VertexId, SubtaskIndex), Vec>, diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs index 5d174323..4b1af6b3 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -41,7 +41,6 @@ use std::{collections::HashMap, mem, sync::Arc}; use tracing::{debug, warn}; use protocol::grpc::api::UpdatingAggregateOperator; // ========================================================================= -// 引入全新的 Actor 框架核心协议 (取代了老旧的 ArrowOperator 和 Collector) // ========================================================================= use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::MessageOperator; @@ -175,7 +174,6 @@ struct Aggregator { } // ========================================================================= -// 核心算子结构体 // ========================================================================= pub struct IncrementalAggregatingFunc { @@ -185,21 +183,18 @@ pub struct IncrementalAggregatingFunc { accumulators: UpdatingCache>, updated_keys: HashMap>>, - // 【新增】:算子自身持有输入元数据,不再依赖外部动态传入 input_schema: Arc, has_routing_keys: bool, sliding_state_schema: Arc, batch_state_schema: Arc, schema_without_metadata: Arc, - /// 下游 changelog 批次 schema(与 planner `final_schema` 一致)。 final_output_schema: Arc, ttl: Duration, key_converter: RowConverter, new_generation: u64, } -/// 全局聚合使用的空 key(单分区无 routing key)。 static GLOBAL_KEY: LazyLock>> = LazyLock::new(|| Arc::new(Vec::new())); impl IncrementalAggregatingFunc { @@ -389,7 +384,6 @@ impl IncrementalAggregatingFunc { } // ========================================================================= - // 状态读写逻辑 (Checkpointing & Restore) // ========================================================================= fn checkpoint_sliding(&mut self) -> DFResult>> { @@ -592,13 +586,11 @@ impl IncrementalAggregatingFunc { Ok(()) } - /// 核心逻辑:从内存中提取这段时间的所有变更,生成 Changelog(追加与撤回) fn generate_changelog(&mut self) -> Result> { let mut output_keys = Vec::with_capacity(self.updated_keys.len() * 2); let mut output_values = vec![Vec::with_capacity(self.updated_keys.len() * 2); self.aggregates.len()]; let mut is_retracts = Vec::with_capacity(self.updated_keys.len() * 2); - // 提取变更 let (updated_keys, updated_values): (Vec<_>, Vec<_>) = mem::take(&mut self.updated_keys).into_iter().unzip(); let mut deleted_keys = vec![]; @@ -606,7 +598,6 @@ impl IncrementalAggregatingFunc { let append = self.evaluate(&k.0)?; if let Some(v) = retract { - // 如果没有变化,直接跳过 if v.iter().zip(append.iter()).take(v.len() - 1).all(|(a, b)| a == b) { continue; } is_retracts.push(true); output_keys.push(k.clone()); @@ -663,7 +654,6 @@ fn set_retract_metadata(metadata: ArrayRef, is_retract: Arc) -> Ar } // ========================================================================= -// 实现全新的 Actor MessageOperator 接口 // ========================================================================= #[async_trait::async_trait] @@ -683,7 +673,6 @@ impl MessageOperator for IncrementalAggregatingFunc { batch: RecordBatch, _ctx: &mut TaskContext, ) -> Result> { - // 数据进入仅更新内存中的 HashMap,暂不发送数据 if self.has_routing_keys { self.keyed_aggregate(&batch)?; } else { @@ -698,9 +687,7 @@ impl MessageOperator for IncrementalAggregatingFunc { _watermark: Watermark, _ctx: &mut TaskContext, ) -> Result> { - // 如果是基于时间的 flush (可根据业务决定是否在水位线推进时 flush) if let Some(changelog_batch) = self.generate_changelog()? { - // Forward 表示按原路直连发送给下游 Ok(vec![StreamOutput::Forward(changelog_batch)]) } else { Ok(vec![]) @@ -747,7 +734,6 @@ impl MessageOperator for IncrementalAggregatingFunc { } // ========================================================================= -// 算子构造器保持对外 API 兼容 // ========================================================================= pub struct IncrementalAggregatingConstructor; diff --git a/src/runtime/streaming/operators/grouping/updating_cache.rs b/src/runtime/streaming/operators/grouping/updating_cache.rs index bdba9fa7..2172535b 100644 --- a/src/runtime/streaming/operators/grouping/updating_cache.rs +++ b/src/runtime/streaming/operators/grouping/updating_cache.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 按 key 的增量状态缓存:LRU + TTL(idle),供 [`super::incremental_aggregate`] 等使用。 use std::borrow::Borrow; use std::collections::HashMap; @@ -35,7 +34,6 @@ struct Node { next: Option, } -/// 基于数组槽位 + 双向链表(LRU)的 UpdatingCache,支持按代更新与 TTL 逐出。 pub struct UpdatingCache { map: HashMap, nodes: Vec>, diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs index 7fe32d6e..ef49c323 100644 --- a/src/runtime/streaming/operators/joins/join_instance.rs +++ b/src/runtime/streaming/operators/joins/join_instance.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 瞬时 JOIN:双通道喂入 DataFusion 物理计划,水位线推进时闭合实例并抽干结果(纯内存版)。 use anyhow::{anyhow, Result}; use arrow::compute::{max, min, partition, sort_to_indices, take}; @@ -55,7 +54,6 @@ impl JoinSide { } } -/// 瞬时 JOIN 执行实例:保存通道;窗口闭合时关闭通道并同步抽干 `SendableRecordBatchStream`。 struct JoinInstance { left_tx: UnboundedSender, right_tx: UnboundedSender, @@ -76,7 +74,6 @@ impl JoinInstance { } } - /// 关闭输入流,促使执行计划结束,并拉取全部 JOIN 结果。 async fn close_and_drain(self) -> Result> { drop(self.left_tx); drop(self.right_tx); @@ -264,8 +261,6 @@ impl MessageOperator for InstantJoinOperator { } } -/// 与 `OperatorConstructor` 类似的配置入口;返回 [`InstantJoinOperator`](实现 [`MessageOperator`]), -/// 而非 `ConstructedOperator`(后者仅包装 `ArrowOperator`)。 pub struct InstantJoinConstructor; impl InstantJoinConstructor { diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs index 9e6de6c9..91fd38a6 100644 --- a/src/runtime/streaming/operators/joins/join_with_expiration.rs +++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs @@ -10,8 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 带 TTL 的 Key-Time Join:纯内存状态版 + DataFusion 物理计划成对计算。 -//! 完全移除了底层 TableManager 和持久化状态依赖。 use anyhow::{anyhow, Result}; use arrow::compute::concat_batches; @@ -43,7 +41,6 @@ enum JoinSide { } // ============================================================================ -// 纯内存状态缓冲区 (In-Memory TTL Buffer) // ============================================================================ struct StateBuffer { @@ -82,7 +79,6 @@ impl StateBuffer { } // ============================================================================ -// 算子主体 // ============================================================================ pub struct JoinWithExpirationOperator { @@ -229,7 +225,6 @@ impl MessageOperator for JoinWithExpirationOperator { } // ============================================================================ -// 构造器 // ============================================================================ pub struct JoinWithExpirationConstructor; diff --git a/src/runtime/streaming/operators/key_by.rs b/src/runtime/streaming/operators/key_by.rs index a432011d..8d0da441 100644 --- a/src/runtime/streaming/operators/key_by.rs +++ b/src/runtime/streaming/operators/key_by.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 物理网络路由算子:利用 DataFusion 物理表达式提取 Key,基于 Hash 排序执行零拷贝切片路由。 use anyhow::{anyhow, Result}; use arrow_array::{Array, RecordBatch, UInt64Array}; @@ -65,7 +64,6 @@ impl MessageOperator for KeyByOperator { return Ok(vec![]); } - // 1. 执行物理表达式,提取所有 Key 列 let mut key_columns = Vec::with_capacity(self.key_extractors.len()); for expr in &self.key_extractors { let column_array = expr @@ -76,18 +74,15 @@ impl MessageOperator for KeyByOperator { key_columns.push(column_array); } - // 2. 向量化计算 Hash 数组 let mut hash_buffer = vec![0u64; num_rows]; create_hashes(&key_columns, &self.random_state, &mut hash_buffer) .map_err(|e| anyhow!("Failed to compute hashes: {}", e))?; let hash_array = UInt64Array::from(hash_buffer); - // 3. 基于 Hash 值排序,获取重排 Indices let sorted_indices = sort_to_indices(&hash_array, None, None) .map_err(|e| anyhow!("Failed to sort hashes: {}", e))?; - // 4. 对齐重排 Hash 数组和原始 Batch let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?; let sorted_hashes = sorted_hashes_ref .as_any() @@ -101,7 +96,6 @@ impl MessageOperator for KeyByOperator { .collect(); let sorted_batch = RecordBatch::try_new(batch.schema(), sorted_columns?)?; - // 5. 零拷贝微批切片 —— 按 Hash 值连续段切分并标记路由意图 let mut outputs = Vec::new(); let mut start_idx = 0; diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/runtime/streaming/operators/key_operator.rs index 5dfd66f6..0202f924 100644 --- a/src/runtime/streaming/operators/key_operator.rs +++ b/src/runtime/streaming/operators/key_operator.rs @@ -10,12 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 物理网络路由算子:利用 DataFusion 物理表达式提取 Key,基于 Hash 排序执行零拷贝切片路由。 //! -//! 提供两种算子: -//! - [`KeyByOperator`]:纯 Key 提取 + Hash 路由,适用于简单的 GROUP BY / PARTITION BY。 -//! - [`KeyExecutionOperator`]:先执行完整物理计划,再按指定列 Hash 路由,适用于需要先做 -//! 计算(如聚合结果映射)再分区的场景。 use anyhow::{anyhow, Result}; use arrow_array::{Array, ArrayRef, RecordBatch, UInt64Array}; @@ -72,7 +67,6 @@ impl MessageOperator for KeyByOperator { return Ok(vec![]); } - // 1. 执行物理表达式,提取所有 Key 列 let mut key_columns = Vec::with_capacity(self.key_extractors.len()); for expr in &self.key_extractors { let column_array = expr @@ -83,18 +77,15 @@ impl MessageOperator for KeyByOperator { key_columns.push(column_array); } - // 2. 向量化计算 Hash 数组 let mut hash_buffer = vec![0u64; num_rows]; create_hashes(&key_columns, &self.random_state, &mut hash_buffer) .map_err(|e| anyhow!("Failed to compute hashes: {}", e))?; let hash_array = UInt64Array::from(hash_buffer); - // 3. 基于 Hash 值排序,获取重排 Indices let sorted_indices = sort_to_indices(&hash_array, None, None) .map_err(|e| anyhow!("Failed to sort hashes: {}", e))?; - // 4. 对齐重排 Hash 数组和原始 Batch let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?; let sorted_hashes = sorted_hashes_ref .as_any() @@ -108,7 +99,6 @@ impl MessageOperator for KeyByOperator { .collect(); let sorted_batch = RecordBatch::try_new(batch.schema(), sorted_columns?)?; - // 5. 零拷贝微批切片 —— 按 Hash 值连续段切分并标记路由意图 let mut outputs = Vec::new(); let mut start_idx = 0; @@ -177,12 +167,8 @@ impl KeyByConstructor { } // =========================================================================== -// KeyExecutionOperator — 先执行物理计划,再按 Key 列 Hash 路由 // =========================================================================== -/// 键控路由执行算子:先驱动 DataFusion 物理计划完成计算(如聚合结果映射), -/// 再根据 `key_fields` 指定列计算 Hash 并以 [`StreamOutput::Keyed`] 输出, -/// 实现算子内部分区。 pub struct KeyExecutionOperator { name: String, executor: StatelessPhysicalExecutor, @@ -219,7 +205,6 @@ impl MessageOperator for KeyExecutionOperator { ) -> Result> { let mut outputs = Vec::new(); - // 1. 执行物理转换 let mut stream = self.executor.process_batch(batch).await?; while let Some(batch_result) = stream.next().await { @@ -229,7 +214,6 @@ impl MessageOperator for KeyExecutionOperator { continue; } - // 2. 提取 Key 列并计算 Hash let key_columns: Vec = self .key_fields .iter() @@ -241,7 +225,6 @@ impl MessageOperator for KeyExecutionOperator { .map_err(|e| anyhow!("hash compute: {e}"))?; let hash_array = UInt64Array::from(hash_buffer); - // 3. 基于 Hash 排序,获取重排 Indices let sorted_indices = sort_to_indices(&hash_array, None, None) .map_err(|e| anyhow!("sort hashes: {e}"))?; @@ -259,7 +242,6 @@ impl MessageOperator for KeyExecutionOperator { let sorted_batch = RecordBatch::try_new(out_batch.schema(), sorted_columns?)?; - // 4. 零拷贝切片 —— 按 Hash 连续段分组,标记 Keyed 路由意图 let mut start_idx = 0; while start_idx < num_rows { let current_hash = sorted_hashes.value(start_idx); diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs index dc8b39b7..cb8412d8 100644 --- a/src/runtime/streaming/operators/mod.rs +++ b/src/runtime/streaming/operators/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 内置算子。 pub mod grouping; pub mod joins; @@ -44,7 +43,6 @@ use async_trait::async_trait; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, Watermark}; -/// 透传数据。 pub struct PassthroughOperator { name: String, } diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs index 33fe0d51..3afb93ef 100644 --- a/src/runtime/streaming/operators/projection.rs +++ b/src/runtime/streaming/operators/projection.rs @@ -10,8 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 高性能投影算子:直接操作 Arrow Array 执行列映射与标量运算, -//! 避开 DataFusion 执行树开销,适用于 SELECT 字段筛选和简单标量计算。 use anyhow::Result; use arrow_array::RecordBatch; diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs index aecef032..9f82a4ce 100644 --- a/src/runtime/streaming/operators/sink/kafka/mod.rs +++ b/src/runtime/streaming/operators/sink/kafka/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Kafka Sink:实现 [`crate::runtime::streaming::api::operator::MessageOperator`],支持 At-Least-Once 与 Exactly-Once(事务 + 二阶段提交)。 use anyhow::{anyhow, bail, Result}; use arrow_array::cast::AsArray; @@ -34,7 +33,6 @@ use crate::runtime::streaming::StreamOutput; use crate::sql::common::constants::factory_operator_name; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; // ============================================================================ -// 1. 领域模型:一致性级别与事务状态机 // ============================================================================ #[derive(Debug, Clone)] @@ -50,7 +48,6 @@ struct TransactionalState { } // ============================================================================ -// 2. 核心算子外壳 // ============================================================================ pub struct KafkaSinkOperator { @@ -205,7 +202,6 @@ fn row_key_bytes(batch: &RecordBatch, row: usize, col: usize) -> Option> } // ============================================================================ -// 3. 实现 MessageOperator 协议 // ============================================================================ #[async_trait] diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/runtime/streaming/operators/sink/mod.rs index 93b3b0ee..8abd2985 100644 --- a/src/runtime/streaming/operators/sink/mod.rs +++ b/src/runtime/streaming/operators/sink/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 与外部系统对接的 Sink 实现(Kafka 等)。 pub mod kafka; diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs index 59507c2e..d0de692a 100644 --- a/src/runtime/streaming/operators/source/kafka/mod.rs +++ b/src/runtime/streaming/operators/source/kafka/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Kafka 源算子:实现 [`crate::runtime::streaming::api::source::SourceOperator`],由 [`crate::runtime::streaming::execution::SourceRunner`] 轮询 `fetch_next`。 use anyhow::{anyhow, Context as _, Result}; use arrow_array::RecordBatch; @@ -31,7 +30,6 @@ use crate::runtime::streaming::format::{BadDataPolicy, DataDeserializer, Format} use crate::sql::common::{CheckpointBarrier, MetadataField}; use crate::sql::common::fs_schema::FieldValueType; // ============================================================================ -// 1. 领域模型:Kafka 状态与配置 // ============================================================================ #[derive(Copy, Clone, Debug, Encode, Decode, PartialEq, PartialOrd)] @@ -40,7 +38,6 @@ pub struct KafkaState { offset: i64, } -/// 增量反序列化缓冲 trait:Source 逐条 `deserialize_slice`,攒满或超时后 `flush_buffer` 输出 [`RecordBatch`]。 pub trait BatchDeserializer: Send + 'static { fn deserialize_slice( &mut self, @@ -53,15 +50,12 @@ pub trait BatchDeserializer: Send + 'static { fn flush_buffer(&mut self) -> Result>; - /// 缓冲区是否无任何待反序列化数据。 fn is_empty(&self) -> bool; } // --------------------------------------------------------------------------- -// BufferedDeserializer — 基于 DataDeserializer 的默认 BatchDeserializer 实现 // --------------------------------------------------------------------------- -/// 将 [`DataDeserializer`] 包装为 [`BatchDeserializer`]:逐条缓存 payload,达到阈值后批量反序列化。 pub struct BufferedDeserializer { inner: DataDeserializer, buffer: Vec>, @@ -120,7 +114,6 @@ impl SourceOffset { } // ============================================================================ -// 2. 核心算子外壳 // ============================================================================ const KAFKA_POLL_TIMEOUT: Duration = Duration::from_millis(100); @@ -144,7 +137,6 @@ pub struct KafkaSourceOperator { current_offsets: HashMap, is_empty_assignment: bool, - /// 上次成功 flush 出 batch 的时间,用于低流量时按逗留时间强制发车。 last_flush_time: Instant, } @@ -251,7 +243,6 @@ impl KafkaSourceOperator { } // ============================================================================ -// 3. 实现 SourceOperator 协议 // ============================================================================ #[async_trait] @@ -288,7 +279,6 @@ impl SourceOperator for KafkaSourceOperator { let offset = msg.offset(); let timestamp = msg.timestamp().to_millis().unwrap_or(0); - // 无论是否有 payload(含 Tombstone),都必须推进位点,否则会永久卡在墓碑消息上。 self.current_offsets.insert(partition, offset); if let Some(payload) = msg.payload() { @@ -345,7 +335,6 @@ impl SourceOperator for KafkaSourceOperator { Err(anyhow!("Kafka error: {}", e)) } Err(_) => { - // 超时内无新消息:若缓冲区仍有积压,强制 flush,避免低流量下数据长期滞留。 if !self.deserializer.is_empty() { if let Some(batch) = self.deserializer.flush_buffer()? { self.last_flush_time = Instant::now(); diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs index 687e2289..76f3639a 100644 --- a/src/runtime/streaming/operators/source/mod.rs +++ b/src/runtime/streaming/operators/source/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 与外部系统对接的源实现(Kafka 等)。 pub mod kafka; diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs index 188015e2..6c1e5c90 100644 --- a/src/runtime/streaming/operators/stateless_physical_executor.rs +++ b/src/runtime/streaming/operators/stateless_physical_executor.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 无状态物理计划执行器:将单批次写入 `SingleLockedBatch` 并让 DataFusion 计划消费。 use std::sync::{Arc, RwLock}; diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/runtime/streaming/operators/value_execution.rs index c3b3d525..86596512 100644 --- a/src/runtime/streaming/operators/value_execution.rs +++ b/src/runtime/streaming/operators/value_execution.rs @@ -10,8 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 通用无状态执行算子:驱动 DataFusion 物理计划(Filter, Case When, Scalar UDF 等), -//! 不改变分区状态,适用于 Map / Filter 阶段。 use anyhow::Result; use arrow_array::RecordBatch; diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs index 63f5acec..b512f842 100644 --- a/src/runtime/streaming/operators/watermark/watermark_generator.rs +++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 表达式水位生成器:与 worker `arrow/watermark_generator` 对齐,通过 [`StreamOutput::Watermark`] 向下游广播。 use anyhow::{anyhow, Result}; use arrow::compute::kernels::aggregate; @@ -35,7 +34,6 @@ use protocol::grpc::api::ExpressionWatermarkConfig; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_millis, CheckpointBarrier, FsSchema, Watermark}; -/// 需持久化到 Checkpoint 的状态(与 worker `WatermarkGeneratorState` 语义一致)。 #[derive(Debug, Copy, Clone, Encode, Decode, PartialEq, Eq)] pub struct WatermarkGeneratorState { pub last_watermark_emitted_at: SystemTime, @@ -86,8 +84,6 @@ impl WatermarkGeneratorOperator { Some(from_nanos(max_ts as u128)) } - /// 水位线计算必须取评估后数组的 **Max**,不能取 Min:同一 Batch 内多行时, - /// Min 会低估“已见事件时间”的安全基线(例如 ts-5s 在两行上 min 会偏早)。 fn evaluate_watermark(&self, batch: &RecordBatch) -> Result { let watermark_array = self .expression @@ -137,14 +133,12 @@ impl MessageOperator for WatermarkGeneratorOperator { let new_watermark = self.evaluate_watermark(&batch)?; - // 死守单调递增底线,绝不倒流 self.state.max_watermark = self.state.max_watermark.max(new_watermark); let time_since_last_emit = max_batch_ts .duration_since(self.state.last_watermark_emitted_at) .unwrap_or(Duration::ZERO); - // 空闲唤醒或达到发射间隔则发射水印 if self.is_idle || time_since_last_emit > self.interval { debug!( "[{}] emitting expression watermark {}", @@ -181,7 +175,6 @@ impl MessageOperator for WatermarkGeneratorOperator { .last_event_wall .elapsed() .unwrap_or(Duration::ZERO); - // 系统时钟超时,发射 Idle 水印,避免下游一直等不到推进 if !self.is_idle && elapsed > idle_timeout { info!( "task [{}] entering Idle after {:?}", diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs index aaf65cf1..d7257223 100644 --- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs @@ -10,8 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 会话窗口聚合:纯内存版,完全脱离持久化状态存储。 -//! 利用 BTreeMap 充当优先队列,数据天然在内存中进行 Gap 合并与触发。 use anyhow::{anyhow, bail, Context, Result}; use arrow::compute::{ @@ -47,7 +45,6 @@ use crate::sql::common::converter::Converter; use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; use crate::sql::schema::utils::window_arrow_struct; // ============================================================================ -// 领域模型与纯内存状态 // ============================================================================ struct SessionWindowConfig { @@ -179,7 +176,7 @@ struct SessionWindowResult { struct KeySessionState { config: Arc, active_session: Option, - buffered_batches: BTreeMap>, // 纯内存缓冲 + buffered_batches: BTreeMap>, } impl KeySessionState { @@ -335,7 +332,6 @@ fn build_session_output_schema( } // ============================================================================ -// 算子本体:负责处理输入数据与时间流,路由给具体的 KeySessionState // ============================================================================ pub struct SessionWindowOperator { @@ -662,7 +658,6 @@ impl MessageOperator for SessionWindowOperator { } // ============================================================================ -// 构造器 // ============================================================================ pub struct SessionAggregatingWindowConstructor; diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs index 6f0aa7f9..7bad21bc 100644 --- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs @@ -10,9 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 滑动窗口聚合:纯内存版。 -//! 完全依赖内部的 TieredRecordBatchHolder 和 ActiveBin 在内存中进行计算, -//! 摆脱 TableManager 依赖,遇到 Barrier 自动透传。 use anyhow::{anyhow, bail, Result}; use arrow::compute::{partition, sort_to_indices, take}; @@ -45,8 +42,6 @@ use crate::runtime::streaming::StreamOutput; use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark}; use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; // ============================================================================ -// 纯内存状态:阶梯式时间面板 (Tiered panes) -// 这部分本身就是极佳的内存数据结构,原样保留! // ============================================================================ #[derive(Default, Debug)] @@ -217,7 +212,6 @@ impl TieredRecordBatchHolder { } // ============================================================================ -// Per-bin partial aggregation (纯内存缓冲区) // ============================================================================ struct ActiveBin { @@ -264,7 +258,6 @@ impl ActiveBin { } // ============================================================================ -// 算子主体 // ============================================================================ pub struct SlidingWindowOperator { @@ -473,7 +466,6 @@ impl MessageOperator for SlidingWindowOperator { } // ============================================================================ -// 构造器 // ============================================================================ pub struct SlidingAggregatingWindowConstructor; diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs index 30724f59..093823bb 100644 --- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 滚动(tumbling)窗口聚合:与 worker `arrow/tumbling_aggregating_window` 对齐,实现 [`MessageOperator`]。 use anyhow::{anyhow, Result}; use arrow::compute::{partition, sort_to_indices, take}; diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs index f6ae2a1a..d067eccf 100644 --- a/src/runtime/streaming/operators/windows/window_function.rs +++ b/src/runtime/streaming/operators/windows/window_function.rs @@ -10,9 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 窗口函数(按事件时间分桶的瞬时执行):纯内存版。 -//! 完全依赖内部的 ActiveWindowExec 通道在内存中缓冲数据, -//! 摆脱持久化状态存储的依赖,遇到 Barrier 自动透传。 use anyhow::{anyhow, Result}; use arrow::compute::{max, min}; @@ -41,7 +38,6 @@ use crate::sql::common::time_utils::print_time; use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec}; // ============================================================================ -// 纯内存执行缓冲区 // ============================================================================ struct ActiveWindowExec { @@ -77,7 +73,6 @@ impl ActiveWindowExec { } // ============================================================================ -// 算子主体 // ============================================================================ pub struct WindowFunctionOperator { @@ -199,7 +194,6 @@ impl MessageOperator for WindowFunctionOperator { let mut final_outputs = Vec::new(); - // 与 worker 一致:仅当桶时间戳 **严格小于** 当前事件时间水位时才结算(`watermark <= ts` 时保留)。 let mut expired_timestamps = Vec::new(); for &k in self.active_execs.keys() { if k < current_time { @@ -233,7 +227,6 @@ impl MessageOperator for WindowFunctionOperator { } // ============================================================================ -// 构造器 // ============================================================================ pub struct WindowFunctionConstructor; diff --git a/src/runtime/streaming/protocol/control.rs b/src/runtime/streaming/protocol/control.rs index d225e2e8..d337046e 100644 --- a/src/runtime/streaming/protocol/control.rs +++ b/src/runtime/streaming/protocol/control.rs @@ -10,14 +10,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 控制平面:与 [`super::event::StreamEvent`] 队列分离的高优先级指令。 use serde::{Deserialize, Serialize}; use std::time::Duration; use tokio::sync::mpsc::{self, Receiver, Sender}; use crate::sql::common::CheckpointBarrier; -/// 可序列化的 barrier 载荷(`CheckpointBarrier` 本身未实现 `serde`,供 RPC / 持久化使用)。 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct CheckpointBarrierWire { pub epoch: u32, @@ -55,7 +53,6 @@ impl From for CheckpointBarrier { } } -/// JobManager / 调度器下发的高优控制指令。 #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ControlCommand { Start, @@ -63,7 +60,6 @@ pub enum ControlCommand { DropState, Commit { epoch: u32 }, UpdateConfig { config_json: String }, - /// 通常由 [`crate::runtime::streaming::SourceRunner`] 接收,源头落盘后向下游注入 `Barrier`。 TriggerCheckpoint { barrier: CheckpointBarrierWire }, } diff --git a/src/runtime/streaming/protocol/event.rs b/src/runtime/streaming/protocol/event.rs index efd43952..b78b7fbc 100644 --- a/src/runtime/streaming/protocol/event.rs +++ b/src/runtime/streaming/protocol/event.rs @@ -13,7 +13,6 @@ use arrow_array::RecordBatch; use crate::sql::common::{CheckpointBarrier, Watermark}; -/// 核心数据面事件 #[derive(Debug, Clone)] pub enum StreamEvent { Data(RecordBatch), diff --git a/src/runtime/streaming/protocol/mod.rs b/src/runtime/streaming/protocol/mod.rs index f859df28..63f7f0bf 100644 --- a/src/runtime/streaming/protocol/mod.rs +++ b/src/runtime/streaming/protocol/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 协议层:数据事件、控制命令、水位线合并与比较语义。 pub mod control; pub mod event; diff --git a/src/runtime/streaming/protocol/stream_out.rs b/src/runtime/streaming/protocol/stream_out.rs index 0f6619f9..fc7b9bba 100644 --- a/src/runtime/streaming/protocol/stream_out.rs +++ b/src/runtime/streaming/protocol/stream_out.rs @@ -13,15 +13,10 @@ use arrow_array::RecordBatch; use crate::sql::common::Watermark; -/// 算子产出的数据及下游 **路由意图**(由 `SubtaskRunner` 选择 `collect` / `collect_keyed` / `broadcast` / 水位广播)。 #[derive(Debug, Clone)] pub enum StreamOutput { - /// 发往所有下游(与 `TaskContext::collect` 一致:当前实现为每条边各发一份 `Data`)。 Forward(RecordBatch), - /// 按 `key_hash % outboxes.len()` 发往单一分区(KeyBy / Shuffle)。 Keyed(u64, RecordBatch), - /// 广播同一份数据到所有下游边(如 broadcast join)。 Broadcast(RecordBatch), - /// 向所有下游广播水位线(如表达式水位生成器)。 Watermark(Watermark), } diff --git a/src/runtime/streaming/protocol/tracked.rs b/src/runtime/streaming/protocol/tracked.rs index 5034abd2..d4360627 100644 --- a/src/runtime/streaming/protocol/tracked.rs +++ b/src/runtime/streaming/protocol/tracked.rs @@ -15,11 +15,7 @@ use std::sync::Arc; use crate::runtime::streaming::memory::MemoryTicket; use crate::runtime::streaming::protocol::event::StreamEvent; -/// 在 Channel 中实际传输的事件,完美解决多路广播 (Broadcast) 的内存管理问题。 /// -/// `MemoryTicket` 包在 `Arc` 中:如果 Event 被发送给 N 个下游分区(Broadcast 路由), -/// 只需 Clone 此 `TrackedEvent`,底层数据共享一块内存,Arc 引用计数 +N。 -/// 只有当所有下游全部处理完并 Drop 后,Arc 归零,内存才被真正释放给 Pool。 #[derive(Debug, Clone)] pub struct TrackedEvent { pub event: StreamEvent, diff --git a/src/runtime/streaming/protocol/watermark.rs b/src/runtime/streaming/protocol/watermark.rs index 9c039aa5..f6e8388a 100644 --- a/src/runtime/streaming/protocol/watermark.rs +++ b/src/runtime/streaming/protocol/watermark.rs @@ -10,12 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 水位线类型来自 `arroyo_types::Watermark`;此处提供 **多路对齐合并** 与 **单调推进** 判断。 use crate::sql::common::Watermark; -/// 多输入对齐:`Idle` 不参与事件时间取最小;若全部为 `Idle` 则输出 `Idle`。 -/// 任一路尚未有水位线时返回 `None`(木桶短板未齐)。 pub fn merge_watermarks(per_input: &[Option]) -> Option { if per_input.iter().any(|w| w.is_none()) { return None; @@ -46,7 +43,6 @@ pub fn merge_watermarks(per_input: &[Option]) -> Option { } } -/// `new` 相对 `previous` 是否为 **严格推进**;`previous == None` 时恒为真。 pub fn watermark_strictly_advances(new: Watermark, previous: Option) -> bool { match previous { None => true, diff --git a/src/runtime/util/mod.rs b/src/runtime/util/mod.rs index 3b4c7e60..0e3a3f7b 100644 --- a/src/runtime/util/mod.rs +++ b/src/runtime/util/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 运行时跨子系统复用的工具函数(物理计划 proto 解码等)。 mod physical_aggregate; diff --git a/src/runtime/util/physical_aggregate.rs b/src/runtime/util/physical_aggregate.rs index 83a6e3bd..33dd1e9f 100644 --- a/src/runtime/util/physical_aggregate.rs +++ b/src/runtime/util/physical_aggregate.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 从 DataFusion proto 物理表达式节点解码聚合(UDAF)表达式。 use std::sync::Arc; @@ -26,7 +25,6 @@ use datafusion_proto::protobuf::physical_aggregate_expr_node::AggregateFunction; use datafusion_proto::protobuf::physical_expr_node::ExprType; use datafusion_proto::protobuf::{PhysicalExprNode, proto_error}; -/// 从 `PhysicalExprNode` 解码 UDAF 聚合表达式(与 worker `arrow/mod` 历史路径语义一致)。 pub fn decode_aggregate( schema: &SchemaRef, name: &str, diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs index cf2a39cc..8eb697e2 100644 --- a/src/sql/common/constants.rs +++ b/src/sql/common/constants.rs @@ -10,10 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! SQL / 流算子相关的**名称与标识符常量**(标量函数名、窗口 TVF、逻辑扩展节点名、CDC 字段、 -//! 运行时 blueprint 字符串、`OperatorName` 特性标签等);与 [`super::with_option_keys`](WITH 选项键)分工。 -// ── 内置标量 UDF(`register_all` / `ScalarUDFImpl::name`)────────────────────── pub mod scalar_fn { pub const GET_FIRST_JSON_OBJECT: &str = "get_first_json_object"; @@ -23,7 +20,6 @@ pub mod scalar_fn { pub const MULTI_HASH: &str = "multi_hash"; } -// ── 窗口 TVF(`hop` / `tumble` / `session` 等,与 DataFusion 解析一致)────────── pub mod window_fn { pub const HOP: &str = "hop"; @@ -31,16 +27,13 @@ pub mod window_fn { pub const SESSION: &str = "session"; } -// ── 流规划期占位标量 UDF(`StreamPlanningContextBuilder::with_streaming_extensions`)── pub mod planning_placeholder_udf { pub const UNNEST: &str = "unnest"; pub const ROW_TIME: &str = "row_time"; - /// `List` 内元素字段名,仅用于占位签名的 Arrow 形态 pub const LIST_ELEMENT_FIELD: &str = "field"; } -// ── `OperatorName` 在指标 / 特性集合中使用的 kebab-case 标签 ───────────────── pub mod operator_feature { pub const ASYNC_UDF: &str = "async-udf"; @@ -57,7 +50,6 @@ pub mod operator_feature { pub const CONNECTOR_SINK: &str = "connector-sink"; } -// ── 逻辑计划扩展节点的 `UserDefinedLogicalNodeCore::name` / 类型字符串 ──────── pub mod extension_node { pub const STREAM_WINDOW_AGGREGATE: &str = "StreamWindowAggregateNode"; @@ -78,19 +70,16 @@ pub mod extension_node { pub const PACK_DEBEZIUM_ENVELOPE: &str = "PackDebeziumEnvelopeNode"; } -// ── gRPC / proto 算子配置里的 `name` 字段(与 `OperatorName` 展示相关)────────── pub mod proto_operator_name { pub const TUMBLING_WINDOW: &str = "TumblingWindow"; pub const UPDATING_AGGREGATE: &str = "UpdatingAggregate"; pub const WINDOW_FUNCTION: &str = "WindowFunction"; - /// 滑动窗口 human-readable 描述片段(非固定 id) pub const SLIDING_WINDOW_LABEL: &str = "sliding window"; pub const INSTANT_WINDOW: &str = "InstantWindow"; pub const INSTANT_WINDOW_LABEL: &str = "instant window"; } -// ── 下发到运行时的 blueprint / 算子种类字符串 ────────────────────────────────── pub mod runtime_operator_kind { pub const STREAMING_JOIN: &str = "streaming_join"; @@ -98,7 +87,6 @@ pub mod runtime_operator_kind { pub const STREAMING_WINDOW_EVALUATOR: &str = "streaming_window_evaluator"; } -// ── Worker [`OperatorFactory`] 注册键(须与任务包 `operator_name`、`OperatorName::Display` 一致)── pub mod factory_operator_name { pub const CONNECTOR_SOURCE: &str = "ConnectorSource"; @@ -107,7 +95,6 @@ pub mod factory_operator_name { pub const KAFKA_SINK: &str = "KafkaSink"; } -// ── Debezium CDC 信封字段 ─────────────────────────────────────────────────── pub mod cdc { pub const BEFORE: &str = "before"; @@ -115,39 +102,29 @@ pub mod cdc { pub const OP: &str = "op"; } -// ── updating aggregate 状态元数据 struct 字段 ──────────────────────────────── pub mod updating_state_field { pub const IS_RETRACT: &str = "is_retract"; pub const ID: &str = "id"; } -// ── 计划里常用的列名 / 别名 ─────────────────────────────────────────────────── pub mod sql_field { - /// 异步 UDF 重写后的结果列名。 pub const ASYNC_RESULT: &str = "__async_result"; pub const DEFAULT_KEY_LABEL: &str = "key"; pub const DEFAULT_PROJECTION_LABEL: &str = "projection"; - /// `WATERMARK FOR … AS expr` 生成的计算列名(与 `TemporalPipelineConfig` 一致)。 pub const COMPUTED_WATERMARK: &str = "__watermark"; - /// 流表事件时间物理列名(与 DataFusion 计划注入列一致)。 pub const TIMESTAMP_FIELD: &str = "_timestamp"; - /// Changelog / updating 模式下的元数据列名。 pub const UPDATING_META_FIELD: &str = "_updating_meta"; } -// ── `SqlConfig` / `PlanningOptions` 默认值 ──────────────────────────────────── pub mod sql_planning_default { pub const DEFAULT_PARALLELISM: usize = 4; - /// [`PlanningOptions::default`] 的 TTL(秒):24h。 pub const PLANNING_TTL_SECS: u64 = 24 * 60 * 60; } -// ── `ConnectorOptions` / WITH 解析用到的字面量 ──────────────────────────────── -/// 单引号字符串形式的布尔取值(见 [`super::connector_options::ConnectorOptions::pull_opt_bool`])。 pub mod with_opt_bool_str { pub const TRUE: &str = "true"; pub const YES: &str = "yes"; @@ -155,7 +132,6 @@ pub mod with_opt_bool_str { pub const NO: &str = "no"; } -/// `INTERVAL '…'` / 间隔字符串解析中的单位 token(小写;解析前会对单位做 `to_lowercase`)。 pub mod interval_duration_unit { pub const SECOND: &str = "second"; pub const SECONDS: &str = "seconds"; @@ -171,9 +147,7 @@ pub mod interval_duration_unit { pub const D: &str = "d"; } -// ── `format` / `framing.method` / `bad_data` 的 WITH 取值(见 `format_from_opts`)────── -/// `format = '…'` 的名称(小写;`Format::from_opts` 会对值做 `to_lowercase`)。 pub mod connection_format_value { pub const JSON: &str = "json"; pub const DEBEZIUM_JSON: &str = "debezium_json"; @@ -184,19 +158,16 @@ pub mod connection_format_value { pub const RAW_BYTES: &str = "raw_bytes"; } -/// `framing.method` 合法取值(与 `Framing::from_opts` 一致;当前不做大小写折叠)。 pub mod framing_method_value { pub const NEWLINE: &str = "newline"; pub const NEWLINE_DELIMITED: &str = "newline_delimited"; } -/// `bad_data = '…'`(小写;解析前 `to_lowercase`)。 pub mod bad_data_value { pub const FAIL: &str = "fail"; pub const DROP: &str = "drop"; } -// ── `formats.rs` 里枚举的 wire 名(与 serde `snake_case` / `TryFrom` / `FromStr` 一致)──── pub mod timestamp_format_value { pub const RFC3339_SNAKE: &str = "rfc3339"; @@ -225,7 +196,6 @@ pub mod parquet_compression_value { pub const LZ4_RAW: &str = "lz4_raw"; } -// ── `date_part` / `date_trunc` SQL 关键字(小写;解析前对输入做 `to_lowercase`)──────── pub mod date_part_keyword { pub const YEAR: &str = "year"; @@ -253,15 +223,12 @@ pub mod date_trunc_keyword { pub const SECOND: &str = "second"; } -// ── `logical_planner/mod.rs` 物理计划与 Debezium 流水线 ─────────────────────── -/// `FsMemExec` / codec 里表示 join 左右输入的 `table_name`。 pub mod mem_exec_join_side { pub const LEFT: &str = "left"; pub const RIGHT: &str = "right"; } -/// 自定义 `ExecutionPlan::name()`(与 DataFusion explain / 调试一致)。 pub mod physical_plan_node_name { pub const RW_LOCK_READER: &str = "rw_lock_reader"; pub const UNBOUNDED_READER: &str = "unbounded_reader"; @@ -271,18 +238,15 @@ pub mod physical_plan_node_name { pub const TO_DEBEZIUM_EXEC: &str = "to_debezium_exec"; } -/// 流式 `window(start, end)` 标量 UDF 的注册名。 pub mod window_function_udf { pub const NAME: &str = "window"; } -/// `window()` UDF 返回 struct 的字段名(与 `window_arrow_struct` 一致)。 pub mod window_interval_field { pub const START: &str = "start"; pub const END: &str = "end"; } -/// Debezium `op` 列中的单字母取值(unroll / pack 路径)。 pub mod debezium_op_short { pub const CREATE: &str = "c"; pub const READ: &str = "r"; @@ -290,7 +254,6 @@ pub mod debezium_op_short { pub const DELETE: &str = "d"; } -// ── 连接器类型短名(工厂注册等)────────────────────────────────────────────── pub mod connector_type { pub const KAFKA: &str = "kafka"; @@ -310,21 +273,17 @@ pub mod connector_type { pub const POSTGRES: &str = "postgres"; } -// ── 连接表 `WITH type = 'source'|'sink'|'lookup'`(`SourceTable::from_options` / `deduce_role`)── pub mod connection_table_role { pub const SOURCE: &str = "source"; pub const SINK: &str = "sink"; - /// 与虚拟 `lookup` 连接器短名相同(亦在 [`SUPPORTED_CONNECTOR_ADAPTERS`] 中)。 pub const LOOKUP: &str = "lookup"; } -/// [`crate::sql::schema::table_role::validate_adapter_availability`] 白名单(与 SQL `connector = '…'` 短名一致)。 pub const SUPPORTED_CONNECTOR_ADAPTERS: &[&str] = &[ connector_type::KAFKA, ]; -// ── Kafka 连接器 WITH 选项取值(`wire_kafka_operator_config`)──────────────── pub mod kafka_with_value { pub const SCAN_LATEST: &str = "latest"; diff --git a/src/sql/common/kafka_catalog.rs b/src/sql/common/kafka_catalog.rs index e54e6901..5d54b1b2 100644 --- a/src/sql/common/kafka_catalog.rs +++ b/src/sql/common/kafka_catalog.rs @@ -10,22 +10,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Kafka 表级与连接级配置(与 JSON Schema / Catalog 对齐)。 //! -//! 放在 [`crate::sql::common`] 而非 `runtime::streaming`,以便 **SQL 规划、Coordinator、连接配置存储** -//! 与 **运行时工厂**(如 `ConnectorSourceDispatcher`)共用同一套类型,避免循环依赖。 //! -//! 与 [`crate::runtime::streaming::api::source::SourceOffset`] 语义相同但独立定义,运行时可用 `From`/`match` 做映射。 use serde::{Deserialize, Serialize}; use std::collections::HashMap; -// ── KafkaTable:单表 Source/Sink ───────────────────────────────────────── #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct KafkaTable { pub topic: String, - /// Source / Sink 判别及各自字段;与顶层 JSON 扁平字段共用 `type` 标签。 #[serde(flatten)] pub kind: TableType, #[serde(default)] @@ -34,7 +28,6 @@ pub struct KafkaTable { } impl KafkaTable { - /// Schema Registry subject;未配置时与常见约定一致:`{topic}-value`。 pub fn subject(&self) -> String { self.value_subject .clone() @@ -82,7 +75,6 @@ pub enum SinkCommitMode { ExactlyOnce, } -// ── KafkaConfig:集群 / 鉴权 / Schema Registry ─────────────────────────── #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "camelCase")] diff --git a/src/sql/common/operator_config.rs b/src/sql/common/operator_config.rs index b9e40391..ba61b36a 100644 --- a/src/sql/common/operator_config.rs +++ b/src/sql/common/operator_config.rs @@ -40,7 +40,6 @@ pub struct OperatorConfig { pub rate_limit: Option, #[serde(default)] pub metadata_fields: Vec, - /// Arrow 行 schema(Kafka Source/Sink 反序列化、序列化必需)。 #[serde(default)] pub input_schema: Option, } diff --git a/src/sql/common/with_option_keys.rs b/src/sql/common/with_option_keys.rs index e48d9b7a..a42f7405 100644 --- a/src/sql/common/with_option_keys.rs +++ b/src/sql/common/with_option_keys.rs @@ -10,14 +10,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! SQL `WITH` 子句中的选项名,以及部分连接器序列化 JSON 的字段名(单一来源)。 -// ── 通用 / 表级 ───────────────────────────────────────────────────────────── pub const CONNECTOR: &str = "connector"; pub const TYPE: &str = "type"; pub const FORMAT: &str = "format"; -/// 未指定 `format` 选项时的默认格式名(值,非键)。 pub const DEFAULT_FORMAT_VALUE: &str = "json"; pub const BAD_DATA: &str = "bad_data"; pub const PARTITION_BY: &str = "partition_by"; @@ -31,11 +28,9 @@ pub const IDLE_TIME: &str = "idle_time"; pub const LOOKUP_CACHE_MAX_BYTES: &str = "lookup.cache.max_bytes"; pub const LOOKUP_CACHE_TTL: &str = "lookup.cache.ttl"; -// ── 非 Kafka 连接器的 opaque JSON(`CONNECTOR` 与 WITH 选项同名)──────────── pub const CONNECTION_SCHEMA: &str = "connection_schema"; -// ── 后端参数序列化(如 lookup)────────────────────────────────────────────── pub const ADAPTER: &str = "adapter"; @@ -92,6 +87,5 @@ pub const PROTOBUF_LENGTH_DELIMITED: &str = "protobuf.length_delimited"; pub const FRAMING_METHOD: &str = "framing.method"; pub const FRAMING_MAX_LINE_LENGTH: &str = "framing.max_line_length"; -// ── 从字符串 map 推断编码(catalog 等)────────────────────────────────────── pub const FORMAT_DEBEZIUM_FLAG: &str = "format.debezium"; diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs index 6cb00914..d157234b 100644 --- a/src/sql/logical_node/logical/operator_name.rs +++ b/src/sql/logical_node/logical/operator_name.rs @@ -38,7 +38,6 @@ pub enum OperatorName { } impl OperatorName { - /// 特性 / 指标聚合使用的 kebab-case 标签(与 [`crate::sql::common::constants::operator_feature`] 一致)。 pub fn feature_tag(self) -> Option<&'static str> { match self { Self::ExpressionWatermark | Self::ArrowValue | Self::ArrowKey | Self::Projection => None, diff --git a/src/sql/physical/cdc/encode.rs b/src/sql/physical/cdc/encode.rs index b1a1cc2e..07495a38 100644 --- a/src/sql/physical/cdc/encode.rs +++ b/src/sql/physical/cdc/encode.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 内部回撤流压回 Debezium `before` / `after` / `op` 信封。 use std::any::Any; use std::collections::HashMap; @@ -169,7 +168,6 @@ struct ToDebeziumStream { struct_projection: Vec, } -/// 按主键 id 归并一行内的 changelog,输出 before/after 行索引与 op 字母。 fn compact_changelog_by_id<'a>( num_rows: usize, is_retract: &'a BooleanArray, diff --git a/src/sql/physical/cdc/mod.rs b/src/sql/physical/cdc/mod.rs index 743ca966..9e32e67a 100644 --- a/src/sql/physical/cdc/mod.rs +++ b/src/sql/physical/cdc/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Debezium 与内部 changelog 行格式的互转。 mod encode; mod unroll; diff --git a/src/sql/physical/cdc/unroll.rs b/src/sql/physical/cdc/unroll.rs index 345d2642..f40beb06 100644 --- a/src/sql/physical/cdc/unroll.rs +++ b/src/sql/physical/cdc/unroll.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Debezium 信封展开为内部带 retract 语义的行流。 use std::any::Any; use std::pin::Pin; @@ -86,7 +85,6 @@ impl DebeziumUnrollingExec { }) } - /// 分布式反序列化路径:跳过 `try_new` 的 schema 校验(proto 已约定形态)。 pub(crate) fn from_decoded_parts( input: Arc, schema: SchemaRef, diff --git a/src/sql/physical/codec.rs b/src/sql/physical/codec.rs index e90e4b3a..1301ef09 100644 --- a/src/sql/physical/codec.rs +++ b/src/sql/physical/codec.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 分布式物理计划 proto 编解码(`FsExecNode`)。 use std::sync::Arc; diff --git a/src/sql/physical/meta.rs b/src/sql/physical/meta.rs index 5828593c..95dd8fd8 100644 --- a/src/sql/physical/meta.rs +++ b/src/sql/physical/meta.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Changelog 元数据列:`is_retract`、行 `id`(FixedSizeBinary)。 use std::sync::{Arc, OnceLock}; diff --git a/src/sql/physical/mod.rs b/src/sql/physical/mod.rs index 1ba09eb0..ee63a2be 100644 --- a/src/sql/physical/mod.rs +++ b/src/sql/physical/mod.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 流式物理执行扩展:元数据列、UDF、内存/无界 Reader、CDC 与 proto 编解码。 mod cdc; mod codec; diff --git a/src/sql/physical/readers.rs b/src/sql/physical/readers.rs index 67a250fd..1c785464 100644 --- a/src/sql/physical/readers.rs +++ b/src/sql/physical/readers.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 无界/锁控 `RecordBatch` 数据源与规划期占位 `FsMemExec`。 use std::any::Any; use std::mem; diff --git a/src/sql/physical/udfs.rs b/src/sql/physical/udfs.rs index 267cb6e3..03895fda 100644 --- a/src/sql/physical/udfs.rs +++ b/src/sql/physical/udfs.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! 流式 `window(start, end)` 标量 UDF。 use std::any::Any; use std::sync::Arc; From 561da59c1761d9ea4546ecbdc3c68b2a9fe85fc7 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 18:13:31 +0800 Subject: [PATCH 29/44] update --- Cargo.lock | 1 - cli/cli/Cargo.toml | 1 - cli/cli/src/repl.rs | 52 ++++- conf/config.yaml | 7 + protocol/proto/storage.proto | 2 + src/config/global_config.rs | 2 + src/config/storage.rs | 24 +++ src/coordinator/analyze/analyzer.rs | 21 +- src/coordinator/dataset/mod.rs | 4 + .../dataset/show_catalog_tables_result.rs | 92 ++++++++ .../dataset/show_create_table_result.rs | 51 +++++ src/coordinator/execution/executor.rs | 53 ++++- src/coordinator/mod.rs | 3 +- src/coordinator/plan/logical_plan_visitor.rs | 25 ++- src/coordinator/plan/mod.rs | 4 + .../plan/show_catalog_tables_plan.rs | 28 +++ .../plan/show_create_table_plan.rs | 30 +++ src/coordinator/plan/visitor.rs | 16 +- src/coordinator/statement/mod.rs | 4 + .../statement/show_catalog_tables.rs | 33 +++ .../statement/show_create_table.rs | 35 +++ src/coordinator/statement/visitor.rs | 15 +- src/server/handler.rs | 7 +- src/sql/common/connector_options.rs | 27 ++- src/sql/frontend_sql_coverage_tests.rs | 16 ++ src/sql/parse.rs | 37 +++- src/sql/schema/catalog_ddl.rs | 199 ++++++++++++++++++ src/sql/schema/mod.rs | 2 + src/sql/schema/schema_provider.rs | 5 +- src/sql/schema/source_table.rs | 22 +- src/storage/stream_catalog/manager.rs | 93 +++++++- src/storage/stream_catalog/mod.rs | 2 + .../stream_catalog/rocksdb_meta_store.rs | 131 ++++++++++++ 33 files changed, 1001 insertions(+), 43 deletions(-) create mode 100644 src/coordinator/dataset/show_catalog_tables_result.rs create mode 100644 src/coordinator/dataset/show_create_table_result.rs create mode 100644 src/coordinator/plan/show_catalog_tables_plan.rs create mode 100644 src/coordinator/plan/show_create_table_plan.rs create mode 100644 src/coordinator/statement/show_catalog_tables.rs create mode 100644 src/coordinator/statement/show_create_table.rs create mode 100644 src/sql/schema/catalog_ddl.rs create mode 100644 src/storage/stream_catalog/rocksdb_meta_store.rs diff --git a/Cargo.lock b/Cargo.lock index c6994ec0..9cdca7e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2324,7 +2324,6 @@ dependencies = [ "comfy-table", "protocol", "rustyline", - "thiserror 2.0.17", "tokio", "tonic", ] diff --git a/cli/cli/Cargo.toml b/cli/cli/Cargo.toml index e3c1c591..3c05d6b4 100644 --- a/cli/cli/Cargo.toml +++ b/cli/cli/Cargo.toml @@ -14,7 +14,6 @@ arrow-schema = "52" comfy-table = "7" protocol = { path = "../../protocol" } clap = { version = "4.5", features = ["derive"] } -thiserror = "2" tokio = { version = "1.0", features = ["full", "signal"] } tonic = { version = "0.12", features = ["default"] } rustyline = { version = "14.0", features = ["with-dirs"] } diff --git a/cli/cli/src/repl.rs b/cli/cli/src/repl.rs index 7f8087b3..8c3882b2 100644 --- a/cli/cli/src/repl.rs +++ b/cli/cli/src/repl.rs @@ -20,26 +20,62 @@ use comfy_table::{Attribute, Cell, Color, ContentArrangement, Table, TableCompon use protocol::cli::{function_stream_service_client::FunctionStreamServiceClient, SqlRequest}; use rustyline::error::ReadlineError; use rustyline::{Config, DefaultEditor, EditMode}; +use std::fmt; use std::io::{self, Cursor, Write}; use std::sync::Arc; use tokio::sync::Mutex; use tonic::Request; -#[derive(Debug, thiserror::Error)] +/// CLI errors. +/// +/// **Important:** [`tonic::Status`] must not be formatted with `{}` — its [`fmt::Display`] dumps +/// `details` / `metadata` (e.g. HTTP headers). Only [`tonic::Status::message`] is stored in +/// [`ReplError::Rpc`]. +#[derive(Debug)] pub enum ReplError { - #[error("RPC error: {0}")] - Rpc(Box), - #[error("Connection failed: {0}")] + Rpc(String), Connection(String), - #[error("Internal error: {0}")] Internal(String), - #[error("IO error: {0}")] - Io(#[from] io::Error), + Io(io::Error), +} + +impl fmt::Display for ReplError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ReplError::Rpc(s) => f.write_str(s), + ReplError::Connection(s) => f.write_str(s), + ReplError::Internal(s) => write!(f, "Internal error: {s}"), + ReplError::Io(e) => write!(f, "IO error: {e}"), + } + } +} + +impl std::error::Error for ReplError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ReplError::Io(e) => Some(e), + _ => None, + } + } +} + +impl From for ReplError { + fn from(e: io::Error) -> Self { + ReplError::Io(e) + } } impl From for ReplError { fn from(s: tonic::Status) -> Self { - ReplError::Rpc(Box::new(s)) + let msg = s.message(); + if msg.is_empty() { + ReplError::Rpc(format!( + "gRPC {} (server returned no message)", + s.code() + )) + } else { + ReplError::Rpc(msg.to_string()) + } } } diff --git a/conf/config.yaml b/conf/config.yaml index 3f19493d..9d0f625e 100644 --- a/conf/config.yaml +++ b/conf/config.yaml @@ -117,3 +117,10 @@ task_storage: # Maximum bytes for level base in bytes (optional) max_bytes_for_level_base: 268435456 + +# Stream table catalog (SQL: CREATE TABLE connector sources, SHOW TABLES, SHOW CREATE TABLE). +# When persist is true (default), metadata is stored under RocksDB at db_path (default: data/stream_catalog) +# and reloaded after process restart. Set persist: false only for tests/ephemeral nodes. +stream_catalog: + persist: true + # db_path: data/stream_catalog diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto index cace3107..5ad09d38 100644 --- a/protocol/proto/storage.proto +++ b/protocol/proto/storage.proto @@ -27,6 +27,8 @@ message StreamSource { bytes arrow_schema_ipc = 1; optional string event_time_field = 2; optional string watermark_field = 3; + // Original CREATE TABLE ... WITH ('k'='v', ...) pairs (best-effort; keys sorted in DDL). + map with_options = 4; } message StreamSink { diff --git a/src/config/global_config.rs b/src/config/global_config.rs index 33676125..c76bf4b0 100644 --- a/src/config/global_config.rs +++ b/src/config/global_config.rs @@ -40,6 +40,8 @@ pub struct GlobalConfig { pub task_storage: crate::config::storage::TaskStorageConfig, #[serde(default)] pub streaming: StreamingConfig, + #[serde(default)] + pub stream_catalog: crate::config::storage::StreamCatalogConfig, } impl GlobalConfig { diff --git a/src/config/storage.rs b/src/config/storage.rs index e5186648..28396d7d 100644 --- a/src/config/storage.rs +++ b/src/config/storage.rs @@ -118,3 +118,27 @@ impl Default for TaskStorageConfig { } } } + +/// Stream table catalog (`CREATE TABLE` / `SHOW TABLES`) storage. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StreamCatalogConfig { + /// When `false`, the catalog is in-memory only and is **lost on process restart**. + #[serde(default = "default_stream_catalog_persist")] + pub persist: bool, + /// RocksDB directory for persisted catalog. Default: `{data_dir}/stream_catalog`. + #[serde(default)] + pub db_path: Option, +} + +fn default_stream_catalog_persist() -> bool { + true +} + +impl Default for StreamCatalogConfig { + fn default() -> Self { + Self { + persist: default_stream_catalog_persist(), + db_path: None, + } + } +} diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs index 3889431e..dbac78cf 100644 --- a/src/coordinator/analyze/analyzer.rs +++ b/src/coordinator/analyze/analyzer.rs @@ -14,8 +14,9 @@ use super::Analysis; use crate::coordinator::execution_context::ExecutionContext; use crate::coordinator::statement::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, - ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext, - StatementVisitorResult, StopFunction, StreamingTableStatement, + ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, Statement, + StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, + StreamingTableStatement, }; use std::fmt; @@ -109,6 +110,22 @@ impl StatementVisitor for Analyzer<'_> { StatementVisitorResult::Analyze(Box::new(stmt.clone())) } + fn visit_show_catalog_tables( + &self, + stmt: &ShowCatalogTables, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Analyze(Box::new(stmt.clone())) + } + + fn visit_show_create_table( + &self, + stmt: &ShowCreateTable, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Analyze(Box::new(stmt.clone())) + } + fn visit_create_python_function( &self, stmt: &CreatePythonFunction, diff --git a/src/coordinator/dataset/mod.rs b/src/coordinator/dataset/mod.rs index b72613da..f09c24ca 100644 --- a/src/coordinator/dataset/mod.rs +++ b/src/coordinator/dataset/mod.rs @@ -12,8 +12,12 @@ mod data_set; mod execute_result; +mod show_catalog_tables_result; +mod show_create_table_result; mod show_functions_result; pub use data_set::{DataSet, empty_record_batch}; pub use execute_result::ExecuteResult; +pub use show_catalog_tables_result::ShowCatalogTablesResult; +pub use show_create_table_result::ShowCreateTableResult; pub use show_functions_result::ShowFunctionsResult; diff --git a/src/coordinator/dataset/show_catalog_tables_result.rs b/src/coordinator/dataset/show_catalog_tables_result.rs new file mode 100644 index 00000000..77792517 --- /dev/null +++ b/src/coordinator/dataset/show_catalog_tables_result.rs @@ -0,0 +1,92 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow_array::{Int32Array, StringArray}; +use arrow_schema::{DataType, Field, Schema}; + +use super::DataSet; +use crate::sql::schema::{schema_columns_one_line, stream_table_row_detail, StreamTable}; + +#[derive(Clone, Debug)] +pub struct ShowCatalogTablesResult { + names: Vec, + kinds: Vec, + column_counts: Vec, + schema_lines: Vec, + details: Vec, +} + +impl ShowCatalogTablesResult { + pub fn from_tables(tables: &[Arc]) -> Self { + let mut names = Vec::with_capacity(tables.len()); + let mut kinds = Vec::with_capacity(tables.len()); + let mut column_counts = Vec::with_capacity(tables.len()); + let mut schema_lines = Vec::with_capacity(tables.len()); + let mut details = Vec::with_capacity(tables.len()); + + for t in tables { + let schema = t.schema(); + let ncols = schema.fields().len() as i32; + names.push(t.name().to_string()); + kinds.push(match t.as_ref() { + StreamTable::Source { .. } => "SOURCE", + StreamTable::Sink { .. } => "SINK", + } + .to_string()); + column_counts.push(ncols); + schema_lines.push(schema_columns_one_line(&schema)); + details.push(stream_table_row_detail(t.as_ref())); + } + + Self { + names, + kinds, + column_counts, + schema_lines, + details, + } + } +} + +impl DataSet for ShowCatalogTablesResult { + fn to_record_batch(&self) -> arrow_array::RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("table_name", DataType::Utf8, false), + Field::new("kind", DataType::Utf8, false), + Field::new("column_count", DataType::Int32, false), + Field::new("schema_columns", DataType::Utf8, false), + Field::new("details", DataType::Utf8, false), + ])); + + arrow_array::RecordBatch::try_new( + schema, + vec![ + Arc::new(StringArray::from( + self.names.iter().map(|s| s.as_str()).collect::>(), + )), + Arc::new(StringArray::from( + self.kinds.iter().map(|s| s.as_str()).collect::>(), + )), + Arc::new(Int32Array::from(self.column_counts.clone())), + Arc::new(StringArray::from( + self.schema_lines.iter().map(|s| s.as_str()).collect::>(), + )), + Arc::new(StringArray::from( + self.details.iter().map(|s| s.as_str()).collect::>(), + )), + ], + ) + .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty()))) + } +} diff --git a/src/coordinator/dataset/show_create_table_result.rs b/src/coordinator/dataset/show_create_table_result.rs new file mode 100644 index 00000000..47f49d59 --- /dev/null +++ b/src/coordinator/dataset/show_create_table_result.rs @@ -0,0 +1,51 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow_array::StringArray; +use arrow_schema::{DataType, Field, Schema}; + +use super::DataSet; + +#[derive(Clone, Debug)] +pub struct ShowCreateTableResult { + table_name: String, + create_sql: String, +} + +impl ShowCreateTableResult { + pub fn new(table_name: String, create_sql: String) -> Self { + Self { + table_name, + create_sql, + } + } +} + +impl DataSet for ShowCreateTableResult { + fn to_record_batch(&self) -> arrow_array::RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("Table", DataType::Utf8, false), + Field::new("Create Table", DataType::Utf8, false), + ])); + + arrow_array::RecordBatch::try_new( + schema, + vec![ + Arc::new(StringArray::from(vec![self.table_name.as_str()])), + Arc::new(StringArray::from(vec![self.create_sql.as_str()])), + ], + ) + .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty()))) + } +} diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index f9f26cd0..4a7fc273 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -16,17 +16,20 @@ use protocol::grpc::api::FsProgram; use thiserror::Error; use tracing::{debug, info}; -use crate::coordinator::dataset::{empty_record_batch, ExecuteResult, ShowFunctionsResult}; +use crate::coordinator::dataset::{ + empty_record_batch, ExecuteResult, ShowCatalogTablesResult, ShowCreateTableResult, + ShowFunctionsResult, +}; use crate::coordinator::plan::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, CreateTablePlanBody, DropFunctionPlan, DropTablePlan, LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext, - PlanVisitorResult, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, - StreamingTableConnectorPlan, + PlanVisitorResult, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan, + StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan, }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::streaming::job::JobManager; use crate::runtime::taskexecutor::TaskManager; -use crate::sql::schema::StreamTable; +use crate::sql::schema::{show_create_stream_table, StreamTable}; use crate::storage::stream_catalog::CatalogManager; #[derive(Error, Debug)] @@ -171,6 +174,44 @@ impl PlanVisitor for Executor { PlanVisitorResult::Execute(Ok(result)) } + fn visit_show_catalog_tables( + &self, + _plan: &ShowCatalogTablesPlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + let tables = self.catalog_manager.list_stream_tables(); + let n = tables.len(); + let result = ExecuteResult::ok_with_data( + format!("{n} stream catalog table(s)"), + ShowCatalogTablesResult::from_tables(&tables), + ); + PlanVisitorResult::Execute(Ok(result)) + } + + fn visit_show_create_table( + &self, + plan: &ShowCreateTablePlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + let execute = || -> Result { + let t = self + .catalog_manager + .get_stream_table(&plan.table_name) + .ok_or_else(|| { + ExecuteError::Validation(format!( + "Table '{}' not found in stream catalog", + plan.table_name + )) + })?; + let ddl = show_create_stream_table(t.as_ref()); + Ok(ExecuteResult::ok_with_data( + format!("SHOW CREATE TABLE {}", plan.table_name), + ShowCreateTableResult::new(plan.table_name.clone(), ddl), + )) + }; + PlanVisitorResult::Execute(execute()) + } + fn visit_create_python_function( &self, plan: &CreatePythonFunctionPlan, @@ -233,6 +274,7 @@ impl PlanVisitor for Executor { schema, event_time_field: source_table.event_time_field().map(str::to_string), watermark_field: source_table.watermark_field().map(str::to_string), + with_options: source_table.catalog_with_options().clone(), }; (table_name, *if_not_exists, table_instance) } @@ -254,7 +296,8 @@ impl PlanVisitor for Executor { .add_table(stream_table) .map_err(|e| { ExecuteError::Internal(format!( - "Failed to register connector source table '{table_name}': {e}" + "Failed to register connector source table '{}': {}", + table_name, e )) })?; diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs index a781f1e1..23cd2925 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/mod.rs @@ -26,5 +26,6 @@ pub use runtime_context::CoordinatorRuntimeContext; pub use dataset::{DataSet, ShowFunctionsResult}; pub use statement::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, - PythonModule, ShowFunctions, StartFunction, Statement, StopFunction, StreamingTableStatement, + PythonModule, ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, Statement, + StopFunction, StreamingTableStatement, }; diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index e2e457eb..e5b5e36a 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -27,12 +27,13 @@ use tracing::debug; use crate::coordinator::analyze::analysis::Analysis; use crate::coordinator::plan::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan, - PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, + PlanNode, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan, StartFunctionPlan, + StopFunctionPlan, StreamingTable, }; use crate::coordinator::statement::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, - ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext, - StatementVisitorResult, StopFunction, StreamingTableStatement, + ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, StatementVisitor, + StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingTableStatement, }; use crate::coordinator::tool::ConnectorOptions; use crate::sql::analysis::{ @@ -352,6 +353,24 @@ impl StatementVisitor for LogicalPlanVisitor { StatementVisitorResult::Plan(Box::new(ShowFunctionsPlan::new())) } + fn visit_show_catalog_tables( + &self, + _stmt: &ShowCatalogTables, + _ctx: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Plan(Box::new(ShowCatalogTablesPlan::new())) + } + + fn visit_show_create_table( + &self, + stmt: &ShowCreateTable, + _ctx: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Plan(Box::new(ShowCreateTablePlan::new( + stmt.table_name.clone(), + ))) + } + fn visit_create_python_function( &self, stmt: &CreatePythonFunction, diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs index 2dbbab77..b04234d5 100644 --- a/src/coordinator/plan/mod.rs +++ b/src/coordinator/plan/mod.rs @@ -18,6 +18,8 @@ mod drop_table_plan; mod logical_plan_visitor; mod lookup_table_plan; mod optimizer; +mod show_catalog_tables_plan; +mod show_create_table_plan; mod show_functions_plan; mod start_function_plan; mod stop_function_plan; @@ -33,6 +35,8 @@ pub use drop_table_plan::DropTablePlan; pub use logical_plan_visitor::LogicalPlanVisitor; pub use lookup_table_plan::LookupTablePlan; pub use optimizer::LogicalPlanner; +pub use show_catalog_tables_plan::ShowCatalogTablesPlan; +pub use show_create_table_plan::ShowCreateTablePlan; pub use show_functions_plan::ShowFunctionsPlan; pub use start_function_plan::StartFunctionPlan; pub use stop_function_plan::StopFunctionPlan; diff --git a/src/coordinator/plan/show_catalog_tables_plan.rs b/src/coordinator/plan/show_catalog_tables_plan.rs new file mode 100644 index 00000000..420fdb40 --- /dev/null +++ b/src/coordinator/plan/show_catalog_tables_plan.rs @@ -0,0 +1,28 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +#[derive(Debug, Default)] +pub struct ShowCatalogTablesPlan; + +impl ShowCatalogTablesPlan { + pub fn new() -> Self { + Self + } +} + +impl PlanNode for ShowCatalogTablesPlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_show_catalog_tables(self, context) + } +} diff --git a/src/coordinator/plan/show_create_table_plan.rs b/src/coordinator/plan/show_create_table_plan.rs new file mode 100644 index 00000000..c5fe6376 --- /dev/null +++ b/src/coordinator/plan/show_create_table_plan.rs @@ -0,0 +1,30 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +#[derive(Debug, Clone)] +pub struct ShowCreateTablePlan { + pub table_name: String, +} + +impl ShowCreateTablePlan { + pub fn new(table_name: String) -> Self { + Self { table_name } + } +} + +impl PlanNode for ShowCreateTablePlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_show_create_table(self, context) + } +} diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs index e8efcf32..28f11f53 100644 --- a/src/coordinator/plan/visitor.rs +++ b/src/coordinator/plan/visitor.rs @@ -12,8 +12,8 @@ use super::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan, - LookupTablePlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, - StreamingTableConnectorPlan, + LookupTablePlan, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan, + StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan, }; /// Context passed to PlanVisitor methods @@ -80,6 +80,18 @@ pub trait PlanVisitor { context: &PlanVisitorContext, ) -> PlanVisitorResult; + fn visit_show_catalog_tables( + &self, + plan: &ShowCatalogTablesPlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; + + fn visit_show_create_table( + &self, + plan: &ShowCreateTablePlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; + fn visit_create_python_function( &self, plan: &CreatePythonFunctionPlan, diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs index 7b39787d..83f6ca0e 100644 --- a/src/coordinator/statement/mod.rs +++ b/src/coordinator/statement/mod.rs @@ -15,6 +15,8 @@ mod create_python_function; mod create_table; mod drop_function; mod drop_table; +mod show_catalog_tables; +mod show_create_table; mod show_functions; mod start_function; mod stop_function; @@ -26,6 +28,8 @@ pub use create_python_function::{CreatePythonFunction, PythonModule}; pub use create_table::CreateTable; pub use drop_function::DropFunction; pub use drop_table::DropTableStatement; +pub use show_catalog_tables::ShowCatalogTables; +pub use show_create_table::ShowCreateTable; pub use show_functions::ShowFunctions; pub use start_function::StartFunction; pub use stop_function::StopFunction; diff --git a/src/coordinator/statement/show_catalog_tables.rs b/src/coordinator/statement/show_catalog_tables.rs new file mode 100644 index 00000000..1f034562 --- /dev/null +++ b/src/coordinator/statement/show_catalog_tables.rs @@ -0,0 +1,33 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// `SHOW TABLES` over the stream catalog (connector sources + streaming sinks). +#[derive(Debug, Clone, Default)] +pub struct ShowCatalogTables; + +impl ShowCatalogTables { + pub fn new() -> Self { + Self + } +} + +impl Statement for ShowCatalogTables { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_show_catalog_tables(self, context) + } +} diff --git a/src/coordinator/statement/show_create_table.rs b/src/coordinator/statement/show_create_table.rs new file mode 100644 index 00000000..5b54a726 --- /dev/null +++ b/src/coordinator/statement/show_create_table.rs @@ -0,0 +1,35 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// `SHOW CREATE TABLE ` for a stream-catalog table. +#[derive(Debug, Clone)] +pub struct ShowCreateTable { + pub table_name: String, +} + +impl ShowCreateTable { + pub fn new(table_name: String) -> Self { + Self { table_name } + } +} + +impl Statement for ShowCreateTable { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_show_create_table(self, context) + } +} diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs index 641abf98..f24c85be 100644 --- a/src/coordinator/statement/visitor.rs +++ b/src/coordinator/statement/visitor.rs @@ -12,7 +12,8 @@ use super::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, - ShowFunctions, StartFunction, StopFunction, StreamingTableStatement, + ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, StopFunction, + StreamingTableStatement, }; use crate::coordinator::plan::PlanNode; use crate::coordinator::statement::Statement; @@ -83,6 +84,18 @@ pub trait StatementVisitor { context: &StatementVisitorContext, ) -> StatementVisitorResult; + fn visit_show_catalog_tables( + &self, + stmt: &ShowCatalogTables, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; + + fn visit_show_create_table( + &self, + stmt: &ShowCreateTable, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; + fn visit_create_python_function( &self, stmt: &CreatePythonFunction, diff --git a/src/server/handler.rs b/src/server/handler.rs index 8ed484d6..2ef6b529 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -112,8 +112,9 @@ impl FunctionStreamService for FunctionStreamServiceImpl { let req = request.into_inner(); let statements = parse_sql(&req.sql).map_err(|e| { - warn!("SQL parse rejection: {}", e); - Status::invalid_argument("Provided SQL syntax is invalid") + let detail = e.to_string(); + warn!("SQL parse rejection: {}", detail); + Status::invalid_argument(detail) })?; if statements.is_empty() { @@ -229,7 +230,7 @@ impl FunctionStreamService for FunctionStreamServiceImpl { error!("show_functions execution failed: {}", result.message); return Ok(TonicResponse::new(ShowFunctionsResponse { status_code: StatusCode::InternalServerError as i32, - message: "Failed to retrieve function definitions".to_string(), + message: result.message, functions: vec![], })); } diff --git a/src/sql/common/connector_options.rs b/src/sql/common/connector_options.rs index bffa766a..98e3299e 100644 --- a/src/sql/common/connector_options.rs +++ b/src/sql/common/connector_options.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::num::{NonZero, NonZeroU64}; use std::str::FromStr; use std::time::Duration; @@ -31,6 +31,22 @@ pub struct ConnectorOptions { partitions: Vec, } +fn sql_expr_to_catalog_string(e: &Expr) -> String { + match e { + Expr::Value(ValueWithSpan { value, .. }) => match value { + SqlValue::SingleQuotedString(s) | SqlValue::DoubleQuotedString(s) => s.clone(), + SqlValue::NationalStringLiteral(s) => s.clone(), + SqlValue::HexStringLiteral(s) => s.clone(), + SqlValue::Number(n, _) => n.clone(), + SqlValue::Boolean(b) => b.to_string(), + SqlValue::Null => "NULL".to_string(), + other => other.to_string(), + }, + Expr::Identifier(ident) => ident.value.clone(), + other => other.to_string(), + } +} + impl ConnectorOptions { pub fn new(sql_opts: &[SqlOption], partition_by: &Option>) -> DFResult { let mut options = HashMap::new(); @@ -331,6 +347,15 @@ impl ConnectorOptions { } Ok(out) } + + /// Snapshot of all current `WITH` key/value pairs for catalog persistence (`SHOW CREATE TABLE`). + /// Call before any `pull_*` consumes options. + pub fn snapshot_for_catalog(&self) -> BTreeMap { + self.options + .iter() + .map(|(k, v)| (k.clone(), sql_expr_to_catalog_string(v))) + .collect() + } } fn duration_from_sql_expr(expr: &Expr) -> Result { diff --git a/src/sql/frontend_sql_coverage_tests.rs b/src/sql/frontend_sql_coverage_tests.rs index cee4d82e..0a201f9e 100644 --- a/src/sql/frontend_sql_coverage_tests.rs +++ b/src/sql/frontend_sql_coverage_tests.rs @@ -359,6 +359,22 @@ fn compile_plan_show_functions() { .expect("ShowFunctions plan"); } +#[test] +fn compile_plan_show_tables() { + let stmts = parse_sql("SHOW TABLES").unwrap(); + Coordinator::new() + .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) + .expect("ShowCatalogTables plan"); +} + +#[test] +fn compile_plan_show_create_table() { + let stmts = parse_sql("SHOW CREATE TABLE my_table").unwrap(); + Coordinator::new() + .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) + .expect("ShowCreateTable plan"); +} + #[test] fn compile_plan_start_stop_drop_function() { for sql in [ diff --git a/src/sql/parse.rs b/src/sql/parse.rs index 1feff64a..33bb13ad 100644 --- a/src/sql/parse.rs +++ b/src/sql/parse.rs @@ -19,6 +19,8 @@ //! - **`CREATE TABLE ...`** other forms (including `CREATE TABLE ... AS SELECT` where DataFusion accepts it) //! - **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (streaming sink DDL) //! - **`DROP TABLE`** / **`DROP TABLE IF EXISTS`** / **`DROP STREAMING TABLE`** (alias for `DROP TABLE` on the stream catalog) +//! - **`SHOW TABLES`** — list stream catalog tables (connector sources and streaming sinks) +//! - **`SHOW CREATE TABLE `** — best-effort DDL text (full `WITH` / `AS SELECT` may not be stored) //! //! **`INSERT` is not supported** here — use `CREATE TABLE ... AS SELECT` or //! `CREATE STREAMING TABLE ... AS SELECT` to define the query shape instead. @@ -29,13 +31,16 @@ use std::collections::HashMap; use datafusion::common::{Result, plan_err}; use datafusion::error::DataFusionError; -use datafusion::sql::sqlparser::ast::{ObjectType, SqlOption, Statement as DFStatement}; +use datafusion::sql::sqlparser::ast::{ + ObjectType, ShowCreateObject, SqlOption, Statement as DFStatement, +}; use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; use datafusion::sql::sqlparser::parser::Parser; use crate::coordinator::{ - CreateFunction, CreateTable, DropFunction, DropTableStatement, ShowFunctions, StartFunction, - Statement as CoordinatorStatement, StopFunction, StreamingTableStatement, + CreateFunction, CreateTable, DropFunction, DropTableStatement, ShowCatalogTables, + ShowCreateTable, ShowFunctions, StartFunction, Statement as CoordinatorStatement, StopFunction, + StreamingTableStatement, }; /// `DROP STREAMING TABLE t` is accepted as sugar for `DROP TABLE t` against the same catalog. @@ -89,6 +94,15 @@ fn classify_statement(stmt: DFStatement) -> Result Ok(Box::new(DropFunction::new(name))) } DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())), + DFStatement::ShowTables { .. } => Ok(Box::new(ShowCatalogTables::new())), + DFStatement::ShowCreate { obj_type, obj_name } => { + if obj_type != ShowCreateObject::Table { + return plan_err!( + "SHOW CREATE {obj_type} is not supported; use SHOW CREATE TABLE " + ); + } + Ok(Box::new(ShowCreateTable::new(obj_name.to_string()))) + }, s @ DFStatement::CreateTable(_) => Ok(Box::new(CreateTable::new(s))), s @ DFStatement::CreateStreamingTable { .. } => { Ok(Box::new(StreamingTableStatement::new(s))) @@ -116,10 +130,7 @@ fn classify_statement(stmt: DFStatement) -> Result "INSERT is not supported; only CREATE TABLE and CREATE STREAMING TABLE (with AS SELECT) \ are supported for defining table/query pipelines in this SQL frontend" ), - other => plan_err!( - "Unsupported SQL statement: {other}. \ - For tables/pipelines use CREATE TABLE or CREATE STREAMING TABLE ... AS SELECT; INSERT is not supported." - ), + other => plan_err!("Unsupported SQL statement: {other}"), } } @@ -190,6 +201,18 @@ mod tests { assert!(is_type(stmt.as_ref(), "ShowFunctions")); } + #[test] + fn test_parse_show_tables() { + let stmt = first_stmt("SHOW TABLES"); + assert!(is_type(stmt.as_ref(), "ShowCatalogTables")); + } + + #[test] + fn test_parse_show_create_table() { + let stmt = first_stmt("SHOW CREATE TABLE my_src"); + assert!(is_type(stmt.as_ref(), "ShowCreateTable")); + } + #[test] fn test_parse_create_table() { let stmt = first_stmt("CREATE TABLE foo (id INT, name VARCHAR)"); diff --git a/src/sql/schema/catalog_ddl.rs b/src/sql/schema/catalog_ddl.rs new file mode 100644 index 00000000..2eea78f9 --- /dev/null +++ b/src/sql/schema/catalog_ddl.rs @@ -0,0 +1,199 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Best-effort SQL text for catalog introspection (`SHOW CREATE TABLE`). + +use std::collections::BTreeMap; + +use datafusion::arrow::datatypes::{DataType, TimeUnit}; + +use super::schema_provider::StreamTable; +use crate::sql::logical_node::logical::LogicalProgram; + +fn data_type_sql(dt: &DataType) -> String { + match dt { + DataType::Null => "NULL".to_string(), + DataType::Boolean => "BOOLEAN".to_string(), + DataType::Int8 => "TINYINT".to_string(), + DataType::Int16 => "SMALLINT".to_string(), + DataType::Int32 => "INT".to_string(), + DataType::Int64 => "BIGINT".to_string(), + DataType::UInt8 => "TINYINT UNSIGNED".to_string(), + DataType::UInt16 => "SMALLINT UNSIGNED".to_string(), + DataType::UInt32 => "INT UNSIGNED".to_string(), + DataType::UInt64 => "BIGINT UNSIGNED".to_string(), + DataType::Float16 => "FLOAT".to_string(), + DataType::Float32 => "REAL".to_string(), + DataType::Float64 => "DOUBLE".to_string(), + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => "VARCHAR".to_string(), + DataType::Binary | DataType::LargeBinary => "VARBINARY".to_string(), + DataType::Date32 => "DATE".to_string(), + DataType::Date64 => "DATE".to_string(), + DataType::Timestamp(unit, tz) => match (unit, tz) { + (TimeUnit::Second, None) => "TIMESTAMP(0)".to_string(), + (TimeUnit::Millisecond, None) => "TIMESTAMP(3)".to_string(), + (TimeUnit::Microsecond, None) => "TIMESTAMP(6)".to_string(), + (TimeUnit::Nanosecond, None) => "TIMESTAMP(9)".to_string(), + (_, Some(_)) => "TIMESTAMP WITH TIME ZONE".to_string(), + }, + DataType::Decimal128(p, s) => format!("DECIMAL({p},{s})"), + DataType::Decimal256(p, s) => format!("DECIMAL({p},{s})"), + _ => dt.to_string(), + } +} + +fn format_columns(schema: &datafusion::arrow::datatypes::Schema) -> Vec { + schema + .fields() + .iter() + .map(|f| { + let null = if f.is_nullable() { + "" + } else { + " NOT NULL" + }; + format!(" {} {}{}", f.name(), data_type_sql(f.data_type()), null) + }) + .collect() +} + +fn format_with_clause(opts: &BTreeMap) -> String { + if opts.is_empty() { + return "WITH ('connector' = '...', 'format' = '...');\n/* Original WITH options are not persisted in the stream catalog. */\n" + .to_string(); + } + let pairs: Vec = opts + .iter() + .map(|(k, v)| { + let k_esc = k.replace('\'', "''"); + let v_esc = v.replace('\'', "''"); + format!(" '{k_esc}' = '{v_esc}'") + }) + .collect(); + format!("WITH (\n{}\n);\n", pairs.join(",\n")) +} + +/// Single-line `col:TYPE` list for result grids. +pub fn schema_columns_one_line(schema: &datafusion::arrow::datatypes::Schema) -> String { + schema + .fields() + .iter() + .map(|f| format!("{}:{}", f.name(), data_type_sql(f.data_type()))) + .collect::>() + .join(", ") +} + +fn pipeline_summary_short(program: &LogicalProgram) -> String { + let mut parts: Vec = Vec::new(); + parts.push(format!("tasks={}", program.task_count())); + parts.push(format!("hash={}", program.get_hash())); + for nw in program.graph.node_weights() { + let chain = nw + .operator_chain + .operators + .iter() + .map(|o| format!("{}", o.operator_name)) + .collect::>() + .join("->"); + parts.push(format!("n{}:{}", nw.node_id, chain)); + } + parts.join(" | ") +} + +/// Extra fields for `SHOW TABLES` result grid (pipeline summary; no full Graphviz). +pub fn stream_table_row_detail(table: &StreamTable) -> String { + match table { + StreamTable::Source { + event_time_field, + watermark_field, + with_options, + .. + } => { + format!( + "event_time={:?}, watermark={:?}, with_options={}", + event_time_field, + watermark_field, + with_options.len() + ) + } + StreamTable::Sink { program, .. } => pipeline_summary_short(program), + } +} + +fn pipeline_text(program: &LogicalProgram) -> String { + let mut lines: Vec = Vec::new(); + lines.push(format!("tasks_total: {}", program.task_count())); + lines.push(format!("program_hash: {}", program.get_hash())); + for nw in program.graph.node_weights() { + let chain = nw + .operator_chain + .operators + .iter() + .map(|o| format!("{}[{}]", o.operator_name, o.operator_id)) + .collect::>() + .join(" -> "); + lines.push(format!( + "node {} (parallelism={}): {chain}", + nw.node_id, nw.parallelism + )); + } + let dot = program.dot(); + const MAX_DOT: usize = 12_000; + if dot.len() > MAX_DOT { + lines.push(format!( + "graphviz_dot_truncated:\n{}... [{} more bytes]", + &dot[..MAX_DOT], + dot.len() - MAX_DOT + )); + } else { + lines.push(format!("graphviz_dot:\n{dot}")); + } + lines.join("\n") +} + +/// Human-readable `SHOW CREATE TABLE` text (sink `AS SELECT` is not stored). +pub fn show_create_stream_table(table: &StreamTable) -> String { + match table { + StreamTable::Source { + name, + schema, + event_time_field, + watermark_field, + with_options, + } => { + let cols = format_columns(schema); + let mut ddl = format!("CREATE TABLE {name} (\n{}\n)", cols.join(",\n")); + if let Some(e) = event_time_field { + ddl.push_str(&format!("\n/* EVENT TIME COLUMN: {e} */\n")); + } + if let Some(w) = watermark_field { + ddl.push_str(&format!("/* WATERMARK: {w} */\n")); + } + ddl.push_str(&format_with_clause(with_options)); + ddl + } + StreamTable::Sink { name, program } => { + let schema = program + .egress_arrow_schema() + .unwrap_or_else(|| std::sync::Arc::new(datafusion::arrow::datatypes::Schema::empty())); + let cols = format_columns(&schema); + let mut ddl = format!( + "CREATE STREAMING TABLE {name}\nWITH ('connector' = '...') AS SELECT ...\n/* Sink WITH / AS SELECT text is not stored. Output schema:\n{}\n*/\n\n", + cols.join(",\n") + ); + ddl.push_str("-- Resolved logical pipeline:\n"); + ddl.push_str(&pipeline_text(program)); + ddl.push('\n'); + ddl + } + } +} diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs index cac86d52..a4aa3747 100644 --- a/src/sql/schema/mod.rs +++ b/src/sql/schema/mod.rs @@ -10,6 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod catalog_ddl; pub mod column_descriptor; pub mod connection_type; pub mod source_table; @@ -22,6 +23,7 @@ pub mod table_role; pub mod temporal_pipeline_config; pub mod utils; +pub use catalog_ddl::{schema_columns_one_line, show_create_stream_table, stream_table_row_detail}; pub use column_descriptor::ColumnDescriptor; pub use connection_type::ConnectionType; pub use source_table::{SourceOperator, SourceTable}; diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs index 5ecde68e..f93aead1 100644 --- a/src/sql/schema/schema_provider.rs +++ b/src/sql/schema/schema_provider.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::Arc; use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema}; @@ -45,6 +45,8 @@ pub enum StreamTable { schema: Arc, event_time_field: Option, watermark_field: Option, + /// Persisted `WITH` options for `SHOW CREATE TABLE`. + with_options: BTreeMap, }, Sink { name: String, @@ -202,6 +204,7 @@ impl StreamPlanningContext { schema, event_time_field, watermark_field, + with_options: BTreeMap::new(), }); } diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs index 63baca2a..9c975573 100644 --- a/src/sql/schema/source_table.rs +++ b/src/sql/schema/source_table.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::Arc; use std::time::Duration; @@ -73,6 +73,8 @@ pub struct SourceTable { pub lookup_cache_max_bytes: Option, pub lookup_cache_ttl: Option, pub inferred_fields: Option>, + /// Original `WITH` options for catalog / `SHOW CREATE TABLE` (snapshot at DDL parse time). + pub catalog_with_options: BTreeMap, } multifield_partial_ord!( @@ -84,7 +86,8 @@ multifield_partial_ord!( opaque_config, description, key_constraints, - connection_format + connection_format, + catalog_with_options ); impl SourceTable { @@ -114,6 +117,7 @@ impl SourceTable { lookup_cache_max_bytes: None, lookup_cache_ttl: None, inferred_fields: None, + catalog_with_options: BTreeMap::new(), } } @@ -135,6 +139,11 @@ impl SourceTable { self.temporal_config.watermark_strategy_column.as_deref() } + #[inline] + pub fn catalog_with_options(&self) -> &BTreeMap { + &self.catalog_with_options + } + pub fn idle_time(&self) -> Option { self.temporal_config.liveness_timeout } @@ -150,6 +159,11 @@ impl SourceTable { ) -> Result { validate_adapter_availability(adapter)?; + let catalog_with_options: BTreeMap = options + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + let encoding = DataEncodingFormat::extract_from_map(options)?; let mut refined_columns = apply_adapter_specific_rules(adapter, raw_columns); @@ -179,6 +193,7 @@ impl SourceTable { lookup_cache_max_bytes: None, lookup_cache_ttl: None, inferred_fields: None, + catalog_with_options, }) } @@ -250,6 +265,8 @@ impl SourceTable { ) -> Result { let _ = connection_profile; + let catalog_with_options = options.snapshot_for_catalog(); + if let Some(c) = options.pull_opt_str(opt::CONNECTOR)? { if c != connector_name { return plan_err!( @@ -351,6 +368,7 @@ impl SourceTable { lookup_cache_max_bytes: None, lookup_cache_ttl: None, inferred_fields: None, + catalog_with_options, }; if let Some(event_time_field) = options.pull_opt_field(opt::EVENT_TIME_FIELD)? { diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs index 7e75f786..a9d68977 100644 --- a/src/storage/stream_catalog/manager.rs +++ b/src/storage/stream_catalog/manager.rs @@ -19,7 +19,7 @@ use datafusion::common::{internal_err, plan_err, Result as DFResult}; use parking_lot::RwLock; use prost::Message; use protocol::storage::{self as pb, table_definition}; -use tracing::warn; +use tracing::{info, warn}; use unicase::UniCase; use crate::sql::schema::{ObjectName, StreamPlanningContext, StreamTable}; @@ -144,17 +144,35 @@ impl CatalogManager { ctx } + /// All stream catalog entries (connector sources + streaming sinks), sorted by table name. + pub fn list_stream_tables(&self) -> Vec> { + let guard = self.cache.read(); + let mut out: Vec> = guard.streams.values().cloned().collect(); + out.sort_by(|a, b| a.name().cmp(b.name())); + out + } + + pub fn get_stream_table(&self, name: &str) -> Option> { + let key = UniCase::new(name.to_string()); + self.cache.read().streams.get(&key).cloned() + } + fn encode_table(&self, table: &StreamTable) -> DFResult { let table_type = match table { StreamTable::Source { schema, event_time_field, watermark_field, + with_options, .. } => table_definition::TableType::Source(pb::StreamSource { arrow_schema_ipc: CatalogCodec::encode_schema(schema)?, event_time_field: event_time_field.clone(), watermark_field: watermark_field.clone(), + with_options: with_options + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(), }), StreamTable::Sink { program, .. } => { let logical_program_bincode = CatalogCodec::encode_logical_program(program)?; @@ -189,6 +207,7 @@ impl CatalogManager { schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?, event_time_field: src.event_time_field, watermark_field: src.watermark_field, + with_options: src.with_options.into_iter().collect(), }), table_definition::TableType::Sink(sink) => { if sink.logical_program_bincode.is_empty() { @@ -211,13 +230,45 @@ pub fn restore_global_catalog_from_store() { let Some(mgr) = CatalogManager::try_global() else { return; }; - if let Err(e) = mgr.restore_from_store() { - warn!("Stream catalog restore_from_store skipped or failed: {e:#}"); + match mgr.restore_from_store() { + Ok(()) => { + let n = mgr.list_stream_tables().len(); + info!(stream_tables = n, "Stream catalog loaded from durable store"); + } + Err(e) => warn!("Stream catalog restore_from_store failed: {e:#}"), } } -pub fn initialize_stream_catalog(_config: &crate::config::GlobalConfig) -> anyhow::Result<()> { - CatalogManager::init_global_in_memory().context("Stream catalog (CatalogManager) init failed") +pub fn initialize_stream_catalog(config: &crate::config::GlobalConfig) -> anyhow::Result<()> { + if !config.stream_catalog.persist { + return CatalogManager::init_global_in_memory() + .context("Stream catalog (CatalogManager) in-memory init failed"); + } + + let path = config + .stream_catalog + .db_path + .as_ref() + .map(|p| crate::config::resolve_path(p)) + .unwrap_or_else(|| crate::config::get_data_dir().join("stream_catalog")); + + std::fs::create_dir_all(&path).with_context(|| { + format!( + "Failed to create stream catalog directory {}", + path.display() + ) + })?; + + let store = std::sync::Arc::new( + super::RocksDbMetaStore::open(&path).with_context(|| { + format!( + "Failed to open stream catalog RocksDB at {}", + path.display() + ) + })?, + ); + + CatalogManager::init_global(store).context("Stream catalog (CatalogManager) init failed") } pub fn planning_schema_provider() -> StreamPlanningContext { @@ -228,6 +279,7 @@ pub fn planning_schema_provider() -> StreamPlanningContext { #[cfg(test)] mod tests { + use std::collections::BTreeMap; use std::sync::Arc; use datafusion::arrow::datatypes::{DataType, Field, Schema}; @@ -252,6 +304,7 @@ mod tests { schema: Arc::clone(&schema), event_time_field: Some("ts".into()), watermark_field: None, + with_options: BTreeMap::new(), }; mgr.add_table(table).unwrap(); @@ -274,6 +327,35 @@ mod tests { } } + #[test] + fn add_table_roundtrip_with_options() { + let mgr = create_test_manager(); + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); + + let mut opts = BTreeMap::new(); + opts.insert("connector".to_string(), "kafka".to_string()); + opts.insert("topic".to_string(), "my-topic".to_string()); + + let table = StreamTable::Source { + name: "t_with".into(), + schema, + event_time_field: None, + watermark_field: None, + with_options: opts.clone(), + }; + + mgr.add_table(table).unwrap(); + + let ctx = mgr.acquire_planning_context(); + let got = ctx.get_stream_table("t_with").expect("table present"); + + if let StreamTable::Source { with_options, .. } = got.as_ref() { + assert_eq!(with_options, &opts); + } else { + panic!("expected Source"); + } + } + #[test] fn drop_table_if_exists() { let mgr = create_test_manager(); @@ -284,6 +366,7 @@ mod tests { schema, event_time_field: None, watermark_field: None, + with_options: BTreeMap::new(), }) .unwrap(); diff --git a/src/storage/stream_catalog/mod.rs b/src/storage/stream_catalog/mod.rs index f4f84469..fea2e39f 100644 --- a/src/storage/stream_catalog/mod.rs +++ b/src/storage/stream_catalog/mod.rs @@ -15,9 +15,11 @@ mod codec; mod manager; mod meta_store; +mod rocksdb_meta_store; pub use manager::{ CatalogManager, initialize_stream_catalog, planning_schema_provider, restore_global_catalog_from_store, }; pub use meta_store::{InMemoryMetaStore, MetaStore}; +pub use rocksdb_meta_store::RocksDbMetaStore; diff --git a/src/storage/stream_catalog/rocksdb_meta_store.rs b/src/storage/stream_catalog/rocksdb_meta_store.rs new file mode 100644 index 00000000..98a518a3 --- /dev/null +++ b/src/storage/stream_catalog/rocksdb_meta_store.rs @@ -0,0 +1,131 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! RocksDB-backed [`super::MetaStore`] for durable stream catalog rows. + +use std::path::Path; +use std::sync::Arc; + +use anyhow::Context; +use datafusion::common::Result; +use rocksdb::{DB, Direction, IteratorMode, Options}; + +use super::MetaStore; + +/// Single-node durable KV used by [`crate::storage::stream_catalog::CatalogManager`]. +pub struct RocksDbMetaStore { + db: Arc, +} + +impl RocksDbMetaStore { + pub fn open>(path: P) -> anyhow::Result { + let path = path.as_ref(); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).with_context(|| { + format!("stream catalog: create parent directory {parent:?}") + })?; + } + let mut opts = Options::default(); + opts.create_if_missing(true); + let db = DB::open(&opts, path).with_context(|| { + format!("stream catalog: open RocksDB at {}", path.display()) + })?; + Ok(Self { db: Arc::new(db) }) + } +} + +impl MetaStore for RocksDbMetaStore { + fn put(&self, key: &str, value: Vec) -> Result<()> { + self.db + .put(key.as_bytes(), value.as_slice()) + .map_err(|e| datafusion::common::DataFusionError::Execution(format!( + "stream catalog store put: {e}" + ))) + } + + fn get(&self, key: &str) -> Result>> { + self.db + .get(key.as_bytes()) + .map_err(|e| datafusion::common::DataFusionError::Execution(format!( + "stream catalog store get: {e}" + ))) + } + + fn delete(&self, key: &str) -> Result<()> { + self.db + .delete(key.as_bytes()) + .map_err(|e| datafusion::common::DataFusionError::Execution(format!( + "stream catalog store delete: {e}" + ))) + } + + fn scan_prefix(&self, prefix: &str) -> Result)>> { + let mut out = Vec::new(); + let iter = self + .db + .iterator(IteratorMode::From(prefix.as_bytes(), Direction::Forward)); + for item in iter { + let (k, v) = item.map_err(|e| { + datafusion::common::DataFusionError::Execution(format!( + "stream catalog store scan: {e}" + )) + })?; + let key = String::from_utf8(k.to_vec()).map_err(|e| { + datafusion::common::DataFusionError::Execution(format!( + "stream catalog store: invalid utf8 key: {e}" + )) + })?; + if !key.starts_with(prefix) { + break; + } + out.push((key, v.to_vec())); + } + Ok(out) + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use uuid::Uuid; + + use super::*; + + #[test] + fn put_get_scan_roundtrip() { + let dir: PathBuf = std::env::temp_dir().join(format!( + "fs_stream_catalog_test_{}", + Uuid::new_v4() + )); + let _ = std::fs::remove_dir_all(&dir); + + let store = RocksDbMetaStore::open(&dir).expect("open"); + store.put("catalog:stream_table:a", vec![1, 2, 3]).unwrap(); + store.put("catalog:stream_table:b", vec![4]).unwrap(); + store.put("other:x", vec![9]).unwrap(); + + assert_eq!( + store.get("catalog:stream_table:a").unwrap(), + Some(vec![1, 2, 3]) + ); + + let prefixed = store.scan_prefix("catalog:stream_table:").unwrap(); + assert_eq!(prefixed.len(), 2); + assert!(prefixed.iter().any(|(k, _)| k.ends_with(":a"))); + assert!(prefixed.iter().any(|(k, _)| k.ends_with(":b"))); + + store.delete("catalog:stream_table:a").unwrap(); + assert!(store.get("catalog:stream_table:a").unwrap().is_none()); + + let _ = std::fs::remove_dir_all(&dir); + } +} From 728c750ae6946d42254a0b2bda791e49fe058f81 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Sun, 29 Mar 2026 22:52:03 +0800 Subject: [PATCH 30/44] update --- src/coordinator/execution/executor.rs | 24 +- src/coordinator/plan/logical_plan_visitor.rs | 9 +- src/runtime/streaming/factory/mod.rs | 2 +- .../streaming/factory/operator_factory.rs | 51 +++-- src/sql/analysis/mod.rs | 4 + src/sql/analysis/source_rewriter.rs | 209 ++++++++++++++++-- src/sql/logical_node/logical/operator_name.rs | 10 +- .../logical_planner/optimizers/chaining.rs | 195 +++++++++------- src/sql/physical/codec.rs | 13 +- src/sql/schema/column_descriptor.rs | 10 + src/sql/schema/source_table.rs | 35 ++- src/storage/stream_catalog/manager.rs | 10 +- 12 files changed, 444 insertions(+), 128 deletions(-) diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 4a7fc273..8329d498 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -82,6 +82,28 @@ impl Executor { } } + +fn strip_noisy_fields(value: &mut serde_json::Value) { + match value { + serde_json::Value::Object(map) => { + // 兼容 camelCase 和 snake_case,直接把配置项连根拔起 + map.remove("operatorConfig"); + map.remove("operator_config"); + + // 继续向子节点递归 + for (_, v) in map.iter_mut() { + strip_noisy_fields(v); + } + } + serde_json::Value::Array(arr) => { + for v in arr.iter_mut() { + strip_noisy_fields(v); + } + } + _ => {} + } +} + impl PlanVisitor for Executor { fn visit_create_function( &self, @@ -273,7 +295,7 @@ impl PlanVisitor for Executor { name: table_name.clone(), schema, event_time_field: source_table.event_time_field().map(str::to_string), - watermark_field: source_table.watermark_field().map(str::to_string), + watermark_field: source_table.stream_catalog_watermark_field(), with_options: source_table.catalog_with_options().clone(), }; (table_name, *if_not_exists, table_instance) diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index e5b5e36a..3bd117e2 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -104,6 +104,13 @@ impl LogicalPlanVisitor { let partition_keys = Self::extract_partitioning_keys(&mut sink_properties)?; + let sink_description = comment + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| format!("sink `{}` ({connector_type})", sink_table_name)); + let mut query_logical_plan = rewrite_plan( produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?, &self.schema_provider, @@ -131,7 +138,7 @@ impl LogicalPlanVisitor { None, &self.schema_provider, Some(ConnectionType::Sink), - comment.clone().unwrap_or_default(), + sink_description, )?; sink_definition.partition_exprs = Arc::new(partition_keys); diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs index 024dfb14..f82cc657 100644 --- a/src/runtime/streaming/factory/mod.rs +++ b/src/runtime/streaming/factory/mod.rs @@ -56,4 +56,4 @@ fn register_kafka_connector_plugins(factory: &mut OperatorFactory) { factory_operator_name::KAFKA_SOURCE, factory_operator_name::KAFKA_SINK ); -} +} \ No newline at end of file diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs index a95c0241..e1bdd635 100644 --- a/src/runtime/streaming/factory/operator_factory.rs +++ b/src/runtime/streaming/factory/operator_factory.rs @@ -38,6 +38,8 @@ use protocol::grpc::api::{ WindowFunctionOperator as WindowFunctionProto, }; +use crate::sql::logical_node::logical::OperatorName; + /// pub struct OperatorFactory { constructors: HashMap>, @@ -58,6 +60,10 @@ impl OperatorFactory { self.constructors.insert(name.to_string(), constructor); } + pub fn register_named(&mut self, name: OperatorName, constructor: Box) { + self.register(name.as_registry_key(), constructor); + } + pub fn create_operator(&self, name: &str, payload: &[u8]) -> Result { let ctor = self .constructors @@ -78,27 +84,36 @@ impl OperatorFactory { } fn register_builtins(&mut self) { - self.register("TumblingWindowAggregate", Box::new(TumblingWindowBridge)); - self.register("SlidingWindowAggregate", Box::new(SlidingWindowBridge)); - self.register("SessionWindowAggregate", Box::new(SessionWindowBridge)); + self.register_named(OperatorName::TumblingWindowAggregate, Box::new(TumblingWindowBridge)); + self.register_named(OperatorName::SlidingWindowAggregate, Box::new(SlidingWindowBridge)); + self.register_named(OperatorName::SessionWindowAggregate, Box::new(SessionWindowBridge)); - self.register("ExpressionWatermark", Box::new(WatermarkBridge)); + self.register_named(OperatorName::ExpressionWatermark, Box::new(WatermarkBridge)); // ─── SQL Window Function ─── - self.register("WindowFunction", Box::new(WindowFunctionBridge)); + self.register_named(OperatorName::WindowFunction, Box::new(WindowFunctionBridge)); // ─── Join ─── - self.register("Join", Box::new(JoinWithExpirationBridge)); - self.register("InstantJoin", Box::new(InstantJoinBridge)); - self.register("LookupJoin", Box::new(LookupJoinBridge)); - - self.register("UpdatingAggregate", Box::new(IncrementalAggregateBridge)); - - self.register("KeyBy", Box::new(KeyByBridge)); - - self.register("Projection", Box::new(PassthroughConstructor("Projection"))); - self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue"))); - self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey"))); + self.register_named(OperatorName::Join, Box::new(JoinWithExpirationBridge)); + self.register_named(OperatorName::InstantJoin, Box::new(InstantJoinBridge)); + self.register_named(OperatorName::LookupJoin, Box::new(LookupJoinBridge)); + + self.register_named(OperatorName::UpdatingAggregate, Box::new(IncrementalAggregateBridge)); + + self.register_named(OperatorName::KeyBy, Box::new(KeyByBridge)); + + self.register_named( + OperatorName::Projection, + Box::new(PassthroughConstructor(OperatorName::Projection)), + ); + self.register_named( + OperatorName::ArrowValue, + Box::new(PassthroughConstructor(OperatorName::ArrowValue)), + ); + self.register_named( + OperatorName::ArrowKey, + Box::new(PassthroughConstructor(OperatorName::ArrowKey)), + ); crate::runtime::streaming::factory::register_builtin_connectors(self); crate::runtime::streaming::factory::register_kafka_connector_plugins(self); @@ -202,12 +217,12 @@ impl OperatorConstructor for KeyByBridge { } } -pub struct PassthroughConstructor(pub &'static str); +pub struct PassthroughConstructor(pub OperatorName); impl OperatorConstructor for PassthroughConstructor { fn with_config(&self, _config: &[u8], _registry: Arc) -> Result { Ok(ConstructedOperator::Operator(Box::new( - PassthroughOperator::new(self.0), + PassthroughOperator::new(self.0.as_registry_key()), ))) } } diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs index d417ebd1..653cb601 100644 --- a/src/sql/analysis/mod.rs +++ b/src/sql/analysis/mod.rs @@ -207,6 +207,10 @@ pub fn rewrite_plan( ) -> Result { info!("Starting streaming plan rewrite pipeline"); + let Transformed { + data: plan, .. + } = plan.rewrite_with_subqueries(&mut source_rewriter::SourceRewriter::new(schema_provider))?; + let mut rewriter = stream_rewriter::StreamRewriter::new(schema_provider); let Transformed { data: rewritten_plan, diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs index d642afd5..d96a47bf 100644 --- a/src/sql/analysis/source_rewriter.rs +++ b/src/sql/analysis/source_rewriter.rs @@ -10,6 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; @@ -24,6 +25,8 @@ use crate::sql::schema::source_table::SourceTable; use crate::sql::schema::ColumnDescriptor; use crate::sql::schema::table::Table; use crate::sql::schema::StreamSchemaProvider; +use crate::sql::schema::StreamTable; +use crate::sql::common::constants::sql_field; use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; use crate::sql::extensions::watermark_node::EventTimeWatermarkNode; use crate::sql::types::TIMESTAMP_FIELD; @@ -33,7 +36,42 @@ pub struct SourceRewriter<'a> { pub(crate) schema_provider: &'a StreamSchemaProvider, } +impl<'a> SourceRewriter<'a> { + pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self { + Self { schema_provider } + } +} + impl SourceRewriter<'_> { + /// Output column names after stream-catalog source projection (physical fields plus optional + /// `_timestamp` alias when event time is renamed). + fn stream_source_projected_column_names( + schema: &datafusion::arrow::datatypes::Schema, + event_time_field: Option<&str>, + ) -> HashSet { + let mut names: HashSet = + schema.fields().iter().map(|f| f.name().clone()).collect(); + if let Some(et) = event_time_field { + if et != TIMESTAMP_FIELD { + names.insert(TIMESTAMP_FIELD.to_string()); + } + } + names + } + + /// Resolves watermark column for [`StreamTable::Source`]: drop computed `__watermark` and any + /// name not present in the projected schema (defaults to `_timestamp` − delay). + fn stream_source_effective_watermark_field<'b>( + watermark_field: Option<&'b str>, + projected: &HashSet, + ) -> Option<&'b str> { + let w = watermark_field?; + if w == sql_field::COMPUTED_WATERMARK { + return None; + } + projected.contains(w).then_some(w) + } + fn projection_expr_for_column(col: &ColumnDescriptor, qualifier: &TableReference) -> Expr { if let Some(logic) = col.computation_logic() { logic @@ -117,6 +155,17 @@ impl SourceRewriter<'_> { expressions .push(expr.alias_qualified(Some(qualifier.clone()), TIMESTAMP_FIELD.to_string())); } else { + let has_ts = table + .schema_specs + .iter() + .any(|c| c.arrow_field().name() == TIMESTAMP_FIELD); + if !has_ts { + return plan_err!( + "Connector table '{}' has no `{}` column; declare WATERMARK FOR AS ... in CREATE TABLE", + table.table_identifier, + TIMESTAMP_FIELD + ); + } expressions.push(Expr::Column(Column::new( Some(qualifier.clone()), TIMESTAMP_FIELD, @@ -126,6 +175,132 @@ impl SourceRewriter<'_> { Ok(expressions) } + /// Stream catalog [`StreamTable::Source`] (Kafka/… registered via coordinator): inject `_timestamp` + /// from `event_time_field` when the physical schema uses another name (e.g. `impression_time`). + fn mutate_stream_catalog_source( + &self, + table_scan: &TableScan, + st: &StreamTable, + ) -> DFResult> { + let StreamTable::Source { + schema, + event_time_field, + watermark_field, + .. + } = st + else { + return Ok(Transformed::no(LogicalPlan::TableScan(table_scan.clone()))); + }; + + let qualifier = table_scan.table_name.clone(); + + let mut expressions: Vec = schema + .fields() + .iter() + .map(|f| { + Expr::Column(Column { + relation: Some(qualifier.clone()), + name: f.name().to_string(), + spans: Default::default(), + }) + }) + .collect(); + + let has_physical_ts = schema.fields().iter().any(|f| f.name() == TIMESTAMP_FIELD); + + match event_time_field.as_deref() { + Some(et) if et != TIMESTAMP_FIELD => { + if !schema.fields().iter().any(|f| f.name().as_str() == et) { + return Err(DataFusionError::Plan(format!( + "Stream source `{}`: event_time_field `{et}` is not in the table schema", + table_scan.table_name.table() + ))); + } + expressions.push( + Expr::Column(Column { + relation: Some(qualifier.clone()), + name: et.to_string(), + spans: Default::default(), + }) + .alias_qualified(Some(qualifier.clone()), TIMESTAMP_FIELD.to_string()), + ); + } + None if !has_physical_ts => { + return plan_err!( + "Stream source `{}` has no `{}` column; declare WATERMARK FOR AS ... on CREATE TABLE, or add a `{}` column", + table_scan.table_name.table(), + TIMESTAMP_FIELD, + TIMESTAMP_FIELD + ); + } + _ => {} + } + + let source_input = LogicalPlan::TableScan(table_scan.clone()); + let projection = LogicalPlan::Projection(Projection::try_new( + expressions, + Arc::new(source_input), + )?); + + let schema_ref = projection.schema().clone(); + let remote = LogicalPlan::Extension(Extension { + node: Arc::new(RemoteTableBoundaryNode { + upstream_plan: projection, + table_identifier: table_scan.table_name.to_owned(), + resolved_schema: schema_ref, + requires_materialization: true, + }), + }); + + let projected = Self::stream_source_projected_column_names( + schema.as_ref(), + event_time_field.as_deref(), + ); + let wf = Self::stream_source_effective_watermark_field( + watermark_field.as_deref(), + &projected, + ); + let wm_expr = Self::watermark_expression_for_stream_source(wf, &qualifier)?; + + let watermark_node = EventTimeWatermarkNode::try_new( + remote, + table_scan.table_name.clone(), + wm_expr, + ) + .map_err(|err| { + DataFusionError::Internal(format!("failed to create watermark node: {err}")) + })?; + + Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(watermark_node), + }))) + } + + fn watermark_expression_for_stream_source( + watermark_field: Option<&str>, + qualifier: &TableReference, + ) -> DFResult { + match watermark_field { + Some(wf) => Ok(Expr::Column(Column { + relation: Some(qualifier.clone()), + name: wf.to_string(), + spans: Default::default(), + })), + None => Ok(Expr::BinaryExpr(BinaryExpr { + left: Box::new(Expr::Column(Column { + relation: Some(qualifier.clone()), + name: TIMESTAMP_FIELD.to_string(), + spans: Default::default(), + })), + op: logical_expr::Operator::Minus, + right: Box::new(Expr::Literal( + ScalarValue::DurationNanosecond(Some(Duration::from_secs(1).as_nanos() as i64)), + None, + )), + })), + } + } + fn projection(&self, table_scan: &TableScan, table: &SourceTable) -> DFResult { let qualifier = table_scan.table_name.clone(); @@ -224,21 +399,25 @@ impl TreeNodeRewriter for SourceRewriter<'_> { }; let table_name = table_scan.table_name.table(); - let table = self - .schema_provider - .get_catalog_table(table_name) - .ok_or_else(|| DataFusionError::Plan(format!("Table {table_name} not found")))?; - - match table { - Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table), - Table::LookupTable(_table) => { - // TODO: implement LookupSource extension - plan_err!("Lookup tables are not yet supported") - } - Table::TableFromQuery { - name: _, - logical_plan, - } => self.mutate_table_from_query(&table_scan, logical_plan), + + if let Some(table) = self.schema_provider.get_catalog_table(table_name) { + return match table { + Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table), + Table::LookupTable(_table) => { + // TODO: implement LookupSource extension + plan_err!("Lookup tables are not yet supported") + } + Table::TableFromQuery { + name: _, + logical_plan, + } => self.mutate_table_from_query(&table_scan, logical_plan), + }; } + + if let Some(st) = self.schema_provider.get_stream_table(table_name) { + return self.mutate_stream_catalog_source(&table_scan, st.as_ref()); + } + + Ok(Transformed::no(LogicalPlan::TableScan(table_scan.clone()))) } } diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs index d157234b..224562ea 100644 --- a/src/sql/logical_node/logical/operator_name.rs +++ b/src/sql/logical_node/logical/operator_name.rs @@ -13,11 +13,11 @@ use std::str::FromStr; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use strum::{Display, EnumString}; +use strum::{Display, EnumString, IntoStaticStr}; use crate::sql::common::constants::operator_feature; -#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display, IntoStaticStr)] pub enum OperatorName { ExpressionWatermark, ArrowValue, @@ -38,6 +38,12 @@ pub enum OperatorName { } impl OperatorName { + /// Registry / worker lookup key; matches [`Display`] and protobuf operator names. + #[inline] + pub fn as_registry_key(self) -> &'static str { + self.into() + } + pub fn feature_tag(self) -> Option<&'static str> { match self { Self::ExpressionWatermark | Self::ArrowValue | Self::ArrowKey | Self::Projection => None, diff --git a/src/sql/logical_planner/optimizers/chaining.rs b/src/sql/logical_planner/optimizers/chaining.rs index 11c072d3..8c1534a6 100644 --- a/src/sql/logical_planner/optimizers/chaining.rs +++ b/src/sql/logical_planner/optimizers/chaining.rs @@ -10,121 +10,164 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::mem; + use petgraph::graph::{EdgeIndex, NodeIndex}; -use petgraph::visit::EdgeRef; -use petgraph::Direction::{Incoming, Outgoing}; +use petgraph::prelude::*; +use petgraph::visit::NodeRef; + use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer}; pub type NodeId = NodeIndex; pub type EdgeId = EdgeIndex; -pub struct ChainingOptimizer; +pub struct ChainingOptimizer {} -impl ChainingOptimizer { - fn find_fusion_candidate(plan: &LogicalGraph) -> Option<(NodeId, NodeId, EdgeId)> { - let node_ids: Vec = plan.node_indices().collect(); +fn remove_in_place(graph: &mut DiGraph, node: NodeIndex) { + let incoming = graph.edges_directed(node, Incoming).next().unwrap(); - for upstream_id in node_ids { - let upstream_node = plan.node_weight(upstream_id)?; + let parent = incoming.source().id(); + let incoming = incoming.id(); + graph.remove_edge(incoming); - if upstream_node.operator_chain.is_source() { - continue; - } + let outgoing: Vec<_> = graph + .edges_directed(node, Outgoing) + .map(|e| (e.id(), e.target().id())) + .collect(); + + for (edge, target) in outgoing { + let weight = graph.remove_edge(edge).unwrap(); + graph.add_edge(parent, target, weight); + } + + graph.remove_node(node); +} + +impl Optimizer for ChainingOptimizer { + fn optimize_once(&self, plan: &mut LogicalGraph) -> bool { + let node_indices: Vec = plan.node_indices().collect(); - let outgoing_edges: Vec<_> = plan.edges_directed(upstream_id, Outgoing).collect(); + for &node_idx in &node_indices { + let cur = plan.node_weight(node_idx).unwrap(); - if outgoing_edges.len() != 1 { + // sources can't be chained + if cur.operator_chain.is_source() { continue; } - let bridging_edge = &outgoing_edges[0]; + let mut successors = plan.edges_directed(node_idx, Outgoing).collect::>(); - if bridging_edge.weight().edge_type != LogicalEdgeType::Forward { + if successors.len() != 1 { continue; } - let downstream_id = bridging_edge.target(); - let downstream_node = plan.node_weight(downstream_id)?; + let edge = successors.remove(0); + let edge_type = edge.weight().edge_type; - if downstream_node.operator_chain.is_sink() { + if edge_type != LogicalEdgeType::Forward { continue; } - if upstream_node.parallelism != downstream_node.parallelism { + let successor_idx = edge.target(); + + let successor_node = plan.node_weight(successor_idx).unwrap(); + + // skip if parallelism doesn't match or successor is a sink + if cur.parallelism != successor_node.parallelism + || successor_node.operator_chain.is_sink() + { continue; } - let incoming_edges: Vec<_> = plan.edges_directed(downstream_id, Incoming).collect(); - if incoming_edges.len() != 1 { + // skip successors with multiple predecessors + if plan.edges_directed(successor_idx, Incoming).count() > 1 { continue; } - return Some((upstream_id, downstream_id, bridging_edge.id())); + // construct the new node + let mut new_cur = cur.clone(); + + new_cur.description = format!("{} -> {}", cur.description, successor_node.description); + + new_cur + .operator_chain + .operators + .extend(successor_node.operator_chain.operators.clone()); + + new_cur + .operator_chain + .edges + .push(edge.weight().schema.clone()); + + mem::swap(&mut new_cur, plan.node_weight_mut(node_idx).unwrap()); + + // remove the old successor + remove_in_place(plan, successor_idx); + return true; } - None + false } +} - fn apply_fusion( - plan: &mut LogicalGraph, - upstream_id: NodeId, - downstream_id: NodeId, - bridging_edge_id: EdgeId, - ) { - let bridging_edge = plan - .remove_edge(bridging_edge_id) - .expect("Graph Integrity Violation: Bridging edge missing"); - - let propagated_schema = bridging_edge.schema.clone(); - - let downstream_outgoing: Vec<_> = plan - .edges_directed(downstream_id, Outgoing) - .map(|e| (e.id(), e.target())) - .collect(); - - for (edge_id, target_id) in downstream_outgoing { - let edge_weight = plan - .remove_edge(edge_id) - .expect("Graph Integrity Violation: Outgoing edge missing"); - - plan.add_edge(upstream_id, target_id, edge_weight); - } +#[cfg(test)] +mod tests { + use std::sync::Arc; - let downstream_node = plan - .remove_node(downstream_id) - .expect("Graph Integrity Violation: Downstream node missing"); + use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; - let upstream_node = plan - .node_weight_mut(upstream_id) - .expect("Graph Integrity Violation: Upstream node missing"); + use crate::sql::common::FsSchema; + use crate::sql::logical_node::logical::{ + LogicalEdge, LogicalEdgeType, LogicalGraph, LogicalNode, Optimizer, OperatorName, + }; - upstream_node.description = format!( - "{} -> {}", - upstream_node.description, downstream_node.description - ); + use super::ChainingOptimizer; - upstream_node - .operator_chain - .operators - .extend(downstream_node.operator_chain.operators); + fn forward_edge() -> LogicalEdge { + let s = Arc::new(Schema::new(vec![Field::new( + "_timestamp", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )])); + LogicalEdge::new(LogicalEdgeType::Forward, FsSchema::new_unkeyed(s, 0)) + } - upstream_node - .operator_chain - .edges - .push(propagated_schema); + fn proj_node(id: u32, label: &str) -> LogicalNode { + LogicalNode::single( + id, + format!("op_{label}"), + OperatorName::Projection, + vec![], + label.to_string(), + 1, + ) } -} -impl Optimizer for ChainingOptimizer { - fn optimize_once(&self, plan: &mut LogicalGraph) -> bool { - if let Some((upstream_id, downstream_id, bridging_edge_id)) = - Self::find_fusion_candidate(plan) - { - Self::apply_fusion(plan, upstream_id, downstream_id, bridging_edge_id); - true - } else { - false - } + fn source_node() -> LogicalNode { + LogicalNode::single( + 0, + "src".into(), + OperatorName::ConnectorSource, + vec![], + "source".into(), + 1, + ) + } + + /// Regression: upstream at last `NodeIndex` + remove non-last downstream swaps indices. + #[test] + fn fusion_remaps_when_upstream_was_last_node_index() { + let mut g = LogicalGraph::new(); + let n0 = g.add_node(source_node()); + let n1 = g.add_node(proj_node(1, "downstream")); + let n2 = g.add_node(proj_node(2, "upstream_last_index")); + let e = forward_edge(); + g.add_edge(n0, n2, e.clone()); + g.add_edge(n2, n1, e); + + let changed = ChainingOptimizer {}.optimize_once(&mut g); + assert!(changed); + assert_eq!(g.node_count(), 2); } } diff --git a/src/sql/physical/codec.rs b/src/sql/physical/codec.rs index 1301ef09..c8349dc6 100644 --- a/src/sql/physical/codec.rs +++ b/src/sql/physical/codec.rs @@ -15,8 +15,9 @@ use std::sync::Arc; use datafusion::arrow::array::RecordBatch; use datafusion::arrow::datatypes::Schema; -use datafusion::common::{DataFusionError, Result, UnnestOptions}; +use datafusion::common::{DataFusionError, Result, UnnestOptions, not_impl_err}; use datafusion::execution::FunctionRegistry; +use datafusion::logical_expr::ScalarUDF; use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec}; use datafusion_proto::physical_plan::PhysicalExtensionCodec; @@ -28,7 +29,8 @@ use protocol::grpc::api::{ use tokio::sync::mpsc::UnboundedReceiver; use crate::sql::analysis::UNNESTED_COL; -use crate::sql::common::constants::mem_exec_join_side; +use crate::sql::common::constants::{mem_exec_join_side, window_function_udf}; +use crate::sql::physical::udfs::window; use crate::sql::physical::cdc::{DebeziumUnrollingExec, ToDebeziumExec}; use crate::sql::physical::readers::{ FsMemExec, RecordBatchVecReader, RwLockRecordBatchReader, UnboundedRecordBatchReader, @@ -138,6 +140,13 @@ impl PhysicalExtensionCodec for FsPhysicalExtensionCodec { ))) } } + + fn try_decode_udf(&self, name: &str, _buf: &[u8]) -> Result> { + if name == window_function_udf::NAME { + return Ok(window()); + } + not_impl_err!("PhysicalExtensionCodec is not provided for scalar function {name}") + } } impl FsPhysicalExtensionCodec { diff --git a/src/sql/schema/column_descriptor.rs b/src/sql/schema/column_descriptor.rs index 941a7500..533708cc 100644 --- a/src/sql/schema/column_descriptor.rs +++ b/src/sql/schema/column_descriptor.rs @@ -99,6 +99,16 @@ impl ColumnDescriptor { self.arrow_field().data_type() } + pub fn set_nullable(&mut self, nullable: bool) { + let f = match self { + Self::Physical(f) => f, + Self::SystemMeta { field, .. } => field, + Self::Computed { field, .. } => field, + }; + *f = Field::new(f.name(), f.data_type().clone(), nullable) + .with_metadata(f.metadata().clone()); + } + pub fn force_precision(&mut self, unit: TimeUnit) { match self { Self::Physical(f) => { diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs index 9c975573..d10e39dc 100644 --- a/src/sql/schema/source_table.rs +++ b/src/sql/schema/source_table.rs @@ -139,6 +139,17 @@ impl SourceTable { self.temporal_config.watermark_strategy_column.as_deref() } + /// Watermark column name safe to persist for [`StreamTable::Source`]. Omits the computed + /// [`sql_field::COMPUTED_WATERMARK`] column: stream catalog only stores Arrow physical fields, + /// so `__watermark` cannot be resolved when the table is planned from the catalog. + pub fn stream_catalog_watermark_field(&self) -> Option { + self.temporal_config + .watermark_strategy_column + .as_deref() + .filter(|w| *w != sql_field::COMPUTED_WATERMARK) + .map(str::to_string) + } + #[inline] pub fn catalog_with_options(&self) -> &BTreeMap { &self.catalog_with_options @@ -382,10 +393,6 @@ impl SourceTable { } if let Some((time_field, watermark_expr)) = watermark { - let table_ref = TableReference::bare(table.table_identifier.as_str()); - let df_schema = - DFSchema::try_from_qualified_schema(table_ref, &table.produce_physical_schema())?; - let field = table .schema_specs .iter() @@ -404,6 +411,19 @@ impl SourceTable { ); } + // Watermark 引用的时间列语义上必须非空,强制设为 NOT NULL, + // 避免用户建表时遗漏 NOT NULL 导致后续表达式 nullable 校验失败。 + for col in table.schema_specs.iter_mut() { + if col.arrow_field().name().as_str() == time_field.as_str() { + col.set_nullable(false); + break; + } + } + + let table_ref = TableReference::bare(table.table_identifier.as_str()); + let df_schema = + DFSchema::try_from_qualified_schema(table_ref, &table.produce_physical_schema())?; + table.temporal_config.event_column = Some(time_field.clone()); if let Some(expr) = watermark_expr { @@ -412,17 +432,12 @@ impl SourceTable { DataFusionError::Plan(format!("could not plan watermark expression: {e}")) })?; - let (data_type, nullable) = logical_expr.data_type_and_nullable(&df_schema)?; + let (data_type, _nullable) = logical_expr.data_type_and_nullable(&df_schema)?; if !matches!(data_type, DataType::Timestamp(_, _)) { return plan_err!( "the type of the WATERMARK FOR expression must be TIMESTAMP, but was {data_type}" ); } - if nullable { - return plan_err!( - "the type of the WATERMARK FOR expression must be NOT NULL" - ); - } table.schema_specs.push(ColumnDescriptor::new_computed( Field::new( diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs index a9d68977..5f40240a 100644 --- a/src/storage/stream_catalog/manager.rs +++ b/src/storage/stream_catalog/manager.rs @@ -22,6 +22,7 @@ use protocol::storage::{self as pb, table_definition}; use tracing::{info, warn}; use unicase::UniCase; +use crate::sql::common::constants::sql_field; use crate::sql::schema::{ObjectName, StreamPlanningContext, StreamTable}; use super::codec::CatalogCodec; @@ -168,7 +169,10 @@ impl CatalogManager { } => table_definition::TableType::Source(pb::StreamSource { arrow_schema_ipc: CatalogCodec::encode_schema(schema)?, event_time_field: event_time_field.clone(), - watermark_field: watermark_field.clone(), + watermark_field: watermark_field + .as_ref() + .filter(|w| *w != sql_field::COMPUTED_WATERMARK) + .cloned(), with_options: with_options .iter() .map(|(k, v)| (k.clone(), v.clone())) @@ -206,7 +210,9 @@ impl CatalogManager { name: proto_def.table_name, schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?, event_time_field: src.event_time_field, - watermark_field: src.watermark_field, + watermark_field: src + .watermark_field + .filter(|w| w != sql_field::COMPUTED_WATERMARK), with_options: src.with_options.into_iter().collect(), }), table_definition::TableType::Sink(sink) => { From b64e4ceda5b8e5a17563001c1c4b4bdc75f86077 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Mon, 30 Mar 2026 22:58:27 +0800 Subject: [PATCH 31/44] update --- src/coordinator/coordinator.rs | 363 +------- src/coordinator/plan/logical_plan_visitor.rs | 56 +- src/coordinator/runtime_context.rs | 44 +- src/runtime/streaming/factory/mod.rs | 2 - .../streaming/factory/operator_factory.rs | 110 ++- src/runtime/streaming/operators/mod.rs | 51 +- src/sql/analysis/source_rewriter.rs | 200 ++--- src/sql/extensions/key_calculation.rs | 2 +- src/sql/extensions/remote_table.rs | 2 +- src/sql/frontend_sql_coverage_tests.rs | 823 ------------------ src/sql/logical_node/logical/operator_name.rs | 7 +- src/sql/mod.rs | 2 - 12 files changed, 190 insertions(+), 1472 deletions(-) delete mode 100644 src/sql/frontend_sql_coverage_tests.rs diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs index f21b12ca..b86b1070 100644 --- a/src/coordinator/coordinator.rs +++ b/src/coordinator/coordinator.rs @@ -145,365 +145,4 @@ impl Coordinator { } } } -} - -// --------------------------------------------------------------------------- -// Test-only helpers (used by `create_streaming_table_coordinator_tests` below) -// --------------------------------------------------------------------------- - -#[cfg(test)] -use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; - -#[cfg(test)] -use crate::sql::common::TIMESTAMP_FIELD; -#[cfg(test)] -use crate::sql::parse::parse_sql; - -#[cfg(test)] -fn fake_stream_schema_provider() -> StreamSchemaProvider { - let mut provider = StreamSchemaProvider::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "src".to_string(), - schema, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider -} - -#[cfg(test)] -fn fake_stream_schema_provider_with_v() -> StreamSchemaProvider { - let mut provider = StreamSchemaProvider::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("v", DataType::Utf8, true), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "src".to_string(), - schema, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider -} - -#[cfg(test)] -fn fake_src_dim_provider() -> StreamSchemaProvider { - let mut provider = fake_stream_schema_provider_with_v(); - let dim = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("name", DataType::Utf8, true), - Field::new("amt", DataType::Float64, true), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "dim".to_string(), - dim, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider -} - -#[cfg(test)] -fn assert_coordinator_streaming_build_ok( - sql: &str, - provider: StreamSchemaProvider, - expect_sink_substring: &str, - expect_connector_substring: &str, -) { - let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); - assert_eq!(stmts.len(), 1); - let plan = Coordinator::new() - .compile_plan(stmts[0].as_ref(), provider) - .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}")); - let rendered = format!("{plan:?}"); - assert!(rendered.contains("StreamingTable"), "{rendered}"); - assert!( - rendered.contains(expect_sink_substring), - "expected sink name fragment {expect_sink_substring:?} in:\n{rendered}" - ); - assert!( - rendered.contains(expect_connector_substring), - "expected connector fragment {expect_connector_substring:?} in:\n{rendered}" - ); -} - -#[cfg(test)] -mod create_streaming_table_coordinator_tests { - use super::{ - assert_coordinator_streaming_build_ok, fake_src_dim_provider, - fake_stream_schema_provider, fake_stream_schema_provider_with_v, - }; - use crate::sql::common::TIMESTAMP_FIELD; - - #[test] - fn coordinator_build_create_streaming_table_select_star_kafka() { - assert_coordinator_streaming_build_ok( - concat!( - "CREATE STREAMING TABLE my_sink ", - "WITH ('connector' = 'kafka') ", - "AS SELECT * FROM src", - ), - fake_stream_schema_provider(), - "my_sink", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_memory_connector() { - assert_coordinator_streaming_build_ok( - "CREATE STREAMING TABLE mem_out WITH ('connector'='memory') AS SELECT * FROM src", - fake_stream_schema_provider(), - "mem_out", - "memory", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_postgres_connector() { - assert_coordinator_streaming_build_ok( - "CREATE STREAMING TABLE pg_out WITH ('connector'='postgres') AS SELECT id FROM src", - fake_stream_schema_provider(), - "pg_out", - "postgres", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_partition_by_and_idle_time() { - assert_coordinator_streaming_build_ok( - concat!( - "CREATE STREAMING TABLE part_idle ", - "WITH ('connector'='kafka', 'partition_by'='id', 'idle_time'='30 seconds') ", - "AS SELECT * FROM src", - ), - fake_stream_schema_provider(), - "part_idle", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_project_timestamp_columns() { - let sql = format!( - "CREATE STREAMING TABLE ts_cols WITH ('connector'='kafka') AS SELECT id, {ts} FROM src", - ts = TIMESTAMP_FIELD - ); - assert_coordinator_streaming_build_ok( - &sql, - fake_stream_schema_provider(), - "ts_cols", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_where_filters() { - let p = fake_stream_schema_provider_with_v(); - for (label, body) in [ - ("eq", "SELECT * FROM src WHERE id = 1"), - ("range", "SELECT * FROM src WHERE id > 0 AND id < 100"), - ("in_list", "SELECT * FROM src WHERE id IN (1, 2, 3)"), - ("between", "SELECT * FROM src WHERE id BETWEEN 1 AND 10"), - ("like", "SELECT * FROM src WHERE v LIKE 'a%'"), - ("null", "SELECT * FROM src WHERE v IS NULL"), - ] { - let sql = format!( - "CREATE STREAMING TABLE sink_w_{label} WITH ('connector'='kafka') AS {body}" - ); - assert_coordinator_streaming_build_ok( - &sql, - p.clone(), - &format!("sink_w_{label}"), - "kafka", - ); - } - } - - #[test] - fn coordinator_build_create_streaming_table_case_coalesce_cast() { - let ts = TIMESTAMP_FIELD; - let sql = format!( - "CREATE STREAMING TABLE sink_expr WITH ('connector'='kafka') AS \ - SELECT CASE WHEN id < 0 THEN 0 ELSE id END AS c, COALESCE(v, 'x') AS v2, \ - CAST(id AS DOUBLE) AS id_f, {ts} FROM src" - ); - assert_coordinator_streaming_build_ok( - &sql, - fake_stream_schema_provider_with_v(), - "sink_expr", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_row_time_projection() { - let ts = TIMESTAMP_FIELD; - let sql = format!( - "CREATE STREAMING TABLE sink_rt WITH ('connector'='kafka') AS \ - SELECT row_time(), id, {ts} FROM src" - ); - assert_coordinator_streaming_build_ok( - &sql, - fake_stream_schema_provider(), - "sink_rt", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_scalar_funcs_projection() { - let ts = TIMESTAMP_FIELD; - let sql = format!( - "CREATE STREAMING TABLE sink_scalar WITH ('connector'='kafka') AS \ - SELECT ABS(id), UPPER(v), LOWER(v), BTRIM(v), CHARACTER_LENGTH(v), {ts} FROM src" - ); - assert_coordinator_streaming_build_ok( - &sql, - fake_stream_schema_provider_with_v(), - "sink_scalar", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_cte() { - let ts = TIMESTAMP_FIELD; - let sql = format!( - "CREATE STREAMING TABLE sink_cte WITH ('connector'='kafka') AS \ - WITH t AS (SELECT id, {ts} FROM src WHERE id > 0) SELECT * FROM t" - ); - assert_coordinator_streaming_build_ok( - &sql, - fake_stream_schema_provider(), - "sink_cte", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_cte_chain() { - let sql = "CREATE STREAMING TABLE sink_cte2 WITH ('connector'='kafka') AS \ - WITH a AS (SELECT id FROM src), b AS (SELECT id FROM a WHERE id > 1) SELECT * FROM b"; - assert_coordinator_streaming_build_ok( - sql, - fake_stream_schema_provider(), - "sink_cte2", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_sink_name_with_digits() { - assert_coordinator_streaming_build_ok( - "CREATE STREAMING TABLE out_sink_01 WITH ('connector'='kafka') AS SELECT * FROM src", - fake_stream_schema_provider(), - "out_sink_01", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_subquery_in_from() { - let ts = TIMESTAMP_FIELD; - let sql = format!( - "CREATE STREAMING TABLE sink_sq WITH ('connector'='kafka') AS \ - SELECT * FROM (SELECT id, {ts} FROM src WHERE id >= 0) AS x" - ); - assert_coordinator_streaming_build_ok( - &sql, - fake_stream_schema_provider(), - "sink_sq", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_nested_subqueries() { - let sql = "CREATE STREAMING TABLE sink_nest WITH ('connector'='kafka') AS \ - SELECT * FROM (SELECT * FROM (SELECT id FROM src) AS i2) AS i1"; - assert_coordinator_streaming_build_ok( - sql, - fake_stream_schema_provider(), - "sink_nest", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_union_all() { - let ts = TIMESTAMP_FIELD; - let sql = format!( - "CREATE STREAMING TABLE sink_union WITH ('connector'='kafka') AS \ - SELECT id, v, {ts} FROM src \ - UNION ALL \ - SELECT id, name AS v, {ts} FROM dim" - ); - assert_coordinator_streaming_build_ok( - &sql, - fake_src_dim_provider(), - "sink_union", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_nullif_regexp() { - let ts = TIMESTAMP_FIELD; - let sql = format!( - "CREATE STREAMING TABLE sink_re WITH ('connector'='kafka') AS \ - SELECT id, NULLIF(v, ''), REGEXP_LIKE(v, '^x'), {ts} FROM src" - ); - assert_coordinator_streaming_build_ok( - &sql, - fake_stream_schema_provider_with_v(), - "sink_re", - "kafka", - ); - } - - #[test] - fn coordinator_build_create_streaming_table_not_and_or_where() { - let p = fake_stream_schema_provider_with_v(); - assert_coordinator_streaming_build_ok( - "CREATE STREAMING TABLE sink_bool WITH ('connector'='kafka') AS \ - SELECT * FROM src WHERE NOT (id = 0) AND (v IS NOT NULL OR id > 0)", - p, - "sink_bool", - "kafka", - ); - } - - #[test] - fn coordinator_sql_create_streaming_table_compiles_full_pipeline() { - assert_coordinator_streaming_build_ok( - concat!( - "CREATE STREAMING TABLE my_sink ", - "WITH ('connector' = 'kafka') ", - "AS SELECT * FROM src", - ), - fake_stream_schema_provider(), - "my_sink", - "kafka", - ); - } -} +} \ No newline at end of file diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index 3bd117e2..b9cb4dfe 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -459,58 +459,4 @@ impl StatementVisitor for LogicalPlanVisitor { *if_exists, ))) } -} - -#[cfg(test)] -mod create_streaming_table_tests { - use std::sync::Arc; - - use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; - use datafusion::sql::sqlparser::ast::Statement as DFStatement; - use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; - use datafusion::sql::sqlparser::parser::Parser; - - use crate::sql::common::TIMESTAMP_FIELD; - use crate::sql::logical_planner::optimizers::produce_optimized_plan; - use crate::sql::rewrite_plan; - use crate::sql::schema::StreamSchemaProvider; - - fn schema_provider_with_src() -> StreamSchemaProvider { - let mut provider = StreamSchemaProvider::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "src".to_string(), - schema, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider - } - - #[test] - fn create_streaming_table_query_plans_and_rewrites() { - let sql = - "CREATE STREAMING TABLE my_sink WITH ('connector' = 'kafka') AS SELECT * FROM src"; - let dialect = FunctionStreamDialect {}; - let ast = Parser::parse_sql(&dialect, sql).expect("parse CREATE STREAMING TABLE"); - let DFStatement::CreateStreamingTable { query, .. } = &ast[0] else { - panic!("expected CreateStreamingTable, got {:?}", ast[0]); - }; - let provider = schema_provider_with_src(); - let base = produce_optimized_plan(&DFStatement::Query(query.clone()), &provider) - .expect("produce optimized logical plan for sink query"); - let rewritten = rewrite_plan(base, &provider).expect("streaming rewrite_plan"); - let dot = format!("{}", rewritten.display_graphviz()); - assert!( - dot.contains("src") || dot.contains("Src"), - "rewritten plan should reference source; got subgraph:\n{dot}" - ); - } -} +} \ No newline at end of file diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs index d0f80786..af9a9ddf 100644 --- a/src/coordinator/runtime_context.rs +++ b/src/coordinator/runtime_context.rs @@ -18,7 +18,11 @@ use anyhow::Result; use crate::runtime::streaming::job::JobManager; use crate::runtime::taskexecutor::TaskManager; -use crate::sql::schema::StreamSchemaProvider; +use crate::sql::schema::column_descriptor::ColumnDescriptor; +use crate::sql::schema::connection_type::ConnectionType; +use crate::sql::schema::source_table::SourceTable; +use crate::sql::schema::table::Table as CatalogTable; +use crate::sql::schema::{StreamSchemaProvider, StreamTable}; use crate::storage::stream_catalog::CatalogManager; /// Dependencies shared by analyze / plan / execute, analogous to installing globals in @@ -60,9 +64,41 @@ impl CoordinatorRuntimeContext { /// Schema provider for [`LogicalPlanVisitor`] / [`SqlToRel`]: override if set, else catalog snapshot. pub fn planning_schema_provider(&self) -> StreamSchemaProvider { - if let Some(ref p) = self.planning_schema_override { - return p.clone(); + let mut provider = self.catalog_manager.acquire_planning_context(); + + for (name, stream) in provider.tables.streams.clone() { + let StreamTable::Source { + name: source_name, + schema, + event_time_field, + watermark_field, + with_options, + } = stream.as_ref() + else { + continue; + }; + + let connector = with_options + .get("connector") + .cloned() + .unwrap_or_else(|| "stream_catalog".to_string()); + let mut source = SourceTable::new(source_name.clone(), connector, ConnectionType::Source); + source.schema_specs = schema + .fields() + .iter() + .map(|f| ColumnDescriptor::new_physical((**f).clone())) + .collect(); + source.inferred_fields = Some(schema.fields().iter().cloned().collect()); + source.temporal_config.event_column = event_time_field.clone(); + source.temporal_config.watermark_strategy_column = watermark_field.clone(); + source.catalog_with_options = with_options.clone(); + + provider + .tables + .catalogs + .insert(name, Arc::new(CatalogTable::ConnectorTable(source))); } - self.catalog_manager.acquire_planning_context() + + provider } } diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs index f82cc657..1315e4de 100644 --- a/src/runtime/streaming/factory/mod.rs +++ b/src/runtime/streaming/factory/mod.rs @@ -28,8 +28,6 @@ pub use connector::{ pub use global::Registry; pub use operator_constructor::OperatorConstructor; pub use operator_factory::OperatorFactory; -#[allow(unused_imports)] -pub use operator_factory::PassthroughConstructor; fn register_builtin_connectors(factory: &mut OperatorFactory) { factory.register( diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs index e1bdd635..eb2afd9b 100644 --- a/src/runtime/streaming/factory/operator_factory.rs +++ b/src/runtime/streaming/factory/operator_factory.rs @@ -19,7 +19,6 @@ use std::sync::Arc; use super::operator_constructor::OperatorConstructor; use crate::runtime::streaming::api::operator::ConstructedOperator; use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::operators::PassthroughOperator; use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor; use crate::runtime::streaming::operators::joins::{ InstantJoinConstructor, JoinWithExpirationConstructor, @@ -30,12 +29,18 @@ use crate::runtime::streaming::operators::windows::{ SessionAggregatingWindowConstructor, SlidingAggregatingWindowConstructor, TumblingAggregateWindowConstructor, WindowFunctionConstructor, }; - +use crate::runtime::streaming::operators::{ + ProjectionOperator, StatelessPhysicalExecutor, ValueExecutionOperator, +}; +use crate::sql::common::FsSchema; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::from_proto::parse_physical_expr; +use datafusion_proto::protobuf::PhysicalExprNode; use protocol::grpc::api::{ ExpressionWatermarkConfig, JoinOperator as JoinOperatorProto, - KeyPlanOperator as KeyByProto, SessionWindowAggregateOperator, SlidingWindowAggregateOperator, - TumblingWindowAggregateOperator, UpdatingAggregateOperator, - WindowFunctionOperator as WindowFunctionProto, + KeyPlanOperator as KeyByProto, ProjectionOperator as ProjectionOperatorProto, + SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator, + UpdatingAggregateOperator, ValuePlanOperator, WindowFunctionOperator as WindowFunctionProto, }; use crate::sql::logical_node::logical::OperatorName; @@ -102,18 +107,8 @@ impl OperatorFactory { self.register_named(OperatorName::KeyBy, Box::new(KeyByBridge)); - self.register_named( - OperatorName::Projection, - Box::new(PassthroughConstructor(OperatorName::Projection)), - ); - self.register_named( - OperatorName::ArrowValue, - Box::new(PassthroughConstructor(OperatorName::ArrowValue)), - ); - self.register_named( - OperatorName::ArrowKey, - Box::new(PassthroughConstructor(OperatorName::ArrowKey)), - ); + self.register_named(OperatorName::Projection, Box::new(ProjectionBridge)); + self.register_named(OperatorName::Value, Box::new(ValueBridge)); crate::runtime::streaming::factory::register_builtin_connectors(self); crate::runtime::streaming::factory::register_kafka_connector_plugins(self); @@ -217,12 +212,81 @@ impl OperatorConstructor for KeyByBridge { } } -pub struct PassthroughConstructor(pub OperatorName); +struct ProjectionBridge; +impl OperatorConstructor for ProjectionBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = ProjectionOperatorProto::decode(config) + .map_err(|e| anyhow!("Decode ProjectionOperator failed: {e}"))?; + let op = ProjectionExecutionConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) + } +} -impl OperatorConstructor for PassthroughConstructor { - fn with_config(&self, _config: &[u8], _registry: Arc) -> Result { - Ok(ConstructedOperator::Operator(Box::new( - PassthroughOperator::new(self.0.as_registry_key()), - ))) +struct ValueBridge; +impl OperatorConstructor for ValueBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + let proto = ValuePlanOperator::decode(config) + .map_err(|e| anyhow!("Decode ValuePlanOperator failed: {e}"))?; + let op = ValueExecutionConstructor.with_config(proto, registry)?; + Ok(ConstructedOperator::Operator(Box::new(op))) } } + +struct ProjectionExecutionConstructor; +impl ProjectionExecutionConstructor { + fn with_config( + &self, + config: ProjectionOperatorProto, + registry: Arc, + ) -> Result { + let input_schema: FsSchema = config + .input_schema + .ok_or_else(|| anyhow!("missing projection input_schema"))? + .try_into() + .map_err(|e| anyhow!("projection input_schema: {e}"))?; + let output_schema: FsSchema = config + .output_schema + .ok_or_else(|| anyhow!("missing projection output_schema"))? + .try_into() + .map_err(|e| anyhow!("projection output_schema: {e}"))?; + + let exprs = config + .exprs + .iter() + .map(|raw| { + let expr_node = PhysicalExprNode::decode(&mut raw.as_slice()) + .map_err(|e| anyhow!("decode projection expr: {e}"))?; + parse_physical_expr( + &expr_node, + registry.as_ref(), + &input_schema.schema, + &DefaultPhysicalExtensionCodec {}, + ) + .map_err(|e| anyhow!("parse projection expr: {e}")) + }) + .collect::>>()?; + + Ok(ProjectionOperator::new( + if config.name.is_empty() { + OperatorName::Projection.as_registry_key().to_string() + } else { + config.name + }, + Arc::new(output_schema), + exprs, + )) + } +} + +struct ValueExecutionConstructor; +impl ValueExecutionConstructor { + fn with_config( + &self, + config: ValuePlanOperator, + registry: Arc, + ) -> Result { + let executor = StatelessPhysicalExecutor::new(&config.physical_plan, registry.as_ref()) + .map_err(|e| anyhow!("build value execution plan '{}': {e}", config.name))?; + Ok(ValueExecutionOperator::new(config.name, executor)) + } +} \ No newline at end of file diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs index cb8412d8..b679f2bd 100644 --- a/src/runtime/streaming/operators/mod.rs +++ b/src/runtime/streaming/operators/mod.rs @@ -24,6 +24,8 @@ mod stateless_physical_executor; mod value_execution; pub use stateless_physical_executor::StatelessPhysicalExecutor; +pub use projection::ProjectionOperator; +pub use value_execution::ValueExecutionOperator; pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache}; pub use joins::{InstantJoinOperator, JoinWithExpirationOperator}; @@ -35,52 +37,3 @@ pub use windows::{ SessionWindowOperator, SlidingWindowOperator, TumblingWindowOperator, WindowFunctionOperator, }; - -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; -use arrow_array::RecordBatch; -use async_trait::async_trait; -use crate::runtime::streaming::StreamOutput; -use crate::sql::common::{CheckpointBarrier, Watermark}; - -pub struct PassthroughOperator { - name: String, -} - -impl PassthroughOperator { - pub fn new(name: impl Into) -> Self { - Self { name: name.into() } - } -} - -#[async_trait] -impl MessageOperator for PassthroughOperator { - fn name(&self) -> &str { - &self.name - } - - async fn process_data( - &mut self, - _input_idx: usize, - batch: RecordBatch, - _ctx: &mut TaskContext, - ) -> anyhow::Result> { - Ok(vec![StreamOutput::Forward(batch)]) - } - - async fn process_watermark( - &mut self, - _watermark: Watermark, - _ctx: &mut TaskContext, - ) -> anyhow::Result> { - Ok(vec![]) - } - - async fn snapshot_state( - &mut self, - _barrier: CheckpointBarrier, - _ctx: &mut TaskContext, - ) -> anyhow::Result<()> { - Ok(()) - } -} diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs index d96a47bf..35804c02 100644 --- a/src/sql/analysis/source_rewriter.rs +++ b/src/sql/analysis/source_rewriter.rs @@ -27,11 +27,17 @@ use crate::sql::schema::table::Table; use crate::sql::schema::StreamSchemaProvider; use crate::sql::schema::StreamTable; use crate::sql::common::constants::sql_field; +use crate::sql::common::UPDATING_META_FIELD; +use crate::sql::extensions::debezium::UnrollDebeziumPayloadNode; use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; +use crate::sql::extensions::table_source::StreamIngestionNode; use crate::sql::extensions::watermark_node::EventTimeWatermarkNode; use crate::sql::types::TIMESTAMP_FIELD; -/// Rewrites table scans into proper source nodes with projections and watermarks. +/// Rewrites table scans: projections are lifted out of scans into a dedicated projection node +/// (including virtual fields), using a connector table-source extension instead of a bare +/// `TableScan`, optionally with Debezium unrolling for updating sources, then remote boundary and +/// watermark. pub struct SourceRewriter<'a> { pub(crate) schema_provider: &'a StreamSchemaProvider, } @@ -172,144 +178,50 @@ impl SourceRewriter<'_> { ))); } + if table.is_updating() { + expressions.push(Expr::Column(Column::new( + Some(qualifier.clone()), + UPDATING_META_FIELD, + ))); + } + Ok(expressions) } - /// Stream catalog [`StreamTable::Source`] (Kafka/… registered via coordinator): inject `_timestamp` - /// from `event_time_field` when the physical schema uses another name (e.g. `impression_time`). - fn mutate_stream_catalog_source( - &self, - table_scan: &TableScan, - st: &StreamTable, - ) -> DFResult> { - let StreamTable::Source { - schema, - event_time_field, - watermark_field, - .. - } = st - else { - return Ok(Transformed::no(LogicalPlan::TableScan(table_scan.clone()))); - }; + /// Connector path: `StreamIngestionNode` (table source) → optional `UnrollDebeziumPayloadNode` + /// → `Projection`, mirroring Arroyo `TableSourceExtension` + Debezium unroll + projection. + fn projection(&self, table_scan: &TableScan, table: &SourceTable) -> DFResult { let qualifier = table_scan.table_name.clone(); - let mut expressions: Vec = schema - .fields() - .iter() - .map(|f| { - Expr::Column(Column { - relation: Some(qualifier.clone()), - name: f.name().to_string(), - spans: Default::default(), - }) - }) - .collect(); + let table_source = LogicalPlan::Extension(Extension { + node: Arc::new(StreamIngestionNode::try_new( + qualifier.clone(), + table.clone(), + )?), + }); - let has_physical_ts = schema.fields().iter().any(|f| f.name() == TIMESTAMP_FIELD); - - match event_time_field.as_deref() { - Some(et) if et != TIMESTAMP_FIELD => { - if !schema.fields().iter().any(|f| f.name().as_str() == et) { - return Err(DataFusionError::Plan(format!( - "Stream source `{}`: event_time_field `{et}` is not in the table schema", - table_scan.table_name.table() - ))); - } - expressions.push( - Expr::Column(Column { - relation: Some(qualifier.clone()), - name: et.to_string(), - spans: Default::default(), - }) - .alias_qualified(Some(qualifier.clone()), TIMESTAMP_FIELD.to_string()), - ); - } - None if !has_physical_ts => { + let (projection_input, scan_projection) = if table.is_updating() { + if table.key_constraints.is_empty() { return plan_err!( - "Stream source `{}` has no `{}` column; declare WATERMARK FOR AS ... on CREATE TABLE, or add a `{}` column", - table_scan.table_name.table(), - TIMESTAMP_FIELD, - TIMESTAMP_FIELD + "Updating connector table `{}` requires at least one PRIMARY KEY for CDC unrolling", + table.table_identifier ); } - _ => {} - } - - let source_input = LogicalPlan::TableScan(table_scan.clone()); - let projection = LogicalPlan::Projection(Projection::try_new( - expressions, - Arc::new(source_input), - )?); - - let schema_ref = projection.schema().clone(); - let remote = LogicalPlan::Extension(Extension { - node: Arc::new(RemoteTableBoundaryNode { - upstream_plan: projection, - table_identifier: table_scan.table_name.to_owned(), - resolved_schema: schema_ref, - requires_materialization: true, - }), - }); - - let projected = Self::stream_source_projected_column_names( - schema.as_ref(), - event_time_field.as_deref(), - ); - let wf = Self::stream_source_effective_watermark_field( - watermark_field.as_deref(), - &projected, - ); - let wm_expr = Self::watermark_expression_for_stream_source(wf, &qualifier)?; - - let watermark_node = EventTimeWatermarkNode::try_new( - remote, - table_scan.table_name.clone(), - wm_expr, - ) - .map_err(|err| { - DataFusionError::Internal(format!("failed to create watermark node: {err}")) - })?; - - Ok(Transformed::yes(LogicalPlan::Extension(Extension { - node: Arc::new(watermark_node), - }))) - } - - fn watermark_expression_for_stream_source( - watermark_field: Option<&str>, - qualifier: &TableReference, - ) -> DFResult { - match watermark_field { - Some(wf) => Ok(Expr::Column(Column { - relation: Some(qualifier.clone()), - name: wf.to_string(), - spans: Default::default(), - })), - None => Ok(Expr::BinaryExpr(BinaryExpr { - left: Box::new(Expr::Column(Column { - relation: Some(qualifier.clone()), - name: TIMESTAMP_FIELD.to_string(), - spans: Default::default(), - })), - op: logical_expr::Operator::Minus, - right: Box::new(Expr::Literal( - ScalarValue::DurationNanosecond(Some(Duration::from_secs(1).as_nanos() as i64)), - None, - )), - })), - } - } - - fn projection(&self, table_scan: &TableScan, table: &SourceTable) -> DFResult { - let qualifier = table_scan.table_name.clone(); - - // TODO: replace with StreamIngestionNode when available - let source_input = LogicalPlan::TableScan(table_scan.clone()); + let unrolled = LogicalPlan::Extension(Extension { + node: Arc::new(UnrollDebeziumPayloadNode::try_new( + table_source, + Arc::new(table.key_constraints.clone()), + )?), + }); + (unrolled, None) + } else { + (table_source, table_scan.projection.clone()) + }; Ok(LogicalPlan::Projection(Projection::try_new( - Self::projection_expressions(table, &qualifier, &table_scan.projection)?, - Arc::new(source_input), + Self::projection_expressions(table, &qualifier, &scan_projection)?, + Arc::new(projection_input), )?)) } @@ -399,25 +311,21 @@ impl TreeNodeRewriter for SourceRewriter<'_> { }; let table_name = table_scan.table_name.table(); - - if let Some(table) = self.schema_provider.get_catalog_table(table_name) { - return match table { - Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table), - Table::LookupTable(_table) => { - // TODO: implement LookupSource extension - plan_err!("Lookup tables are not yet supported") - } - Table::TableFromQuery { - name: _, - logical_plan, - } => self.mutate_table_from_query(&table_scan, logical_plan), - }; - } - - if let Some(st) = self.schema_provider.get_stream_table(table_name) { - return self.mutate_stream_catalog_source(&table_scan, st.as_ref()); + let table = self + .schema_provider + .get_catalog_table(table_name) + .ok_or_else(|| DataFusionError::Plan(format!("Table {table_name} not found")))?; + + match table { + Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table), + Table::LookupTable(_table) => { + // TODO: implement LookupSource extension + plan_err!("Lookup tables are not yet supported") + } + Table::TableFromQuery { + name: _, + logical_plan, + } => self.mutate_table_from_query(&table_scan, logical_plan), } - - Ok(Transformed::no(LogicalPlan::TableScan(table_scan.clone()))) } } diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs index 1d271698..25206429 100644 --- a/src/sql/extensions/key_calculation.rs +++ b/src/sql/extensions/key_calculation.rs @@ -107,7 +107,7 @@ impl KeyExtractionNode { key_fields: indices.iter().map(|&idx| idx as u64).collect(), }; - (operator_config.encode_to_vec(), OperatorName::ArrowKey) + (operator_config.encode_to_vec(), OperatorName::KeyBy) } fn compile_expression_router( diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs index 7025e254..72b6150c 100644 --- a/src/sql/extensions/remote_table.rs +++ b/src/sql/extensions/remote_table.rs @@ -116,7 +116,7 @@ impl StreamingOperatorBlueprint for RemoteTableBoundaryNode { let logical_node = LogicalNode::single( node_index as u32, format!("value_{node_index}"), - OperatorName::ArrowValue, + OperatorName::Value, operator_payload, self.table_identifier.to_string(), 1, diff --git a/src/sql/frontend_sql_coverage_tests.rs b/src/sql/frontend_sql_coverage_tests.rs deleted file mode 100644 index 0a201f9e..00000000 --- a/src/sql/frontend_sql_coverage_tests.rs +++ /dev/null @@ -1,823 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! SQL parse and streaming-related tests. - -use std::sync::Arc; - -use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; -use datafusion::sql::sqlparser::ast::Statement as DFStatement; -use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; -use datafusion::sql::sqlparser::parser::Parser; - -use crate::coordinator::Coordinator; -use crate::sql::common::TIMESTAMP_FIELD; -use crate::sql::parse::parse_sql; -use crate::sql::rewrite_plan; -use crate::sql::logical_planner::optimizers::produce_optimized_plan; -use crate::sql::schema::StreamSchemaProvider; - -fn assert_parses_as(sql: &str, type_prefix: &str) { - let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse failed for {sql:?}: {e}")); - assert!(!stmts.is_empty(), "{sql}"); - let dbg = format!("{:?}", stmts[0]); - assert!( - dbg.starts_with(type_prefix), - "sql={sql:?} expected prefix {type_prefix}, got {dbg}" - ); -} - -fn assert_parse_fails(sql: &str) { - assert!( - parse_sql(sql).is_err(), - "expected parse/classify failure for {sql:?}" - ); -} - -fn fake_src_stream_provider() -> StreamSchemaProvider { - let mut provider = StreamSchemaProvider::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("v", DataType::Utf8, true), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "src".to_string(), - schema, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider -} - -fn compile_first(coordinator: &Coordinator, sql: &str, provider: StreamSchemaProvider) { - let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); - coordinator - .compile_plan(stmts[0].as_ref(), provider) - .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}")); -} - -fn compile_first_streaming(sql: &str) { - compile_first( - &Coordinator::new(), - sql, - fake_src_stream_provider(), - ); -} - -fn fake_src_dim_stream_provider() -> StreamSchemaProvider { - let mut provider = fake_src_stream_provider(); - let dim_schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("name", DataType::Utf8, true), - Field::new("amt", DataType::Float64, true), - Field::new( - TIMESTAMP_FIELD, - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - ), - ])); - provider.add_source_table( - "dim".to_string(), - dim_schema, - Some(TIMESTAMP_FIELD.to_string()), - None, - ); - provider -} - -fn compile_streaming_select_body(body: &str, provider: StreamSchemaProvider) { - let sql = format!( - "CREATE STREAMING TABLE sink_shape_cov WITH ('connector'='kafka') AS {body}" - ); - compile_first(&Coordinator::new(), &sql, provider); -} - -fn assert_streaming_select_logical_rewrites(body: &str, provider: &StreamSchemaProvider) { - let sql = format!( - "CREATE STREAMING TABLE sink_lr WITH ('connector'='kafka') AS {body}" - ); - let dialect = FunctionStreamDialect {}; - let stmts = Parser::parse_sql(&dialect, &sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); - let DFStatement::CreateStreamingTable { query, .. } = &stmts[0] else { - panic!("expected CreateStreamingTable, got {:?}", stmts[0]); - }; - let plan = produce_optimized_plan(&DFStatement::Query(query.clone()), provider) - .unwrap_or_else(|e| panic!("produce_optimized_plan {sql:?}: {e:#}")); - rewrite_plan(plan, provider).unwrap_or_else(|e| panic!("rewrite_plan {sql:?}: {e:#}")); -} - -fn assert_streaming_select_logical_rewrite_err_contains( - body: &str, - provider: &StreamSchemaProvider, - needle: &str, -) { - let sql = format!( - "CREATE STREAMING TABLE sink_lr WITH ('connector'='kafka') AS {body}" - ); - let dialect = FunctionStreamDialect {}; - let stmts = Parser::parse_sql(&dialect, &sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}")); - let DFStatement::CreateStreamingTable { query, .. } = &stmts[0] else { - panic!("expected CreateStreamingTable, got {:?}", stmts[0]); - }; - let plan = produce_optimized_plan(&DFStatement::Query(query.clone()), provider) - .unwrap_or_else(|e| panic!("produce_optimized_plan {sql:?}: {e:#}")); - let err = rewrite_plan(plan, provider).unwrap_err(); - let msg = err.to_string(); - assert!( - msg.contains(needle), - "expected '{needle}' in rewrite error, got: {msg}" - ); -} - -#[test] -fn parse_create_function_double_quoted_path_style() { - assert_parses_as( - r#"CREATE FUNCTION WITH ("function_path"='./a.wasm', "config_path"='./b.yml')"#, - "CreateFunction", - ); -} - -#[test] -fn parse_create_function_extra_numeric_and_bool_like_strings() { - assert_parses_as( - r#"CREATE FUNCTION WITH ( - 'function_path'='./f.wasm', - 'config_path'='./c.yml', - 'parallelism'='8', - 'dry_run'='false' - )"#, - "CreateFunction", - ); -} - -#[test] -fn parse_create_function_fails_without_function_path() { - let err = parse_sql("CREATE FUNCTION WITH ('config_path'='./only.yml')").unwrap_err(); - let s = err.to_string(); - assert!( - s.contains("function_path") || s.contains("CREATE FUNCTION"), - "{s}" - ); -} - -#[test] -fn parse_drop_function_quoted_name() { - assert_parses_as(r#"DROP FUNCTION "my-pipeline""#, "DropFunction"); -} - -#[test] -fn parse_start_stop_function_dotted_style_name() { - assert_parses_as("START FUNCTION job.v1.main", "StartFunction"); - assert_parses_as("STOP FUNCTION job.v1.main", "StopFunction"); -} - -#[test] -fn parse_show_functions_extra_whitespace() { - assert_parses_as(" SHOW FUNCTIONS ", "ShowFunctions"); -} - -#[test] -fn parse_create_table_multiple_columns_types() { - assert_parses_as( - "CREATE TABLE metrics (ts TIMESTAMP, name VARCHAR, val DOUBLE, ok BOOLEAN)", - "CreateTable", - ); -} - -#[test] -fn parse_create_table_with_not_null_and_precision() { - assert_parses_as( - "CREATE TABLE t (id BIGINT NOT NULL, code DECIMAL(10,2))", - "CreateTable", - ); -} - -#[test] -fn parse_create_table_if_not_exists_if_dialect_accepts() { - if let Ok(stmts) = parse_sql("CREATE TABLE IF NOT EXISTS guard (id INT)") { - assert!(format!("{:?}", stmts[0]).starts_with("CreateTable")); - } -} - -#[test] -fn parse_streaming_table_select_star() { - assert_parses_as( - "CREATE STREAMING TABLE s1 WITH ('connector'='kafka') AS SELECT * FROM src", - "StreamingTableStatement", - ); -} - -#[test] -fn parse_streaming_table_select_columns() { - assert_parses_as( - "CREATE STREAMING TABLE s2 WITH ('connector'='memory') AS SELECT id, v FROM src", - "StreamingTableStatement", - ); -} - -#[test] -fn parse_streaming_table_with_partition_by() { - let sql = format!( - "CREATE STREAMING TABLE s3 WITH ('connector' = 'kafka', 'partition_by' = 'id') AS SELECT id, {} FROM src", - TIMESTAMP_FIELD - ); - assert_parses_as(&sql, "StreamingTableStatement"); -} - -#[test] -fn parse_streaming_table_with_idle_time_option() { - assert_parses_as( - "CREATE STREAMING TABLE s4 WITH ('connector'='kafka', 'idle_time'='30s') AS SELECT * FROM src", - "StreamingTableStatement", - ); -} - -#[test] -fn parse_streaming_table_sink_name_snake_and_digits() { - assert_parses_as( - "CREATE STREAMING TABLE sink_01_out WITH ('connector'='memory') AS SELECT 1", - "StreamingTableStatement", - ); -} - -#[test] -fn parse_streaming_table_comment_before_as_if_supported() { - let sql = "CREATE STREAMING TABLE c1 WITH ('connector'='kafka') COMMENT 'out' AS SELECT * FROM src"; - if let Ok(stmts) = parse_sql(sql) { - assert!( - format!("{:?}", stmts[0]).starts_with("StreamingTableStatement"), - "{stmts:?}" - ); - } -} - -#[test] -fn parse_three_semicolon_separated_statements() { - let sql = concat!( - "CREATE FUNCTION WITH ('function_path'='./x.wasm'); ", - "CREATE TABLE meta (id INT); ", - "CREATE STREAMING TABLE out1 WITH ('connector'='kafka') AS SELECT 1", - ); - let stmts = parse_sql(sql).unwrap(); - assert_eq!(stmts.len(), 3); - assert!(format!("{:?}", stmts[0]).starts_with("CreateFunction")); - assert!(format!("{:?}", stmts[1]).starts_with("CreateTable")); - assert!(format!("{:?}", stmts[2]).starts_with("StreamingTableStatement")); -} - -#[test] -fn parse_rejects_insert_with_columns_list() { - assert_parse_fails("INSERT INTO t (a,b) VALUES (1,2)"); -} - -#[test] -fn parse_rejects_update_delete() { - assert_parse_fails("UPDATE src SET id = 1"); - assert_parse_fails("DELETE FROM src WHERE id = 0"); -} - -#[test] -fn parse_rejects_merge_explain() { - assert_parse_fails("EXPLAIN SELECT 1"); - assert_parse_fails("MERGE INTO t USING s ON true WHEN MATCHED THEN UPDATE SET x=1"); -} - -#[test] -fn parse_rejects_create_schema_database() { - assert_parse_fails("CREATE SCHEMA s"); - assert_parse_fails("CREATE DATABASE d"); -} - -#[test] -fn compile_streaming_select_star_from_src() { - compile_first_streaming(concat!( - "CREATE STREAMING TABLE kafka_all ", - "WITH ('connector'='kafka') ", - "AS SELECT * FROM src", - )); -} - -#[test] -fn compile_streaming_select_id_v_from_src() { - let sql = format!( - "CREATE STREAMING TABLE kafka_cols WITH ('connector'='kafka') AS SELECT id, v, {} FROM src", - TIMESTAMP_FIELD - ); - compile_first_streaming(&sql); -} - -#[test] -fn compile_streaming_memory_connector() { - compile_first_streaming( - "CREATE STREAMING TABLE mem_sink WITH ('connector'='memory') AS SELECT * FROM src", - ); -} - -#[test] -fn compile_streaming_with_partition_by_id() { - compile_first_streaming(concat!( - "CREATE STREAMING TABLE part_sink ", - "WITH ('connector'='kafka', 'partition_by'='id') ", - "AS SELECT * FROM src", - )); -} - -#[test] -fn compile_streaming_connector_postgres_string() { - compile_first_streaming( - "CREATE STREAMING TABLE pg_sink WITH ('connector'='postgres') AS SELECT id FROM src", - ); -} - -#[test] -#[should_panic(expected = "connector")] -fn compile_streaming_fails_without_connector() { - let sql = "CREATE STREAMING TABLE bad WITH ('partition_by'='id') AS SELECT * FROM src"; - let stmts = parse_sql(sql).unwrap(); - let _ = Coordinator::new().compile_plan(stmts[0].as_ref(), fake_src_stream_provider()); -} - -#[test] -fn compile_plan_show_functions() { - let stmts = parse_sql("SHOW FUNCTIONS").unwrap(); - Coordinator::new() - .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) - .expect("ShowFunctions plan"); -} - -#[test] -fn compile_plan_show_tables() { - let stmts = parse_sql("SHOW TABLES").unwrap(); - Coordinator::new() - .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) - .expect("ShowCatalogTables plan"); -} - -#[test] -fn compile_plan_show_create_table() { - let stmts = parse_sql("SHOW CREATE TABLE my_table").unwrap(); - Coordinator::new() - .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) - .expect("ShowCreateTable plan"); -} - -#[test] -fn compile_plan_start_stop_drop_function() { - for sql in [ - "START FUNCTION t1", - "STOP FUNCTION t1", - "DROP FUNCTION t1", - ] { - let stmts = parse_sql(sql).unwrap(); - Coordinator::new() - .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) - .unwrap_or_else(|e| panic!("{sql}: {e:#}")); - } -} - -#[test] -fn compile_plan_create_function() { - let sql = - "CREATE FUNCTION WITH ('function_path'='./x.wasm', 'config_path'='./c.yml')"; - let stmts = parse_sql(sql).unwrap(); - Coordinator::new() - .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) - .expect("CreateFunction plan"); -} - -#[test] -fn compile_plan_create_table_simple_ddl() { - let sql = "CREATE TABLE local_only (id INT, name VARCHAR)"; - let stmts = parse_sql(sql).unwrap(); - Coordinator::new() - .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new()) - .expect("CreateTable plan"); -} - -#[test] -fn streaming_where_eq_ne_and_or_not() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!("SELECT * FROM src WHERE id = 1 AND (v <> 'x' OR NOT (id < 0))"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT * FROM src WHERE id > 0 AND id <= 100 AND id >= 1"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT id, v, {ts} FROM src WHERE (id = 2 OR id = 3) AND v IS NOT NULL"), - fake_src_stream_provider(), - ); -} - -#[test] -fn streaming_where_in_between_like_null() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!("SELECT * FROM src WHERE id IN (1, 2, 3)"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT * FROM src WHERE id NOT IN (99, 100)"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT * FROM src WHERE id BETWEEN 1 AND 10"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT * FROM src WHERE v LIKE 'pre%'"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT * FROM src WHERE v IS NULL"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT id, v, {ts} FROM src WHERE v IS NOT NULL OR id = 0"), - fake_src_stream_provider(), - ); -} - -#[test] -fn streaming_where_scalar_subquery() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_dim_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT src.id, src.v, src.{ts} FROM src \ - WHERE src.id = (SELECT MAX(dim.id) FROM dim)" - ), - &p, - ); -} - -#[test] -#[should_panic(expected = "window")] -fn streaming_where_in_subquery_currently_panics() { - let p = fake_src_dim_stream_provider(); - compile_streaming_select_body( - "SELECT * FROM src WHERE id IN (SELECT id FROM dim WHERE amt IS NOT NULL)", - p, - ); -} - -#[test] -#[should_panic(expected = "window")] -fn streaming_where_exists_correlated_currently_panics() { - let p = fake_src_dim_stream_provider(); - compile_streaming_select_body( - "SELECT * FROM src WHERE EXISTS (SELECT 1 FROM dim WHERE dim.id = src.id)", - p, - ); -} - -#[test] -fn streaming_select_case_coalesce_cast() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!( - "SELECT CASE WHEN id < 0 THEN 0 WHEN id > 1000 THEN 1000 ELSE id END AS c, v, {ts} FROM src" - ), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT COALESCE(v, 'na') AS v2, id, {ts} FROM src"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!("SELECT CAST(id AS DOUBLE) AS id_f, {ts} FROM src"), - fake_src_stream_provider(), - ); -} - -#[test] -fn streaming_select_row_time_distinct() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!("SELECT row_time(), id, v, {ts} FROM src"), - fake_src_stream_provider(), - ); - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites("SELECT DISTINCT id FROM src", &p); -} - -#[test] -fn streaming_from_subquery_nested() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!("SELECT * FROM (SELECT id, v, {ts} FROM src WHERE id > 0) AS t"), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!( - "SELECT * FROM (SELECT * FROM (SELECT id FROM src) AS i2) AS i1" - ), - fake_src_stream_provider(), - ); -} - -#[test] -fn streaming_with_cte_single_and_chain() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!( - "WITH a AS (SELECT id, v, {ts} FROM src WHERE id > 0) SELECT * FROM a" - ), - fake_src_stream_provider(), - ); - compile_streaming_select_body( - &format!( - "WITH a AS (SELECT id FROM src), b AS (SELECT id FROM a WHERE id > 1) SELECT * FROM b" - ), - fake_src_stream_provider(), - ); -} - -#[test] -fn streaming_group_by_updating_aggregate_bundle() { - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - "SELECT id, COUNT(*), SUM(id), AVG(id), MIN(v), MAX(v) FROM src GROUP BY id", - &p, - ); -} - -#[test] -fn streaming_group_by_count_distinct_and_stats() { - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - "SELECT id, COUNT(DISTINCT v), STDDEV_POP(id), VAR_POP(id) FROM src GROUP BY id", - &p, - ); -} - -#[test] -fn streaming_group_by_having() { - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - "SELECT id, COUNT(*) AS c FROM src GROUP BY id HAVING COUNT(*) >= 0", - &p, - ); -} - -#[test] -fn streaming_group_by_tumble_window() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT tumble(INTERVAL '1' MINUTE) AS w, id, COUNT(*) AS c, MAX({ts}) AS max_evt \ - FROM src GROUP BY tumble(INTERVAL '1' MINUTE), id" - ), - &p, - ); -} - -#[test] -fn streaming_group_by_hop_window() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT hop(INTERVAL '1' MINUTE, INTERVAL '3' MINUTE) AS w, id, SUM(id), MAX({ts}) AS max_evt \ - FROM src GROUP BY hop(INTERVAL '1' MINUTE, INTERVAL '3' MINUTE), id" - ), - &p, - ); -} - -#[test] -fn streaming_window_row_number_over_tumble_aggregate() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT ROW_NUMBER() OVER (PARTITION BY w ORDER BY max_evt) AS rn, id, w, max_evt \ - FROM ( \ - SELECT tumble(INTERVAL '1' MINUTE) AS w, id, MAX({ts}) AS max_evt \ - FROM src \ - GROUP BY tumble(INTERVAL '1' MINUTE), id \ - ) AS x" - ), - &p, - ); -} - -#[test] -fn streaming_inner_join_eq_and_compound_on() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_dim_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT src.id, src.v, dim.name, src.{ts} \ - FROM src INNER JOIN dim ON src.id = dim.id" - ), - &p, - ); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT src.id, dim.amt, src.{ts} \ - FROM src JOIN dim ON src.id = dim.id AND dim.amt > CAST(0 AS DOUBLE)" - ), - &p, - ); -} - -#[test] -#[ignore] -fn streaming_self_join_inner_ignored() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!( - "SELECT a.id, b.v, a.{ts} \ - FROM src AS a JOIN src AS b ON a.id = b.id AND a.v = b.v" - ), - fake_src_stream_provider(), - ); -} - -#[test] -fn streaming_join_subquery_branch() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_dim_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT src.id, src.v, j.name, src.{ts} \ - FROM src JOIN (SELECT id, name FROM dim) AS j ON src.id = j.id" - ), - &p, - ); -} - -#[test] -fn streaming_union_all_compatible_schemas() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_dim_stream_provider(); - compile_streaming_select_body( - &format!( - "SELECT id, v, {ts} FROM src \ - UNION ALL \ - SELECT id, name AS v, {ts} FROM dim" - ), - p, - ); -} - -#[test] -fn streaming_logical_group_by_two_keys_and_filter_agg() { - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - "SELECT id, v, COUNT(*) AS c FROM src GROUP BY id, v", - &p, - ); - assert_streaming_select_logical_rewrites( - "SELECT id, SUM(id) FILTER (WHERE v IS NOT NULL) AS s FROM src GROUP BY id", - &p, - ); -} - -#[test] -fn streaming_logical_more_builtin_aggregates() { - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - "SELECT id, STDDEV_POP(CAST(id AS DOUBLE)), COVAR_SAMP(CAST(id AS DOUBLE), CAST(id AS DOUBLE)), \ - COVAR_POP(CAST(id AS DOUBLE), CAST(id AS DOUBLE)) \ - FROM src GROUP BY id", - &p, - ); - assert_streaming_select_logical_rewrites( - "SELECT id, CORR(CAST(id AS DOUBLE), CAST(id AS DOUBLE)) FROM src GROUP BY id", - &p, - ); -} - -#[test] -fn streaming_logical_bit_and_bool_aggregates() { - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - "SELECT id, BIT_AND(id), BIT_OR(id), BIT_XOR(id) FROM src GROUP BY id", - &p, - ); - assert_streaming_select_logical_rewrites( - "SELECT id, BOOL_AND(id > 0), BOOL_OR(id < 100000) FROM src GROUP BY id", - &p, - ); -} - -#[test] -fn streaming_logical_array_agg_and_list_union() { - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - "SELECT id, ARRAY_AGG(v) FROM src GROUP BY id", - &p, - ); -} - -#[test] -fn streaming_logical_scalar_funcs_on_projection() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!( - "SELECT ABS(id), POWER(CAST(id AS DOUBLE), 2.0), UPPER(v), LOWER(v), BTRIM(v), \ - CHARACTER_LENGTH(v), CONCAT(v, '_x'), {ts} FROM src" - ), - fake_src_stream_provider(), - ); -} - -#[test] -fn streaming_logical_nullif_regexp() { - let ts = TIMESTAMP_FIELD; - compile_streaming_select_body( - &format!( - "SELECT id, NULLIF(v, ''), REGEXP_LIKE(v, '^a'), {ts} FROM src WHERE v IS NOT NULL OR id = 0" - ), - fake_src_stream_provider(), - ); -} - -#[test] -fn streaming_window_first_value_over_tumbled_subquery() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT FIRST_VALUE(id) OVER (PARTITION BY w ORDER BY max_evt) AS fv, w, id \ - FROM ( \ - SELECT tumble(INTERVAL '1' MINUTE) AS w, id, MAX({ts}) AS max_evt \ - FROM src GROUP BY tumble(INTERVAL '1' MINUTE), id \ - ) AS x" - ), - &p, - ); -} - -#[test] -fn streaming_window_lag_over_tumbled_subquery() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT LAG(id, 1) OVER (PARTITION BY w ORDER BY max_evt) AS prev_id, w, id \ - FROM ( \ - SELECT tumble(INTERVAL '2' MINUTE) AS w, id, MAX({ts}) AS max_evt \ - FROM src GROUP BY tumble(INTERVAL '2' MINUTE), id \ - ) AS x" - ), - &p, - ); -} - -#[test] -fn streaming_window_lead_over_tumbled_subquery() { - let ts = TIMESTAMP_FIELD; - let p = fake_src_stream_provider(); - assert_streaming_select_logical_rewrites( - &format!( - "SELECT LEAD(id, 1) OVER (PARTITION BY w ORDER BY max_evt) AS next_id, w \ - FROM ( \ - SELECT tumble(INTERVAL '2' MINUTE) AS w, id, MAX({ts}) AS max_evt \ - FROM src GROUP BY tumble(INTERVAL '2' MINUTE), id \ - ) AS x" - ), - &p, - ); -} - -#[test] -fn streaming_logical_full_outer_join_errors() { - let p = fake_src_dim_stream_provider(); - assert_streaming_select_logical_rewrite_err_contains( - "SELECT src.id, dim.name FROM src FULL OUTER JOIN dim ON src.id = dim.id", - &p, - "inner", - ); -} - -#[test] -#[should_panic(expected = "Non-inner")] -fn streaming_left_join_errors_without_window() { - let ts = TIMESTAMP_FIELD; - let sql = format!( - "CREATE STREAMING TABLE sink_left WITH ('connector'='kafka') AS \ - SELECT src.id, dim.name, src.{ts} FROM src LEFT JOIN dim ON src.id = dim.id" - ); - let stmts = parse_sql(&sql).unwrap(); - let _ = Coordinator::new().compile_plan(stmts[0].as_ref(), fake_src_dim_stream_provider()); -} diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs index 224562ea..79fe9a05 100644 --- a/src/sql/logical_node/logical/operator_name.rs +++ b/src/sql/logical_node/logical/operator_name.rs @@ -20,8 +20,8 @@ use crate::sql::common::constants::operator_feature; #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display, IntoStaticStr)] pub enum OperatorName { ExpressionWatermark, - ArrowValue, - ArrowKey, + Value, + KeyBy, Projection, AsyncUdf, Join, @@ -32,7 +32,6 @@ pub enum OperatorName { SlidingWindowAggregate, SessionWindowAggregate, UpdatingAggregate, - KeyBy, ConnectorSource, ConnectorSink, } @@ -46,7 +45,7 @@ impl OperatorName { pub fn feature_tag(self) -> Option<&'static str> { match self { - Self::ExpressionWatermark | Self::ArrowValue | Self::ArrowKey | Self::Projection => None, + Self::ExpressionWatermark | Self::Value | Self::KeyBy | Self::Projection => None, Self::AsyncUdf => Some(operator_feature::ASYNC_UDF), Self::Join => Some(operator_feature::JOIN_WITH_EXPIRATION), Self::InstantJoin => Some(operator_feature::WINDOWED_JOIN), diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 5cb53705..c13f1c4a 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -27,5 +27,3 @@ pub use schema::{StreamPlanningContext, StreamSchemaProvider}; pub use parse::parse_sql; pub use analysis::rewrite_plan; -#[cfg(test)] -mod frontend_sql_coverage_tests; From 53655b4f52d5ee8b1d7f8c2865844be0deb6a1ad Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Mon, 30 Mar 2026 23:17:45 +0800 Subject: [PATCH 32/44] update --- src/coordinator/execution/executor.rs | 1 + src/coordinator/runtime_context.rs | 12 ++++++------ src/sql/schema/catalog_ddl.rs | 11 +++++++++-- src/sql/schema/schema_provider.rs | 2 ++ src/storage/stream_catalog/manager.rs | 22 ++++++++++++++++++---- 5 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 8329d498..6f7c5afb 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -293,6 +293,7 @@ impl PlanVisitor for Executor { let schema = Arc::new(source_table.produce_physical_schema()); let table_instance = StreamTable::Source { name: table_name.clone(), + connector: source_table.connector().to_string(), schema, event_time_field: source_table.event_time_field().map(str::to_string), watermark_field: source_table.stream_catalog_watermark_field(), diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs index af9a9ddf..91f4100c 100644 --- a/src/coordinator/runtime_context.rs +++ b/src/coordinator/runtime_context.rs @@ -69,6 +69,7 @@ impl CoordinatorRuntimeContext { for (name, stream) in provider.tables.streams.clone() { let StreamTable::Source { name: source_name, + connector, schema, event_time_field, watermark_field, @@ -77,12 +78,11 @@ impl CoordinatorRuntimeContext { else { continue; }; - - let connector = with_options - .get("connector") - .cloned() - .unwrap_or_else(|| "stream_catalog".to_string()); - let mut source = SourceTable::new(source_name.clone(), connector, ConnectionType::Source); + let mut source = SourceTable::new( + source_name.clone(), + connector.clone(), + ConnectionType::Source, + ); source.schema_specs = schema .fields() .iter() diff --git a/src/sql/schema/catalog_ddl.rs b/src/sql/schema/catalog_ddl.rs index 2eea78f9..0828a45d 100644 --- a/src/sql/schema/catalog_ddl.rs +++ b/src/sql/schema/catalog_ddl.rs @@ -113,13 +113,15 @@ fn pipeline_summary_short(program: &LogicalProgram) -> String { pub fn stream_table_row_detail(table: &StreamTable) -> String { match table { StreamTable::Source { + connector, event_time_field, watermark_field, with_options, .. } => { format!( - "event_time={:?}, watermark={:?}, with_options={}", + "connector={}, event_time={:?}, watermark={:?}, with_options={}", + connector, event_time_field, watermark_field, with_options.len() @@ -165,6 +167,7 @@ pub fn show_create_stream_table(table: &StreamTable) -> String { match table { StreamTable::Source { name, + connector, schema, event_time_field, watermark_field, @@ -178,7 +181,11 @@ pub fn show_create_stream_table(table: &StreamTable) -> String { if let Some(w) = watermark_field { ddl.push_str(&format!("/* WATERMARK: {w} */\n")); } - ddl.push_str(&format_with_clause(with_options)); + let mut merged_opts = with_options.clone(); + merged_opts + .entry("connector".to_string()) + .or_insert_with(|| connector.clone()); + ddl.push_str(&format_with_clause(&merged_opts)); ddl } StreamTable::Sink { name, program } => { diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs index f93aead1..bbe03079 100644 --- a/src/sql/schema/schema_provider.rs +++ b/src/sql/schema/schema_provider.rs @@ -42,6 +42,7 @@ fn object_name(s: impl Into) -> ObjectName { pub enum StreamTable { Source { name: String, + connector: String, schema: Arc, event_time_field: Option, watermark_field: Option, @@ -201,6 +202,7 @@ impl StreamPlanningContext { ) { self.register_stream_table(StreamTable::Source { name, + connector: "stream_catalog".to_string(), schema, event_time_field, watermark_field, diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs index 5f40240a..fc7c5b2f 100644 --- a/src/storage/stream_catalog/manager.rs +++ b/src/storage/stream_catalog/manager.rs @@ -161,6 +161,7 @@ impl CatalogManager { fn encode_table(&self, table: &StreamTable) -> DFResult { let table_type = match table { StreamTable::Source { + connector, schema, event_time_field, watermark_field, @@ -173,10 +174,15 @@ impl CatalogManager { .as_ref() .filter(|w| *w != sql_field::COMPUTED_WATERMARK) .cloned(), - with_options: with_options - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect(), + with_options: { + let mut opts: std::collections::BTreeMap = with_options + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + opts.entry("connector".to_string()) + .or_insert_with(|| connector.clone()); + opts.into_iter().collect() + }, }), StreamTable::Sink { program, .. } => { let logical_program_bincode = CatalogCodec::encode_logical_program(program)?; @@ -208,6 +214,11 @@ impl CatalogManager { match table_type { table_definition::TableType::Source(src) => Ok(StreamTable::Source { name: proto_def.table_name, + connector: src + .with_options + .get("connector") + .cloned() + .unwrap_or_else(|| "stream_catalog".to_string()), schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?, event_time_field: src.event_time_field, watermark_field: src @@ -307,6 +318,7 @@ mod tests { let table = StreamTable::Source { name: "t1".into(), + connector: "stream_catalog".into(), schema: Arc::clone(&schema), event_time_field: Some("ts".into()), watermark_field: None, @@ -344,6 +356,7 @@ mod tests { let table = StreamTable::Source { name: "t_with".into(), + connector: "kafka".into(), schema, event_time_field: None, watermark_field: None, @@ -369,6 +382,7 @@ mod tests { mgr.add_table(StreamTable::Source { name: "t_drop".into(), + connector: "stream_catalog".into(), schema, event_time_field: None, watermark_field: None, From 9bed7e730a430e2e8752216c6fe1a05021d4ed7b Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Mon, 30 Mar 2026 23:39:34 +0800 Subject: [PATCH 33/44] update --- .../streaming/factory/operator_factory.rs | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs index eb2afd9b..d11a1555 100644 --- a/src/runtime/streaming/factory/operator_factory.rs +++ b/src/runtime/streaming/factory/operator_factory.rs @@ -18,6 +18,9 @@ use std::sync::Arc; use super::operator_constructor::OperatorConstructor; use crate::runtime::streaming::api::operator::ConstructedOperator; +use crate::runtime::streaming::factory::connector::{ + ConnectorSinkDispatcher, ConnectorSourceDispatcher, +}; use crate::runtime::streaming::factory::global::Registry; use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor; use crate::runtime::streaming::operators::joins::{ @@ -109,6 +112,8 @@ impl OperatorFactory { self.register_named(OperatorName::Projection, Box::new(ProjectionBridge)); self.register_named(OperatorName::Value, Box::new(ValueBridge)); + self.register_named(OperatorName::ConnectorSource, Box::new(ConnectorSourceBridge)); + self.register_named(OperatorName::ConnectorSink, Box::new(ConnectorSinkBridge)); crate::runtime::streaming::factory::register_builtin_connectors(self); crate::runtime::streaming::factory::register_kafka_connector_plugins(self); @@ -232,6 +237,22 @@ impl OperatorConstructor for ValueBridge { } } +/// Generic connector source constructor: decodes `ConnectorOp` and dispatches by connector type. +struct ConnectorSourceBridge; +impl OperatorConstructor for ConnectorSourceBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + ConnectorSourceDispatcher.with_config(config, registry) + } +} + +/// Generic connector sink constructor: decodes `ConnectorOp` and dispatches by connector type. +struct ConnectorSinkBridge; +impl OperatorConstructor for ConnectorSinkBridge { + fn with_config(&self, config: &[u8], registry: Arc) -> Result { + ConnectorSinkDispatcher.with_config(config, registry) + } +} + struct ProjectionExecutionConstructor; impl ProjectionExecutionConstructor { fn with_config( From 174ebaa6c49bf12ca5f915df3c2d5434a7e2213b Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Tue, 31 Mar 2026 00:34:33 +0800 Subject: [PATCH 34/44] update --- protocol/proto/storage.proto | 20 +- .../dataset/show_catalog_tables_result.rs | 20 +- src/coordinator/execution/executor.rs | 42 +- src/coordinator/runtime_context.rs | 49 +- src/sql/schema/catalog_ddl.rs | 47 ++ src/sql/schema/mod.rs | 5 +- src/storage/stream_catalog/manager.rs | 465 ++++++++++-------- 7 files changed, 346 insertions(+), 302 deletions(-) diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto index 5ad09d38..6c645e75 100644 --- a/protocol/proto/storage.proto +++ b/protocol/proto/storage.proto @@ -10,30 +10,30 @@ syntax = "proto3"; package function_stream.storage; // ============================================================================= -// Stream catalog (coordinator stream tables: source / sink) +// Catalog table storage (coordinator SQL catalog) // ============================================================================= -// Top-level persisted record for one stream table. +// Top-level persisted record for one catalog table. message TableDefinition { string table_name = 1; int64 updated_at_millis = 2; oneof table_type { - StreamSource source = 3; - StreamSink sink = 4; + // Connector-backed ingestion/egress table definition. + CatalogSourceTable connector_table = 3; + // Connector-backed lookup table definition. + CatalogSourceTable lookup_table = 5; } } -message StreamSource { +// Shared connector-backed table payload for connector/lookup entries. +message CatalogSourceTable { bytes arrow_schema_ipc = 1; optional string event_time_field = 2; optional string watermark_field = 3; // Original CREATE TABLE ... WITH ('k'='v', ...) pairs (best-effort; keys sorted in DDL). map with_options = 4; -} - -message StreamSink { - bytes arrow_schema_ipc = 1; - bytes logical_program_bincode = 2; + // Canonical connector identifier (e.g. kafka, postgres-cdc). + string connector = 5; } // ============================================================================= diff --git a/src/coordinator/dataset/show_catalog_tables_result.rs b/src/coordinator/dataset/show_catalog_tables_result.rs index 77792517..74a8cd2d 100644 --- a/src/coordinator/dataset/show_catalog_tables_result.rs +++ b/src/coordinator/dataset/show_catalog_tables_result.rs @@ -14,9 +14,11 @@ use std::sync::Arc; use arrow_array::{Int32Array, StringArray}; use arrow_schema::{DataType, Field, Schema}; +use datafusion::arrow::datatypes::Schema as DfSchema; use super::DataSet; -use crate::sql::schema::{schema_columns_one_line, stream_table_row_detail, StreamTable}; +use crate::sql::schema::table::Table as CatalogTable; +use crate::sql::schema::{catalog_table_row_detail, schema_columns_one_line}; #[derive(Clone, Debug)] pub struct ShowCatalogTablesResult { @@ -28,7 +30,7 @@ pub struct ShowCatalogTablesResult { } impl ShowCatalogTablesResult { - pub fn from_tables(tables: &[Arc]) -> Self { + pub fn from_tables(tables: &[Arc]) -> Self { let mut names = Vec::with_capacity(tables.len()); let mut kinds = Vec::with_capacity(tables.len()); let mut column_counts = Vec::with_capacity(tables.len()); @@ -36,17 +38,23 @@ impl ShowCatalogTablesResult { let mut details = Vec::with_capacity(tables.len()); for t in tables { - let schema = t.schema(); + let schema = match t.as_ref() { + CatalogTable::ConnectorTable(source) | CatalogTable::LookupTable(source) => { + source.produce_physical_schema() + } + CatalogTable::TableFromQuery { .. } => DfSchema::new(t.get_fields()), + }; let ncols = schema.fields().len() as i32; names.push(t.name().to_string()); kinds.push(match t.as_ref() { - StreamTable::Source { .. } => "SOURCE", - StreamTable::Sink { .. } => "SINK", + CatalogTable::ConnectorTable(_) => "SOURCE", + CatalogTable::LookupTable(_) => "LOOKUP", + CatalogTable::TableFromQuery { .. } => "QUERY", } .to_string()); column_counts.push(ncols); schema_lines.push(schema_columns_one_line(&schema)); - details.push(stream_table_row_detail(t.as_ref())); + details.push(catalog_table_row_detail(t.as_ref())); } Self { diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 6f7c5afb..5372ed33 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -29,7 +29,8 @@ use crate::coordinator::plan::{ use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::streaming::job::JobManager; use crate::runtime::taskexecutor::TaskManager; -use crate::sql::schema::{show_create_stream_table, StreamTable}; +use crate::sql::schema::table::Table as CatalogTable; +use crate::sql::schema::show_create_catalog_table; use crate::storage::stream_catalog::CatalogManager; #[derive(Error, Debug)] @@ -201,7 +202,10 @@ impl PlanVisitor for Executor { _plan: &ShowCatalogTablesPlan, _context: &PlanVisitorContext, ) -> PlanVisitorResult { - let tables = self.catalog_manager.list_stream_tables(); + let tables = match self.catalog_manager.list_catalog_tables() { + Ok(tables) => tables, + Err(e) => return PlanVisitorResult::Execute(Err(ExecuteError::Internal(e.to_string()))), + }; let n = tables.len(); let result = ExecuteResult::ok_with_data( format!("{n} stream catalog table(s)"), @@ -218,14 +222,15 @@ impl PlanVisitor for Executor { let execute = || -> Result { let t = self .catalog_manager - .get_stream_table(&plan.table_name) + .get_catalog_table(&plan.table_name) + .map_err(|e| ExecuteError::Internal(e.to_string()))? .ok_or_else(|| { ExecuteError::Validation(format!( "Table '{}' not found in stream catalog", plan.table_name )) })?; - let ddl = show_create_stream_table(t.as_ref()); + let ddl = show_create_catalog_table(t.as_ref()); Ok(ExecuteResult::ok_with_data( format!("SHOW CREATE TABLE {}", plan.table_name), ShowCreateTableResult::new(plan.table_name.clone(), ddl), @@ -284,21 +289,13 @@ impl PlanVisitor for Executor { _context: &PlanVisitorContext, ) -> PlanVisitorResult { let execute = || -> Result { - let (table_name, if_not_exists, stream_table) = match &plan.body { + let (table_name, if_not_exists, catalog_table) = match &plan.body { CreateTablePlanBody::ConnectorSource { source_table, if_not_exists, } => { let table_name = source_table.name().to_string(); - let schema = Arc::new(source_table.produce_physical_schema()); - let table_instance = StreamTable::Source { - name: table_name.clone(), - connector: source_table.connector().to_string(), - schema, - event_time_field: source_table.event_time_field().map(str::to_string), - watermark_field: source_table.stream_catalog_watermark_field(), - with_options: source_table.catalog_with_options().clone(), - }; + let table_instance = CatalogTable::ConnectorTable(source_table.clone()); (table_name, *if_not_exists, table_instance) } CreateTablePlanBody::DataFusion(_) => { @@ -309,14 +306,14 @@ impl PlanVisitor for Executor { } }; - if if_not_exists && self.catalog_manager.has_stream_table(&table_name) { + if if_not_exists && self.catalog_manager.has_catalog_table(&table_name) { return Ok(ExecuteResult::ok(format!( "Table '{table_name}' already exists (skipped)" ))); } self.catalog_manager - .add_table(stream_table) + .add_catalog_table(catalog_table) .map_err(|e| { ExecuteError::Internal(format!( "Failed to register connector source table '{}': {}", @@ -338,15 +335,6 @@ impl PlanVisitor for Executor { _context: &PlanVisitorContext, ) -> PlanVisitorResult { let execute = || -> Result { - let sink = StreamTable::Sink { - name: plan.name.clone(), - program: plan.program.clone(), - }; - - self.catalog_manager - .add_table(sink) - .map_err(|e| ExecuteError::Internal(e.to_string()))?; - let fs_program: FsProgram = plan.program.clone().into(); let job_manager: Arc = Arc::clone(&self.job_manager); @@ -359,7 +347,7 @@ impl PlanVisitor for Executor { info!( job_id = %job_id, table = %plan.name, - "Streaming table registered and job submitted" + "Streaming job submitted" ); Ok(ExecuteResult::ok_with_data( @@ -398,7 +386,7 @@ impl PlanVisitor for Executor { ) -> PlanVisitorResult { let execute = || -> Result { self.catalog_manager - .drop_table(&plan.table_name, plan.if_exists) + .drop_catalog_table(&plan.table_name, plan.if_exists) .map_err(|e| ExecuteError::Internal(e.to_string()))?; Ok(ExecuteResult::ok(format!( diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs index 91f4100c..5d671b98 100644 --- a/src/coordinator/runtime_context.rs +++ b/src/coordinator/runtime_context.rs @@ -18,11 +18,7 @@ use anyhow::Result; use crate::runtime::streaming::job::JobManager; use crate::runtime::taskexecutor::TaskManager; -use crate::sql::schema::column_descriptor::ColumnDescriptor; -use crate::sql::schema::connection_type::ConnectionType; -use crate::sql::schema::source_table::SourceTable; -use crate::sql::schema::table::Table as CatalogTable; -use crate::sql::schema::{StreamSchemaProvider, StreamTable}; +use crate::sql::schema::StreamSchemaProvider; use crate::storage::stream_catalog::CatalogManager; /// Dependencies shared by analyze / plan / execute, analogous to installing globals in @@ -32,7 +28,6 @@ pub struct CoordinatorRuntimeContext { pub task_manager: Arc, pub catalog_manager: Arc, pub job_manager: Arc, - planning_schema_override: Option, } impl CoordinatorRuntimeContext { @@ -44,7 +39,6 @@ impl CoordinatorRuntimeContext { .map_err(|e| anyhow::anyhow!("Failed to get CatalogManager: {}", e))?, job_manager: JobManager::global() .map_err(|e| anyhow::anyhow!("Failed to get JobManager: {}", e))?, - planning_schema_override: None, }) } @@ -52,53 +46,16 @@ impl CoordinatorRuntimeContext { task_manager: Arc, catalog_manager: Arc, job_manager: Arc, - planning_schema_override: Option, ) -> Self { Self { task_manager, catalog_manager, job_manager, - planning_schema_override, } } - /// Schema provider for [`LogicalPlanVisitor`] / [`SqlToRel`]: override if set, else catalog snapshot. + /// Schema provider for [`LogicalPlanVisitor`] / [`SqlToRel`]. pub fn planning_schema_provider(&self) -> StreamSchemaProvider { - let mut provider = self.catalog_manager.acquire_planning_context(); - - for (name, stream) in provider.tables.streams.clone() { - let StreamTable::Source { - name: source_name, - connector, - schema, - event_time_field, - watermark_field, - with_options, - } = stream.as_ref() - else { - continue; - }; - let mut source = SourceTable::new( - source_name.clone(), - connector.clone(), - ConnectionType::Source, - ); - source.schema_specs = schema - .fields() - .iter() - .map(|f| ColumnDescriptor::new_physical((**f).clone())) - .collect(); - source.inferred_fields = Some(schema.fields().iter().cloned().collect()); - source.temporal_config.event_column = event_time_field.clone(); - source.temporal_config.watermark_strategy_column = watermark_field.clone(); - source.catalog_with_options = with_options.clone(); - - provider - .tables - .catalogs - .insert(name, Arc::new(CatalogTable::ConnectorTable(source))); - } - - provider + self.catalog_manager.acquire_planning_context() } } diff --git a/src/sql/schema/catalog_ddl.rs b/src/sql/schema/catalog_ddl.rs index 0828a45d..3729c99c 100644 --- a/src/sql/schema/catalog_ddl.rs +++ b/src/sql/schema/catalog_ddl.rs @@ -17,6 +17,7 @@ use std::collections::BTreeMap; use datafusion::arrow::datatypes::{DataType, TimeUnit}; use super::schema_provider::StreamTable; +use super::table::Table as CatalogTable; use crate::sql::logical_node::logical::LogicalProgram; fn data_type_sql(dt: &DataType) -> String { @@ -204,3 +205,49 @@ pub fn show_create_stream_table(table: &StreamTable) -> String { } } } + +/// Extra fields for `SHOW TABLES` result grid for persisted catalog rows. +pub fn catalog_table_row_detail(table: &CatalogTable) -> String { + match table { + CatalogTable::ConnectorTable(source) => format!( + "kind=connector, connector={}, event_time={:?}, watermark={:?}, with_options={}", + source.connector(), + source.event_time_field(), + source.temporal_config.watermark_strategy_column, + source.catalog_with_options().len() + ), + CatalogTable::LookupTable(source) => format!( + "kind=lookup, connector={}, event_time={:?}, watermark={:?}, with_options={}", + source.connector(), + source.event_time_field(), + source.temporal_config.watermark_strategy_column, + source.catalog_with_options().len() + ), + CatalogTable::TableFromQuery { .. } => "kind=query".to_string(), + } +} + +/// Human-readable `SHOW CREATE TABLE` text for persisted catalog rows. +pub fn show_create_catalog_table(table: &CatalogTable) -> String { + match table { + CatalogTable::ConnectorTable(source) | CatalogTable::LookupTable(source) => { + let schema = source.produce_physical_schema(); + let cols = format_columns(&schema); + let mut ddl = format!("CREATE TABLE {} (\n{}\n)", source.name(), cols.join(",\n")); + if let Some(e) = source.event_time_field() { + ddl.push_str(&format!("\n/* EVENT TIME COLUMN: {e} */\n")); + } + if let Some(w) = source.temporal_config.watermark_strategy_column.as_deref() { + ddl.push_str(&format!("/* WATERMARK: {w} */\n")); + } + let mut opts = source.catalog_with_options().clone(); + opts.entry("connector".to_string()) + .or_insert_with(|| source.connector().to_string()); + ddl.push_str(&format_with_clause(&opts)); + ddl + } + CatalogTable::TableFromQuery { name, .. } => { + format!("CREATE TABLE {name} AS SELECT ...;\n/* logical query text is not persisted */\n") + } + } +} diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs index a4aa3747..b3ec5e09 100644 --- a/src/sql/schema/mod.rs +++ b/src/sql/schema/mod.rs @@ -23,7 +23,10 @@ pub mod table_role; pub mod temporal_pipeline_config; pub mod utils; -pub use catalog_ddl::{schema_columns_one_line, show_create_stream_table, stream_table_row_detail}; +pub use catalog_ddl::{ + catalog_table_row_detail, schema_columns_one_line, show_create_catalog_table, + show_create_stream_table, stream_table_row_detail, +}; pub use column_descriptor::ColumnDescriptor; pub use connection_type::ConnectionType; pub use source_table::{SourceOperator, SourceTable}; diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs index fc7c5b2f..fc6a16f8 100644 --- a/src/storage/stream_catalog/manager.rs +++ b/src/storage/stream_catalog/manager.rs @@ -10,44 +10,36 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; use std::sync::{Arc, OnceLock}; use anyhow::{anyhow, bail, Context}; -use datafusion::arrow::datatypes::Schema; use datafusion::common::{internal_err, plan_err, Result as DFResult}; -use parking_lot::RwLock; use prost::Message; use protocol::storage::{self as pb, table_definition}; use tracing::{info, warn}; use unicase::UniCase; use crate::sql::common::constants::sql_field; -use crate::sql::schema::{ObjectName, StreamPlanningContext, StreamTable}; +use crate::sql::schema::column_descriptor::ColumnDescriptor; +use crate::sql::schema::connection_type::ConnectionType; +use crate::sql::schema::source_table::SourceTable; +use crate::sql::schema::table::Table as CatalogTable; +use crate::sql::schema::{StreamPlanningContext, StreamTable}; use super::codec::CatalogCodec; use super::meta_store::MetaStore; const CATALOG_KEY_PREFIX: &str = "catalog:stream_table:"; -#[derive(Clone, Default, Debug)] -pub struct StreamTableCatalogCache { - pub streams: HashMap>, -} - pub struct CatalogManager { store: Arc, - cache: RwLock, } static GLOBAL_CATALOG: OnceLock> = OnceLock::new(); impl CatalogManager { pub fn new(store: Arc) -> Self { - Self { - store, - cache: RwLock::new(StreamTableCatalogCache::default()), - } + Self { store } } pub fn init_global_in_memory() -> anyhow::Result<()> { @@ -80,120 +72,185 @@ impl CatalogManager { format!("{CATALOG_KEY_PREFIX}{}", table_name.to_lowercase()) } - pub fn add_table(&self, table: StreamTable) -> DFResult<()> { - let proto_def = self.encode_table(&table)?; + pub fn add_catalog_table(&self, table: CatalogTable) -> DFResult<()> { + let proto_def = self.encode_catalog_table(&table)?; let payload = proto_def.encode_to_vec(); let key = Self::build_store_key(table.name()); self.store.put(&key, payload)?; - - let object_name = UniCase::new(table.name().to_string()); - self.cache.write().streams.insert(object_name, Arc::new(table)); - Ok(()) } - pub fn has_stream_table(&self, name: &str) -> bool { - let object_name = UniCase::new(name.to_string()); - self.cache.read().streams.contains_key(&object_name) + pub fn has_catalog_table(&self, name: &str) -> bool { + let key = Self::build_store_key(name); + self.store.get(&key).ok().flatten().is_some() } - pub fn drop_table(&self, table_name: &str, if_exists: bool) -> DFResult<()> { - let object_name = UniCase::new(table_name.to_string()); - - let exists = self.cache.read().streams.contains_key(&object_name); - + pub fn drop_catalog_table(&self, table_name: &str, if_exists: bool) -> DFResult<()> { + let key = Self::build_store_key(table_name); + let exists = self.store.get(&key)?.is_some(); if !exists { if if_exists { return Ok(()); } return plan_err!("Table '{table_name}' not found"); } - - let key = Self::build_store_key(table_name); self.store.delete(&key)?; - - self.cache.write().streams.remove(&object_name); - Ok(()) } pub fn restore_from_store(&self) -> DFResult<()> { - let records = self.store.scan_prefix(CATALOG_KEY_PREFIX)?; - let mut restored = StreamTableCatalogCache::default(); - - for (_key, payload) in records { - let proto_def = pb::TableDefinition::decode(payload.as_slice()).map_err(|e| { - datafusion::common::DataFusionError::Execution(format!( - "Failed to decode stream catalog protobuf: {e}" - )) - })?; - - let table = self.decode_table(proto_def)?; - let object_name = UniCase::new(table.name().to_string()); - restored.streams.insert(object_name, Arc::new(table)); - } - - *self.cache.write() = restored; - + // No-op by design: the catalog is read-through from storage. Ok(()) } pub fn acquire_planning_context(&self) -> StreamPlanningContext { let mut ctx = StreamPlanningContext::new(); - ctx.tables.streams = self.cache.read().streams.clone(); + let catalogs = self.load_catalog_tables_map().unwrap_or_default(); + ctx.tables.catalogs = catalogs.clone(); + + for (name, table) in catalogs { + let source = match table.as_ref() { + CatalogTable::ConnectorTable(s) | CatalogTable::LookupTable(s) => s, + CatalogTable::TableFromQuery { .. } => continue, + }; + + let schema = Arc::new(source.produce_physical_schema()); + ctx.tables.streams.insert( + name, + Arc::new(StreamTable::Source { + name: source.name().to_string(), + connector: source.connector().to_string(), + schema, + event_time_field: source.event_time_field().map(str::to_string), + watermark_field: source.stream_catalog_watermark_field(), + with_options: source.catalog_with_options().clone(), + }), + ); + } ctx } - /// All stream catalog entries (connector sources + streaming sinks), sorted by table name. - pub fn list_stream_tables(&self) -> Vec> { - let guard = self.cache.read(); - let mut out: Vec> = guard.streams.values().cloned().collect(); + /// All persisted catalog tables, sorted by table name. + pub fn list_catalog_tables(&self) -> DFResult>> { + let mut out: Vec> = + self.load_catalog_tables_map()?.into_values().collect(); out.sort_by(|a, b| a.name().cmp(b.name())); - out + Ok(out) } - pub fn get_stream_table(&self, name: &str) -> Option> { + pub fn get_catalog_table(&self, name: &str) -> DFResult>> { let key = UniCase::new(name.to_string()); - self.cache.read().streams.get(&key).cloned() + Ok(self.load_catalog_tables_map()?.get(&key).cloned()) } - fn encode_table(&self, table: &StreamTable) -> DFResult { - let table_type = match table { + pub fn add_table(&self, table: StreamTable) -> DFResult<()> { + match table { StreamTable::Source { + name, connector, schema, event_time_field, watermark_field, with_options, - .. - } => table_definition::TableType::Source(pb::StreamSource { - arrow_schema_ipc: CatalogCodec::encode_schema(schema)?, - event_time_field: event_time_field.clone(), - watermark_field: watermark_field - .as_ref() - .filter(|w| *w != sql_field::COMPUTED_WATERMARK) - .cloned(), - with_options: { - let mut opts: std::collections::BTreeMap = with_options - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect(); - opts.entry("connector".to_string()) - .or_insert_with(|| connector.clone()); - opts.into_iter().collect() - }, - }), - StreamTable::Sink { program, .. } => { - let logical_program_bincode = CatalogCodec::encode_logical_program(program)?; - let schema = program - .egress_arrow_schema() - .unwrap_or_else(|| Arc::new(Schema::empty())); - table_definition::TableType::Sink(pb::StreamSink { - arrow_schema_ipc: CatalogCodec::encode_schema(&schema)?, - logical_program_bincode, - }) + } => { + let mut source = SourceTable::new(name, connector, ConnectionType::Source); + source.schema_specs = schema + .fields() + .iter() + .map(|f| ColumnDescriptor::new_physical((**f).clone())) + .collect(); + source.inferred_fields = Some(schema.fields().iter().cloned().collect()); + source.temporal_config.event_column = event_time_field; + source.temporal_config.watermark_strategy_column = watermark_field; + source.catalog_with_options = with_options; + self.add_catalog_table(CatalogTable::ConnectorTable(source)) + } + StreamTable::Sink { name, .. } => plan_err!( + "Persisting streaming sink '{name}' in stream catalog is no longer supported" + ), + } + } + + pub fn has_stream_table(&self, name: &str) -> bool { + self.has_catalog_table(name) + } + + pub fn drop_table(&self, table_name: &str, if_exists: bool) -> DFResult<()> { + self.drop_catalog_table(table_name, if_exists) + } + + pub fn list_stream_tables(&self) -> Vec> { + self.list_catalog_tables() + .unwrap_or_default() + .into_iter() + .filter_map(|t| match t.as_ref() { + CatalogTable::ConnectorTable(s) | CatalogTable::LookupTable(s) => { + Some(Arc::new(StreamTable::Source { + name: s.name().to_string(), + connector: s.connector().to_string(), + schema: Arc::new(s.produce_physical_schema()), + event_time_field: s.event_time_field().map(str::to_string), + watermark_field: s.stream_catalog_watermark_field(), + with_options: s.catalog_with_options().clone(), + })) + } + CatalogTable::TableFromQuery { .. } => None, + }) + .collect() + } + + pub fn get_stream_table(&self, name: &str) -> Option> { + self.get_catalog_table(name) + .ok() + .flatten() + .and_then(|t| match t.as_ref() { + CatalogTable::ConnectorTable(s) | CatalogTable::LookupTable(s) => { + Some(Arc::new(StreamTable::Source { + name: s.name().to_string(), + connector: s.connector().to_string(), + schema: Arc::new(s.produce_physical_schema()), + event_time_field: s.event_time_field().map(str::to_string), + watermark_field: s.stream_catalog_watermark_field(), + with_options: s.catalog_with_options().clone(), + })) + } + CatalogTable::TableFromQuery { .. } => None, + }) + } + + fn encode_catalog_table(&self, table: &CatalogTable) -> DFResult { + let table_type = match table { + CatalogTable::ConnectorTable(source) | CatalogTable::LookupTable(source) => { + let mut opts = source.catalog_with_options().clone(); + opts.entry("connector".to_string()) + .or_insert_with(|| source.connector().to_string()); + if matches!(table, CatalogTable::LookupTable(_)) { + table_definition::TableType::LookupTable(pb::CatalogSourceTable { + arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new( + source.produce_physical_schema(), + ))?, + event_time_field: source.event_time_field().map(str::to_string), + watermark_field: source.stream_catalog_watermark_field(), + with_options: opts.into_iter().collect(), + connector: source.connector().to_string(), + }) + } else { + table_definition::TableType::ConnectorTable(pb::CatalogSourceTable { + arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new( + source.produce_physical_schema(), + ))?, + event_time_field: source.event_time_field().map(str::to_string), + watermark_field: source.stream_catalog_watermark_field(), + with_options: opts.into_iter().collect(), + connector: source.connector().to_string(), + }) + } } + CatalogTable::TableFromQuery { name, .. } => return plan_err!( + "Persisting query-defined table '{}' is not supported by stream catalog storage", + name + ), }; Ok(pb::TableDefinition { @@ -203,7 +260,43 @@ impl CatalogManager { }) } - fn decode_table(&self, proto_def: pb::TableDefinition) -> DFResult { + fn decode_catalog_source_table( + &self, + table_name: String, + source_row: pb::CatalogSourceTable, + as_lookup: bool, + ) -> DFResult { + let connector = if source_row.connector.is_empty() { + source_row + .with_options + .get("connector") + .cloned() + .unwrap_or_else(|| "stream_catalog".to_string()) + } else { + source_row.connector.clone() + }; + let mut source = SourceTable::new(table_name, connector, ConnectionType::Source); + let schema = CatalogCodec::decode_schema(&source_row.arrow_schema_ipc)?; + source.schema_specs = schema + .fields() + .iter() + .map(|f| ColumnDescriptor::new_physical((**f).clone())) + .collect(); + source.inferred_fields = Some(schema.fields().iter().cloned().collect()); + source.temporal_config.event_column = source_row.event_time_field; + source.temporal_config.watermark_strategy_column = source_row + .watermark_field + .filter(|w| w != sql_field::COMPUTED_WATERMARK); + source.catalog_with_options = source_row.with_options.into_iter().collect(); + + if as_lookup { + Ok(CatalogTable::LookupTable(source)) + } else { + Ok(CatalogTable::ConnectorTable(source)) + } + } + + fn decode_catalog_table(&self, proto_def: pb::TableDefinition) -> DFResult { let Some(table_type) = proto_def.table_type else { return internal_err!( "Corrupted catalog row: missing table_type for {}", @@ -212,34 +305,47 @@ impl CatalogManager { }; match table_type { - table_definition::TableType::Source(src) => Ok(StreamTable::Source { - name: proto_def.table_name, - connector: src - .with_options - .get("connector") - .cloned() - .unwrap_or_else(|| "stream_catalog".to_string()), - schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?, - event_time_field: src.event_time_field, - watermark_field: src - .watermark_field - .filter(|w| w != sql_field::COMPUTED_WATERMARK), - with_options: src.with_options.into_iter().collect(), - }), - table_definition::TableType::Sink(sink) => { - if sink.logical_program_bincode.is_empty() { - return internal_err!( - "Corrupted catalog row: sink '{}' missing logical_program_bincode", - proto_def.table_name + table_definition::TableType::ConnectorTable(src) => { + self.decode_catalog_source_table(proto_def.table_name, src, false) + } + table_definition::TableType::LookupTable(src) => { + self.decode_catalog_source_table(proto_def.table_name, src, true) + } + } + } + + fn load_catalog_tables_map( + &self, + ) -> DFResult>> { + let mut out = std::collections::HashMap::new(); + let records = self.store.scan_prefix(CATALOG_KEY_PREFIX)?; + for (key, payload) in records { + let proto_def = match pb::TableDefinition::decode(payload.as_slice()) { + Ok(v) => v, + Err(e) => { + warn!( + catalog_key = %key, + error = %e, + "Skipping corrupted stream catalog row: protobuf decode failed" ); + continue; } - let program = CatalogCodec::decode_logical_program(&sink.logical_program_bincode)?; - Ok(StreamTable::Sink { - name: proto_def.table_name, - program, - }) - } + }; + let table = match self.decode_catalog_table(proto_def) { + Ok(v) => v, + Err(e) => { + warn!( + catalog_key = %key, + error = %e, + "Skipping unsupported/corrupted stream catalog row" + ); + continue; + } + }; + let object_name = UniCase::new(table.name().to_string()); + out.insert(object_name, Arc::new(table)); } + Ok(out) } } @@ -249,8 +355,8 @@ pub fn restore_global_catalog_from_store() { }; match mgr.restore_from_store() { Ok(()) => { - let n = mgr.list_stream_tables().len(); - info!(stream_tables = n, "Stream catalog loaded from durable store"); + let n = mgr.list_catalog_tables().map(|t| t.len()).unwrap_or(0); + info!(catalog_tables = n, "Catalog loaded from durable store"); } Err(e) => warn!("Stream catalog restore_from_store failed: {e:#}"), } @@ -296,13 +402,14 @@ pub fn planning_schema_provider() -> StreamPlanningContext { #[cfg(test)] mod tests { - use std::collections::BTreeMap; use std::sync::Arc; - use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::arrow::datatypes::{DataType, Field}; - use crate::sql::logical_node::logical::LogicalProgram; - use crate::sql::schema::StreamTable; + use crate::sql::schema::column_descriptor::ColumnDescriptor; + use crate::sql::schema::connection_type::ConnectionType; + use crate::sql::schema::source_table::SourceTable; + use crate::sql::schema::table::Table as CatalogTable; use crate::storage::stream_catalog::{InMemoryMetaStore, MetaStore}; use super::CatalogManager; @@ -314,107 +421,41 @@ mod tests { #[test] fn add_table_roundtrip_snapshot() { let mgr = create_test_manager(); - let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); - - let table = StreamTable::Source { - name: "t1".into(), - connector: "stream_catalog".into(), - schema: Arc::clone(&schema), - event_time_field: Some("ts".into()), - watermark_field: None, - with_options: BTreeMap::new(), - }; - - mgr.add_table(table).unwrap(); - - let ctx = mgr.acquire_planning_context(); - let got = ctx.get_stream_table("t1").expect("table present"); - + let mut source = SourceTable::new("t1", "kafka", ConnectionType::Source); + source.schema_specs = vec![ColumnDescriptor::new_physical(Field::new( + "a", + DataType::Int32, + false, + ))]; + source.temporal_config.event_column = Some("ts".into()); + let table = CatalogTable::ConnectorTable(source); + + mgr.add_catalog_table(table).unwrap(); + + let got = mgr + .get_catalog_table("t1") + .unwrap() + .expect("table present"); assert_eq!(got.name(), "t1"); - - if let StreamTable::Source { - event_time_field, - watermark_field, - .. - } = got.as_ref() - { - assert_eq!(event_time_field.as_deref(), Some("ts")); - assert!(watermark_field.is_none()); - } else { - panic!("expected Source"); - } - } - - #[test] - fn add_table_roundtrip_with_options() { - let mgr = create_test_manager(); - let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); - - let mut opts = BTreeMap::new(); - opts.insert("connector".to_string(), "kafka".to_string()); - opts.insert("topic".to_string(), "my-topic".to_string()); - - let table = StreamTable::Source { - name: "t_with".into(), - connector: "kafka".into(), - schema, - event_time_field: None, - watermark_field: None, - with_options: opts.clone(), - }; - - mgr.add_table(table).unwrap(); - - let ctx = mgr.acquire_planning_context(); - let got = ctx.get_stream_table("t_with").expect("table present"); - - if let StreamTable::Source { with_options, .. } = got.as_ref() { - assert_eq!(with_options, &opts); - } else { - panic!("expected Source"); - } } #[test] fn drop_table_if_exists() { let mgr = create_test_manager(); - let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); - - mgr.add_table(StreamTable::Source { - name: "t_drop".into(), - connector: "stream_catalog".into(), - schema, - event_time_field: None, - watermark_field: None, - with_options: BTreeMap::new(), - }) - .unwrap(); - - mgr.drop_table("t_drop", false).unwrap(); - assert!(!mgr.has_stream_table("t_drop")); - - mgr.drop_table("t_drop", true).unwrap(); - assert!(mgr.drop_table("nope", false).is_err()); - mgr.drop_table("nope", true).unwrap(); - } - - #[test] - fn restore_from_store_rebuilds_cache() { - let store: Arc = Arc::new(InMemoryMetaStore::new()); - - let mgr_a = CatalogManager::new(Arc::clone(&store)); - - mgr_a - .add_table(StreamTable::Sink { - name: "sink1".into(), - program: LogicalProgram::default(), - }) + let mut source = SourceTable::new("t_drop", "kafka", ConnectionType::Source); + source.schema_specs = vec![ColumnDescriptor::new_physical(Field::new( + "a", + DataType::Int32, + false, + ))]; + mgr.add_catalog_table(CatalogTable::ConnectorTable(source)) .unwrap(); - let mgr_b = CatalogManager::new(store); - mgr_b.restore_from_store().unwrap(); + mgr.drop_catalog_table("t_drop", false).unwrap(); + assert!(!mgr.has_catalog_table("t_drop")); - let ctx = mgr_b.acquire_planning_context(); - assert!(ctx.get_stream_table("sink1").is_some()); + mgr.drop_catalog_table("t_drop", true).unwrap(); + assert!(mgr.drop_catalog_table("nope", false).is_err()); + mgr.drop_catalog_table("nope", true).unwrap(); } } From 87f77222881b0bd533e9bbc0d2109ef5fc61b827 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Tue, 31 Mar 2026 23:58:48 +0800 Subject: [PATCH 35/44] update --- protocol/proto/fs_api.proto | 127 +++++- protocol/proto/storage.proto | 5 +- src/common/mod.rs | 2 +- src/runtime/streaming/api/mod.rs | 2 +- src/runtime/streaming/api/operator.rs | 4 +- src/runtime/streaming/driver.rs | 266 ----------- src/runtime/streaming/execution/runner.rs | 10 +- .../factory/connector/dispatchers.rs | 28 +- .../streaming/factory/connector/kafka.rs | 421 ++++++++---------- src/runtime/streaming/factory/mod.rs | 4 +- .../streaming/factory/operator_factory.rs | 72 +-- src/runtime/streaming/job/job_manager.rs | 134 +++++- src/runtime/streaming/lib.rs | 40 -- src/runtime/streaming/mod.rs | 2 +- .../grouping/incremental_aggregate.rs | 4 +- .../operators/joins/join_instance.rs | 4 +- .../operators/joins/join_with_expiration.rs | 4 +- src/runtime/streaming/operators/key_by.rs | 4 +- .../streaming/operators/key_operator.rs | 6 +- src/runtime/streaming/operators/mod.rs | 2 +- src/runtime/streaming/operators/projection.rs | 59 ++- .../streaming/operators/sink/kafka/mod.rs | 4 +- .../streaming/operators/value_execution.rs | 4 +- .../watermark/watermark_generator.rs | 4 +- .../windows/session_aggregating_window.rs | 4 +- .../windows/sliding_aggregating_window.rs | 4 +- .../windows/tumbling_aggregating_window.rs | 4 +- .../operators/windows/window_function.rs | 4 +- src/sql/common/connector_options.rs | 15 + src/sql/common/mod.rs | 2 +- src/sql/common/operator_config.rs | 33 -- src/sql/extensions/lookup.rs | 18 +- src/sql/schema/connector_config.rs | 82 ++++ src/sql/schema/kafka_operator_config.rs | 250 +++++++++++ src/sql/schema/mod.rs | 3 + src/sql/schema/source_table.rs | 282 ++---------- src/storage/stream_catalog/manager.rs | 68 ++- 37 files changed, 983 insertions(+), 998 deletions(-) delete mode 100644 src/runtime/streaming/driver.rs delete mode 100644 src/runtime/streaming/lib.rs create mode 100644 src/sql/schema/connector_config.rs create mode 100644 src/sql/schema/kafka_operator_config.rs diff --git a/protocol/proto/fs_api.proto b/protocol/proto/fs_api.proto index b178f6ea..1f578ffe 100644 --- a/protocol/proto/fs_api.proto +++ b/protocol/proto/fs_api.proto @@ -8,8 +8,131 @@ package fs_api; message ConnectorOp { string connector = 1; - string config = 2; - string description = 3; + reserved 2; // removed: map config_map + optional FsSchema fs_schema = 3; + string name = 4; + string description = 5; + + oneof config { + KafkaSourceConfig kafka_source = 6; + KafkaSinkConfig kafka_sink = 7; + GenericConnectorConfig generic = 8; + } +} + +// ─────────────────────── Kafka Connector Configs ─────────────────────── + +message KafkaSourceConfig { + string topic = 1; + string bootstrap_servers = 2; + optional string group_id = 3; + optional string group_id_prefix = 4; + KafkaOffsetMode offset_mode = 5; + KafkaReadMode read_mode = 6; + KafkaAuthConfig auth = 7; + map client_configs = 8; + FormatConfig format = 9; + BadDataPolicy bad_data_policy = 10; + uint32 rate_limit_msgs_per_sec = 11; + optional string value_subject = 12; +} + +message KafkaSinkConfig { + string topic = 1; + string bootstrap_servers = 2; + KafkaSinkCommitMode commit_mode = 3; + optional string key_field = 4; + optional string timestamp_field = 5; + KafkaAuthConfig auth = 6; + map client_configs = 7; + FormatConfig format = 8; + optional string value_subject = 9; +} + +// Fallback for non-Kafka connectors that are not yet strongly typed. +message GenericConnectorConfig { + map properties = 1; +} + +// ─────────────────────── Kafka Auth ─────────────────────── + +message KafkaAuthConfig { + oneof auth { + KafkaAuthNone none = 1; + KafkaAuthSasl sasl = 2; + KafkaAuthAwsMskIam aws_msk_iam = 3; + } +} + +message KafkaAuthNone {} + +message KafkaAuthSasl { + string protocol = 1; + string mechanism = 2; + string username = 3; + string password = 4; +} + +message KafkaAuthAwsMskIam { + string region = 1; +} + +// ─────────────────────── Format & Data-Quality ─────────────────────── + +message FormatConfig { + oneof format { + JsonFormatConfig json = 1; + RawStringFormatConfig raw_string = 2; + RawBytesFormatConfig raw_bytes = 3; + } +} + +message JsonFormatConfig { + TimestampFormatProto timestamp_format = 1; + DecimalEncodingProto decimal_encoding = 2; + bool include_schema = 3; + bool confluent_schema_registry = 4; + optional uint32 schema_id = 5; + bool debezium = 6; + bool unstructured = 7; +} + +message RawStringFormatConfig {} +message RawBytesFormatConfig {} + +// ─────────────────────── Kafka Enums ─────────────────────── + +enum TimestampFormatProto { + TIMESTAMP_RFC3339 = 0; + TIMESTAMP_UNIX_MILLIS = 1; +} + +enum DecimalEncodingProto { + DECIMAL_NUMBER = 0; + DECIMAL_STRING = 1; + DECIMAL_BYTES = 2; +} + +enum BadDataPolicy { + BAD_DATA_FAIL = 0; + BAD_DATA_DROP = 1; +} + +enum KafkaOffsetMode { + KAFKA_OFFSET_EARLIEST = 0; + KAFKA_OFFSET_LATEST = 1; + KAFKA_OFFSET_GROUP = 2; +} + +enum KafkaReadMode { + KAFKA_READ_DEFAULT = 0; + KAFKA_READ_COMMITTED = 1; + KAFKA_READ_UNCOMMITTED = 2; +} + +enum KafkaSinkCommitMode { + KAFKA_SINK_AT_LEAST_ONCE = 0; + KAFKA_SINK_EXACTLY_ONCE = 1; } message ValuePlanOperator { diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto index 6c645e75..9ab0995d 100644 --- a/protocol/proto/storage.proto +++ b/protocol/proto/storage.proto @@ -30,10 +30,13 @@ message CatalogSourceTable { bytes arrow_schema_ipc = 1; optional string event_time_field = 2; optional string watermark_field = 3; - // Original CREATE TABLE ... WITH ('k'='v', ...) pairs (best-effort; keys sorted in DDL). + // Original CREATE TABLE ... WITH ('k'='v', ...) pairs — single source of truth. map with_options = 4; // Canonical connector identifier (e.g. kafka, postgres-cdc). string connector = 5; + reserved 6; // removed: string opaque_config (JSON blob no longer needed) + // Human-readable note from DDL (ConnectorOp.description). + string description = 7; } // ============================================================================= diff --git a/src/common/mod.rs b/src/common/mod.rs index e3c103a2..e0eb8d7a 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -48,7 +48,7 @@ pub use control::{ pub use fs_schema::{FsSchema, FsSchemaRef}; pub use errors::DataflowError; pub use formats::{BadData, Format, Framing, JsonFormat}; -pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; +pub use operator_config::MetadataField; // ── Well-known column names ── pub const TIMESTAMP_FIELD: &str = "_timestamp"; diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs index f004de58..a525c883 100644 --- a/src/runtime/streaming/api/mod.rs +++ b/src/runtime/streaming/api/mod.rs @@ -16,5 +16,5 @@ pub mod operator; pub mod source; pub use context::TaskContext; -pub use operator::{ConstructedOperator, MessageOperator}; +pub use operator::{ConstructedOperator, Operator}; pub use source::{SourceEvent, SourceOffset, SourceOperator}; diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs index 4683379b..9acc6e06 100644 --- a/src/runtime/streaming/api/operator.rs +++ b/src/runtime/streaming/api/operator.rs @@ -24,11 +24,11 @@ use crate::sql::common::{CheckpointBarrier, Watermark}; pub enum ConstructedOperator { Source(Box), - Operator(Box), + Operator(Box), } #[async_trait] -pub trait MessageOperator: Send + 'static { +pub trait Operator: Send + 'static { fn name(&self) -> &str; async fn on_start(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<()> { diff --git a/src/runtime/streaming/driver.rs b/src/runtime/streaming/driver.rs deleted file mode 100644 index 011e49ab..00000000 --- a/src/runtime/streaming/driver.rs +++ /dev/null @@ -1,266 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::future::pending; -use std::sync::Arc; - -use arrow_array::RecordBatch; -use tokio::sync::mpsc; - -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{MessageOperator, OperatorContext, StreamOperator}; -use crate::runtime::streaming::context::{ChainedOperatorContext, TerminalOutputContext}; -use crate::runtime::streaming::environment::TaskEnvironment; -use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; -use crate::runtime::streaming::protocol::event::StreamEvent; -use crate::runtime::streaming::protocol::stream_out::StreamOutput; -use crate::runtime::streaming::protocol::tracked::TrackedEvent; -use crate::sql::common::CheckpointBarrier; - -pub struct StreamTaskDriver { - head_op: Box, - head_ctx: Box, - inbox: Option>, - control_rx: mpsc::Receiver, -} - -impl StreamTaskDriver { - pub fn new( - task_id: u32, - mut operators: Vec>, - inbox: Option>, - outboxes: Vec>, - control_rx: mpsc::Receiver, - job_id: String, - ) -> Self { - let env = TaskEnvironment::new(job_id, task_id, 0, 1); - let mut current_op = operators.pop().expect("Operators pipeline cannot be empty"); - let mut current_ctx: Box = - Box::new(TerminalOutputContext::new(outboxes, env)); - - while let Some(prev_op) = operators.pop() { - let chained = ChainedOperatorContext::new(current_op, current_ctx); - current_op = prev_op; - current_ctx = Box::new(chained); - } - - Self { - head_op: current_op, - head_ctx: current_ctx, - inbox, - control_rx, - } - } - - pub async fn run(&mut self) -> anyhow::Result<()> { - self.head_op.open(self.head_ctx.env()).await?; - - 'main_loop: loop { - tokio::select! { - biased; - Some(cmd) = self.control_rx.recv() => { - if self.process_control_command(cmd).await? { - break 'main_loop; - } - } - Some(tracked) = async { - if let Some(ref mut rx) = self.inbox { rx.recv().await } - else { pending().await } - } => { - self.pump_event(tracked.event).await?; - } - } - } - - self.head_op.close(self.head_ctx.env()).await?; - Ok(()) - } - - async fn process_control_command(&mut self, cmd: ControlCommand) -> anyhow::Result { - match cmd { - ControlCommand::TriggerCheckpoint { barrier } => { - let barrier: CheckpointBarrier = barrier.into(); - self.pump_event(StreamEvent::Barrier(barrier)).await?; - Ok(false) - } - ControlCommand::Commit { epoch } => { - self.head_op.commit_checkpoint(epoch, self.head_ctx.env()).await?; - self.head_ctx.commit_checkpoint(epoch).await?; - Ok(false) - } - ControlCommand::Stop { mode } if mode == StopMode::Immediate => Ok(true), - other_cmd => { - let stop_head = self - .head_op - .handle_control(other_cmd.clone(), self.head_ctx.env()) - .await?; - let stop_rest = self.head_ctx.handle_control(other_cmd).await?; - Ok(stop_head || stop_rest) - } - } - } - - async fn pump_event(&mut self, event: StreamEvent) -> anyhow::Result<()> { - match event { - StreamEvent::Data(batch) => self.head_op.process_data(batch, self.head_ctx.as_mut()).await, - StreamEvent::Watermark(wm) => { - self.head_op.process_watermark(wm, self.head_ctx.as_mut()).await - } - StreamEvent::Barrier(br) => { - self.head_op - .snapshot_state(br.clone(), self.head_ctx.as_mut()) - .await?; - self.head_ctx.broadcast(StreamEvent::Barrier(br)).await - } - StreamEvent::EndOfStream => { - self.head_op.close(self.head_ctx.env()).await?; - self.head_ctx.broadcast(StreamEvent::EndOfStream).await - } - } - } -} - -pub struct MessageOperatorAdapter { - inner: Box, -} - -impl MessageOperatorAdapter { - pub fn new(inner: Box) -> Self { - Self { inner } - } - - async fn emit_outputs( - ctx: &mut dyn OperatorContext, - outputs: Vec, - ) -> anyhow::Result<()> { - for out in outputs { - match out { - StreamOutput::Forward(b) | StreamOutput::Broadcast(b) | StreamOutput::Keyed(_, b) => { - ctx.collect(b).await?; - } - StreamOutput::Watermark(wm) => { - ctx.broadcast(StreamEvent::Watermark(wm)).await?; - } - } - } - Ok(()) - } -} - -#[async_trait::async_trait(?Send)] -impl StreamOperator for MessageOperatorAdapter { - async fn open(&mut self, env: &mut TaskEnvironment) -> anyhow::Result<()> { - let mut ctx = TaskContext::new( - env.job_id.clone(), - env.task_id, - env.subtask_index, - env.parallelism, - vec![], - env.memory_pool.clone(), - ); - self.inner.on_start(&mut ctx).await - } - - async fn close(&mut self, env: &mut TaskEnvironment) -> anyhow::Result<()> { - let mut ctx = TaskContext::new( - env.job_id.clone(), - env.task_id, - env.subtask_index, - env.parallelism, - vec![], - env.memory_pool.clone(), - ); - let _ = self.inner.on_close(&mut ctx).await?; - Ok(()) - } - - async fn process_data( - &mut self, - batch: RecordBatch, - ctx: &mut dyn OperatorContext, - ) -> anyhow::Result<()> { - let mut op_ctx = TaskContext::new( - ctx.env().job_id.clone(), - ctx.env().task_id, - ctx.env().subtask_index, - ctx.env().parallelism, - vec![], - ctx.env().memory_pool.clone(), - ); - let outs = self.inner.process_data(0, batch, &mut op_ctx).await?; - Self::emit_outputs(ctx, outs).await - } - - async fn process_watermark( - &mut self, - wm: crate::sql::common::Watermark, - ctx: &mut dyn OperatorContext, - ) -> anyhow::Result<()> { - let mut op_ctx = TaskContext::new( - ctx.env().job_id.clone(), - ctx.env().task_id, - ctx.env().subtask_index, - ctx.env().parallelism, - vec![], - ctx.env().memory_pool.clone(), - ); - let outs = self.inner.process_watermark(wm, &mut op_ctx).await?; - Self::emit_outputs(ctx, outs).await - } - - async fn snapshot_state( - &mut self, - barrier: CheckpointBarrier, - ctx: &mut dyn OperatorContext, - ) -> anyhow::Result<()> { - let mut op_ctx = TaskContext::new( - ctx.env().job_id.clone(), - ctx.env().task_id, - ctx.env().subtask_index, - ctx.env().parallelism, - vec![], - ctx.env().memory_pool.clone(), - ); - self.inner.snapshot_state(barrier, &mut op_ctx).await - } - - async fn commit_checkpoint( - &mut self, - epoch: u32, - env: &mut TaskEnvironment, - ) -> anyhow::Result<()> { - let mut ctx = TaskContext::new( - env.job_id.clone(), - env.task_id, - env.subtask_index, - env.parallelism, - vec![], - env.memory_pool.clone(), - ); - self.inner.commit_checkpoint(epoch, &mut ctx).await - } - - async fn handle_control( - &mut self, - cmd: ControlCommand, - _env: &mut TaskEnvironment, - ) -> anyhow::Result { - match cmd { - ControlCommand::Stop { mode } => Ok(mode == StopMode::Immediate), - ControlCommand::DropState - | ControlCommand::Start - | ControlCommand::UpdateConfig { .. } - | ControlCommand::TriggerCheckpoint { .. } - | ControlCommand::Commit { .. } => Ok(false), - } - } -} diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs index d43f052d..c4981d93 100644 --- a/src/runtime/streaming/execution/runner.rs +++ b/src/runtime/streaming/execution/runner.rs @@ -16,7 +16,7 @@ use tokio_stream::{StreamExt, StreamMap}; use tracing::{info, info_span, Instrument}; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::error::RunError; use crate::runtime::streaming::network::endpoint::BoxedEventStream; use crate::runtime::streaming::protocol::{ @@ -52,16 +52,16 @@ pub trait OperatorDrive: Send { } pub struct ChainedDriver { - operator: Box, + operator: Box, next: Option>, } impl ChainedDriver { - pub fn new(operator: Box, next: Option>) -> Self { + pub fn new(operator: Box, next: Option>) -> Self { Self { operator, next } } - pub fn build_chain(mut operators: Vec>) -> Option> { + pub fn build_chain(mut operators: Vec>) -> Option> { if operators.is_empty() { return None; } @@ -240,7 +240,7 @@ pub struct Pipeline { impl Pipeline { pub fn new( - operators: Vec>, + operators: Vec>, ctx: TaskContext, inboxes: Vec, control_rx: Receiver, diff --git a/src/runtime/streaming/factory/connector/dispatchers.rs b/src/runtime/streaming/factory/connector/dispatchers.rs index cca85c1a..40e7242c 100644 --- a/src/runtime/streaming/factory/connector/dispatchers.rs +++ b/src/runtime/streaming/factory/connector/dispatchers.rs @@ -10,35 +10,21 @@ // See the License for the specific language governing permissions and // limitations under the License. - -use anyhow::{anyhow, Result}; -use prost::Message; use std::sync::Arc; -use protocol::grpc::api::ConnectorOp; +use anyhow::Result; use crate::runtime::streaming::api::operator::ConstructedOperator; use crate::runtime::streaming::factory::global::Registry; use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; -use crate::sql::common::constants::connector_type; -use super::kafka::{KafkaSinkDispatcher, KafkaSourceDispatcher}; +use super::kafka::ConnectorDispatcher; pub struct ConnectorSourceDispatcher; impl OperatorConstructor for ConnectorSourceDispatcher { fn with_config(&self, config: &[u8], registry: Arc) -> Result { - let op = ConnectorOp::decode(config) - .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?; - - match op.connector.as_str() { - ct if ct == connector_type::KAFKA => KafkaSourceDispatcher.with_config(config, registry), - ct if ct == connector_type::REDIS => Err(anyhow!( - "ConnectorSource '{}' factory wiring not yet implemented", - op.connector - )), - other => Err(anyhow!("Unsupported source connector type: {}", other)), - } + ConnectorDispatcher.with_config(config, registry) } } @@ -46,12 +32,6 @@ pub struct ConnectorSinkDispatcher; impl OperatorConstructor for ConnectorSinkDispatcher { fn with_config(&self, config: &[u8], registry: Arc) -> Result { - let op = ConnectorOp::decode(config) - .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?; - - match op.connector.as_str() { - ct if ct == connector_type::KAFKA => KafkaSinkDispatcher.with_config(config, registry), - other => Err(anyhow!("Unsupported sink connector type: {}", other)), - } + ConnectorDispatcher.with_config(config, registry) } } diff --git a/src/runtime/streaming/factory/connector/kafka.rs b/src/runtime/streaming/factory/connector/kafka.rs index 7e548cec..a55ef477 100644 --- a/src/runtime/streaming/factory/connector/kafka.rs +++ b/src/runtime/streaming/factory/connector/kafka.rs @@ -10,316 +10,213 @@ // See the License for the specific language governing permissions and // limitations under the License. - -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{bail, Context, Result}; use prost::Message; use std::collections::HashMap; use std::num::NonZeroU32; use std::sync::Arc; -use protocol::grpc::api::ConnectorOp; -use tracing::{info, warn}; +use protocol::grpc::api::connector_op::Config; +use protocol::grpc::api::{ + BadDataPolicy, ConnectorOp, DecimalEncodingProto, FormatConfig, + KafkaAuthConfig, KafkaOffsetMode, KafkaReadMode, KafkaSinkCommitMode, KafkaSinkConfig, + KafkaSourceConfig, TimestampFormatProto, +}; +use tracing::info; use crate::runtime::streaming::api::operator::ConstructedOperator; use crate::runtime::streaming::api::source::SourceOffset; use crate::runtime::streaming::factory::global::Registry; use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; use crate::runtime::streaming::format::{ - BadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding, Format as RuntimeFormat, - JsonFormat as RuntimeJsonFormat, TimestampFormat as RtTimestampFormat, + BadDataPolicy as RtBadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding, + Format as RuntimeFormat, JsonFormat as RuntimeJsonFormat, + TimestampFormat as RtTimestampFormat, }; use crate::runtime::streaming::operators::sink::kafka::{ConsistencyMode, KafkaSinkOperator}; -use crate::runtime::streaming::operators::source::kafka::{BufferedDeserializer, KafkaSourceOperator}; -use crate::sql::common::constants::connector_type; -use crate::sql::common::formats::{ - BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, JsonFormat as SqlJsonFormat, - TimestampFormat as SqlTimestampFormat, +use crate::runtime::streaming::operators::source::kafka::{ + BufferedDeserializer, KafkaSourceOperator, }; -use crate::sql::common::kafka_catalog::{ - KafkaConfig, KafkaConfigAuthentication, KafkaTable, ReadMode, SinkCommitMode, TableType, -}; -use crate::sql::common::{FsSchema, OperatorConfig}; +use crate::sql::common::FsSchema; const DEFAULT_SOURCE_BATCH_SIZE: usize = 1024; -pub fn build_client_configs(config: &KafkaConfig, table: &KafkaTable) -> Result> { - let mut client_configs = HashMap::new(); - - match &config.authentication { - KafkaConfigAuthentication::None => {} - KafkaConfigAuthentication::Sasl { - protocol, - mechanism, - username, - password, - } => { - client_configs.insert("security.protocol".to_string(), protocol.clone()); - client_configs.insert("sasl.mechanism".to_string(), mechanism.clone()); - client_configs.insert("sasl.username".to_string(), username.clone()); - client_configs.insert("sasl.password".to_string(), password.clone()); +// ─────────────── Proto → Runtime type conversions ─────────────── + +fn proto_format_to_runtime(fmt: &Option) -> Result { + let cfg = fmt.as_ref().context("FormatConfig is required")?; + match &cfg.format { + Some(protocol::grpc::api::format_config::Format::Json(j)) => { + Ok(RuntimeFormat::Json(RuntimeJsonFormat { + timestamp_format: match j.timestamp_format() { + TimestampFormatProto::TimestampRfc3339 => RtTimestampFormat::RFC3339, + TimestampFormatProto::TimestampUnixMillis => RtTimestampFormat::UnixMillis, + }, + decimal_encoding: match j.decimal_encoding() { + DecimalEncodingProto::DecimalNumber => RtDecimalEncoding::Number, + DecimalEncodingProto::DecimalString => RtDecimalEncoding::String, + DecimalEncodingProto::DecimalBytes => RtDecimalEncoding::Bytes, + }, + include_schema: j.include_schema, + })) } - KafkaConfigAuthentication::AwsMskIam { region } => { - client_configs.insert("security.protocol".to_string(), "SASL_SSL".to_string()); - client_configs.insert("sasl.mechanism".to_string(), "OAUTHBEARER".to_string()); - client_configs.insert( - "sasl.oauthbearer.extensions".to_string(), - format!("logicalCluster=aws_msk;aws_region={region}"), - ); + Some(protocol::grpc::api::format_config::Format::RawString(_)) => { + Ok(RuntimeFormat::RawString) } - } - - for (k, v) in &config.connection_properties { - client_configs.insert(k.clone(), v.clone()); - } - - for (k, v) in &table.client_configs { - if client_configs.contains_key(k) { - warn!( - "Kafka config key '{}' is defined in both connection and table; using table value", - k - ); + Some(protocol::grpc::api::format_config::Format::RawBytes(_)) => { + Ok(RuntimeFormat::RawBytes) } - client_configs.insert(k.clone(), v.clone()); - } - - Ok(client_configs) -} - -fn bad_data_policy(b: Option) -> BadDataPolicy { - match b.unwrap_or_default() { - BadData::Fail {} => BadDataPolicy::Fail, - BadData::Drop {} => BadDataPolicy::Drop, + None => bail!("FormatConfig has no format variant set"), } } -fn sql_timestamp_format(t: SqlTimestampFormat) -> RtTimestampFormat { - match t { - SqlTimestampFormat::RFC3339 => RtTimestampFormat::RFC3339, - SqlTimestampFormat::UnixMillis => RtTimestampFormat::UnixMillis, +fn proto_bad_data_to_runtime(policy: i32) -> RtBadDataPolicy { + match BadDataPolicy::try_from(policy) { + Ok(BadDataPolicy::BadDataDrop) => RtBadDataPolicy::Drop, + _ => RtBadDataPolicy::Fail, } } -fn sql_decimal_encoding(d: SqlDecimalEncoding) -> RtDecimalEncoding { - match d { - SqlDecimalEncoding::Number => RtDecimalEncoding::Number, - SqlDecimalEncoding::String => RtDecimalEncoding::String, - SqlDecimalEncoding::Bytes => RtDecimalEncoding::Bytes, +fn proto_offset_to_runtime(mode: i32) -> SourceOffset { + match KafkaOffsetMode::try_from(mode) { + Ok(KafkaOffsetMode::KafkaOffsetLatest) => SourceOffset::Latest, + Ok(KafkaOffsetMode::KafkaOffsetEarliest) => SourceOffset::Earliest, + _ => SourceOffset::Group, } } -fn sql_json_format_to_runtime(j: &SqlJsonFormat) -> RuntimeJsonFormat { - RuntimeJsonFormat { - timestamp_format: sql_timestamp_format(j.timestamp_format), - decimal_encoding: sql_decimal_encoding(j.decimal_encoding), - include_schema: j.include_schema, +fn build_auth_client_configs(auth: &Option) -> HashMap { + let mut out = HashMap::new(); + let Some(auth) = auth else { return out }; + match &auth.auth { + Some(protocol::grpc::api::kafka_auth_config::Auth::Sasl(sasl)) => { + out.insert("security.protocol".to_string(), sasl.protocol.clone()); + out.insert("sasl.mechanism".to_string(), sasl.mechanism.clone()); + out.insert("sasl.username".to_string(), sasl.username.clone()); + out.insert("sasl.password".to_string(), sasl.password.clone()); + } + Some(protocol::grpc::api::kafka_auth_config::Auth::AwsMskIam(iam)) => { + out.insert("security.protocol".to_string(), "SASL_SSL".to_string()); + out.insert("sasl.mechanism".to_string(), "OAUTHBEARER".to_string()); + out.insert( + "sasl.oauthbearer.extensions".to_string(), + format!("logicalCluster=aws_msk;aws_region={}", iam.region), + ); + } + _ => {} } + out } -fn sql_format_to_runtime(f: SqlFormat) -> Result { - match f { - SqlFormat::Json(j) => Ok(RuntimeFormat::Json(sql_json_format_to_runtime(&j))), - SqlFormat::RawString(_) => Ok(RuntimeFormat::RawString), - SqlFormat::RawBytes(_) => Ok(RuntimeFormat::RawBytes), - other => bail!( - "Kafka connector: format '{}' is not supported for runtime deserializer/serializer yet", - other.name() - ), +fn merge_client_configs( + auth: &Option, + extra: &HashMap, +) -> HashMap { + let mut configs = build_auth_client_configs(auth); + for (k, v) in extra { + configs.insert(k.clone(), v.clone()); } + configs } -fn kafka_table_offset_to_runtime(o: crate::sql::common::KafkaTableSourceOffset) -> SourceOffset { - use crate::sql::common::KafkaTableSourceOffset as KOff; - match o { - KOff::Latest => SourceOffset::Latest, - KOff::Earliest => SourceOffset::Earliest, - KOff::Group => SourceOffset::Group, - } -} +// ─────────────── Unified Connector Dispatcher ─────────────── -fn non_zero_rate_per_second(op: &OperatorConfig) -> NonZeroU32 { - op.rate_limit - .as_ref() - .and_then(|r| NonZeroU32::new(r.messages_per_second.max(1))) - .unwrap_or_else(|| NonZeroU32::new(1_000_000).expect("nonzero")) -} +pub struct ConnectorDispatcher; -fn sink_fs_schema_adjusted( - fs: FsSchema, - key_field: &Option, - timestamp_field: &Option, -) -> Result { - if key_field.is_none() && timestamp_field.is_none() { - return Ok(fs); - } - let schema = fs.schema.clone(); - let ts = if let Some(name) = timestamp_field { - schema - .column_with_name(name) - .ok_or_else(|| anyhow!("timestamp column '{name}' not found in schema"))? - .0 - } else { - fs.timestamp_index - }; - let keys = fs.clone_storage_key_indices(); - let routing = if let Some(name) = key_field { - let k = schema - .column_with_name(name) - .ok_or_else(|| anyhow!("key column '{name}' not found in schema"))? - .0; - Some(vec![k]) - } else { - fs.clone_routing_key_indices() - }; - Ok(FsSchema::new(schema, ts, keys, routing)) -} - -fn decode_operator_config(op: &ConnectorOp) -> Result { - serde_json::from_str(&op.config).with_context(|| { - format!( - "Invalid OperatorConfig JSON for connector '{}'", - op.connector - ) - }) -} - -pub struct KafkaSourceDispatcher; - -impl OperatorConstructor for KafkaSourceDispatcher { +impl OperatorConstructor for ConnectorDispatcher { fn with_config(&self, payload: &[u8], _registry: Arc) -> Result { let op = ConnectorOp::decode(payload) - .context("Failed to decode ConnectorOp protobuf for Kafka Source")?; - - if op.connector != connector_type::KAFKA { - bail!( - "KafkaSourceDispatcher: expected connector 'kafka', got '{}'", - op.connector - ); + .context("Failed to decode ConnectorOp protobuf")?; + + let fs_schema = op + .fs_schema + .as_ref() + .map(|fs| FsSchema::try_from(fs.clone())) + .transpose() + .map_err(|e| anyhow::anyhow!("{e}"))?; + + match op.config { + Some(Config::KafkaSource(ref cfg)) => { + Self::build_kafka_source(&op.name, cfg, fs_schema) + } + Some(Config::KafkaSink(ref cfg)) => { + Self::build_kafka_sink(&op.name, cfg, fs_schema) + } + Some(Config::Generic(_)) => bail!( + "ConnectorOp '{}': GenericConnectorConfig dispatch not yet implemented", + op.name + ), + None => bail!("ConnectorOp '{}' has no configuration payload", op.name), } + } +} - let op_config = decode_operator_config(&op)?; - - let kafka_config: KafkaConfig = serde_json::from_value(op_config.connection.clone()) - .context("Failed to parse Kafka connection configuration")?; - - let kafka_table: KafkaTable = serde_json::from_value(op_config.table.clone()) - .context("Failed to parse Kafka table configuration")?; - - let TableType::Source { - offset, - read_mode, - group_id, - group_id_prefix, - } = &kafka_table.kind - else { - bail!( - "Expected Kafka Source, got Sink configuration for topic '{}'", - kafka_table.topic - ); - }; +impl ConnectorDispatcher { + fn build_kafka_source( + _name: &str, + cfg: &KafkaSourceConfig, + fs_schema: Option, + ) -> Result { + info!(topic = %cfg.topic, "Constructing Kafka Source"); - info!("Constructing Kafka Source for topic: {}", kafka_table.topic); + let fs = fs_schema.context("fs_schema is required for Kafka Source")?; + let client_configs = merge_client_configs(&cfg.auth, &cfg.client_configs); - let mut client_configs = build_client_configs(&kafka_config, &kafka_table)?; - if let Some(ReadMode::ReadCommitted) = read_mode { - client_configs.insert("isolation.level".to_string(), "read_committed".to_string()); + let mut final_configs = client_configs; + if cfg.read_mode() == KafkaReadMode::KafkaReadCommitted { + final_configs.insert("isolation.level".to_string(), "read_committed".to_string()); } - let sql_format = op_config - .format - .clone() - .context("Format must be specified for Kafka Source")?; - let runtime_format = sql_format_to_runtime(sql_format)?; - let fs = op_config - .input_schema - .clone() - .context("input_schema is required for Kafka Source")?; - let bad = bad_data_policy(op_config.bad_data.clone()); + let runtime_format = proto_format_to_runtime(&cfg.format)?; + let bad_data = proto_bad_data_to_runtime(cfg.bad_data_policy); - let deserializer: std::boxed::Box< - dyn crate::runtime::streaming::operators::source::kafka::BatchDeserializer, - > = Box::new(BufferedDeserializer::new( + let deserializer = Box::new(BufferedDeserializer::new( runtime_format, fs.schema.clone(), - bad, + bad_data, DEFAULT_SOURCE_BATCH_SIZE, )); + let rate = NonZeroU32::new(cfg.rate_limit_msgs_per_sec.max(1)) + .unwrap_or_else(|| NonZeroU32::new(1_000_000).expect("nonzero")); + let source_op = KafkaSourceOperator::new( - kafka_table.topic.clone(), - kafka_config.bootstrap_servers.clone(), - group_id.clone(), - group_id_prefix.clone(), - kafka_table_offset_to_runtime(*offset), - client_configs, - non_zero_rate_per_second(&op_config), - op_config.metadata_fields, + cfg.topic.clone(), + cfg.bootstrap_servers.clone(), + cfg.group_id.clone(), + cfg.group_id_prefix.clone(), + proto_offset_to_runtime(cfg.offset_mode), + final_configs, + rate, + vec![], deserializer, ); Ok(ConstructedOperator::Source(Box::new(source_op))) } -} -pub struct KafkaSinkDispatcher; + fn build_kafka_sink( + _name: &str, + cfg: &KafkaSinkConfig, + fs_schema: Option, + ) -> Result { + info!(topic = %cfg.topic, "Constructing Kafka Sink"); -impl OperatorConstructor for KafkaSinkDispatcher { - fn with_config(&self, payload: &[u8], _registry: Arc) -> Result { - let op = ConnectorOp::decode(payload) - .context("Failed to decode ConnectorOp protobuf for Kafka Sink")?; + let fs_in = fs_schema.context("fs_schema is required for Kafka Sink")?; + let client_configs = merge_client_configs(&cfg.auth, &cfg.client_configs); - if op.connector != connector_type::KAFKA { - bail!( - "KafkaSinkDispatcher: expected connector 'kafka', got '{}'", - op.connector - ); - } - - let op_config = decode_operator_config(&op)?; - - let kafka_config: KafkaConfig = serde_json::from_value(op_config.connection.clone()) - .context("Failed to parse Kafka connection configuration")?; - - let kafka_table: KafkaTable = serde_json::from_value(op_config.table.clone()) - .context("Failed to parse Kafka table configuration")?; - - let TableType::Sink { - commit_mode, - key_field, - timestamp_field, - } = &kafka_table.kind - else { - bail!( - "Expected Kafka Sink, got Source configuration for topic '{}'", - kafka_table.topic - ); + let consistency = match cfg.commit_mode() { + KafkaSinkCommitMode::KafkaSinkExactlyOnce => ConsistencyMode::ExactlyOnce, + KafkaSinkCommitMode::KafkaSinkAtLeastOnce => ConsistencyMode::AtLeastOnce, }; - info!("Constructing Kafka Sink for topic: {}", kafka_table.topic); - - let client_configs = build_client_configs(&kafka_config, &kafka_table)?; - - let consistency = match commit_mode { - SinkCommitMode::ExactlyOnce => ConsistencyMode::ExactlyOnce, - SinkCommitMode::AtLeastOnce => ConsistencyMode::AtLeastOnce, - }; - - let sql_format = op_config - .format - .clone() - .context("Format must be specified for Kafka Sink")?; - let runtime_format = sql_format_to_runtime(sql_format)?; - - let fs_in = op_config - .input_schema - .clone() - .context("input_schema is required for Kafka Sink")?; - let fs = sink_fs_schema_adjusted(fs_in, key_field, timestamp_field)?; - + let runtime_format = proto_format_to_runtime(&cfg.format)?; + let fs = sink_fs_schema_adjusted(fs_in, &cfg.key_field, &cfg.timestamp_field)?; let serializer = DataSerializer::new(runtime_format, fs.schema.clone()); let sink_op = KafkaSinkOperator::new( - kafka_table.topic.clone(), - kafka_config.bootstrap_servers.clone(), + cfg.topic.clone(), + cfg.bootstrap_servers.clone(), consistency, client_configs, fs, @@ -329,3 +226,37 @@ impl OperatorConstructor for KafkaSinkDispatcher { Ok(ConstructedOperator::Operator(Box::new(sink_op))) } } + +fn sink_fs_schema_adjusted( + fs: FsSchema, + key_field: &Option, + timestamp_field: &Option, +) -> Result { + if key_field.is_none() && timestamp_field.is_none() { + return Ok(fs); + } + let schema = fs.schema.clone(); + let ts = if let Some(name) = timestamp_field { + schema + .column_with_name(name) + .ok_or_else(|| anyhow::anyhow!("timestamp column '{name}' not found in schema"))? + .0 + } else { + fs.timestamp_index + }; + let keys = fs.clone_storage_key_indices(); + let routing = if let Some(name) = key_field { + let k = schema + .column_with_name(name) + .ok_or_else(|| anyhow::anyhow!("key column '{name}' not found in schema"))? + .0; + Some(vec![k]) + } else { + fs.clone_routing_key_indices() + }; + Ok(FsSchema::new(schema, ts, keys, routing)) +} + +// Legacy dispatcher aliases kept for backward compatibility with factory registration. +pub type KafkaSourceDispatcher = ConnectorDispatcher; +pub type KafkaSinkDispatcher = ConnectorDispatcher; diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs index 1315e4de..442c9bf9 100644 --- a/src/runtime/streaming/factory/mod.rs +++ b/src/runtime/streaming/factory/mod.rs @@ -43,11 +43,11 @@ fn register_builtin_connectors(factory: &mut OperatorFactory) { fn register_kafka_connector_plugins(factory: &mut OperatorFactory) { factory.register( factory_operator_name::KAFKA_SOURCE, - Box::new(connector::KafkaSourceDispatcher), + Box::new(connector::kafka::ConnectorDispatcher), ); factory.register( factory_operator_name::KAFKA_SINK, - Box::new(connector::KafkaSinkDispatcher), + Box::new(connector::kafka::ConnectorDispatcher), ); info!( "Registered Kafka connector plugins ({}, {})", diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs index d11a1555..5a2dc26f 100644 --- a/src/runtime/streaming/factory/operator_factory.rs +++ b/src/runtime/streaming/factory/operator_factory.rs @@ -15,7 +15,7 @@ use anyhow::{anyhow, Result}; use prost::Message; use std::collections::HashMap; use std::sync::Arc; - +use protocol::grpc::api::ProjectionOperator as ProjectionOperatorProto; use super::operator_constructor::OperatorConstructor; use crate::runtime::streaming::api::operator::ConstructedOperator; use crate::runtime::streaming::factory::connector::{ @@ -32,16 +32,10 @@ use crate::runtime::streaming::operators::windows::{ SessionAggregatingWindowConstructor, SlidingAggregatingWindowConstructor, TumblingAggregateWindowConstructor, WindowFunctionConstructor, }; -use crate::runtime::streaming::operators::{ - ProjectionOperator, StatelessPhysicalExecutor, ValueExecutionOperator, -}; -use crate::sql::common::FsSchema; -use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; -use datafusion_proto::physical_plan::from_proto::parse_physical_expr; -use datafusion_proto::protobuf::PhysicalExprNode; +use crate::runtime::streaming::operators::{ProjectionOperator, StatelessPhysicalExecutor, ValueExecutionOperator}; use protocol::grpc::api::{ ExpressionWatermarkConfig, JoinOperator as JoinOperatorProto, - KeyPlanOperator as KeyByProto, ProjectionOperator as ProjectionOperatorProto, + KeyPlanOperator as KeyByProto, SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator, UpdatingAggregateOperator, ValuePlanOperator, WindowFunctionOperator as WindowFunctionProto, }; @@ -110,12 +104,11 @@ impl OperatorFactory { self.register_named(OperatorName::KeyBy, Box::new(KeyByBridge)); - self.register_named(OperatorName::Projection, Box::new(ProjectionBridge)); + self.register_named(OperatorName::Projection, Box::new(ProjectionConstructor)); self.register_named(OperatorName::Value, Box::new(ValueBridge)); self.register_named(OperatorName::ConnectorSource, Box::new(ConnectorSourceBridge)); self.register_named(OperatorName::ConnectorSink, Box::new(ConnectorSinkBridge)); - crate::runtime::streaming::factory::register_builtin_connectors(self); crate::runtime::streaming::factory::register_kafka_connector_plugins(self); } } @@ -217,12 +210,12 @@ impl OperatorConstructor for KeyByBridge { } } -struct ProjectionBridge; -impl OperatorConstructor for ProjectionBridge { - fn with_config(&self, config: &[u8], registry: Arc) -> Result { - let proto = ProjectionOperatorProto::decode(config) - .map_err(|e| anyhow!("Decode ProjectionOperator failed: {e}"))?; - let op = ProjectionExecutionConstructor.with_config(proto, registry)?; +pub struct ProjectionConstructor; + +impl OperatorConstructor for ProjectionConstructor { + fn with_config(&self, payload: &[u8], registry: Arc) -> Result { + let proto = ProjectionOperatorProto::decode(payload)?; + let op = ProjectionOperator::from_proto(proto, registry)?; Ok(ConstructedOperator::Operator(Box::new(op))) } } @@ -253,51 +246,6 @@ impl OperatorConstructor for ConnectorSinkBridge { } } -struct ProjectionExecutionConstructor; -impl ProjectionExecutionConstructor { - fn with_config( - &self, - config: ProjectionOperatorProto, - registry: Arc, - ) -> Result { - let input_schema: FsSchema = config - .input_schema - .ok_or_else(|| anyhow!("missing projection input_schema"))? - .try_into() - .map_err(|e| anyhow!("projection input_schema: {e}"))?; - let output_schema: FsSchema = config - .output_schema - .ok_or_else(|| anyhow!("missing projection output_schema"))? - .try_into() - .map_err(|e| anyhow!("projection output_schema: {e}"))?; - - let exprs = config - .exprs - .iter() - .map(|raw| { - let expr_node = PhysicalExprNode::decode(&mut raw.as_slice()) - .map_err(|e| anyhow!("decode projection expr: {e}"))?; - parse_physical_expr( - &expr_node, - registry.as_ref(), - &input_schema.schema, - &DefaultPhysicalExtensionCodec {}, - ) - .map_err(|e| anyhow!("parse projection expr: {e}")) - }) - .collect::>>()?; - - Ok(ProjectionOperator::new( - if config.name.is_empty() { - OperatorName::Projection.as_registry_key().to_string() - } else { - config.name - }, - Arc::new(output_schema), - exprs, - )) - } -} struct ValueExecutionConstructor; impl ValueExecutionConstructor { diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs index 20ecad9f..88df6457 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/runtime/streaming/job/job_manager.rs @@ -21,8 +21,10 @@ use tracing::{error, info, warn}; use protocol::grpc::api::{ChainedOperator, FsProgram}; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{ConstructedOperator, MessageOperator}; -use crate::runtime::streaming::execution::runner::Pipeline; +use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator}; +use crate::runtime::streaming::api::source::SourceOperator; +use crate::runtime::streaming::execution::runner::{ChainedDriver, Pipeline}; +use crate::runtime::streaming::execution::source::SourceRunner; use crate::runtime::streaming::factory::OperatorFactory; use crate::runtime::streaming::job::edge_manager::EdgeManager; use crate::runtime::streaming::job::models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; @@ -38,6 +40,11 @@ pub struct JobManager { memory_pool: Arc, } +struct PreparedChain { + source: Option>, + operators: Vec>, +} + impl JobManager { pub fn new(operator_factory: Arc, max_memory_bytes: usize) -> Self { Self { @@ -76,20 +83,44 @@ impl JobManager { .map(|rx| Box::pin(ReceiverStream::new(rx)) as _) .collect(); - let operators = self.build_operator_chain(&node.operators)?; + let chain = self.build_operator_chain(&node.operators)?; + if chain.source.is_none() && physical_inboxes.is_empty() { + anyhow::bail!( + "Topology Error: pipeline '{}' contains no source operator and has no upstream inputs.", + pipeline_id + ); + } + if chain.source.is_some() && !physical_inboxes.is_empty() { + anyhow::bail!( + "Topology Error: source pipeline '{}' should not have upstream inputs.", + pipeline_id + ); + } let (control_tx, control_rx) = mpsc::channel(64); let status = Arc::new(RwLock::new(PipelineStatus::Initializing)); - let handle = self.spawn_pipeline_thread( - job_id.clone(), - pipeline_id, - operators, - physical_inboxes, - physical_outboxes, - control_rx, - Arc::clone(&status), - )?; + let handle = if let Some(source) = chain.source { + self.spawn_source_pipeline_thread( + job_id.clone(), + pipeline_id, + source, + chain.operators, + physical_outboxes, + control_rx, + Arc::clone(&status), + )? + } else { + self.spawn_pipeline_thread( + job_id.clone(), + pipeline_id, + chain.operators, + physical_inboxes, + physical_outboxes, + control_rx, + Arc::clone(&status), + )? + }; pipelines.insert( pipeline_id, @@ -153,7 +184,8 @@ impl JobManager { fn build_operator_chain( &self, operator_configs: &[ChainedOperator], - ) -> anyhow::Result>> { + ) -> anyhow::Result { + let mut source: Option> = None; let mut chain = Vec::with_capacity(operator_configs.len()); for op_config in operator_configs { @@ -162,22 +194,33 @@ impl JobManager { match constructed { ConstructedOperator::Operator(msg_op) => chain.push(msg_op), - ConstructedOperator::Source(_) => { - anyhow::bail!( - "Topology Error: Source operator '{}' cannot be scheduled inside a MessageOperator physical chain.", - op_config.operator_name - ); + ConstructedOperator::Source(src_op) => { + if source.is_some() { + anyhow::bail!( + "Topology Error: Multiple source operators detected in one physical chain." + ); + } + if !chain.is_empty() { + anyhow::bail!( + "Topology Error: Source operator '{}' cannot be scheduled inside a MessageOperator physical chain.", + op_config.operator_name + ); + } + source = Some(src_op); } } } - Ok(chain) + Ok(PreparedChain { + source, + operators: chain, + }) } fn spawn_pipeline_thread( &self, job_id: String, pipeline_id: u32, - operators: Vec>, + operators: Vec>, inboxes: Vec, outboxes: Vec, control_rx: mpsc::Receiver, @@ -221,6 +264,57 @@ impl JobManager { Ok(handle) } + fn spawn_source_pipeline_thread( + &self, + job_id: String, + pipeline_id: u32, + source: Box, + operators: Vec>, + outboxes: Vec, + control_rx: mpsc::Receiver, + status: Arc>, + ) -> anyhow::Result> { + let memory_pool = Arc::clone(&self.memory_pool); + let thread_name = format!("Task-{job_id}-{pipeline_id}"); + + let handle = std::thread::Builder::new() + .name(thread_name) + .spawn(move || { + *status.write().unwrap() = PipelineStatus::Running; + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("Failed to build current-thread Tokio runtime for source pipeline"); + + let job_id_inner = job_id.clone(); + let execution_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + rt.block_on(async move { + let ctx = TaskContext::new( + job_id_inner, + pipeline_id, + 0, + 1, + outboxes, + memory_pool, + ); + + let chain_head = ChainedDriver::build_chain(operators); + let runner = SourceRunner::new(source, chain_head, ctx, control_rx); + + runner + .run() + .await + .map_err(|e| anyhow::anyhow!("Source pipeline execution failed: {e}")) + }) + })); + + Self::handle_pipeline_exit(&job_id, pipeline_id, execution_result, &status); + })?; + + Ok(handle) + } + fn handle_pipeline_exit( job_id: &str, pipeline_id: u32, diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs deleted file mode 100644 index 4dd6316b..00000000 --- a/src/runtime/streaming/lib.rs +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`). - -pub mod api; -pub mod error; -pub mod execution; -pub mod factory; -pub mod job; -pub mod memory; -pub mod network; -pub mod operators; -pub mod protocol; - -pub use api::{ - ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext, -}; -pub use error::RunError; -pub use execution::{ - OperatorDrive, SourceRunner, SubtaskRunner, SOURCE_IDLE_SLEEP, WATERMARK_EMIT_INTERVAL, -}; -pub use factory::{OperatorConstructor, OperatorFactory}; -pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; -pub use memory::{MemoryPool, MemoryTicket}; -pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; -pub use protocol::{ - CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput, - control_channel, merge_watermarks, watermark_strictly_advances, -}; diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs index 1da5f952..6fd45abd 100644 --- a/src/runtime/streaming/mod.rs +++ b/src/runtime/streaming/mod.rs @@ -25,7 +25,7 @@ pub mod operators; pub mod protocol; pub use api::{ - ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext, + ConstructedOperator, Operator, SourceEvent, SourceOffset, SourceOperator, TaskContext, }; pub use error::RunError; pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs index 4b1af6b3..16d92fd1 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -43,7 +43,7 @@ use protocol::grpc::api::UpdatingAggregateOperator; // ========================================================================= // ========================================================================= use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::factory::Registry; use crate::runtime::util::decode_aggregate; use crate::runtime::streaming::operators::{Key, UpdatingCache}; @@ -657,7 +657,7 @@ fn set_retract_metadata(metadata: ArrayRef, is_retract: Arc) -> Ar // ========================================================================= #[async_trait::async_trait] -impl MessageOperator for IncrementalAggregatingFunc { +impl Operator for IncrementalAggregatingFunc { fn name(&self) -> &str { "UpdatingAggregatingFunc" } diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs index ef49c323..18ed3599 100644 --- a/src/runtime/streaming/operators/joins/join_instance.rs +++ b/src/runtime/streaming/operators/joins/join_instance.rs @@ -29,7 +29,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::factory::Registry; use async_trait::async_trait; use protocol::grpc::api::JoinOperator; @@ -197,7 +197,7 @@ impl InstantJoinOperator { } #[async_trait] -impl MessageOperator for InstantJoinOperator { +impl Operator for InstantJoinOperator { fn name(&self) -> &str { "InstantJoin" } diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs index 91fd38a6..212cfaad 100644 --- a/src/runtime/streaming/operators/joins/join_with_expiration.rs +++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs @@ -26,7 +26,7 @@ use std::time::{Duration, SystemTime}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::factory::Registry; use async_trait::async_trait; use protocol::grpc::api::JoinOperator; @@ -180,7 +180,7 @@ impl JoinWithExpirationOperator { } #[async_trait] -impl MessageOperator for JoinWithExpirationOperator { +impl Operator for JoinWithExpirationOperator { fn name(&self) -> &str { "JoinWithExpiration" } diff --git a/src/runtime/streaming/operators/key_by.rs b/src/runtime/streaming/operators/key_by.rs index 8d0da441..edafc063 100644 --- a/src/runtime/streaming/operators/key_by.rs +++ b/src/runtime/streaming/operators/key_by.rs @@ -21,7 +21,7 @@ use datafusion_common::hash_utils::create_hashes; use std::sync::Arc; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, Watermark}; @@ -44,7 +44,7 @@ impl KeyByOperator { } #[async_trait] -impl MessageOperator for KeyByOperator { +impl Operator for KeyByOperator { fn name(&self) -> &str { &self.name } diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/runtime/streaming/operators/key_operator.rs index 0202f924..4a3942e0 100644 --- a/src/runtime/streaming/operators/key_operator.rs +++ b/src/runtime/streaming/operators/key_operator.rs @@ -23,7 +23,7 @@ use futures::StreamExt; use std::sync::Arc; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::operators::StatelessPhysicalExecutor; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, Watermark}; @@ -47,7 +47,7 @@ impl KeyByOperator { } #[async_trait] -impl MessageOperator for KeyByOperator { +impl Operator for KeyByOperator { fn name(&self) -> &str { &self.name } @@ -192,7 +192,7 @@ impl KeyExecutionOperator { } #[async_trait] -impl MessageOperator for KeyExecutionOperator { +impl Operator for KeyExecutionOperator { fn name(&self) -> &str { &self.name } diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs index b679f2bd..18a98830 100644 --- a/src/runtime/streaming/operators/mod.rs +++ b/src/runtime/streaming/operators/mod.rs @@ -19,7 +19,7 @@ pub mod source; pub mod watermark; pub mod windows; mod key_operator; -mod projection; +pub mod projection; mod stateless_physical_executor; mod value_execution; diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs index 3afb93ef..d2f54b8c 100644 --- a/src/runtime/streaming/operators/projection.rs +++ b/src/runtime/streaming/operators/projection.rs @@ -10,17 +10,24 @@ // See the License for the specific language governing permissions and // limitations under the License. - -use anyhow::Result; +use anyhow::{anyhow, Context, Result}; use arrow_array::RecordBatch; use async_trait::async_trait; use datafusion::physical_expr::PhysicalExpr; +use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; +use datafusion_proto::physical_plan::from_proto::parse_physical_expr; +use datafusion_proto::protobuf::PhysicalExprNode; +use prost::Message; use std::sync::Arc; +use protocol::grpc::api::ProjectionOperator as ProjectionOperatorProto; + use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator}; +use crate::runtime::streaming::factory::global::Registry; use crate::runtime::streaming::StreamOutput; -use crate::sql::common::{CheckpointBarrier, FsSchemaRef, Watermark}; +use crate::sql::common::{CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; +use crate::sql::logical_node::logical::OperatorName; pub struct ProjectionOperator { name: String, @@ -40,10 +47,52 @@ impl ProjectionOperator { exprs, } } + + pub fn from_proto( + config: ProjectionOperatorProto, + registry: Arc, + ) -> Result { + let input_schema: FsSchema = config + .input_schema + .ok_or_else(|| anyhow!("missing projection input_schema"))? + .try_into() + .map_err(|e| anyhow!("projection input_schema: {e}"))?; + + let output_schema: FsSchema = config + .output_schema + .ok_or_else(|| anyhow!("missing projection output_schema"))? + .try_into() + .map_err(|e| anyhow!("projection output_schema: {e}"))?; + + let exprs = config + .exprs + .iter() + .map(|raw| { + let expr_node = PhysicalExprNode::decode(&mut raw.as_slice()) + .map_err(|e| anyhow!("decode projection expr: {e}"))?; + parse_physical_expr( + &expr_node, + registry.as_ref(), + &input_schema.schema, + &DefaultPhysicalExtensionCodec {}, + ) + .map_err(|e| anyhow!("parse projection expr: {e}")) + }) + .collect::>>()?; + + let name = if config.name.is_empty() { + OperatorName::Projection.as_registry_key().to_string() + } else { + config.name + }; + + Ok(Self::new(name, Arc::new(output_schema), exprs)) + + } } #[async_trait] -impl MessageOperator for ProjectionOperator { +impl Operator for ProjectionOperator { fn name(&self) -> &str { &self.name } diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs index 9f82a4ce..4b6d48cb 100644 --- a/src/runtime/streaming/operators/sink/kafka/mod.rs +++ b/src/runtime/streaming/operators/sink/kafka/mod.rs @@ -27,7 +27,7 @@ use tokio::time::sleep; use tracing::{info, warn}; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::format::DataSerializer; use crate::runtime::streaming::StreamOutput; use crate::sql::common::constants::factory_operator_name; @@ -205,7 +205,7 @@ fn row_key_bytes(batch: &RecordBatch, row: usize, col: usize) -> Option> // ============================================================================ #[async_trait] -impl MessageOperator for KafkaSinkOperator { +impl Operator for KafkaSinkOperator { fn name(&self) -> &str { factory_operator_name::KAFKA_SINK } diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/runtime/streaming/operators/value_execution.rs index 86596512..effdf5f6 100644 --- a/src/runtime/streaming/operators/value_execution.rs +++ b/src/runtime/streaming/operators/value_execution.rs @@ -17,7 +17,7 @@ use async_trait::async_trait; use futures::StreamExt; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::operators::StatelessPhysicalExecutor; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, Watermark}; @@ -34,7 +34,7 @@ impl ValueExecutionOperator { } #[async_trait] -impl MessageOperator for ValueExecutionOperator { +impl Operator for ValueExecutionOperator { fn name(&self) -> &str { &self.name } diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs index b512f842..0fee4a38 100644 --- a/src/runtime/streaming/operators/watermark/watermark_generator.rs +++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs @@ -27,7 +27,7 @@ use std::time::{Duration, SystemTime}; use tracing::{debug, info}; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::factory::Registry; use async_trait::async_trait; use protocol::grpc::api::ExpressionWatermarkConfig; @@ -103,7 +103,7 @@ impl WatermarkGeneratorOperator { } #[async_trait] -impl MessageOperator for WatermarkGeneratorOperator { +impl Operator for WatermarkGeneratorOperator { fn name(&self) -> &str { "ExpressionWatermarkGenerator" } diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs index d7257223..93376c4c 100644 --- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs @@ -35,7 +35,7 @@ use std::time::{Duration, SystemTime}; use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use async_trait::async_trait; use crate::runtime::streaming::factory::Registry; use protocol::grpc::api::SessionWindowAggregateOperator; @@ -603,7 +603,7 @@ impl SessionWindowOperator { } #[async_trait] -impl MessageOperator for SessionWindowOperator { +impl Operator for SessionWindowOperator { fn name(&self) -> &str { "SessionWindow" } diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs index 7bad21bc..19a539f3 100644 --- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs @@ -34,7 +34,7 @@ use std::time::{Duration, SystemTime}; use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use async_trait::async_trait; use crate::runtime::streaming::factory::Registry; use protocol::grpc::api::SlidingWindowAggregateOperator; @@ -317,7 +317,7 @@ impl SlidingWindowOperator { } #[async_trait] -impl MessageOperator for SlidingWindowOperator { +impl Operator for SlidingWindowOperator { fn name(&self) -> &str { "SlidingWindow" } diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs index 093823bb..c0342d66 100644 --- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs +++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs @@ -36,7 +36,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use async_trait::async_trait; use crate::runtime::streaming::factory::Registry; use protocol::grpc::api::TumblingWindowAggregateOperator; @@ -145,7 +145,7 @@ impl TumblingWindowOperator { } #[async_trait] -impl MessageOperator for TumblingWindowOperator { +impl Operator for TumblingWindowOperator { fn name(&self) -> &str { "TumblingWindow" } diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs index d067eccf..4ab68cfd 100644 --- a/src/runtime/streaming/operators/windows/window_function.rs +++ b/src/runtime/streaming/operators/windows/window_function.rs @@ -29,7 +29,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tracing::warn; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::MessageOperator; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::factory::Registry; use async_trait::async_trait; use crate::runtime::streaming::StreamOutput; @@ -153,7 +153,7 @@ impl WindowFunctionOperator { } #[async_trait] -impl MessageOperator for WindowFunctionOperator { +impl Operator for WindowFunctionOperator { fn name(&self) -> &str { "WindowFunction" } diff --git a/src/sql/common/connector_options.rs b/src/sql/common/connector_options.rs index 98e3299e..6f82782e 100644 --- a/src/sql/common/connector_options.rs +++ b/src/sql/common/connector_options.rs @@ -48,6 +48,21 @@ fn sql_expr_to_catalog_string(e: &Expr) -> String { } impl ConnectorOptions { + /// Build options from persisted catalog string maps (same semantics as SQL `WITH` literals). + pub fn from_flat_string_map(map: HashMap) -> DFResult { + let mut options = HashMap::with_capacity(map.len()); + for (k, v) in map { + options.insert( + k, + Expr::Value(SqlValue::SingleQuotedString(v).with_empty_span()), + ); + } + Ok(Self { + options, + partitions: Vec::new(), + }) + } + pub fn new(sql_opts: &[SqlOption], partition_by: &Option>) -> DFResult { let mut options = HashMap::new(); diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index 3302eb10..fa37a9fd 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -58,7 +58,7 @@ pub use kafka_catalog::{ }; pub use errors::{DataflowError, DataflowResult}; pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat}; -pub use operator_config::{MetadataField, OperatorConfig, RateLimit}; +pub use operator_config::MetadataField; // ── Well-known column names ── pub use constants::sql_field::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; diff --git a/src/sql/common/operator_config.rs b/src/sql/common/operator_config.rs index ba61b36a..b5360cd7 100644 --- a/src/sql/common/operator_config.rs +++ b/src/sql/common/operator_config.rs @@ -1,45 +1,12 @@ // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. use serde::{Deserialize, Serialize}; -use serde_json::Value; - -use super::formats::{BadData, Format, Framing}; -use super::fs_schema::FsSchema; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RateLimit { - pub messages_per_second: u32, -} #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MetadataField { pub field_name: String, pub key: String, - /// JSON-encoded Arrow DataType string, e.g. `"Utf8"`, `"Int64"`. #[serde(default)] pub data_type: Option, } - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct OperatorConfig { - pub connection: Value, - pub table: Value, - pub format: Option, - pub bad_data: Option, - pub framing: Option, - pub rate_limit: Option, - #[serde(default)] - pub metadata_fields: Vec, - #[serde(default)] - pub input_schema: Option, -} diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs index 684a8f97..8371efce 100644 --- a/src/sql/extensions/lookup.rs +++ b/src/sql/extensions/lookup.rs @@ -10,6 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::fmt::Formatter; use std::sync::Arc; @@ -21,7 +22,7 @@ use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use prost::Message; use protocol::grpc::api; -use protocol::grpc::api::{ConnectorOp, LookupJoinCondition, LookupJoinOperator}; +use protocol::grpc::api::{ConnectorOp, GenericConnectorConfig, LookupJoinCondition, LookupJoinOperator}; use crate::multifield_partial_ord; use crate::sql::common::constants::extension_node; @@ -156,13 +157,24 @@ impl StreamReferenceJoinNode { let lookup_fs_schema = FsSchema::from_schema_unkeyed(add_timestamp_field_arrow(dictionary_physical_schema))?; + let properties: HashMap = self + .external_dictionary + .catalog_with_options + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + Ok(LookupJoinOperator { input_schema: Some(internal_input_schema.into()), - lookup_schema: Some(lookup_fs_schema.into()), + lookup_schema: Some(lookup_fs_schema.clone().into()), connector: Some(ConnectorOp { connector: self.external_dictionary.adapter_type.clone(), - config: self.external_dictionary.opaque_config.clone(), + fs_schema: Some(lookup_fs_schema.into()), + name: self.external_dictionary.table_identifier.clone(), description: self.external_dictionary.description.clone(), + config: Some(protocol::grpc::api::connector_op::Config::Generic( + GenericConnectorConfig { properties }, + )), }), key_exprs: self.compile_join_conditions(planner)?, join_type: self.map_api_join_type()?, diff --git a/src/sql/schema/connector_config.rs b/src/sql/schema/connector_config.rs new file mode 100644 index 00000000..f47e05d9 --- /dev/null +++ b/src/sql/schema/connector_config.rs @@ -0,0 +1,82 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// Strongly-typed in-memory connector configuration for the SQL catalog layer. +// Maps 1:1 to the `ConnectorOp.oneof config` proto variants. + +use std::collections::HashMap; + +use protocol::grpc::api::{ + connector_op, GenericConnectorConfig, KafkaSinkConfig, KafkaSourceConfig, +}; + +/// Strongly-typed connector configuration stored in [`super::SourceTable`]. +/// +/// Each variant corresponds directly to a proto `ConnectorOp.oneof config` branch. +/// Adding a new connector (e.g. MySQL CDC) means adding a variant here and a proto message — +/// the Rust compiler will then guide you to every call-site that needs updating. +#[derive(Debug, Clone)] +pub enum ConnectorConfig { + KafkaSource(KafkaSourceConfig), + KafkaSink(KafkaSinkConfig), + /// Fallback for connectors not yet strongly typed (e.g. future Redis, JDBC). + Generic(HashMap), +} + +impl ConnectorConfig { + /// Convert to the proto `ConnectorOp.oneof config` representation — zero JSON involved. + pub fn to_proto_config(&self) -> connector_op::Config { + match self { + ConnectorConfig::KafkaSource(cfg) => { + connector_op::Config::KafkaSource(cfg.clone()) + } + ConnectorConfig::KafkaSink(cfg) => { + connector_op::Config::KafkaSink(cfg.clone()) + } + ConnectorConfig::Generic(props) => { + connector_op::Config::Generic(GenericConnectorConfig { + properties: props.clone(), + }) + } + } + } +} + +// Proto-generated types do not derive Eq/Hash/PartialEq since they contain f32/f64 +// in the general case. For our subset (Kafka configs) all fields are integers, strings, +// and maps — logically hashable. We impl the traits via serialized proto bytes so the +// SourceTable derive chain stays intact. + +impl PartialEq for ConnectorConfig { + fn eq(&self, other: &Self) -> bool { + use prost::Message; + match (self, other) { + (ConnectorConfig::KafkaSource(a), ConnectorConfig::KafkaSource(b)) => { + a.encode_to_vec() == b.encode_to_vec() + } + (ConnectorConfig::KafkaSink(a), ConnectorConfig::KafkaSink(b)) => { + a.encode_to_vec() == b.encode_to_vec() + } + (ConnectorConfig::Generic(a), ConnectorConfig::Generic(b)) => a == b, + _ => false, + } + } +} + +impl Eq for ConnectorConfig {} + +impl std::hash::Hash for ConnectorConfig { + fn hash(&self, state: &mut H) { + use prost::Message; + std::mem::discriminant(self).hash(state); + match self { + ConnectorConfig::KafkaSource(cfg) => cfg.encode_to_vec().hash(state), + ConnectorConfig::KafkaSink(cfg) => cfg.encode_to_vec().hash(state), + ConnectorConfig::Generic(m) => { + let mut pairs: Vec<_> = m.iter().collect(); + pairs.sort_by_key(|(k, _)| (*k).clone()); + pairs.hash(state); + } + } + } +} diff --git a/src/sql/schema/kafka_operator_config.rs b/src/sql/schema/kafka_operator_config.rs new file mode 100644 index 00000000..4dd70906 --- /dev/null +++ b/src/sql/schema/kafka_operator_config.rs @@ -0,0 +1,250 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// Builds strongly-typed proto Kafka configs from SQL DDL WITH options. + +use std::collections::HashMap; + +use datafusion::arrow::datatypes::Schema; +use datafusion::common::{Result as DFResult, plan_datafusion_err, plan_err}; + +use protocol::grpc::api::connector_op::Config as ProtoConfig; +use protocol::grpc::api::{ + BadDataPolicy, DecimalEncodingProto, FormatConfig, JsonFormatConfig, KafkaAuthConfig, + KafkaAuthNone, KafkaOffsetMode, KafkaReadMode, KafkaSinkCommitMode, KafkaSinkConfig, + KafkaSourceConfig, RawBytesFormatConfig, RawStringFormatConfig, TimestampFormatProto, +}; + +use crate::sql::common::constants::{connection_table_role, kafka_with_value}; +use crate::sql::common::connector_options::ConnectorOptions; +use crate::sql::common::formats::{ + BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, + TimestampFormat as SqlTimestampFormat, +}; +use crate::sql::common::with_option_keys as opt; +use crate::sql::schema::table_role::TableRole; + +fn sql_format_to_proto(fmt: &SqlFormat) -> DFResult { + match fmt { + SqlFormat::Json(j) => Ok(FormatConfig { + format: Some(protocol::grpc::api::format_config::Format::Json( + JsonFormatConfig { + timestamp_format: match j.timestamp_format { + SqlTimestampFormat::RFC3339 => TimestampFormatProto::TimestampRfc3339 as i32, + SqlTimestampFormat::UnixMillis => { + TimestampFormatProto::TimestampUnixMillis as i32 + } + }, + decimal_encoding: match j.decimal_encoding { + SqlDecimalEncoding::Number => DecimalEncodingProto::DecimalNumber as i32, + SqlDecimalEncoding::String => DecimalEncodingProto::DecimalString as i32, + SqlDecimalEncoding::Bytes => DecimalEncodingProto::DecimalBytes as i32, + }, + include_schema: j.include_schema, + confluent_schema_registry: j.confluent_schema_registry, + schema_id: j.schema_id, + debezium: j.debezium, + unstructured: j.unstructured, + }, + )), + }), + SqlFormat::RawString(_) => Ok(FormatConfig { + format: Some(protocol::grpc::api::format_config::Format::RawString( + RawStringFormatConfig {}, + )), + }), + SqlFormat::RawBytes(_) => Ok(FormatConfig { + format: Some(protocol::grpc::api::format_config::Format::RawBytes( + RawBytesFormatConfig {}, + )), + }), + other => plan_err!( + "Kafka connector: format '{}' is not supported yet", + other.name() + ), + } +} + +fn sql_bad_data_to_proto(bad: &BadData) -> i32 { + match bad { + BadData::Fail {} => BadDataPolicy::BadDataFail as i32, + BadData::Drop {} => BadDataPolicy::BadDataDrop as i32, + } +} + +/// Build Kafka proto config from a flat string map (catalog rebuild path). +pub fn build_kafka_proto_config_from_string_map( + map: HashMap, + _physical_schema: &Schema, +) -> DFResult { + let mut options = ConnectorOptions::from_flat_string_map(map)?; + let format = crate::sql::common::formats::Format::from_opts(&mut options) + .map_err(|e| datafusion::error::DataFusionError::Plan(format!("invalid format: {e}")))?; + let bad_data = BadData::from_opts(&mut options) + .map_err(|e| datafusion::error::DataFusionError::Plan(format!("Invalid bad_data: '{e}'")))?; + let _framing = crate::sql::common::formats::Framing::from_opts(&mut options) + .map_err(|e| datafusion::error::DataFusionError::Plan(format!("invalid framing: '{e}'")))?; + + let role = match options.pull_opt_str(opt::TYPE)?.as_deref() { + None | Some(connection_table_role::SOURCE) => TableRole::Ingestion, + Some(connection_table_role::SINK) => TableRole::Egress, + Some(connection_table_role::LOOKUP) => TableRole::Reference, + Some(other) => { + return plan_err!("invalid connection type '{other}' in WITH options"); + } + }; + + build_kafka_proto_config(&mut options, role, &format, bad_data) +} + +/// Core builder shared by SQL DDL and catalog reload paths. +pub fn build_kafka_proto_config( + options: &mut ConnectorOptions, + role: TableRole, + format: &Option, + bad_data: BadData, +) -> DFResult { + let bootstrap_servers = match options.pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS)? { + Some(s) => s, + None => options + .pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS_LEGACY)? + .ok_or_else(|| { + plan_datafusion_err!( + "Kafka connector requires 'bootstrap.servers' in the WITH clause" + ) + })?, + }; + + let topic = options + .pull_opt_str(opt::KAFKA_TOPIC)? + .ok_or_else(|| plan_datafusion_err!("Kafka connector requires 'topic' in the WITH clause"))?; + + let sql_format = format.clone().ok_or_else(|| { + plan_datafusion_err!( + "Kafka connector requires 'format' in the WITH clause (e.g. format = 'json')" + ) + })?; + let proto_format = sql_format_to_proto(&sql_format)?; + + let rate_limit = options + .pull_opt_u64(opt::KAFKA_RATE_LIMIT_MESSAGES_PER_SECOND)? + .map(|v| v.clamp(1, u32::MAX as u64) as u32) + .unwrap_or(0); + + let value_subject = options.pull_opt_str(opt::KAFKA_VALUE_SUBJECT)?; + + let auth = Some(KafkaAuthConfig { + auth: Some(protocol::grpc::api::kafka_auth_config::Auth::None( + KafkaAuthNone {}, + )), + }); + + let _ = options.pull_opt_str(opt::TYPE)?; + let _ = options.pull_opt_str(opt::CONNECTOR)?; + + match role { + TableRole::Ingestion => { + let offset_mode = match options.pull_opt_str(opt::KAFKA_SCAN_STARTUP_MODE)?.as_deref() { + Some(s) if s == kafka_with_value::SCAN_LATEST => { + KafkaOffsetMode::KafkaOffsetLatest as i32 + } + Some(s) if s == kafka_with_value::SCAN_EARLIEST => { + KafkaOffsetMode::KafkaOffsetEarliest as i32 + } + Some(s) + if s == kafka_with_value::SCAN_GROUP_OFFSETS + || s == kafka_with_value::SCAN_GROUP => + { + KafkaOffsetMode::KafkaOffsetGroup as i32 + } + None => KafkaOffsetMode::KafkaOffsetGroup as i32, + Some(other) => { + return plan_err!( + "invalid scan.startup.mode '{other}'; expected latest, earliest, or group-offsets" + ); + } + }; + + let read_mode = match options.pull_opt_str(opt::KAFKA_ISOLATION_LEVEL)?.as_deref() { + Some(s) if s == kafka_with_value::ISOLATION_READ_COMMITTED => { + KafkaReadMode::KafkaReadCommitted as i32 + } + Some(s) if s == kafka_with_value::ISOLATION_READ_UNCOMMITTED => { + KafkaReadMode::KafkaReadUncommitted as i32 + } + None => KafkaReadMode::KafkaReadDefault as i32, + Some(other) => { + return plan_err!("invalid isolation.level '{other}'"); + } + }; + + let group_id = match options.pull_opt_str(opt::KAFKA_GROUP_ID)? { + Some(s) => Some(s), + None => options.pull_opt_str(opt::KAFKA_GROUP_ID_LEGACY)?, + }; + let group_id_prefix = options.pull_opt_str(opt::KAFKA_GROUP_ID_PREFIX)?; + + let client_configs = options.drain_remaining_string_values()?; + + Ok(ProtoConfig::KafkaSource(KafkaSourceConfig { + topic, + bootstrap_servers, + group_id, + group_id_prefix, + offset_mode, + read_mode, + auth, + client_configs, + format: Some(proto_format), + bad_data_policy: sql_bad_data_to_proto(&bad_data), + rate_limit_msgs_per_sec: rate_limit, + value_subject, + })) + } + TableRole::Egress => { + let commit_mode = match options.pull_opt_str(opt::KAFKA_SINK_COMMIT_MODE)?.as_deref() { + Some(s) + if s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_HYPHEN + || s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_UNDERSCORE => + { + KafkaSinkCommitMode::KafkaSinkExactlyOnce as i32 + } + None => KafkaSinkCommitMode::KafkaSinkAtLeastOnce as i32, + Some(s) + if s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_HYPHEN + || s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_UNDERSCORE => + { + KafkaSinkCommitMode::KafkaSinkAtLeastOnce as i32 + } + Some(other) => { + return plan_err!("invalid sink.commit.mode '{other}'"); + } + }; + let key_field = match options.pull_opt_str(opt::KAFKA_SINK_KEY_FIELD)? { + Some(s) => Some(s), + None => options.pull_opt_str(opt::KAFKA_KEY_FIELD_LEGACY)?, + }; + let timestamp_field = match options.pull_opt_str(opt::KAFKA_SINK_TIMESTAMP_FIELD)? { + Some(s) => Some(s), + None => options.pull_opt_str(opt::KAFKA_TIMESTAMP_FIELD_LEGACY)?, + }; + + let client_configs = options.drain_remaining_string_values()?; + + Ok(ProtoConfig::KafkaSink(KafkaSinkConfig { + topic, + bootstrap_servers, + commit_mode, + key_field, + timestamp_field, + auth, + client_configs, + format: Some(proto_format), + value_subject, + })) + } + TableRole::Reference => { + plan_err!("Kafka connector cannot be used as a lookup table in this path") + } + } +} diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs index b3ec5e09..b052aa68 100644 --- a/src/sql/schema/mod.rs +++ b/src/sql/schema/mod.rs @@ -13,6 +13,8 @@ pub mod catalog_ddl; pub mod column_descriptor; pub mod connection_type; +pub mod connector_config; +pub mod kafka_operator_config; pub mod source_table; pub mod data_encoding_format; pub mod schema_context; @@ -29,6 +31,7 @@ pub use catalog_ddl::{ }; pub use column_descriptor::ColumnDescriptor; pub use connection_type::ConnectionType; +pub use connector_config::ConnectorConfig; pub use source_table::{SourceOperator, SourceTable}; /// Back-compat alias for [`SourceTable`]. diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs index d10e39dc..fe4411dd 100644 --- a/src/sql/schema/source_table.rs +++ b/src/sql/schema/source_table.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap}; use std::sync::Arc; use std::time::Duration; @@ -26,6 +26,7 @@ use protocol::grpc::api::ConnectorOp; use tracing::warn; use super::column_descriptor::ColumnDescriptor; +use super::connector_config::ConnectorConfig; use super::data_encoding_format::DataEncodingFormat; use super::schema_context::SchemaContext; use super::table_execution_unit::{EngineDescriptor, SyncMode, TableExecutionUnit}; @@ -36,19 +37,16 @@ use super::table_role::{ use super::temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineConfig, TemporalSpec}; use super::StreamSchemaProvider; use crate::multifield_partial_ord; -use crate::sql::api::{ConnectionProfile, ConnectionSchema, SourceField}; +use crate::sql::api::ConnectionProfile; use crate::sql::common::constants::{ - connection_table_role, connector_type, kafka_with_value, sql_field, + connection_table_role, connector_type, sql_field, }; use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::kafka_catalog::{ - KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode, - SinkCommitMode, TableType as KafkaTableType, -}; use crate::sql::common::with_option_keys as opt; use crate::sql::common::{ - BadData, Format, Framing, FsSchema, JsonCompression, JsonFormat, OperatorConfig, RateLimit, + BadData, Format, Framing, FsSchema, JsonCompression, JsonFormat, }; +use crate::sql::schema::kafka_operator_config::build_kafka_proto_config; use crate::sql::schema::ConnectionType; use crate::sql::schema::table::SqlSource; use crate::sql::types::ProcessingMode; @@ -61,8 +59,8 @@ pub struct SourceTable { pub table_identifier: String, pub role: TableRole, pub schema_specs: Vec, - /// Serialized runtime payload (e.g. JSON: connector + `connection_schema` + options). - pub opaque_config: String, + /// Strongly-typed connector runtime configuration — replaces the legacy `opaque_config: String`. + pub connector_config: ConnectorConfig, pub temporal_config: TemporalPipelineConfig, pub key_constraints: Vec, pub payload_format: Option, @@ -73,7 +71,7 @@ pub struct SourceTable { pub lookup_cache_max_bytes: Option, pub lookup_cache_ttl: Option, pub inferred_fields: Option>, - /// Original `WITH` options for catalog / `SHOW CREATE TABLE` (snapshot at DDL parse time). + /// Original `WITH` options for catalog persistence / `SHOW CREATE TABLE`. pub catalog_with_options: BTreeMap, } @@ -83,7 +81,6 @@ multifield_partial_ord!( adapter_type, table_identifier, role, - opaque_config, description, key_constraints, connection_format, @@ -107,7 +104,7 @@ impl SourceTable { table_identifier: table_identifier.into(), role: connection_type.into(), schema_specs: Vec::new(), - opaque_config: String::new(), + connector_config: ConnectorConfig::Generic(HashMap::new()), temporal_config: TemporalPipelineConfig::default(), key_constraints: Vec::new(), payload_format: None, @@ -181,7 +178,7 @@ impl SourceTable { refined_columns = encoding.apply_envelope(refined_columns)?; let temporal_settings = resolve_temporal_logic(&refined_columns, time_meta)?; - let finalized_config = serialize_backend_params(adapter, options)?; + let _finalized_config = serialize_backend_params(adapter, options)?; let role = deduce_role(options)?; if role == TableRole::Ingestion && encoding.supports_delta_updates() && pk_list.is_empty() { @@ -194,7 +191,7 @@ impl SourceTable { table_identifier: id.to_string(), role, schema_specs: refined_columns, - opaque_config: finalized_config, + connector_config: ConnectorConfig::Generic(catalog_with_options.clone().into_iter().collect()), temporal_config: temporal_settings, key_constraints: pk_list, payload_format: Some(encoding), @@ -242,7 +239,7 @@ impl SourceTable { label: self.table_identifier.clone(), engine_meta: EngineDescriptor { engine_type: self.adapter_type.clone(), - raw_payload: self.opaque_config.clone(), + raw_payload: String::new(), }, sync_mode: mode, temporal_offset: self.temporal_config.clone(), @@ -288,7 +285,6 @@ impl SourceTable { validate_adapter_availability(connector_name)?; - let inferred_empty = fields.is_empty(); let mut columns = fields; columns = apply_adapter_specific_rules(connector_name, columns); @@ -302,7 +298,7 @@ impl SourceTable { return plan_err!("'json.compression' is only supported for the filesystem connector"); } - let framing = Framing::from_opts(options) + let _framing = Framing::from_opts(options) .map_err(|e| DataFusionError::Plan(format!("invalid framing: '{e}'")))?; if temporary @@ -318,38 +314,9 @@ impl SourceTable { let encoding = payload_format.unwrap_or(DataEncodingFormat::Raw); columns = encoding.apply_envelope(columns)?; - let schema_fields: Vec = columns - .iter() - .filter(|c| !c.is_computed()) - .map(|c| { - let mut sf: SourceField = c.arrow_field().clone().try_into().map_err(|_| { - DataFusionError::Plan(format!( - "field '{}' has a type '{:?}' that cannot be used in a connection table", - c.arrow_field().name(), - c.arrow_field().data_type() - )) - })?; - if let Some(key) = c.system_meta_key() { - sf.metadata_key = Some(key.to_string()); - } - Ok(sf) - }) - .collect::>()?; - let bad_data = BadData::from_opts(options) .map_err(|e| DataFusionError::Plan(format!("Invalid bad_data: '{e}'")))?; - let connection_schema = ConnectionSchema::try_new( - format.clone(), - Some(bad_data.clone()), - framing.clone(), - schema_fields, - None, - Some(inferred_empty), - primary_keys.iter().cloned().collect::>(), - ) - .map_err(|e| DataFusionError::Plan(format!("could not create connection schema: {e}")))?; - let role = if let Some(t) = connection_type_override { t.into() } else { @@ -369,7 +336,7 @@ impl SourceTable { table_identifier: table_identifier.to_string(), role, schema_specs: columns, - opaque_config: String::new(), + connector_config: ConnectorConfig::Generic(HashMap::new()), temporal_config: TemporalPipelineConfig::default(), key_constraints: Vec::new(), payload_format, @@ -466,36 +433,21 @@ impl SourceTable { table.lookup_cache_ttl = options.pull_opt_duration(opt::LOOKUP_CACHE_TTL)?; if connector_name.eq_ignore_ascii_case(connector_type::KAFKA) { - let physical = table.produce_physical_schema(); - let op_cfg = wire_kafka_operator_config( - options, - role, - &physical, - &format, - bad_data, - framing, - )?; - table.opaque_config = serde_json::to_string(&op_cfg).map_err(|e| { - DataFusionError::Plan(format!("failed to serialize Kafka OperatorConfig: {e}")) - })?; + let proto_cfg = build_kafka_proto_config(options, role, &format, bad_data)?; + table.connector_config = match proto_cfg { + protocol::grpc::api::connector_op::Config::KafkaSource(cfg) => { + ConnectorConfig::KafkaSource(cfg) + } + protocol::grpc::api::connector_op::Config::KafkaSink(cfg) => { + ConnectorConfig::KafkaSink(cfg) + } + protocol::grpc::api::connector_op::Config::Generic(g) => { + ConnectorConfig::Generic(g.properties) + } + }; } else { let extra_opts = options.drain_remaining_string_values()?; - let mut map = serde_json::Map::new(); - map.insert( - opt::CONNECTOR.to_string(), - serde_json::Value::String(connector_name.to_string()), - ); - let schema_val = serde_json::to_value(&connection_schema).map_err(|e| { - DataFusionError::Plan(format!("failed to serialize connection schema: {e}")) - })?; - map.insert(opt::CONNECTION_SCHEMA.to_string(), schema_val); - for (k, v) in extra_opts { - map.insert(k, serde_json::Value::String(v)); - } - let config_root = serde_json::Value::Object(map); - table.opaque_config = serde_json::to_string(&config_root).map_err(|e| { - DataFusionError::Plan(format!("failed to serialize connector config: {e}")) - })?; + table.connector_config = ConnectorConfig::Generic(extra_opts); } if role == TableRole::Ingestion && encoding.supports_delta_updates() && primary_keys.is_empty() @@ -519,11 +471,25 @@ impl SourceTable { || self.payload_format == Some(DataEncodingFormat::DebeziumJson) } + /// Build strongly-typed `ConnectorOp` protobuf for runtime operator construction. + /// + /// Directly maps the in-memory [`ConnectorConfig`] to the proto `oneof config` — zero JSON, + /// zero re-parsing. pub fn connector_op(&self) -> ConnectorOp { + let physical = self.produce_physical_schema(); + let fields: Vec = physical + .fields() + .iter() + .map(|f| f.as_ref().clone()) + .collect(); + let fs_schema = FsSchema::from_fields(fields); + ConnectorOp { connector: self.adapter_type.clone(), - config: self.opaque_config.clone(), + fs_schema: Some(fs_schema.into()), + name: self.table_identifier.clone(), description: self.description.clone(), + config: Some(self.connector_config.to_proto_config()), } } @@ -607,168 +573,6 @@ impl SourceTable { } } -/// Kafka: runtime [`KafkaSourceDispatcher`] / [`KafkaSinkDispatcher`] expect [`OperatorConfig`] JSON, -/// not the legacy `{ connector, connection_schema, ... }` blob used by other adapters. -fn wire_kafka_operator_config( - options: &mut ConnectorOptions, - role: TableRole, - physical_schema: &Schema, - format: &Option, - bad_data: BadData, - framing: Option, -) -> Result { - let bootstrap_servers = match options.pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS)? { - Some(s) => s, - None => options - .pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS_LEGACY)? - .ok_or_else(|| { - plan_datafusion_err!( - "Kafka connector requires 'bootstrap.servers' in the WITH clause" - ) - })?, - }; - - let topic = options - .pull_opt_str(opt::KAFKA_TOPIC)? - .ok_or_else(|| plan_datafusion_err!("Kafka connector requires 'topic' in the WITH clause"))?; - - let sql_format = format.clone().ok_or_else(|| { - plan_datafusion_err!( - "Kafka connector requires 'format' in the WITH clause (e.g. format = 'json')" - ) - })?; - - let rate_limit = options - .pull_opt_u64(opt::KAFKA_RATE_LIMIT_MESSAGES_PER_SECOND)? - .map(|v| RateLimit { - messages_per_second: v.clamp(1, u32::MAX as u64) as u32, - }); - - let value_subject = options.pull_opt_str(opt::KAFKA_VALUE_SUBJECT)?; - - let kind = match role { - TableRole::Ingestion => { - let offset = match options.pull_opt_str(opt::KAFKA_SCAN_STARTUP_MODE)?.as_deref() { - Some(s) if s == kafka_with_value::SCAN_LATEST => KafkaTableSourceOffset::Latest, - Some(s) if s == kafka_with_value::SCAN_EARLIEST => KafkaTableSourceOffset::Earliest, - Some(s) - if s == kafka_with_value::SCAN_GROUP_OFFSETS - || s == kafka_with_value::SCAN_GROUP => - { - KafkaTableSourceOffset::Group - } - None => KafkaTableSourceOffset::Group, - Some(other) => { - return plan_err!( - "invalid scan.startup.mode '{other}'; expected latest, earliest, or group-offsets" - ); - } - }; - let read_mode = match options.pull_opt_str(opt::KAFKA_ISOLATION_LEVEL)?.as_deref() { - Some(s) if s == kafka_with_value::ISOLATION_READ_COMMITTED => { - Some(ReadMode::ReadCommitted) - } - Some(s) if s == kafka_with_value::ISOLATION_READ_UNCOMMITTED => { - Some(ReadMode::ReadUncommitted) - } - None => None, - Some(other) => { - return plan_err!("invalid isolation.level '{other}'"); - } - }; - let group_id = match options.pull_opt_str(opt::KAFKA_GROUP_ID)? { - Some(s) => Some(s), - None => options.pull_opt_str(opt::KAFKA_GROUP_ID_LEGACY)?, - }; - let group_id_prefix = options.pull_opt_str(opt::KAFKA_GROUP_ID_PREFIX)?; - KafkaTableType::Source { - offset, - read_mode, - group_id, - group_id_prefix, - } - } - TableRole::Egress => { - let commit_mode = match options.pull_opt_str(opt::KAFKA_SINK_COMMIT_MODE)?.as_deref() { - Some(s) - if s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_HYPHEN - || s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_UNDERSCORE => - { - SinkCommitMode::ExactlyOnce - } - None => SinkCommitMode::AtLeastOnce, - Some(s) - if s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_HYPHEN - || s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_UNDERSCORE => - { - SinkCommitMode::AtLeastOnce - } - Some(other) => { - return plan_err!("invalid sink.commit.mode '{other}'"); - } - }; - let key_field = match options.pull_opt_str(opt::KAFKA_SINK_KEY_FIELD)? { - Some(s) => Some(s), - None => options.pull_opt_str(opt::KAFKA_KEY_FIELD_LEGACY)?, - }; - let timestamp_field = match options.pull_opt_str(opt::KAFKA_SINK_TIMESTAMP_FIELD)? { - Some(s) => Some(s), - None => options.pull_opt_str(opt::KAFKA_TIMESTAMP_FIELD_LEGACY)?, - }; - KafkaTableType::Sink { - commit_mode, - key_field, - timestamp_field, - } - } - TableRole::Reference => { - return plan_err!("Kafka connector cannot be used as a lookup table in this path"); - } - }; - - // Role already decided; keep these out of librdkafka `connection_properties`. - let _ = options.pull_opt_str(opt::TYPE)?; - let _ = options.pull_opt_str(opt::CONNECTOR)?; - - let connection_properties = options.drain_remaining_string_values()?; - - let kafka_connection = KafkaConfig { - bootstrap_servers, - authentication: KafkaConfigAuthentication::None, - schema_registry_enum: None, - connection_properties, - }; - - let kafka_table = KafkaTable { - topic, - kind, - client_configs: HashMap::new(), - value_subject, - }; - - let fields: Vec = physical_schema - .fields() - .iter() - .map(|f| f.as_ref().clone()) - .collect(); - let input_schema = FsSchema::from_fields(fields); - - Ok(OperatorConfig { - connection: serde_json::to_value(&kafka_connection).map_err(|e| { - DataFusionError::Plan(format!("Kafka connection serialization failed: {e}")) - })?, - table: serde_json::to_value(&kafka_table).map_err(|e| { - DataFusionError::Plan(format!("Kafka table serialization failed: {e}")) - })?, - format: Some(sql_format), - bad_data: Some(bad_data), - framing, - rate_limit, - metadata_fields: vec![], - input_schema: Some(input_schema), - }) -} - /// Plan a SQL scalar expression against a table-qualified schema (e.g. watermark `AS` clause). fn plan_generating_expr( ast: &ast::Expr, diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs index fc6a16f8..086b206b 100644 --- a/src/storage/stream_catalog/manager.rs +++ b/src/storage/stream_catalog/manager.rs @@ -225,26 +225,20 @@ impl CatalogManager { let mut opts = source.catalog_with_options().clone(); opts.entry("connector".to_string()) .or_insert_with(|| source.connector().to_string()); + let catalog_row = pb::CatalogSourceTable { + arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new( + source.produce_physical_schema(), + ))?, + event_time_field: source.event_time_field().map(str::to_string), + watermark_field: source.stream_catalog_watermark_field(), + with_options: opts.into_iter().collect(), + connector: source.connector().to_string(), + description: source.description.clone(), + }; if matches!(table, CatalogTable::LookupTable(_)) { - table_definition::TableType::LookupTable(pb::CatalogSourceTable { - arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new( - source.produce_physical_schema(), - ))?, - event_time_field: source.event_time_field().map(str::to_string), - watermark_field: source.stream_catalog_watermark_field(), - with_options: opts.into_iter().collect(), - connector: source.connector().to_string(), - }) + table_definition::TableType::LookupTable(catalog_row) } else { - table_definition::TableType::ConnectorTable(pb::CatalogSourceTable { - arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new( - source.produce_physical_schema(), - ))?, - event_time_field: source.event_time_field().map(str::to_string), - watermark_field: source.stream_catalog_watermark_field(), - with_options: opts.into_iter().collect(), - connector: source.connector().to_string(), - }) + table_definition::TableType::ConnectorTable(catalog_row) } } CatalogTable::TableFromQuery { name, .. } => return plan_err!( @@ -275,7 +269,15 @@ impl CatalogManager { } else { source_row.connector.clone() }; - let mut source = SourceTable::new(table_name, connector, ConnectionType::Source); + let mut source = SourceTable::new( + table_name, + connector, + if as_lookup { + ConnectionType::Lookup + } else { + ConnectionType::Source + }, + ); let schema = CatalogCodec::decode_schema(&source_row.arrow_schema_ipc)?; source.schema_specs = schema .fields() @@ -288,6 +290,34 @@ impl CatalogManager { .watermark_field .filter(|w| w != sql_field::COMPUTED_WATERMARK); source.catalog_with_options = source_row.with_options.into_iter().collect(); + source.description = source_row.description; + + // Rebuild strongly-typed ConnectorConfig from persisted WITH options. + if source.connector().eq_ignore_ascii_case("kafka") { + use crate::sql::schema::kafka_operator_config::build_kafka_proto_config_from_string_map; + use crate::sql::schema::ConnectorConfig; + let opts_map: std::collections::HashMap = + source.catalog_with_options.iter().map(|(k, v)| (k.clone(), v.clone())).collect(); + let physical = source.produce_physical_schema(); + if let Ok(proto_cfg) = build_kafka_proto_config_from_string_map(opts_map, &physical) { + source.connector_config = match proto_cfg { + protocol::grpc::api::connector_op::Config::KafkaSource(cfg) => { + ConnectorConfig::KafkaSource(cfg) + } + protocol::grpc::api::connector_op::Config::KafkaSink(cfg) => { + ConnectorConfig::KafkaSink(cfg) + } + protocol::grpc::api::connector_op::Config::Generic(g) => { + ConnectorConfig::Generic(g.properties) + } + }; + } + } else { + use crate::sql::schema::ConnectorConfig; + source.connector_config = ConnectorConfig::Generic( + source.catalog_with_options.iter().map(|(k, v)| (k.clone(), v.clone())).collect(), + ); + } if as_lookup { Ok(CatalogTable::LookupTable(source)) From 249bd5dce4d9c2553b906595f40e811e7808d8e3 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 00:21:02 +0800 Subject: [PATCH 36/44] update --- src/coordinator/mod.rs | 1 - src/runtime/streaming/api/mod.rs | 3 - src/runtime/streaming/execution/mod.rs | 2 - .../streaming/execution/tracker/mod.rs | 2 - src/runtime/streaming/factory/mod.rs | 1 - src/runtime/streaming/format/mod.rs | 1 - src/runtime/streaming/job/mod.rs | 1 - src/runtime/streaming/mod.rs | 13 +- src/runtime/streaming/network/mod.rs | 2 - .../grouping/incremental_aggregate.rs | 122 +----------------- .../streaming/operators/grouping/mod.rs | 2 +- src/runtime/streaming/operators/joins/mod.rs | 4 +- src/runtime/streaming/operators/mod.rs | 11 +- src/runtime/streaming/operators/projection.rs | 4 +- src/runtime/streaming/operators/sink/mod.rs | 1 - src/runtime/streaming/operators/source/mod.rs | 1 - .../streaming/operators/watermark/mod.rs | 2 +- .../streaming/operators/windows/mod.rs | 8 +- src/runtime/streaming/protocol/mod.rs | 6 - src/server/mod.rs | 2 +- src/sql/analysis/join_rewriter.rs | 2 +- src/sql/analysis/mod.rs | 24 +--- src/sql/analysis/sink_input_rewriter.rs | 2 +- src/sql/analysis/source_rewriter.rs | 1 - src/sql/analysis/window_function_rewriter.rs | 2 +- src/sql/api/mod.rs | 9 +- src/sql/common/mod.rs | 22 +--- src/sql/logical_node/logical/mod.rs | 2 - src/sql/logical_node/logical/operator_name.rs | 2 +- src/sql/mod.rs | 2 - src/sql/physical/mod.rs | 2 +- src/sql/schema/mod.rs | 14 +- src/sql/types/mod.rs | 4 +- src/storage/stream_catalog/mod.rs | 2 +- 34 files changed, 35 insertions(+), 244 deletions(-) diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs index 23cd2925..ca384a90 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/mod.rs @@ -22,7 +22,6 @@ mod statement; mod tool; pub use coordinator::Coordinator; -pub use runtime_context::CoordinatorRuntimeContext; pub use dataset::{DataSet, ShowFunctionsResult}; pub use statement::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs index a525c883..e78ba371 100644 --- a/src/runtime/streaming/api/mod.rs +++ b/src/runtime/streaming/api/mod.rs @@ -15,6 +15,3 @@ pub mod context; pub mod operator; pub mod source; -pub use context::TaskContext; -pub use operator::{ConstructedOperator, Operator}; -pub use source::{SourceEvent, SourceOffset, SourceOperator}; diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs index 40beabe4..1a8401ef 100644 --- a/src/runtime/streaming/execution/mod.rs +++ b/src/runtime/streaming/execution/mod.rs @@ -15,5 +15,3 @@ pub mod runner; pub mod source; pub mod tracker; -pub use runner::{OperatorDrive, SubtaskRunner}; -pub use source::{SourceRunner, SOURCE_IDLE_SLEEP, WATERMARK_EMIT_INTERVAL}; diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/runtime/streaming/execution/tracker/mod.rs index b00ee0a2..3206f352 100644 --- a/src/runtime/streaming/execution/tracker/mod.rs +++ b/src/runtime/streaming/execution/tracker/mod.rs @@ -14,5 +14,3 @@ pub mod barrier_aligner; pub mod watermark_tracker; -pub use barrier_aligner::{AlignmentStatus, BarrierAligner}; -pub use watermark_tracker::WatermarkTracker; diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs index 442c9bf9..f02ec955 100644 --- a/src/runtime/streaming/factory/mod.rs +++ b/src/runtime/streaming/factory/mod.rs @@ -26,7 +26,6 @@ pub use connector::{ ConnectorSinkDispatcher, ConnectorSourceDispatcher, KafkaSinkDispatcher, KafkaSourceDispatcher, }; pub use global::Registry; -pub use operator_constructor::OperatorConstructor; pub use operator_factory::OperatorFactory; fn register_builtin_connectors(factory: &mut OperatorFactory) { diff --git a/src/runtime/streaming/format/mod.rs b/src/runtime/streaming/format/mod.rs index b27935ba..d5e63a9d 100644 --- a/src/runtime/streaming/format/mod.rs +++ b/src/runtime/streaming/format/mod.rs @@ -17,5 +17,4 @@ pub mod serializer; pub use config::{BadDataPolicy, DecimalEncoding, Format, JsonFormat, TimestampFormat}; pub use deserializer::DataDeserializer; -pub use json_encoder::CustomEncoderFactory; pub use serializer::DataSerializer; diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs index 72f98d69..5b2bbd8c 100644 --- a/src/runtime/streaming/job/mod.rs +++ b/src/runtime/streaming/job/mod.rs @@ -15,4 +15,3 @@ pub mod job_manager; pub mod models; pub use job_manager::JobManager; -pub use models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus}; diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs index 6fd45abd..7e0ba57a 100644 --- a/src/runtime/streaming/mod.rs +++ b/src/runtime/streaming/mod.rs @@ -24,15 +24,4 @@ pub mod network; pub mod operators; pub mod protocol; -pub use api::{ - ConstructedOperator, Operator, SourceEvent, SourceOffset, SourceOperator, TaskContext, -}; -pub use error::RunError; -pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner}; -pub use factory::{OperatorConstructor, OperatorFactory}; -pub use memory::{MemoryPool, MemoryTicket}; -pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub}; -pub use protocol::{ - CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput, - control_channel, merge_watermarks, watermark_strictly_advances, -}; +pub use protocol::StreamOutput; diff --git a/src/runtime/streaming/network/mod.rs b/src/runtime/streaming/network/mod.rs index 4b120781..16100133 100644 --- a/src/runtime/streaming/network/mod.rs +++ b/src/runtime/streaming/network/mod.rs @@ -13,5 +13,3 @@ pub mod endpoint; pub mod environment; -pub use endpoint::{BoxedEventStream, PhysicalSender, RemoteSenderStub}; -pub use environment::NetworkEnvironment; diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs index 16d92fd1..f895c173 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{anyhow, bail, Result}; +use anyhow::{bail, Result}; use arrow::compute::max_array; use arrow::row::{RowConverter, SortField}; use arrow_array::builder::{ @@ -19,7 +19,7 @@ use arrow_array::builder::{ use arrow_array::cast::AsArray; use arrow_array::types::UInt64Type; use arrow_array::{ - Array, ArrayRef, BinaryArray, BooleanArray, RecordBatch, StructArray, UInt32Array, UInt64Array, + Array, ArrayRef, BooleanArray, RecordBatch, StructArray, }; use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit}; use datafusion::common::{Result as DFResult, ScalarValue}; @@ -31,7 +31,6 @@ use datafusion_proto::physical_plan::from_proto::parse_physical_expr; use datafusion_proto::protobuf::PhysicalExprNode; use datafusion_proto::protobuf::PhysicalPlanNode; use datafusion_proto::protobuf::physical_plan_node::PhysicalPlanType; -use futures::StreamExt; use itertools::Itertools; use prost::Message; use std::collections::HashSet; @@ -482,95 +481,6 @@ impl IncrementalAggregatingFunc { } async fn initialize(&mut self, _ctx: &mut TaskContext) -> Result<()> { - // let table = tm - // .get_uncached_key_value_view("a") - // .await - // .map_err(|e| anyhow!("state table a: {e}"))?; - // let mut stream = Box::pin(table.get_all()); - // let key_converter = RowConverter::new(self.sliding_state_schema.sort_fields(false))?; - // - // while let Some(batch) = stream.next().await { - // let batch = batch?; - // if batch.num_rows() == 0 { continue; } - // - // let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect(); - // let aggregate_states = self.aggregates.iter().map(|agg| { - // agg.state_cols.iter().map(|idx| batch.column(*idx).clone()).collect_vec() - // }).collect_vec(); - // let generations = batch.columns().last().unwrap().as_primitive::(); - // let now = Instant::now(); - // - // if key_cols.is_empty() { - // self.restore_sliding( - // GLOBAL_KEY.as_ref().as_slice(), - // now, - // 0, - // &aggregate_states, - // generations.value(0), - // )?; - // } else { - // let key_rows = key_converter.convert_columns(&key_cols)?; - // for (i, row) in key_rows.iter().enumerate() { - // if generations.is_null(i) { - // bail!("generation is null at row {i}"); - // } - // let generation = generations.value(i); - // self.restore_sliding( - // row.as_ref(), - // now, - // i, - // &aggregate_states, - // generation, - // )?; - // } - // } - // } - // drop(stream); - - // - // if self.aggregates.iter().any(|agg| agg.accumulator_type == AccumulatorType::Batch) { - // let table = tm - // .get_uncached_key_value_view("b") - // .await - // .map_err(|e| anyhow!("state table b: {e}"))?; - // let mut stream = Box::pin(table.get_all()); - // while let Some(batch) = stream.next().await { - // let batch = batch?; - // if batch.num_rows() == 0 { continue; } - // - // let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect(); - // let count_column = batch.column(self.batch_state_schema.schema.index_of("count").unwrap()).as_any().downcast_ref::().unwrap(); - // let accumulator_column = batch.column(self.batch_state_schema.schema.index_of("accumulator").unwrap()).as_any().downcast_ref::().unwrap(); - // let args_row_column = batch.column(self.batch_state_schema.schema.index_of("args_row").unwrap()).as_any().downcast_ref::().unwrap(); - // let generations = batch.columns().last().unwrap().as_primitive::(); - // - // let key_rows = if key_cols.is_empty() { - // vec![GLOBAL_KEY.as_ref().clone()] - // } else { - // self.key_converter - // .convert_columns(&key_cols)? - // .iter() - // .map(|k| k.as_ref().to_vec()) - // .collect() - // }; - // - // for (i, row) in key_rows.iter().enumerate() { - // let Some(accumulators) = self.accumulators.get_mut(row.as_ref()) else { continue; }; - // let count = count_column.value(i); - // let accumulator_idx = accumulator_column.value(i) as usize; - // let args_row = args_row_column.value(i); - // let generation = generations.value(i); - // - // let IncrementalState::Batch { data, .. } = &mut accumulators[accumulator_idx] else { bail!("expected batch accumulator"); }; - // - // if let Some(existing) = data.get_mut(args_row) { - // if existing.generation < generation { existing.count = count; existing.generation = generation; } - // } else { - // data.insert(Key(Arc::new(args_row.to_vec())), BatchData { count, generation }); - // } - // } - // } - // } let mut deleted_keys = vec![]; for (k, v) in self.accumulators.iter_mut() { @@ -697,34 +607,8 @@ impl Operator for IncrementalAggregatingFunc { async fn snapshot_state( &mut self, _barrier: CheckpointBarrier, - ctx: &mut TaskContext, + _ctx: &mut TaskContext, ) -> Result<()> { - // let mut tm = ctx.table_manager_guard().await?; - // - // if let Some(sliding) = self.checkpoint_sliding()? { - // let table = tm - // .get_uncached_key_value_view("a") - // .await - // .map_err(|e| anyhow!("state table a: {e}"))?; - // table - // .insert_batch(sliding) - // .await - // .map_err(|e| anyhow!("insert_batch a: {e}"))?; - // } - // - // if let Some(batch) = self.checkpoint_batch()? { - // let table = tm - // .get_uncached_key_value_view("b") - // .await - // .map_err(|e| anyhow!("state table b: {e}"))?; - // table - // .insert_batch(batch) - // .await - // .map_err(|e| anyhow!("insert_batch b: {e}"))?; - // } - // - // - // self.updated_keys.clear(); Ok(()) } diff --git a/src/runtime/streaming/operators/grouping/mod.rs b/src/runtime/streaming/operators/grouping/mod.rs index ef672351..2a17a49d 100644 --- a/src/runtime/streaming/operators/grouping/mod.rs +++ b/src/runtime/streaming/operators/grouping/mod.rs @@ -13,5 +13,5 @@ pub mod incremental_aggregate; pub mod updating_cache; -pub use incremental_aggregate::{IncrementalAggregatingConstructor, IncrementalAggregatingFunc}; +pub use incremental_aggregate::IncrementalAggregatingConstructor; pub use updating_cache::{Key, UpdatingCache}; diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/runtime/streaming/operators/joins/mod.rs index bc81f328..1cc83d36 100644 --- a/src/runtime/streaming/operators/joins/mod.rs +++ b/src/runtime/streaming/operators/joins/mod.rs @@ -13,5 +13,5 @@ pub mod join_instance; pub mod join_with_expiration; -pub use join_instance::{InstantJoinConstructor, InstantJoinOperator}; -pub use join_with_expiration::{JoinWithExpirationConstructor, JoinWithExpirationOperator}; +pub use join_instance::InstantJoinConstructor; +pub use join_with_expiration::JoinWithExpirationConstructor; diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs index 18a98830..ffe1c101 100644 --- a/src/runtime/streaming/operators/mod.rs +++ b/src/runtime/streaming/operators/mod.rs @@ -27,13 +27,4 @@ pub use stateless_physical_executor::StatelessPhysicalExecutor; pub use projection::ProjectionOperator; pub use value_execution::ValueExecutionOperator; -pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache}; -pub use joins::{InstantJoinOperator, JoinWithExpirationOperator}; -pub use key_by::KeyByOperator; -pub use sink::{ConsistencyMode, KafkaSinkOperator}; -pub use source::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState}; -pub use watermark::{WatermarkGeneratorOperator, WatermarkGeneratorState}; -pub use windows::{ - SessionWindowOperator, SlidingWindowOperator, TumblingWindowOperator, - WindowFunctionOperator, -}; +pub use grouping::{Key, UpdatingCache}; diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs index d2f54b8c..0136e18e 100644 --- a/src/runtime/streaming/operators/projection.rs +++ b/src/runtime/streaming/operators/projection.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, Result}; use arrow_array::RecordBatch; use async_trait::async_trait; use datafusion::physical_expr::PhysicalExpr; @@ -23,7 +23,7 @@ use std::sync::Arc; use protocol::grpc::api::ProjectionOperator as ProjectionOperatorProto; use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator}; +use crate::runtime::streaming::api::operator::Operator; use crate::runtime::streaming::factory::global::Registry; use crate::runtime::streaming::StreamOutput; use crate::sql::common::{CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/runtime/streaming/operators/sink/mod.rs index 8abd2985..aa340614 100644 --- a/src/runtime/streaming/operators/sink/mod.rs +++ b/src/runtime/streaming/operators/sink/mod.rs @@ -13,4 +13,3 @@ pub mod kafka; -pub use kafka::{ConsistencyMode, KafkaSinkOperator}; diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs index 76f3639a..aa340614 100644 --- a/src/runtime/streaming/operators/source/mod.rs +++ b/src/runtime/streaming/operators/source/mod.rs @@ -13,4 +13,3 @@ pub mod kafka; -pub use kafka::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState}; diff --git a/src/runtime/streaming/operators/watermark/mod.rs b/src/runtime/streaming/operators/watermark/mod.rs index 4486a0fd..3a0a1099 100644 --- a/src/runtime/streaming/operators/watermark/mod.rs +++ b/src/runtime/streaming/operators/watermark/mod.rs @@ -12,4 +12,4 @@ pub mod watermark_generator; -pub use watermark_generator::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState}; +pub use watermark_generator::WatermarkGeneratorConstructor; diff --git a/src/runtime/streaming/operators/windows/mod.rs b/src/runtime/streaming/operators/windows/mod.rs index 02c9eccb..f1915f0d 100644 --- a/src/runtime/streaming/operators/windows/mod.rs +++ b/src/runtime/streaming/operators/windows/mod.rs @@ -15,7 +15,7 @@ pub mod sliding_aggregating_window; pub mod tumbling_aggregating_window; pub mod window_function; -pub use session_aggregating_window::{SessionAggregatingWindowConstructor, SessionWindowOperator}; -pub use sliding_aggregating_window::{SlidingAggregatingWindowConstructor, SlidingWindowOperator}; -pub use tumbling_aggregating_window::{TumblingAggregateWindowConstructor, TumblingWindowOperator}; -pub use window_function::{WindowFunctionConstructor, WindowFunctionOperator}; +pub use session_aggregating_window::SessionAggregatingWindowConstructor; +pub use sliding_aggregating_window::SlidingAggregatingWindowConstructor; +pub use tumbling_aggregating_window::TumblingAggregateWindowConstructor; +pub use window_function::WindowFunctionConstructor; diff --git a/src/runtime/streaming/protocol/mod.rs b/src/runtime/streaming/protocol/mod.rs index 63f7f0bf..fb20c59e 100644 --- a/src/runtime/streaming/protocol/mod.rs +++ b/src/runtime/streaming/protocol/mod.rs @@ -17,10 +17,4 @@ pub mod stream_out; pub mod tracked; pub mod watermark; -pub use control::{ - control_channel, CheckpointBarrierWire, ControlCommand, StopMode, -}; -pub use event::StreamEvent; pub use stream_out::StreamOutput; -pub use tracked::TrackedEvent; -pub use watermark::{merge_watermarks, watermark_strictly_advances}; diff --git a/src/server/mod.rs b/src/server/mod.rs index 7795f29b..cb7a4a85 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -17,5 +17,5 @@ mod initializer; mod service; pub use handler::FunctionStreamServiceImpl; -pub use initializer::{bootstrap_system, build_core_registry}; +pub use initializer::bootstrap_system; pub use service::start_server_with_shutdown; diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs index 4421aa99..058a5bd8 100644 --- a/src/sql/analysis/join_rewriter.rs +++ b/src/sql/analysis/join_rewriter.rs @@ -19,7 +19,7 @@ use crate::sql::common::constants::mem_exec_join_side; use crate::sql::common::TIMESTAMP_FIELD; use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{ - Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference, + JoinConstraint, JoinType, Result, ScalarValue, TableReference, not_impl_err, plan_err, }; use datafusion::logical_expr::{ diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs index 653cb601..cd26a4e6 100644 --- a/src/sql/analysis/mod.rs +++ b/src/sql/analysis/mod.rs @@ -26,41 +26,27 @@ pub mod source_rewriter; pub mod time_window; pub mod unnest_rewriter; -pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter}; +pub use async_udf_rewriter::AsyncOptions; pub use sink_input_rewriter::SinkInputRewriter; -pub use source_metadata_visitor::SourceMetadataVisitor; -pub use source_rewriter::SourceRewriter; -pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker, is_time_window}; -pub use unnest_rewriter::{UNNESTED_COL, UnnestRewriter}; +pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker}; +pub use unnest_rewriter::UNNESTED_COL; -pub use crate::sql::schema::schema_provider::{ - LogicalBatchInput, StreamSchemaProvider, StreamTable, -}; +pub use crate::sql::schema::schema_provider::StreamSchemaProvider; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::sync::Arc; use datafusion::common::tree_node::{Transformed, TreeNode}; use datafusion::common::{Result, plan_err}; use datafusion::error::DataFusionError; -use datafusion::execution::SessionStateBuilder; use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::prelude::SessionConfig; -use datafusion::sql::TableReference; -use datafusion::sql::sqlparser::ast::{OneOrManyWithParens, Statement}; -use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; -use datafusion::sql::sqlparser::parser::Parser; use tracing::{debug, info, instrument}; -use crate::sql::logical_planner::optimizers::ChainingOptimizer; -use crate::sql::schema::table::Table as CatalogTable; -use crate::sql::functions::{is_json_union, serialize_outgoing_json}; use crate::sql::extensions::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; use crate::sql::extensions::projection::StreamProjectionNode; use crate::sql::extensions::sink::StreamEgressNode; use crate::sql::extensions::StreamingOperatorBlueprint; use crate::sql::logical_planner::planner::NamedNode; -use crate::sql::types::SqlConfig; fn duration_from_sql_expr( expr: &datafusion::sql::sqlparser::ast::Expr, diff --git a/src/sql/analysis/sink_input_rewriter.rs b/src/sql/analysis/sink_input_rewriter.rs index 6b8b2de1..ad36046f 100644 --- a/src/sql/analysis/sink_input_rewriter.rs +++ b/src/sql/analysis/sink_input_rewriter.rs @@ -28,7 +28,7 @@ pub struct SinkInputRewriter<'a> { } impl<'a> SinkInputRewriter<'a> { - pub fn new(sink_inputs: &'a mut SinkInputs) -> Self { + pub(crate) fn new(sink_inputs: &'a mut SinkInputs) -> Self { Self { sink_inputs, was_removed: false, diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs index 35804c02..39df350e 100644 --- a/src/sql/analysis/source_rewriter.rs +++ b/src/sql/analysis/source_rewriter.rs @@ -25,7 +25,6 @@ use crate::sql::schema::source_table::SourceTable; use crate::sql::schema::ColumnDescriptor; use crate::sql::schema::table::Table; use crate::sql::schema::StreamSchemaProvider; -use crate::sql::schema::StreamTable; use crate::sql::common::constants::sql_field; use crate::sql::common::UPDATING_META_FIELD; use crate::sql::extensions::debezium::UnrollDebeziumPayloadNode; diff --git a/src/sql/analysis/window_function_rewriter.rs b/src/sql/analysis/window_function_rewriter.rs index 7b94b841..63c502bf 100644 --- a/src/sql/analysis/window_function_rewriter.rs +++ b/src/sql/analysis/window_function_rewriter.rs @@ -11,7 +11,7 @@ // limitations under the License. use datafusion::common::tree_node::Transformed; -use datafusion::common::{Column, Result as DFResult, plan_err, tree_node::TreeNodeRewriter}; +use datafusion::common::{Result as DFResult, plan_err, tree_node::TreeNodeRewriter}; use datafusion::logical_expr::{ self, Expr, Extension, LogicalPlan, Projection, Sort, Window, expr::WindowFunction, expr::WindowFunctionParams, diff --git a/src/sql/api/mod.rs b/src/sql/api/mod.rs index 3969296a..cdc119b7 100644 --- a/src/sql/api/mod.rs +++ b/src/sql/api/mod.rs @@ -25,14 +25,7 @@ pub mod var_str; use serde::{Deserialize, Serialize}; -pub use checkpoints::*; -pub use connections::{ - ConnectionProfile, ConnectionSchema, ConnectionType, Connector, FieldType, SchemaDefinition, - SourceField, -}; -pub use metrics::*; -pub use pipelines::*; -pub use udfs::*; +pub use connections::ConnectionProfile; #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "camelCase")] diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index fa37a9fd..03e9b4a6 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -36,27 +36,13 @@ pub mod worker; pub mod converter; // ── Re-exports from existing modules ── -pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType}; -pub use date::{DatePart, DateTruncPrecision}; -pub use debezium::{Debezium, DebeziumOp, UpdatingData}; -pub use hash::{range_for_server, server_for_hash, HASH_SEEDS}; -pub use message::{ArrowMessage, CheckpointBarrier, SignalMessage, Watermark}; -pub use task_info::{ChainInfo, TaskInfo}; -pub use time_utils::{from_micros, from_millis, from_nanos, to_micros, to_millis, to_nanos}; -pub use worker::{MachineId, WorkerId}; +pub use arrow_ext::FsExtensionType; +pub use message::{CheckpointBarrier, Watermark}; +pub use time_utils::{from_nanos, to_micros, to_millis, to_nanos}; // ── Re-exports from new modules ── -pub use control::{ - CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp, - ErrorDomain, RetryHint, StopMode, TableConfig, TaskCheckpointEventType, TaskError, -}; pub use fs_schema::{FsSchema, FsSchemaRef}; -pub use connector_options::{ConnectorOptions, FromOpts}; -pub use kafka_catalog::{ - KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode, - SchemaRegistryConfig, SinkCommitMode, TableType, -}; -pub use errors::{DataflowError, DataflowResult}; +pub use connector_options::ConnectorOptions; pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat}; pub use operator_config::MetadataField; diff --git a/src/sql/logical_node/logical/mod.rs b/src/sql/logical_node/logical/mod.rs index ab318804..d2e9a327 100644 --- a/src/sql/logical_node/logical/mod.rs +++ b/src/sql/logical_node/logical/mod.rs @@ -26,7 +26,5 @@ pub use logical_edge::{LogicalEdge, LogicalEdgeType}; pub use logical_graph::{LogicalGraph, Optimizer}; pub use logical_node::LogicalNode; pub use logical_program::LogicalProgram; -pub use operator_chain::OperatorChain; pub use operator_name::OperatorName; pub use program_config::ProgramConfig; -pub use python_udf_config::PythonUdfConfig; diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs index 79fe9a05..57f53f90 100644 --- a/src/sql/logical_node/logical/operator_name.rs +++ b/src/sql/logical_node/logical/operator_name.rs @@ -45,7 +45,7 @@ impl OperatorName { pub fn feature_tag(self) -> Option<&'static str> { match self { - Self::ExpressionWatermark | Self::Value | Self::KeyBy | Self::Projection => None, + Self::ExpressionWatermark | Self::Value | Self::Projection => None, Self::AsyncUdf => Some(operator_feature::ASYNC_UDF), Self::Join => Some(operator_feature::JOIN_WITH_EXPIRATION), Self::InstantJoin => Some(operator_feature::WINDOWED_JOIN), diff --git a/src/sql/mod.rs b/src/sql/mod.rs index c13f1c4a..dc98a4de 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -23,7 +23,5 @@ pub mod analysis; pub(crate) mod extensions; pub mod types; -pub use schema::{StreamPlanningContext, StreamSchemaProvider}; -pub use parse::parse_sql; pub use analysis::rewrite_plan; diff --git a/src/sql/physical/mod.rs b/src/sql/physical/mod.rs index ee63a2be..7cbb3231 100644 --- a/src/sql/physical/mod.rs +++ b/src/sql/physical/mod.rs @@ -21,4 +21,4 @@ pub use cdc::{DebeziumUnrollingExec, ToDebeziumExec}; pub use codec::{DecodingContext, FsPhysicalExtensionCodec}; pub use meta::{updating_meta_field, updating_meta_fields}; pub use readers::FsMemExec; -pub use udfs::{WindowFunctionUdf, window}; +pub use udfs::window; diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs index b052aa68..f3bf1946 100644 --- a/src/sql/schema/mod.rs +++ b/src/sql/schema/mod.rs @@ -27,25 +27,15 @@ pub mod utils; pub use catalog_ddl::{ catalog_table_row_detail, schema_columns_one_line, show_create_catalog_table, - show_create_stream_table, stream_table_row_detail, }; pub use column_descriptor::ColumnDescriptor; pub use connection_type::ConnectionType; pub use connector_config::ConnectorConfig; -pub use source_table::{SourceOperator, SourceTable}; +pub use source_table::SourceTable; /// Back-compat alias for [`SourceTable`]. pub type ConnectorTable = SourceTable; -pub use data_encoding_format::DataEncodingFormat; -pub use schema_context::{DfSchemaContext, SchemaContext}; pub use schema_provider::{ - FunctionCatalog, LogicalBatchInput, ObjectName, StreamPlanningContext, - StreamPlanningContextBuilder, StreamSchemaProvider, StreamTable, TableCatalog, + ObjectName, StreamPlanningContext, StreamSchemaProvider, StreamTable, }; pub use table::Table; -pub use table_execution_unit::{EngineDescriptor, SyncMode, TableExecutionUnit}; -pub use table_role::{ - apply_adapter_specific_rules, deduce_role, serialize_backend_params, validate_adapter_availability, - TableRole, -}; -pub use temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineConfig, TemporalSpec}; diff --git a/src/sql/types/mod.rs b/src/sql/types/mod.rs index 41753e38..4c99d08f 100644 --- a/src/sql/types/mod.rs +++ b/src/sql/types/mod.rs @@ -20,14 +20,12 @@ use std::time::Duration; use crate::sql::common::constants::sql_planning_default; -pub use data_type::convert_data_type; pub use df_field::{ DFField, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata, }; pub(crate) use placeholder_udf::PlaceholderUdf; -pub use stream_schema::StreamSchema; pub(crate) use window::WindowBehavior; -pub use window::{WindowType, find_window, get_duration}; +pub use window::{WindowType, find_window}; pub use crate::sql::common::constants::sql_field::TIMESTAMP_FIELD; diff --git a/src/storage/stream_catalog/mod.rs b/src/storage/stream_catalog/mod.rs index fea2e39f..1b893cea 100644 --- a/src/storage/stream_catalog/mod.rs +++ b/src/storage/stream_catalog/mod.rs @@ -18,7 +18,7 @@ mod meta_store; mod rocksdb_meta_store; pub use manager::{ - CatalogManager, initialize_stream_catalog, planning_schema_provider, + CatalogManager, initialize_stream_catalog, restore_global_catalog_from_store, }; pub use meta_store::{InMemoryMetaStore, MetaStore}; From 52610ec34d4328704453f695a8e29e3eab92a949 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 20:30:11 +0800 Subject: [PATCH 37/44] update --- src/coordinator/analyze/analyzer.rs | 33 ++++- src/coordinator/dataset/mod.rs | 4 + .../show_create_streaming_table_result.rs | 64 +++++++++ .../dataset/show_streaming_tables_result.rs | 75 ++++++++++ src/coordinator/execution/executor.rs | 134 +++++++++++++++++- src/coordinator/mod.rs | 7 +- .../plan/drop_streaming_table_plan.rs | 34 +++++ src/coordinator/plan/logical_plan_visitor.rs | 44 +++++- src/coordinator/plan/mod.rs | 6 + .../plan/show_create_streaming_table_plan.rs | 30 ++++ .../plan/show_streaming_tables_plan.rs | 28 ++++ src/coordinator/plan/visitor.rs | 26 +++- .../statement/drop_streaming_table.rs | 40 ++++++ src/coordinator/statement/mod.rs | 10 ++ .../statement/show_create_streaming_table.rs | 36 +++++ .../statement/show_streaming_tables.rs | 33 +++++ src/coordinator/statement/visitor.rs | 25 +++- src/runtime/streaming/job/job_manager.rs | 133 ++++++++++++++++- src/runtime/streaming/job/mod.rs | 2 +- src/sql/parse.rs | 88 ++++++++++-- 20 files changed, 813 insertions(+), 39 deletions(-) create mode 100644 src/coordinator/dataset/show_create_streaming_table_result.rs create mode 100644 src/coordinator/dataset/show_streaming_tables_result.rs create mode 100644 src/coordinator/plan/drop_streaming_table_plan.rs create mode 100644 src/coordinator/plan/show_create_streaming_table_plan.rs create mode 100644 src/coordinator/plan/show_streaming_tables_plan.rs create mode 100644 src/coordinator/statement/drop_streaming_table.rs create mode 100644 src/coordinator/statement/show_create_streaming_table.rs create mode 100644 src/coordinator/statement/show_streaming_tables.rs diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs index dbac78cf..878a9481 100644 --- a/src/coordinator/analyze/analyzer.rs +++ b/src/coordinator/analyze/analyzer.rs @@ -13,10 +13,11 @@ use super::Analysis; use crate::coordinator::execution_context::ExecutionContext; use crate::coordinator::statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, - ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, Statement, - StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, - StreamingTableStatement, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, + DropStreamingTableStatement, DropTableStatement, ShowCatalogTables, + ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, ShowStreamingTables, + StartFunction, Statement, StatementVisitor, StatementVisitorContext, + StatementVisitorResult, StopFunction, StreamingTableStatement, }; use std::fmt; @@ -159,4 +160,28 @@ impl StatementVisitor for Analyzer<'_> { ) -> StatementVisitorResult { StatementVisitorResult::Analyze(Box::new(DropTableStatement::new(stmt.statement.clone()))) } + + fn visit_show_streaming_tables( + &self, + stmt: &ShowStreamingTables, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Analyze(Box::new(stmt.clone())) + } + + fn visit_show_create_streaming_table( + &self, + stmt: &ShowCreateStreamingTable, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Analyze(Box::new(stmt.clone())) + } + + fn visit_drop_streaming_table( + &self, + stmt: &DropStreamingTableStatement, + _context: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Analyze(Box::new(stmt.clone())) + } } diff --git a/src/coordinator/dataset/mod.rs b/src/coordinator/dataset/mod.rs index f09c24ca..bbcac6f0 100644 --- a/src/coordinator/dataset/mod.rs +++ b/src/coordinator/dataset/mod.rs @@ -13,11 +13,15 @@ mod data_set; mod execute_result; mod show_catalog_tables_result; +mod show_create_streaming_table_result; mod show_create_table_result; mod show_functions_result; +mod show_streaming_tables_result; pub use data_set::{DataSet, empty_record_batch}; pub use execute_result::ExecuteResult; pub use show_catalog_tables_result::ShowCatalogTablesResult; +pub use show_create_streaming_table_result::ShowCreateStreamingTableResult; pub use show_create_table_result::ShowCreateTableResult; pub use show_functions_result::ShowFunctionsResult; +pub use show_streaming_tables_result::ShowStreamingTablesResult; diff --git a/src/coordinator/dataset/show_create_streaming_table_result.rs b/src/coordinator/dataset/show_create_streaming_table_result.rs new file mode 100644 index 00000000..2b9e0d0a --- /dev/null +++ b/src/coordinator/dataset/show_create_streaming_table_result.rs @@ -0,0 +1,64 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow_array::StringArray; +use arrow_schema::{DataType, Field, Schema}; + +use super::DataSet; + +#[derive(Clone, Debug)] +pub struct ShowCreateStreamingTableResult { + table_name: String, + status: String, + pipeline_detail: String, + program_json: String, +} + +impl ShowCreateStreamingTableResult { + pub fn new( + table_name: String, + status: String, + pipeline_detail: String, + program_json: String, + ) -> Self { + Self { + table_name, + status, + pipeline_detail, + program_json, + } + } +} + +impl DataSet for ShowCreateStreamingTableResult { + fn to_record_batch(&self) -> arrow_array::RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("Streaming Table", DataType::Utf8, false), + Field::new("Status", DataType::Utf8, false), + Field::new("Pipelines", DataType::Utf8, false), + Field::new("Program", DataType::Utf8, false), + ])); + + arrow_array::RecordBatch::try_new( + schema, + vec![ + Arc::new(StringArray::from(vec![self.table_name.as_str()])), + Arc::new(StringArray::from(vec![self.status.as_str()])), + Arc::new(StringArray::from(vec![self.pipeline_detail.as_str()])), + Arc::new(StringArray::from(vec![self.program_json.as_str()])), + ], + ) + .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty()))) + } +} diff --git a/src/coordinator/dataset/show_streaming_tables_result.rs b/src/coordinator/dataset/show_streaming_tables_result.rs new file mode 100644 index 00000000..a992d1b9 --- /dev/null +++ b/src/coordinator/dataset/show_streaming_tables_result.rs @@ -0,0 +1,75 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow_array::{Int32Array, StringArray}; +use arrow_schema::{DataType, Field, Schema}; + +use super::DataSet; +use crate::runtime::streaming::job::StreamingJobSummary; + +#[derive(Clone, Debug)] +pub struct ShowStreamingTablesResult { + jobs: Vec, +} + +impl ShowStreamingTablesResult { + pub fn new(jobs: Vec) -> Self { + Self { jobs } + } +} + +impl DataSet for ShowStreamingTablesResult { + fn to_record_batch(&self) -> arrow_array::RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("job_id", DataType::Utf8, false), + Field::new("status", DataType::Utf8, false), + Field::new("pipeline_count", DataType::Int32, false), + Field::new("uptime", DataType::Utf8, false), + ])); + + let job_ids: Vec<&str> = self.jobs.iter().map(|j| j.job_id.as_str()).collect(); + let statuses: Vec<&str> = self.jobs.iter().map(|j| j.status.as_str()).collect(); + let pipeline_counts: Vec = self.jobs.iter().map(|j| j.pipeline_count).collect(); + let uptimes: Vec = self.jobs.iter().map(|j| format_duration(j.uptime_secs)).collect(); + let uptime_refs: Vec<&str> = uptimes.iter().map(|s| s.as_str()).collect(); + + arrow_array::RecordBatch::try_new( + schema, + vec![ + Arc::new(StringArray::from(job_ids)), + Arc::new(StringArray::from(statuses)), + Arc::new(Int32Array::from(pipeline_counts)), + Arc::new(StringArray::from(uptime_refs)), + ], + ) + .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty()))) + } +} + +fn format_duration(total_secs: u64) -> String { + let days = total_secs / 86400; + let hours = (total_secs % 86400) / 3600; + let mins = (total_secs % 3600) / 60; + let secs = total_secs % 60; + + if days > 0 { + format!("{days}d {hours}h {mins}m {secs}s") + } else if hours > 0 { + format!("{hours}h {mins}m {secs}s") + } else if mins > 0 { + format!("{mins}m {secs}s") + } else { + format!("{secs}s") + } +} diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 5372ed33..f52504e0 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -17,17 +17,21 @@ use thiserror::Error; use tracing::{debug, info}; use crate::coordinator::dataset::{ - empty_record_batch, ExecuteResult, ShowCatalogTablesResult, ShowCreateTableResult, - ShowFunctionsResult, + empty_record_batch, ExecuteResult, ShowCatalogTablesResult, + ShowCreateStreamingTableResult, ShowCreateTableResult, ShowFunctionsResult, + ShowStreamingTablesResult, }; use crate::coordinator::plan::{ CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, CreateTablePlanBody, - DropFunctionPlan, DropTablePlan, LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext, - PlanVisitorResult, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan, - StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan, + DropFunctionPlan, DropStreamingTablePlan, DropTablePlan, LookupTablePlan, PlanNode, + PlanVisitor, PlanVisitorContext, PlanVisitorResult, ShowCatalogTablesPlan, + ShowCreateStreamingTablePlan, ShowCreateTablePlan, ShowFunctionsPlan, + ShowStreamingTablesPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, + StreamingTableConnectorPlan, }; use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::runtime::streaming::job::JobManager; +use crate::runtime::streaming::protocol::control::StopMode; use crate::runtime::taskexecutor::TaskManager; use crate::sql::schema::table::Table as CatalogTable; use crate::sql::schema::show_create_catalog_table; @@ -397,4 +401,124 @@ impl PlanVisitor for Executor { PlanVisitorResult::Execute(execute()) } + + fn visit_show_streaming_tables( + &self, + _plan: &ShowStreamingTablesPlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + let execute = || -> Result { + let jobs = self.job_manager.list_jobs(); + let n = jobs.len(); + Ok(ExecuteResult::ok_with_data( + format!("{n} streaming table(s)"), + ShowStreamingTablesResult::new(jobs), + )) + }; + PlanVisitorResult::Execute(execute()) + } + + fn visit_show_create_streaming_table( + &self, + plan: &ShowCreateStreamingTablePlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + let execute = || -> Result { + let detail = self + .job_manager + .get_job_detail(&plan.table_name) + .ok_or_else(|| { + ExecuteError::Validation(format!( + "Streaming table '{}' not found in active jobs", + plan.table_name + )) + })?; + + let pipeline_lines: Vec = detail + .pipelines + .iter() + .map(|p| format!(" pipeline[{}]: {}", p.pipeline_id, p.status)) + .collect(); + let pipeline_detail = if pipeline_lines.is_empty() { + "(no pipelines)".to_string() + } else { + pipeline_lines.join("\n") + }; + + let mut program_json = serde_json::Value::String(detail.program_json.clone()); + if let Ok(parsed) = serde_json::from_str::(&detail.program_json) { + let mut cleaned = parsed; + strip_noisy_fields(&mut cleaned); + program_json = cleaned; + } + let program_display = + serde_json::to_string_pretty(&program_json).unwrap_or(detail.program_json); + + Ok(ExecuteResult::ok_with_data( + format!("SHOW CREATE STREAMING TABLE {}", plan.table_name), + ShowCreateStreamingTableResult::new( + plan.table_name.clone(), + detail.status, + pipeline_detail, + program_display, + ), + )) + }; + PlanVisitorResult::Execute(execute()) + } + + fn visit_drop_streaming_table( + &self, + plan: &DropStreamingTablePlan, + _context: &PlanVisitorContext, + ) -> PlanVisitorResult { + let execute = || -> Result { + let job_exists = self.job_manager.has_job(&plan.table_name); + + if !job_exists && !plan.if_exists { + return Err(ExecuteError::Validation(format!( + "Streaming table '{}' not found in active jobs", + plan.table_name + ))); + } + + if job_exists { + let job_manager = Arc::clone(&self.job_manager); + let table_name = plan.table_name.clone(); + tokio::task::block_in_place(|| { + tokio::runtime::Handle::current() + .block_on(job_manager.remove_job(&table_name, StopMode::Graceful)) + }) + .map_err(|e| { + ExecuteError::Internal(format!( + "Failed to stop streaming job '{}': {}", + plan.table_name, e + )) + })?; + + info!( + table = %plan.table_name, + "Streaming job stopped and removed" + ); + } + + let _ = self + .catalog_manager + .drop_catalog_table(&plan.table_name, true); + + if job_exists { + Ok(ExecuteResult::ok(format!( + "Dropped streaming table '{}'", + plan.table_name + ))) + } else { + Ok(ExecuteResult::ok(format!( + "Streaming table '{}' does not exist (skipped)", + plan.table_name + ))) + } + }; + + PlanVisitorResult::Execute(execute()) + } } diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs index ca384a90..922b793f 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/mod.rs @@ -24,7 +24,8 @@ mod tool; pub use coordinator::Coordinator; pub use dataset::{DataSet, ShowFunctionsResult}; pub use statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, - PythonModule, ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, Statement, - StopFunction, StreamingTableStatement, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, + DropStreamingTableStatement, DropTableStatement, PythonModule, ShowCatalogTables, + ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, ShowStreamingTables, + StartFunction, Statement, StopFunction, StreamingTableStatement, }; diff --git a/src/coordinator/plan/drop_streaming_table_plan.rs b/src/coordinator/plan/drop_streaming_table_plan.rs new file mode 100644 index 00000000..d06dc836 --- /dev/null +++ b/src/coordinator/plan/drop_streaming_table_plan.rs @@ -0,0 +1,34 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +#[derive(Debug, Clone)] +pub struct DropStreamingTablePlan { + pub table_name: String, + pub if_exists: bool, +} + +impl DropStreamingTablePlan { + pub fn new(table_name: String, if_exists: bool) -> Self { + Self { + table_name, + if_exists, + } + } +} + +impl PlanNode for DropStreamingTablePlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_drop_streaming_table(self, context) + } +} diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs index b9cb4dfe..77fa9eb4 100644 --- a/src/coordinator/plan/logical_plan_visitor.rs +++ b/src/coordinator/plan/logical_plan_visitor.rs @@ -26,14 +26,17 @@ use tracing::debug; use crate::coordinator::analyze::analysis::Analysis; use crate::coordinator::plan::{ - CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan, - PlanNode, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan, StartFunctionPlan, - StopFunctionPlan, StreamingTable, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, + DropStreamingTablePlan, DropTablePlan, PlanNode, ShowCatalogTablesPlan, + ShowCreateStreamingTablePlan, ShowCreateTablePlan, ShowFunctionsPlan, + ShowStreamingTablesPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, }; use crate::coordinator::statement::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, - ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, StatementVisitor, - StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingTableStatement, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, + DropStreamingTableStatement, DropTableStatement, ShowCatalogTables, + ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, ShowStreamingTables, + StartFunction, StatementVisitor, StatementVisitorContext, StatementVisitorResult, + StopFunction, StreamingTableStatement, }; use crate::coordinator::tool::ConnectorOptions; use crate::sql::analysis::{ @@ -459,4 +462,33 @@ impl StatementVisitor for LogicalPlanVisitor { *if_exists, ))) } + + fn visit_show_streaming_tables( + &self, + _stmt: &ShowStreamingTables, + _ctx: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Plan(Box::new(ShowStreamingTablesPlan::new())) + } + + fn visit_show_create_streaming_table( + &self, + stmt: &ShowCreateStreamingTable, + _ctx: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Plan(Box::new(ShowCreateStreamingTablePlan::new( + stmt.table_name.clone(), + ))) + } + + fn visit_drop_streaming_table( + &self, + stmt: &DropStreamingTableStatement, + _ctx: &StatementVisitorContext, + ) -> StatementVisitorResult { + StatementVisitorResult::Plan(Box::new(DropStreamingTablePlan::new( + stmt.table_name.clone(), + stmt.if_exists, + ))) + } } \ No newline at end of file diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs index b04234d5..8166d444 100644 --- a/src/coordinator/plan/mod.rs +++ b/src/coordinator/plan/mod.rs @@ -14,13 +14,16 @@ mod create_function_plan; mod create_python_function_plan; mod create_table_plan; mod drop_function_plan; +mod drop_streaming_table_plan; mod drop_table_plan; mod logical_plan_visitor; mod lookup_table_plan; mod optimizer; mod show_catalog_tables_plan; +mod show_create_streaming_table_plan; mod show_create_table_plan; mod show_functions_plan; +mod show_streaming_tables_plan; mod start_function_plan; mod stop_function_plan; mod streaming_table_connector_plan; @@ -31,13 +34,16 @@ pub use create_function_plan::CreateFunctionPlan; pub use create_python_function_plan::CreatePythonFunctionPlan; pub use create_table_plan::{CreateTablePlan, CreateTablePlanBody}; pub use drop_function_plan::DropFunctionPlan; +pub use drop_streaming_table_plan::DropStreamingTablePlan; pub use drop_table_plan::DropTablePlan; pub use logical_plan_visitor::LogicalPlanVisitor; pub use lookup_table_plan::LookupTablePlan; pub use optimizer::LogicalPlanner; pub use show_catalog_tables_plan::ShowCatalogTablesPlan; +pub use show_create_streaming_table_plan::ShowCreateStreamingTablePlan; pub use show_create_table_plan::ShowCreateTablePlan; pub use show_functions_plan::ShowFunctionsPlan; +pub use show_streaming_tables_plan::ShowStreamingTablesPlan; pub use start_function_plan::StartFunctionPlan; pub use stop_function_plan::StopFunctionPlan; pub use streaming_table_connector_plan::StreamingTableConnectorPlan; diff --git a/src/coordinator/plan/show_create_streaming_table_plan.rs b/src/coordinator/plan/show_create_streaming_table_plan.rs new file mode 100644 index 00000000..8d63c0d5 --- /dev/null +++ b/src/coordinator/plan/show_create_streaming_table_plan.rs @@ -0,0 +1,30 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +#[derive(Debug, Clone)] +pub struct ShowCreateStreamingTablePlan { + pub table_name: String, +} + +impl ShowCreateStreamingTablePlan { + pub fn new(table_name: String) -> Self { + Self { table_name } + } +} + +impl PlanNode for ShowCreateStreamingTablePlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_show_create_streaming_table(self, context) + } +} diff --git a/src/coordinator/plan/show_streaming_tables_plan.rs b/src/coordinator/plan/show_streaming_tables_plan.rs new file mode 100644 index 00000000..08410115 --- /dev/null +++ b/src/coordinator/plan/show_streaming_tables_plan.rs @@ -0,0 +1,28 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult}; + +#[derive(Debug, Default)] +pub struct ShowStreamingTablesPlan; + +impl ShowStreamingTablesPlan { + pub fn new() -> Self { + Self + } +} + +impl PlanNode for ShowStreamingTablesPlan { + fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult { + visitor.visit_show_streaming_tables(self, context) + } +} diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs index 28f11f53..bba44a1f 100644 --- a/src/coordinator/plan/visitor.rs +++ b/src/coordinator/plan/visitor.rs @@ -11,9 +11,11 @@ // limitations under the License. use super::{ - CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan, - LookupTablePlan, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan, - StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan, + CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, + DropStreamingTablePlan, DropTablePlan, LookupTablePlan, ShowCatalogTablesPlan, + ShowCreateStreamingTablePlan, ShowCreateTablePlan, ShowFunctionsPlan, + ShowStreamingTablesPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable, + StreamingTableConnectorPlan, }; /// Context passed to PlanVisitor methods @@ -127,4 +129,22 @@ pub trait PlanVisitor { plan: &DropTablePlan, context: &PlanVisitorContext, ) -> PlanVisitorResult; + + fn visit_show_streaming_tables( + &self, + plan: &ShowStreamingTablesPlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; + + fn visit_show_create_streaming_table( + &self, + plan: &ShowCreateStreamingTablePlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; + + fn visit_drop_streaming_table( + &self, + plan: &DropStreamingTablePlan, + context: &PlanVisitorContext, + ) -> PlanVisitorResult; } diff --git a/src/coordinator/statement/drop_streaming_table.rs b/src/coordinator/statement/drop_streaming_table.rs new file mode 100644 index 00000000..309abd97 --- /dev/null +++ b/src/coordinator/statement/drop_streaming_table.rs @@ -0,0 +1,40 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// `DROP STREAMING TABLE [IF EXISTS] ` — stops and removes the streaming +/// job from `JobManager`, then drops the corresponding catalog entry if present. +#[derive(Debug, Clone)] +pub struct DropStreamingTableStatement { + pub table_name: String, + pub if_exists: bool, +} + +impl DropStreamingTableStatement { + pub fn new(table_name: String, if_exists: bool) -> Self { + Self { + table_name, + if_exists, + } + } +} + +impl Statement for DropStreamingTableStatement { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_drop_streaming_table(self, context) + } +} diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs index 83f6ca0e..80d9c320 100644 --- a/src/coordinator/statement/mod.rs +++ b/src/coordinator/statement/mod.rs @@ -14,10 +14,13 @@ mod create_function; mod create_python_function; mod create_table; mod drop_function; +mod drop_streaming_table; mod drop_table; mod show_catalog_tables; +mod show_create_streaming_table; mod show_create_table; mod show_functions; +mod show_streaming_tables; mod start_function; mod stop_function; mod streaming_table; @@ -27,10 +30,13 @@ pub use create_function::{ConfigSource, CreateFunction, FunctionSource}; pub use create_python_function::{CreatePythonFunction, PythonModule}; pub use create_table::CreateTable; pub use drop_function::DropFunction; +pub use drop_streaming_table::DropStreamingTableStatement; pub use drop_table::DropTableStatement; pub use show_catalog_tables::ShowCatalogTables; +pub use show_create_streaming_table::ShowCreateStreamingTable; pub use show_create_table::ShowCreateTable; pub use show_functions::ShowFunctions; +pub use show_streaming_tables::ShowStreamingTables; pub use start_function::StartFunction; pub use stop_function::StopFunction; pub use streaming_table::StreamingTableStatement; @@ -56,4 +62,8 @@ pub trait Statement: fmt::Debug + Send + Sync { fn as_streaming_table_statement(&self) -> Option<&StreamingTableStatement> { None } + + fn as_drop_streaming_table_statement(&self) -> Option<&DropStreamingTableStatement> { + None + } } diff --git a/src/coordinator/statement/show_create_streaming_table.rs b/src/coordinator/statement/show_create_streaming_table.rs new file mode 100644 index 00000000..73f16870 --- /dev/null +++ b/src/coordinator/statement/show_create_streaming_table.rs @@ -0,0 +1,36 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// `SHOW CREATE STREAMING TABLE ` — displays the pipeline topology and +/// runtime metadata for the named streaming job. +#[derive(Debug, Clone)] +pub struct ShowCreateStreamingTable { + pub table_name: String, +} + +impl ShowCreateStreamingTable { + pub fn new(table_name: String) -> Self { + Self { table_name } + } +} + +impl Statement for ShowCreateStreamingTable { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_show_create_streaming_table(self, context) + } +} diff --git a/src/coordinator/statement/show_streaming_tables.rs b/src/coordinator/statement/show_streaming_tables.rs new file mode 100644 index 00000000..cedf3610 --- /dev/null +++ b/src/coordinator/statement/show_streaming_tables.rs @@ -0,0 +1,33 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult}; + +/// `SHOW STREAMING TABLES` — lists all active streaming jobs managed by `JobManager`. +#[derive(Debug, Clone, Default)] +pub struct ShowStreamingTables; + +impl ShowStreamingTables { + pub fn new() -> Self { + Self + } +} + +impl Statement for ShowStreamingTables { + fn accept( + &self, + visitor: &dyn StatementVisitor, + context: &StatementVisitorContext, + ) -> StatementVisitorResult { + visitor.visit_show_streaming_tables(self, context) + } +} diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs index f24c85be..c3cf153a 100644 --- a/src/coordinator/statement/visitor.rs +++ b/src/coordinator/statement/visitor.rs @@ -11,9 +11,10 @@ // limitations under the License. use super::{ - CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement, - ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, StopFunction, - StreamingTableStatement, + CreateFunction, CreatePythonFunction, CreateTable, DropFunction, + DropStreamingTableStatement, DropTableStatement, ShowCatalogTables, + ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, ShowStreamingTables, + StartFunction, StopFunction, StreamingTableStatement, }; use crate::coordinator::plan::PlanNode; use crate::coordinator::statement::Statement; @@ -119,4 +120,22 @@ pub trait StatementVisitor { stmt: &DropTableStatement, context: &StatementVisitorContext, ) -> StatementVisitorResult; + + fn visit_show_streaming_tables( + &self, + stmt: &ShowStreamingTables, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; + + fn visit_show_create_streaming_table( + &self, + stmt: &ShowCreateStreamingTable, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; + + fn visit_drop_streaming_table( + &self, + stmt: &DropStreamingTableStatement, + context: &StatementVisitorContext, + ) -> StatementVisitorResult; } diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs index 88df6457..5508ab70 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/runtime/streaming/job/job_manager.rs @@ -32,6 +32,30 @@ use crate::runtime::streaming::memory::MemoryPool; use crate::runtime::streaming::network::endpoint::{BoxedEventStream, PhysicalSender}; use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode}; +#[derive(Debug, Clone)] +pub struct StreamingJobSummary { + pub job_id: String, + pub status: String, + pub pipeline_count: i32, + pub uptime_secs: u64, +} + +#[derive(Debug, Clone)] +pub struct PipelineDetail { + pub pipeline_id: u32, + pub status: String, +} + +#[derive(Debug, Clone)] +pub struct StreamingJobDetail { + pub job_id: String, + pub status: String, + pub pipeline_count: i32, + pub uptime_secs: u64, + pub pipelines: Vec, + pub program_json: String, +} + static GLOBAL_JOB_MANAGER: OnceLock> = OnceLock::new(); pub struct JobManager { @@ -178,7 +202,114 @@ impl JobManager { ) } - // ======================================================================== + pub fn list_jobs(&self) -> Vec { + let jobs_guard = self.active_jobs.read().unwrap(); + jobs_guard + .values() + .map(|graph| { + let pipeline_count = graph.pipelines.len() as i32; + let uptime_secs = graph.start_time.elapsed().as_secs(); + let status = Self::aggregate_pipeline_status(&graph.pipelines); + StreamingJobSummary { + job_id: graph.job_id.clone(), + status, + pipeline_count, + uptime_secs, + } + }) + .collect() + } + + pub fn get_job_detail(&self, job_id: &str) -> Option { + let jobs_guard = self.active_jobs.read().unwrap(); + let graph = jobs_guard.get(job_id)?; + + let uptime_secs = graph.start_time.elapsed().as_secs(); + let overall_status = Self::aggregate_pipeline_status(&graph.pipelines); + + let pipeline_details: Vec = graph + .pipelines + .iter() + .map(|(id, pipeline)| { + let status = pipeline.status.read().unwrap().clone(); + PipelineDetail { + pipeline_id: *id, + status: format!("{status:?}"), + } + }) + .collect(); + + let program_json = serde_json::to_string_pretty(&graph.program).unwrap_or_else(|e| { + format!("{{\"error\": \"Failed to serialize program: {e}\"}}") + }); + + Some(StreamingJobDetail { + job_id: graph.job_id.clone(), + status: overall_status, + pipeline_count: graph.pipelines.len() as i32, + uptime_secs, + pipelines: pipeline_details, + program_json, + }) + } + + pub fn has_job(&self, job_id: &str) -> bool { + self.active_jobs.read().unwrap().contains_key(job_id) + } + + pub async fn remove_job(&self, job_id: &str, mode: StopMode) -> anyhow::Result<()> { + { + let jobs_guard = self.active_jobs.read().unwrap(); + if !jobs_guard.contains_key(job_id) { + anyhow::bail!("Job not found: {job_id}"); + } + let graph = &jobs_guard[job_id]; + let control_senders: Vec<_> = + graph.pipelines.values().map(|p| p.control_tx.clone()).collect(); + + drop(jobs_guard); + + for tx in control_senders { + let _ = tx.send(ControlCommand::Stop { mode: mode.clone() }).await; + } + } + + self.active_jobs.write().unwrap().remove(job_id); + info!(job_id = %job_id, "Job stopped and removed."); + Ok(()) + } + + fn aggregate_pipeline_status( + pipelines: &HashMap, + ) -> String { + let mut running = 0u32; + let mut failed = 0u32; + let mut finished = 0u32; + let mut initializing = 0u32; + + for pipeline in pipelines.values() { + match &*pipeline.status.read().unwrap() { + PipelineStatus::Running => running += 1, + PipelineStatus::Failed { .. } => failed += 1, + PipelineStatus::Finished => finished += 1, + PipelineStatus::Initializing => initializing += 1, + PipelineStatus::Stopping => {} + } + } + + if failed > 0 { + "DEGRADED".to_string() + } else if running > 0 && running == pipelines.len() as u32 { + "RUNNING".to_string() + } else if finished == pipelines.len() as u32 { + "FINISHED".to_string() + } else if initializing > 0 { + "INITIALIZING".to_string() + } else { + "PARTIAL".to_string() + } + } + // ======================================================================== fn build_operator_chain( diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs index 5b2bbd8c..02e0343c 100644 --- a/src/runtime/streaming/job/mod.rs +++ b/src/runtime/streaming/job/mod.rs @@ -14,4 +14,4 @@ pub mod edge_manager; pub mod job_manager; pub mod models; -pub use job_manager::JobManager; +pub use job_manager::{JobManager, StreamingJobSummary}; diff --git a/src/sql/parse.rs b/src/sql/parse.rs index 33bb13ad..5fd4a59f 100644 --- a/src/sql/parse.rs +++ b/src/sql/parse.rs @@ -38,24 +38,65 @@ use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; use datafusion::sql::sqlparser::parser::Parser; use crate::coordinator::{ - CreateFunction, CreateTable, DropFunction, DropTableStatement, ShowCatalogTables, - ShowCreateTable, ShowFunctions, StartFunction, Statement as CoordinatorStatement, StopFunction, + CreateFunction, CreateTable, DropFunction, DropStreamingTableStatement, DropTableStatement, + ShowCatalogTables, ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, + ShowStreamingTables, StartFunction, Statement as CoordinatorStatement, StopFunction, StreamingTableStatement, }; -/// `DROP STREAMING TABLE t` is accepted as sugar for `DROP TABLE t` against the same catalog. -fn rewrite_drop_streaming_table(sql: &str) -> String { - let trimmed = sql.trim_start(); - let tokens: Vec<&str> = trimmed.split_whitespace().collect(); +/// Streaming-specific SQL that the sqlparser dialect does not natively handle. +/// +/// Returns `Some(statement)` if the SQL was intercepted, `None` otherwise so +/// the caller falls through to the normal sqlparser pipeline. +fn try_parse_streaming_statement(sql: &str) -> Option> { + let tokens: Vec<&str> = sql.split_whitespace().collect(); + if tokens.is_empty() { + return None; + } + + // SHOW STREAMING TABLES + if tokens.len() == 3 + && tokens[0].eq_ignore_ascii_case("show") + && tokens[1].eq_ignore_ascii_case("streaming") + && tokens[2].eq_ignore_ascii_case("tables") + { + return Some(Box::new(ShowStreamingTables::new())); + } + + // SHOW CREATE STREAMING TABLE + if tokens.len() == 5 + && tokens[0].eq_ignore_ascii_case("show") + && tokens[1].eq_ignore_ascii_case("create") + && tokens[2].eq_ignore_ascii_case("streaming") + && tokens[3].eq_ignore_ascii_case("table") + { + let name = tokens[4].trim_end_matches(';').to_string(); + return Some(Box::new(ShowCreateStreamingTable::new(name))); + } + + // DROP STREAMING TABLE [IF EXISTS] if tokens.len() >= 4 && tokens[0].eq_ignore_ascii_case("drop") && tokens[1].eq_ignore_ascii_case("streaming") && tokens[2].eq_ignore_ascii_case("table") { - let rest = tokens[3..].join(" "); - return format!("DROP TABLE {rest}"); + let (if_exists, name_idx) = if tokens.len() >= 6 + && tokens[3].eq_ignore_ascii_case("if") + && tokens[4].eq_ignore_ascii_case("exists") + { + (true, 5) + } else { + (false, 3) + }; + + if name_idx >= tokens.len() { + return None; + } + let name = tokens[name_idx].trim_end_matches(';').to_string(); + return Some(Box::new(DropStreamingTableStatement::new(name, if_exists))); } - sql.to_string() + + None } pub fn parse_sql(query: &str) -> Result>> { @@ -64,9 +105,12 @@ pub fn parse_sql(query: &str) -> Result>> { return plan_err!("Query is empty"); } + if let Some(stmt) = try_parse_streaming_statement(trimmed) { + return Ok(vec![stmt]); + } + let dialect = FunctionStreamDialect {}; - let to_parse = rewrite_drop_streaming_table(trimmed); - let statements = Parser::parse_sql(&dialect, &to_parse) + let statements = Parser::parse_sql(&dialect, trimmed) .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?; if statements.is_empty() { @@ -242,9 +286,27 @@ mod tests { } #[test] - fn test_parse_drop_streaming_table_rewritten() { + fn test_parse_drop_streaming_table() { let stmt = first_stmt("DROP STREAMING TABLE my_sink"); - assert!(is_type(stmt.as_ref(), "DropTableStatement")); + assert!(is_type(stmt.as_ref(), "DropStreamingTableStatement")); + } + + #[test] + fn test_parse_drop_streaming_table_if_exists() { + let stmt = first_stmt("DROP STREAMING TABLE IF EXISTS my_sink"); + assert!(is_type(stmt.as_ref(), "DropStreamingTableStatement")); + } + + #[test] + fn test_parse_show_streaming_tables() { + let stmt = first_stmt("SHOW STREAMING TABLES"); + assert!(is_type(stmt.as_ref(), "ShowStreamingTables")); + } + + #[test] + fn test_parse_show_create_streaming_table() { + let stmt = first_stmt("SHOW CREATE STREAMING TABLE my_sink"); + assert!(is_type(stmt.as_ref(), "ShowCreateStreamingTable")); } /// `CREATE STREAMING TABLE` is the sink DDL supported by FunctionStream (not `CREATE STREAM TABLE`). From d1bf1c70568ebd602881d7956c0bc718cda04b7f Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 21:01:09 +0800 Subject: [PATCH 38/44] update --- .../show_create_streaming_table_result.rs | 10 +- src/coordinator/execution/executor.rs | 11 +- src/runtime/streaming/job/job_manager.rs | 10 +- src/sql/common/mod.rs | 2 + src/sql/common/topology.rs | 280 ++++++++++++++++++ 5 files changed, 293 insertions(+), 20 deletions(-) create mode 100644 src/sql/common/topology.rs diff --git a/src/coordinator/dataset/show_create_streaming_table_result.rs b/src/coordinator/dataset/show_create_streaming_table_result.rs index 2b9e0d0a..28f0069e 100644 --- a/src/coordinator/dataset/show_create_streaming_table_result.rs +++ b/src/coordinator/dataset/show_create_streaming_table_result.rs @@ -22,7 +22,7 @@ pub struct ShowCreateStreamingTableResult { table_name: String, status: String, pipeline_detail: String, - program_json: String, + topology: String, } impl ShowCreateStreamingTableResult { @@ -30,13 +30,13 @@ impl ShowCreateStreamingTableResult { table_name: String, status: String, pipeline_detail: String, - program_json: String, + topology: String, ) -> Self { Self { table_name, status, pipeline_detail, - program_json, + topology, } } } @@ -47,7 +47,7 @@ impl DataSet for ShowCreateStreamingTableResult { Field::new("Streaming Table", DataType::Utf8, false), Field::new("Status", DataType::Utf8, false), Field::new("Pipelines", DataType::Utf8, false), - Field::new("Program", DataType::Utf8, false), + Field::new("Topology", DataType::Utf8, false), ])); arrow_array::RecordBatch::try_new( @@ -56,7 +56,7 @@ impl DataSet for ShowCreateStreamingTableResult { Arc::new(StringArray::from(vec![self.table_name.as_str()])), Arc::new(StringArray::from(vec![self.status.as_str()])), Arc::new(StringArray::from(vec![self.pipeline_detail.as_str()])), - Arc::new(StringArray::from(vec![self.program_json.as_str()])), + Arc::new(StringArray::from(vec![self.topology.as_str()])), ], ) .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty()))) diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index f52504e0..9907dbe8 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -445,22 +445,13 @@ impl PlanVisitor for Executor { pipeline_lines.join("\n") }; - let mut program_json = serde_json::Value::String(detail.program_json.clone()); - if let Ok(parsed) = serde_json::from_str::(&detail.program_json) { - let mut cleaned = parsed; - strip_noisy_fields(&mut cleaned); - program_json = cleaned; - } - let program_display = - serde_json::to_string_pretty(&program_json).unwrap_or(detail.program_json); - Ok(ExecuteResult::ok_with_data( format!("SHOW CREATE STREAMING TABLE {}", plan.table_name), ShowCreateStreamingTableResult::new( plan.table_name.clone(), detail.status, pipeline_detail, - program_display, + detail.topology, ), )) }; diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs index 5508ab70..3a400b54 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/runtime/streaming/job/job_manager.rs @@ -20,6 +20,8 @@ use tracing::{error, info, warn}; use protocol::grpc::api::{ChainedOperator, FsProgram}; +use crate::sql::common::render_program_topology; + use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator}; use crate::runtime::streaming::api::source::SourceOperator; @@ -53,7 +55,7 @@ pub struct StreamingJobDetail { pub pipeline_count: i32, pub uptime_secs: u64, pub pipelines: Vec, - pub program_json: String, + pub topology: String, } static GLOBAL_JOB_MANAGER: OnceLock> = OnceLock::new(); @@ -239,9 +241,7 @@ impl JobManager { }) .collect(); - let program_json = serde_json::to_string_pretty(&graph.program).unwrap_or_else(|e| { - format!("{{\"error\": \"Failed to serialize program: {e}\"}}") - }); + let topology = render_program_topology(&graph.program); Some(StreamingJobDetail { job_id: graph.job_id.clone(), @@ -249,7 +249,7 @@ impl JobManager { pipeline_count: graph.pipelines.len() as i32, uptime_secs, pipelines: pipeline_details, - program_json, + topology, }) } diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index 03e9b4a6..4c0cc6d3 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -34,6 +34,7 @@ pub mod task_info; pub mod time_utils; pub mod worker; pub mod converter; +pub mod topology; // ── Re-exports from existing modules ── pub use arrow_ext::FsExtensionType; @@ -48,6 +49,7 @@ pub use operator_config::MetadataField; // ── Well-known column names ── pub use constants::sql_field::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; +pub use topology::render_program_topology; // ── Environment variables ── pub const JOB_ID_ENV: &str = "JOB_ID"; diff --git a/src/sql/common/topology.rs b/src/sql/common/topology.rs new file mode 100644 index 00000000..bc71d57f --- /dev/null +++ b/src/sql/common/topology.rs @@ -0,0 +1,280 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! EXPLAIN-like DAG text renderer for [`FsProgram`]. +//! +//! Renders a streaming pipeline topology as a human-readable ASCII graph using +//! Kahn's topological sort. Handles linear chains, fan-out, and fan-in (JOIN). + +use std::collections::{BTreeMap, VecDeque}; +use std::fmt::Write; + +use protocol::grpc::api::FsProgram; + +fn edge_type_label(edge_type: i32) -> &'static str { + match edge_type { + 1 => "Forward", + 2 => "Shuffle", + 3 => "LeftJoin", + 4 => "RightJoin", + _ => "Unknown", + } +} + +/// Render an [`FsProgram`] as an EXPLAIN-style topology string. +pub fn render_program_topology(program: &FsProgram) -> String { + if program.nodes.is_empty() { + return "(empty topology)".to_string(); + } + + struct EdgeInfo { target: i32, edge_type: i32 } + struct InputInfo { source: i32, edge_type: i32 } + + let node_map: BTreeMap = + program.nodes.iter().map(|n| (n.node_index, n)).collect(); + + let mut downstream: BTreeMap> = BTreeMap::new(); + let mut upstream: BTreeMap> = BTreeMap::new(); + let mut in_degree: BTreeMap = BTreeMap::new(); + + for idx in node_map.keys() { + in_degree.entry(*idx).or_insert(0); + } + for edge in &program.edges { + downstream.entry(edge.source).or_default().push(EdgeInfo { + target: edge.target, + edge_type: edge.edge_type, + }); + upstream.entry(edge.target).or_default().push(InputInfo { + source: edge.source, + edge_type: edge.edge_type, + }); + *in_degree.entry(edge.target).or_insert(0) += 1; + } + + // Kahn's topological sort + let mut queue: VecDeque = in_degree + .iter() + .filter(|(_, deg)| **deg == 0) + .map(|(idx, _)| *idx) + .collect(); + let mut topo_order: Vec = Vec::with_capacity(node_map.len()); + let mut remaining = in_degree.clone(); + while let Some(idx) = queue.pop_front() { + topo_order.push(idx); + if let Some(edges) = downstream.get(&idx) { + for e in edges { + if let Some(deg) = remaining.get_mut(&e.target) { + *deg -= 1; + if *deg == 0 { + queue.push_back(e.target); + } + } + } + } + } + for idx in node_map.keys() { + if !topo_order.contains(idx) { + topo_order.push(*idx); + } + } + + let is_source = |idx: &i32| upstream.get(idx).map_or(true, |v| v.is_empty()); + let is_sink = |idx: &i32| downstream.get(idx).map_or(true, |v| v.is_empty()); + + let mut out = String::new(); + let _ = writeln!( + out, + "Pipeline Topology ({} nodes, {} edges)", + program.nodes.len(), + program.edges.len(), + ); + let _ = writeln!(out, "{}", "=".repeat(50)); + + for (pos, &node_idx) in topo_order.iter().enumerate() { + let Some(node) = node_map.get(&node_idx) else { + continue; + }; + + let op_chain: String = node + .operators + .iter() + .map(|op| op.operator_name.as_str()) + .collect::>() + .join(" -> "); + + let role = if is_source(&node_idx) { + "Source" + } else if is_sink(&node_idx) { + "Sink" + } else { + "Operator" + }; + + let _ = writeln!(out); + let _ = writeln!( + out, + "[{role}] Node {node_idx} parallelism = {}", + node.parallelism, + ); + let _ = writeln!(out, " operators: {op_chain}"); + + if !node.description.is_empty() { + let _ = writeln!(out, " desc: {}", node.description); + } + + if let Some(inputs) = upstream.get(&node_idx) { + if inputs.len() == 1 { + let i = &inputs[0]; + let _ = writeln!( + out, + " input: <-- [{}] Node {}", + edge_type_label(i.edge_type), + i.source, + ); + } else if inputs.len() > 1 { + let _ = writeln!(out, " inputs:"); + for i in inputs { + let _ = writeln!( + out, + " <-- [{}] Node {}", + edge_type_label(i.edge_type), + i.source, + ); + } + } + } + + if let Some(outputs) = downstream.get(&node_idx) { + if outputs.len() == 1 { + let e = &outputs[0]; + let _ = writeln!( + out, + " output: --> [{}] Node {}", + edge_type_label(e.edge_type), + e.target, + ); + } else if outputs.len() > 1 { + let _ = writeln!(out, " outputs:"); + for e in outputs { + let _ = writeln!( + out, + " --> [{}] Node {}", + edge_type_label(e.edge_type), + e.target, + ); + } + } + } + + if pos < topo_order.len() - 1 { + let single_out = downstream.get(&node_idx).map_or(false, |v| v.len() == 1); + let next_idx = topo_order.get(pos + 1).copied(); + let is_direct = single_out + && next_idx.map_or(false, |n| { + downstream.get(&node_idx).map_or(false, |v| v[0].target == n) + }); + let next_single_in = next_idx + .and_then(|n| upstream.get(&n)) + .map_or(false, |v| v.len() == 1); + + if is_direct && next_single_in { + let etype = downstream.get(&node_idx).unwrap()[0].edge_type; + let _ = writeln!(out, " |"); + let _ = writeln!(out, " | {}", edge_type_label(etype)); + let _ = writeln!(out, " v"); + } + } + } + + out.trim_end().to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + use protocol::grpc::api::{ChainedOperator, FsEdge, FsNode, FsProgram}; + + fn make_node(node_index: i32, operators: Vec<(&str, &str)>, desc: &str, parallelism: u32) -> FsNode { + FsNode { + node_index, + node_id: node_index as u32, + parallelism, + description: desc.to_string(), + operators: operators + .into_iter() + .map(|(id, name)| ChainedOperator { + operator_id: id.to_string(), + operator_name: name.to_string(), + operator_config: Vec::new(), + }) + .collect(), + edges: Vec::new(), + } + } + + fn make_edge(source: i32, target: i32, edge_type: i32) -> FsEdge { + FsEdge { source, target, schema: None, edge_type } + } + + #[test] + fn empty_program_renders_placeholder() { + let program = FsProgram { nodes: vec![], edges: vec![], program_config: None }; + assert_eq!(render_program_topology(&program), "(empty topology)"); + } + + #[test] + fn linear_pipeline_renders_correctly() { + let program = FsProgram { + nodes: vec![ + make_node(0, vec![("src_0", "ConnectorSource")], "", 1), + make_node(1, vec![("val_1", "Value"), ("wm_2", "ExpressionWatermark")], "source -> watermark", 1), + make_node(2, vec![("sink_3", "ConnectorSink")], "sink (kafka)", 1), + ], + edges: vec![ + make_edge(0, 1, 1), + make_edge(1, 2, 1), + ], + program_config: None, + }; + let result = render_program_topology(&program); + assert!(result.contains("[Source] Node 0")); + assert!(result.contains("[Operator] Node 1")); + assert!(result.contains("[Sink] Node 2")); + assert!(result.contains("ConnectorSource")); + assert!(result.contains("Value -> ExpressionWatermark")); + assert!(result.contains("Forward")); + } + + #[test] + fn join_topology_shows_multiple_inputs() { + let program = FsProgram { + nodes: vec![ + make_node(0, vec![("src_a", "ConnectorSource")], "source A", 1), + make_node(1, vec![("src_b", "ConnectorSource")], "source B", 1), + make_node(2, vec![("join_0", "WindowJoin")], "join node", 2), + make_node(3, vec![("sink_0", "ConnectorSink")], "sink", 1), + ], + edges: vec![ + make_edge(0, 2, 3), // LeftJoin + make_edge(1, 2, 4), // RightJoin + make_edge(2, 3, 1), // Forward + ], + program_config: None, + }; + let result = render_program_topology(&program); + assert!(result.contains("inputs:")); + assert!(result.contains("LeftJoin")); + assert!(result.contains("RightJoin")); + assert!(result.contains("[Operator] Node 2")); + } +} From 871a1cf215270a49a420417718ec72359f5db8c2 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 21:06:28 +0800 Subject: [PATCH 39/44] update --- .../show_create_streaming_table_result.rs | 13 +++++++---- src/coordinator/execution/executor.rs | 23 +------------------ src/runtime/streaming/job/job_manager.rs | 8 ++----- 3 files changed, 12 insertions(+), 32 deletions(-) diff --git a/src/coordinator/dataset/show_create_streaming_table_result.rs b/src/coordinator/dataset/show_create_streaming_table_result.rs index 28f0069e..ed3ec600 100644 --- a/src/coordinator/dataset/show_create_streaming_table_result.rs +++ b/src/coordinator/dataset/show_create_streaming_table_result.rs @@ -14,6 +14,9 @@ use std::sync::Arc; use arrow_array::StringArray; use arrow_schema::{DataType, Field, Schema}; +use protocol::grpc::api::FsProgram; + +use crate::sql::common::render_program_topology; use super::DataSet; @@ -22,7 +25,7 @@ pub struct ShowCreateStreamingTableResult { table_name: String, status: String, pipeline_detail: String, - topology: String, + program: FsProgram, } impl ShowCreateStreamingTableResult { @@ -30,19 +33,21 @@ impl ShowCreateStreamingTableResult { table_name: String, status: String, pipeline_detail: String, - topology: String, + program: FsProgram, ) -> Self { Self { table_name, status, pipeline_detail, - topology, + program, } } } impl DataSet for ShowCreateStreamingTableResult { fn to_record_batch(&self) -> arrow_array::RecordBatch { + let topology = render_program_topology(&self.program); + let schema = Arc::new(Schema::new(vec![ Field::new("Streaming Table", DataType::Utf8, false), Field::new("Status", DataType::Utf8, false), @@ -56,7 +61,7 @@ impl DataSet for ShowCreateStreamingTableResult { Arc::new(StringArray::from(vec![self.table_name.as_str()])), Arc::new(StringArray::from(vec![self.status.as_str()])), Arc::new(StringArray::from(vec![self.pipeline_detail.as_str()])), - Arc::new(StringArray::from(vec![self.topology.as_str()])), + Arc::new(StringArray::from(vec![topology.as_str()])), ], ) .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty()))) diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index 9907dbe8..dcfbcb83 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -88,27 +88,6 @@ impl Executor { } -fn strip_noisy_fields(value: &mut serde_json::Value) { - match value { - serde_json::Value::Object(map) => { - // 兼容 camelCase 和 snake_case,直接把配置项连根拔起 - map.remove("operatorConfig"); - map.remove("operator_config"); - - // 继续向子节点递归 - for (_, v) in map.iter_mut() { - strip_noisy_fields(v); - } - } - serde_json::Value::Array(arr) => { - for v in arr.iter_mut() { - strip_noisy_fields(v); - } - } - _ => {} - } -} - impl PlanVisitor for Executor { fn visit_create_function( &self, @@ -451,7 +430,7 @@ impl PlanVisitor for Executor { plan.table_name.clone(), detail.status, pipeline_detail, - detail.topology, + detail.program, ), )) }; diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs index 3a400b54..19a8a26e 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/runtime/streaming/job/job_manager.rs @@ -20,8 +20,6 @@ use tracing::{error, info, warn}; use protocol::grpc::api::{ChainedOperator, FsProgram}; -use crate::sql::common::render_program_topology; - use crate::runtime::streaming::api::context::TaskContext; use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator}; use crate::runtime::streaming::api::source::SourceOperator; @@ -55,7 +53,7 @@ pub struct StreamingJobDetail { pub pipeline_count: i32, pub uptime_secs: u64, pub pipelines: Vec, - pub topology: String, + pub program: FsProgram, } static GLOBAL_JOB_MANAGER: OnceLock> = OnceLock::new(); @@ -241,15 +239,13 @@ impl JobManager { }) .collect(); - let topology = render_program_topology(&graph.program); - Some(StreamingJobDetail { job_id: graph.job_id.clone(), status: overall_status, pipeline_count: graph.pipelines.len() as i32, uptime_secs, pipelines: pipeline_details, - topology, + program: graph.program.clone(), }) } From ff5ec464becaa851754dd5df1a5681a221760592 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 21:19:07 +0800 Subject: [PATCH 40/44] update --- src/sql/analysis/source_rewriter.rs | 31 ----------------------------- 1 file changed, 31 deletions(-) diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs index 39df350e..0ade3ea1 100644 --- a/src/sql/analysis/source_rewriter.rs +++ b/src/sql/analysis/source_rewriter.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; @@ -25,7 +24,6 @@ use crate::sql::schema::source_table::SourceTable; use crate::sql::schema::ColumnDescriptor; use crate::sql::schema::table::Table; use crate::sql::schema::StreamSchemaProvider; -use crate::sql::common::constants::sql_field; use crate::sql::common::UPDATING_META_FIELD; use crate::sql::extensions::debezium::UnrollDebeziumPayloadNode; use crate::sql::extensions::remote_table::RemoteTableBoundaryNode; @@ -48,35 +46,6 @@ impl<'a> SourceRewriter<'a> { } impl SourceRewriter<'_> { - /// Output column names after stream-catalog source projection (physical fields plus optional - /// `_timestamp` alias when event time is renamed). - fn stream_source_projected_column_names( - schema: &datafusion::arrow::datatypes::Schema, - event_time_field: Option<&str>, - ) -> HashSet { - let mut names: HashSet = - schema.fields().iter().map(|f| f.name().clone()).collect(); - if let Some(et) = event_time_field { - if et != TIMESTAMP_FIELD { - names.insert(TIMESTAMP_FIELD.to_string()); - } - } - names - } - - /// Resolves watermark column for [`StreamTable::Source`]: drop computed `__watermark` and any - /// name not present in the projected schema (defaults to `_timestamp` − delay). - fn stream_source_effective_watermark_field<'b>( - watermark_field: Option<&'b str>, - projected: &HashSet, - ) -> Option<&'b str> { - let w = watermark_field?; - if w == sql_field::COMPUTED_WATERMARK { - return None; - } - projected.contains(w).then_some(w) - } - fn projection_expr_for_column(col: &ColumnDescriptor, qualifier: &TableReference) -> Expr { if let Some(logic) = col.computation_logic() { logic From d4387f9bbdf44170cd1082f143ad47b7c6cc8e6f Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 21:29:04 +0800 Subject: [PATCH 41/44] update --- src/sql/common/hash.rs | 100 ------------------------------------ src/sql/common/mod.rs | 3 -- src/sql/common/task_info.rs | 92 --------------------------------- src/sql/common/worker.rs | 26 ---------- 4 files changed, 221 deletions(-) delete mode 100644 src/sql/common/hash.rs delete mode 100644 src/sql/common/task_info.rs delete mode 100644 src/sql/common/worker.rs diff --git a/src/sql/common/hash.rs b/src/sql/common/hash.rs deleted file mode 100644 index 6dce5b9a..00000000 --- a/src/sql/common/hash.rs +++ /dev/null @@ -1,100 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::ops::RangeInclusive; - -/// Randomly generated seeds for consistent hashing. Changing these breaks existing state. -pub const HASH_SEEDS: [u64; 4] = [ - 5093852630788334730, - 1843948808084437226, - 8049205638242432149, - 17942305062735447798, -]; - -/// Returns the server index (0-based) responsible for the given hash value -/// when distributing across `n` servers. -pub fn server_for_hash(x: u64, n: usize) -> usize { - if n == 1 { - 0 - } else { - let range_size = (u64::MAX / (n as u64)) + 1; - (x / range_size) as usize - } -} - -/// Returns the key range assigned to server `i` out of `n` total servers. -pub fn range_for_server(i: usize, n: usize) -> RangeInclusive { - if n == 1 { - return 0..=u64::MAX; - } - let range_size = (u64::MAX / (n as u64)) + 1; - let start = range_size * (i as u64); - let end = if i + 1 == n { - u64::MAX - } else { - start + range_size - 1 - }; - start..=end -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_range_for_server() { - let n = 6; - - for i in 0..(n - 1) { - let range1 = range_for_server(i, n); - let range2 = range_for_server(i + 1, n); - - assert_eq!(*range1.end() + 1, *range2.start(), "Ranges not adjacent"); - assert_eq!( - i, - server_for_hash(*range1.start(), n), - "start not assigned to range" - ); - assert_eq!( - i, - server_for_hash(*range1.end(), n), - "end not assigned to range" - ); - } - - let last_range = range_for_server(n - 1, n); - assert_eq!( - *last_range.end(), - u64::MAX, - "Last range does not contain u64::MAX" - ); - assert_eq!( - n - 1, - server_for_hash(u64::MAX, n), - "u64::MAX not in last range" - ); - } - - #[test] - fn test_server_for_hash() { - let n = 2; - let x = u64::MAX; - - let server_index = server_for_hash(x, n); - let server_range = range_for_server(server_index, n); - - assert!( - server_range.contains(&x), - "u64::MAX is not in the correct range" - ); - } -} diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs index 4c0cc6d3..e042aea6 100644 --- a/src/sql/common/mod.rs +++ b/src/sql/common/mod.rs @@ -26,13 +26,10 @@ pub mod fs_schema; pub mod errors; pub mod format_from_opts; pub mod formats; -pub mod hash; pub mod kafka_catalog; pub mod message; pub mod operator_config; -pub mod task_info; pub mod time_utils; -pub mod worker; pub mod converter; pub mod topology; diff --git a/src/sql/common/task_info.rs b/src/sql/common/task_info.rs deleted file mode 100644 index 479ab082..00000000 --- a/src/sql/common/task_info.rs +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use bincode::{Decode, Encode}; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::fmt::{Display, Formatter}; -use std::ops::RangeInclusive; - -#[derive(Eq, PartialEq, Hash, Debug, Clone, Encode, Decode, Serialize, Deserialize)] -pub struct TaskInfo { - pub job_id: String, - pub node_id: u32, - pub operator_name: String, - pub operator_id: String, - pub task_index: u32, - pub parallelism: u32, - pub key_range: RangeInclusive, -} - -impl Display for TaskInfo { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!( - f, - "Task_{}-{}/{}", - self.operator_id, self.task_index, self.parallelism - ) - } -} - -impl TaskInfo { - pub fn for_test(job_id: &str, operator_id: &str) -> Self { - Self { - job_id: job_id.to_string(), - node_id: 1, - operator_name: "op".to_string(), - operator_id: operator_id.to_string(), - task_index: 0, - parallelism: 1, - key_range: 0..=u64::MAX, - } - } -} - -pub fn get_test_task_info() -> TaskInfo { - TaskInfo { - job_id: "instance-1".to_string(), - node_id: 1, - operator_name: "test-operator".to_string(), - operator_id: "test-operator-1".to_string(), - task_index: 0, - parallelism: 1, - key_range: 0..=u64::MAX, - } -} - -#[derive(Eq, PartialEq, Hash, Debug, Clone, Encode, Decode, Serialize, Deserialize)] -pub struct ChainInfo { - pub job_id: String, - pub node_id: u32, - pub description: String, - pub task_index: u32, -} - -impl Display for ChainInfo { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!( - f, - "TaskChain{}-{} ({})", - self.node_id, self.task_index, self.description - ) - } -} - -impl ChainInfo { - pub fn metric_label_map(&self) -> HashMap { - let mut labels = HashMap::new(); - labels.insert("node_id".to_string(), self.node_id.to_string()); - labels.insert("subtask_idx".to_string(), self.task_index.to_string()); - labels.insert("node_description".to_string(), self.description.to_string()); - labels - } -} diff --git a/src/sql/common/worker.rs b/src/sql/common/worker.rs deleted file mode 100644 index 48c218fb..00000000 --- a/src/sql/common/worker.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt::{Display, Formatter}; -use std::sync::Arc; - -#[derive(Debug, Hash, Eq, PartialEq, Copy, Clone)] -pub struct WorkerId(pub u64); - -#[derive(Debug, Hash, Eq, PartialEq, Clone)] -pub struct MachineId(pub Arc); - -impl Display for MachineId { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} From c842c0bee0df4339aaa6495b9f8b89085ab846f1 Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 22:31:37 +0800 Subject: [PATCH 42/44] update --- README-zh.md | 21 +-- README.md | 21 +-- docs/connectors-and-formats-zh.md | 197 ++++++++++++++++++++++ docs/connectors-and-formats.md | 197 ++++++++++++++++++++++ docs/sql-cli-guide-zh.md | 66 +++++++- docs/sql-cli-guide.md | 66 +++++++- docs/streaming-sql-guide-zh.md | 261 ++++++++++++++++++++++++++++++ docs/streaming-sql-guide.md | 260 +++++++++++++++++++++++++++++ 8 files changed, 1067 insertions(+), 22 deletions(-) create mode 100644 docs/connectors-and-formats-zh.md create mode 100644 docs/connectors-and-formats.md create mode 100644 docs/streaming-sql-guide-zh.md create mode 100644 docs/streaming-sql-guide.md diff --git a/README-zh.md b/README-zh.md index b1d68eac..a15bfcc5 100644 --- a/README-zh.md +++ b/README-zh.md @@ -23,7 +23,7 @@ [中文](README-zh.md) | [English](README.md) -**Function Stream** 是一个基于 Rust 构建的高性能、事件驱动的流处理框架。它提供了一个模块化的运行时,用于编排编译为 **WebAssembly (WASM)** 的 Serverless 风格处理函数,支持使用 **Go、Python 和 Rust** 编写函数。 +**Function Stream** 是一个基于 Rust 构建的高性能、事件驱动的流处理框架。它提供了一个模块化的运行时,用于编排编译为 **WebAssembly (WASM)** 的 Serverless 风格处理函数,支持使用 **Go、Python 和 Rust** 编写函数。同时内置 **Streaming SQL** 引擎,可通过纯声明式 SQL 构建实时数据管道 — 包括时间窗口聚合、多流关联和持续 ETL。 ## 目录 @@ -46,6 +46,7 @@ ## 核心特性 +- **Streaming SQL 引擎**:使用纯 SQL 构建实时管道 — 注册数据源(`CREATE TABLE`)、启动持续计算(`CREATE STREAMING TABLE ... AS SELECT`)、管理生命周期(`SHOW` / `DROP`)。支持滚动窗口、滑动窗口、窗口关联等丰富语义。 - **事件驱动的 WASM 运行时**:以接近原生的性能和沙箱隔离的方式执行多语言函数(Go、Python、Rust)。 - **持久化状态管理**:内置支持基于 RocksDB 的状态存储,用于有状态流处理。 - **SQL 驱动的 CLI**:使用类 SQL 命令进行作业管理和流检测的交互式 REPL。 @@ -200,14 +201,16 @@ function-stream-/ ## 文档 -| 文档 | 描述 | -|------------------------------------------------------|---------------| -| [服务端配置与运维指南](docs/server-configuration-zh.md) | 服务端配置与运维操作 | -| [Function 任务配置规范](docs/function-configuration-zh.md) | 任务定义规范 | -| [SQL CLI 交互式管理指南](docs/sql-cli-guide-zh.md) | 交互式管理指南 | -| [Function 管理与开发指南](docs/function-development-zh.md) | 管理与开发指南 | -| [Go SDK 开发与交互指南](docs/Go-SDK/go-sdk-guide-zh.md) | Go SDK 指南 | -| [Python SDK 开发与交互指南](docs/Python-SDK/python-sdk-guide-zh.md) | Python SDK 指南 | +| 文档 | 描述 | +|------------------------------------------------------------------------|--------------------------| +| [Streaming SQL 使用指南](docs/streaming-sql-guide-zh.md) | 声明式 SQL 实时流处理指南 | +| [连接器、格式与类型参考](docs/connectors-and-formats-zh.md) | 支持的 Source/Sink、格式与数据类型 | +| [服务端配置与运维指南](docs/server-configuration-zh.md) | 服务端配置与运维操作 | +| [Function 任务配置规范](docs/function-configuration-zh.md) | 任务定义规范 | +| [SQL CLI 交互式管理指南](docs/sql-cli-guide-zh.md) | 交互式管理指南 | +| [Function 管理与开发指南](docs/function-development-zh.md) | 管理与开发指南 | +| [Go SDK 开发与交互指南](docs/Go-SDK/go-sdk-guide-zh.md) | Go SDK 指南 | +| [Python SDK 开发与交互指南](docs/Python-SDK/python-sdk-guide-zh.md) | Python SDK 指南 | ## 配置 diff --git a/README.md b/README.md index 51a69de1..f74bee33 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ [中文](README-zh.md) | [English](README.md) -**Function Stream** is a high-performance, event-driven stream processing framework built in Rust. It provides a modular runtime to orchestrate serverless-style processing functions compiled to **WebAssembly (WASM)**, supporting functions written in **Go, Python, and Rust**. +**Function Stream** is a high-performance, event-driven stream processing framework built in Rust. It provides a modular runtime to orchestrate serverless-style processing functions compiled to **WebAssembly (WASM)**, supporting functions written in **Go, Python, and Rust**. It also features a **Streaming SQL** engine that lets you build real-time data pipelines — including time-windowed aggregations, multi-stream joins, and continuous ETL — using pure declarative SQL. ## Table of Contents @@ -46,6 +46,7 @@ ## Key Features +* **Streaming SQL Engine**: Build real-time pipelines with pure SQL — register sources (`CREATE TABLE`), launch continuous computations (`CREATE STREAMING TABLE ... AS SELECT`), and manage lifecycle (`SHOW` / `DROP`). Supports tumbling windows, hopping windows, window joins, and more. * **Event-Driven WASM Runtime**: Executes polyglot functions (Go, Python, Rust) with near-native performance and sandboxed isolation. * **Durable State Management**: Built-in support for RocksDB-backed state stores for stateful stream processing. * **SQL-Powered CLI**: Interactive REPL for job management and stream inspection using SQL-like commands. @@ -199,14 +200,16 @@ We provide a robust shell script to manage the server process, capable of handli ## Documentation -| Document | Description | -|----------------------------------------------------------|-----------------------------------| -| [Server Configuration](docs/server-configuration.md) | Server Configuration & Operations | -| [Function Configuration](docs/function-configuration.md) | Task Definition Specification | -| [SQL CLI Guide](docs/sql-cli-guide.md) | Interactive Management Guide | -| [Function Development](docs/function-development.md) | Management & Development Guide | -| [Go SDK Guide](docs/Go-SDK/go-sdk-guide.md) | Go SDK Guide | -| [Python SDK Guide](docs/Python-SDK/python-sdk-guide.md) | Python SDK Guide | +| Document | Description | +|----------------------------------------------------------------|-------------------------------------------------| +| [Streaming SQL Guide](docs/streaming-sql-guide.md) | Declarative SQL for Real-Time Stream Processing | +| [Connectors, Formats & Types](docs/connectors-and-formats.md) | Supported Sources, Sinks, Formats & Data Types | +| [Server Configuration](docs/server-configuration.md) | Server Configuration & Operations | +| [Function Configuration](docs/function-configuration.md) | Task Definition Specification | +| [SQL CLI Guide](docs/sql-cli-guide.md) | Interactive Management Guide | +| [Function Development](docs/function-development.md) | Management & Development Guide | +| [Go SDK Guide](docs/Go-SDK/go-sdk-guide.md) | Go SDK Guide | +| [Python SDK Guide](docs/Python-SDK/python-sdk-guide.md) | Python SDK Guide | ## Configuration diff --git a/docs/connectors-and-formats-zh.md b/docs/connectors-and-formats-zh.md new file mode 100644 index 00000000..8f25a7dc --- /dev/null +++ b/docs/connectors-and-formats-zh.md @@ -0,0 +1,197 @@ + + +# 连接器、数据格式与 SQL 类型参考 + +[中文](connectors-and-formats-zh.md) | [English](connectors-and-formats.md) + +本文档是 Function Stream Streaming SQL 引擎所支持的连接器(Source / Sink)、序列化格式以及 SQL 数据类型的权威参考。 + +--- + +## 目录 + +- [1. 连接器 (Connector)](#1-连接器-connector) + - [1.1 Kafka Source(数据源)](#11-kafka-source数据源) + - [1.2 Kafka Sink(数据汇)](#12-kafka-sink数据汇) +- [2. 数据格式 (Format)](#2-数据格式-format) +- [3. SQL 数据类型](#3-sql-数据类型) +- [4. 完整示例](#4-完整示例) + +--- + +## 1. 连接器 (Connector) + +当前 Function Stream 支持 **Kafka** 作为生产可用的连接器,同时可作为数据源(Source)和数据汇(Sink)。 + +### 1.1 Kafka Source(数据源) + +Kafka Source 从一个或多个 Kafka Topic 分区读取消息。在 `CREATE TABLE` 中使用以注册输入流。 + +**必填属性:** + +| 属性 | 说明 | 示例 | +|------|------|------| +| `connector` | 必须为 `kafka`。 | `'kafka'` | +| `topic` | 要消费的 Kafka Topic。 | `'raw_events'` | +| `format` | 消息的序列化格式。 | `'json'` | +| `bootstrap.servers` | Kafka Broker 地址列表,逗号分隔。 | `'broker1:9092,broker2:9092'` | + +**示例:** + +```sql +CREATE TABLE page_views ( + user_id VARCHAR, + page_url VARCHAR, + view_time TIMESTAMP NOT NULL, + WATERMARK FOR view_time AS view_time - INTERVAL '3' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'page_views', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); +``` + +### 1.2 Kafka Sink(数据汇) + +Kafka Sink 将计算结果写入 Kafka Topic。在 `CREATE STREAMING TABLE` 的 `WITH` 子句中配置。 + +**必填属性:** + +| 属性 | 说明 | 示例 | +|------|------|------| +| `connector` | 必须为 `kafka`。 | `'kafka'` | +| `topic` | 要写入的 Kafka Topic。 | `'sink_results'` | +| `format` | 输出消息的序列化格式。 | `'json'` | +| `bootstrap.servers` | Kafka Broker 地址列表。 | `'broker1:9092'` | + +**示例:** + +```sql +CREATE STREAMING TABLE enriched_clicks WITH ( + 'connector' = 'kafka', + 'topic' = 'enriched_clicks', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT click_id, user_id, click_time +FROM ad_clicks; +``` + +--- + +## 2. 数据格式 (Format) + +当前唯一支持的序列化格式是 **JSON**。每条 Kafka 消息应为一个自描述的 JSON 对象,其字段直接映射到 `CREATE TABLE` 中定义的列。 + +在 `WITH` 子句中设置 `'format' = 'json'`(省略时也默认为 JSON)。 + +--- + +## 3. SQL 数据类型 + +以下是 `CREATE TABLE` 列定义中支持的 SQL 数据类型: + +### 数值类型 + +| SQL 类型 | 别名 | Arrow 类型 | 说明 | +|----------|------|-----------|------| +| `BOOLEAN` | `BOOL` | Boolean | 布尔值。 | +| `TINYINT` | — | Int8 | 8 位有符号整数。 | +| `SMALLINT` | `INT2` | Int16 | 16 位有符号整数。 | +| `INT` | `INTEGER`、`INT4` | Int32 | 32 位有符号整数。 | +| `BIGINT` | `INT8` | Int64 | 64 位有符号整数。 | +| `TINYINT UNSIGNED` | — | UInt8 | 8 位无符号整数。 | +| `SMALLINT UNSIGNED` | `INT2 UNSIGNED` | UInt16 | 16 位无符号整数。 | +| `INT UNSIGNED` | `INT4 UNSIGNED` | UInt32 | 32 位无符号整数。 | +| `BIGINT UNSIGNED` | `INT8 UNSIGNED` | UInt64 | 64 位无符号整数。 | +| `FLOAT` | `REAL`、`FLOAT4` | Float32 | 32 位 IEEE 754 浮点数。 | +| `DOUBLE` | `DOUBLE PRECISION`、`FLOAT8` | Float64 | 64 位 IEEE 754 浮点数。 | +| `DECIMAL(p, s)` | `NUMERIC(p, s)` | Decimal128 | 定点小数。精度 1–38,标度 <= 精度。 | + +### 字符串与二进制类型 + +| SQL 类型 | 别名 | Arrow 类型 | 说明 | +|----------|------|-----------|------| +| `VARCHAR` | `TEXT`、`STRING`、`CHAR` | Utf8 | 可变长度 UTF-8 字符串。 | +| `BYTEA` | — | Binary | 可变长度字节数组。 | +| `JSON` | — | Utf8(JSON 扩展) | 带有 FunctionStream 扩展元数据的 JSON 类型字符串。 | + +### 日期与时间类型 + +| SQL 类型 | Arrow 类型 | 说明 | +|----------|-----------|------| +| `TIMESTAMP` | Timestamp(Nanosecond) | 不含时区的日期时间(纳秒精度)。 | +| `TIMESTAMP(0)` | Timestamp(Second) | 秒精度。 | +| `TIMESTAMP(3)` | Timestamp(Millisecond) | 毫秒精度。 | +| `TIMESTAMP(6)` | Timestamp(Microsecond) | 微秒精度。 | +| `TIMESTAMP(9)` | Timestamp(Nanosecond) | 纳秒精度(与 `TIMESTAMP` 相同)。 | +| `DATE` | Date32 | 日历日期(年、月、日)。 | +| `DATETIME` | Timestamp(Nanosecond) | `TIMESTAMP` 的别名。 | +| `TIME` | Time64(Nanosecond) | 不含时区的时刻。 | +| `INTERVAL` | Interval(MonthDayNano) | 时间间隔 / 持续时间。 | + +### 复合类型 + +| SQL 类型 | Arrow 类型 | 说明 | +|----------|-----------|------| +| `STRUCT` | Struct | 命名组合字段。 | +| `ARRAY` | List | 相同类型元素的有序列表。也支持 `element_type[]` 语法。 | + +--- + +## 4. 完整示例 + +以下是一个结合 Kafka Source、Kafka Sink、JSON 格式和多种 SQL 数据类型的完整示例: + +```sql +-- Source:从 Kafka 读取用户活动事件 +CREATE TABLE user_activity ( + event_id VARCHAR, + user_id BIGINT, + action VARCHAR, + amount DECIMAL(10, 2), + tags ARRAY, + event_time TIMESTAMP NOT NULL, + WATERMARK FOR event_time AS event_time - INTERVAL '5' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'user_activity', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); + +-- Sink:1 分钟滚动窗口聚合 +CREATE STREAMING TABLE activity_stats_1m WITH ( + 'connector' = 'kafka', + 'topic' = 'activity_stats_1m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + TUMBLE(INTERVAL '1' MINUTE) AS time_window, + action, + COUNT(*) AS event_count, + SUM(amount) AS total_amount +FROM user_activity +GROUP BY 1, action; +``` diff --git a/docs/connectors-and-formats.md b/docs/connectors-and-formats.md new file mode 100644 index 00000000..46d0d964 --- /dev/null +++ b/docs/connectors-and-formats.md @@ -0,0 +1,197 @@ + + +# Connectors, Formats & Data Types + +[中文](connectors-and-formats-zh.md) | [English](connectors-and-formats.md) + +This document is the authoritative reference for connectors (sources & sinks), serialization formats, and SQL data types supported by Function Stream's Streaming SQL engine. + +--- + +## Table of Contents + +- [1. Connectors](#1-connectors) + - [1.1 Kafka (Source)](#11-kafka-source) + - [1.2 Kafka (Sink)](#12-kafka-sink) +- [2. Data Format](#2-data-format) +- [3. SQL Data Types](#3-sql-data-types) +- [4. Full Example](#4-full-example) + +--- + +## 1. Connectors + +Currently Function Stream supports **Kafka** as the production-ready connector for both source (ingestion) and sink (egress). + +### 1.1 Kafka (Source) + +A Kafka source reads records from one or more Kafka topic partitions. Use it in `CREATE TABLE` to register an input stream. + +**Required Properties:** + +| Property | Description | Example | +|----------|-------------|---------| +| `connector` | Must be `kafka`. | `'kafka'` | +| `topic` | Kafka topic to consume from. | `'raw_events'` | +| `format` | Serialization format of messages. | `'json'` | +| `bootstrap.servers` | Comma-separated list of Kafka broker addresses. | `'broker1:9092,broker2:9092'` | + +**Example:** + +```sql +CREATE TABLE page_views ( + user_id VARCHAR, + page_url VARCHAR, + view_time TIMESTAMP NOT NULL, + WATERMARK FOR view_time AS view_time - INTERVAL '3' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'page_views', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); +``` + +### 1.2 Kafka (Sink) + +A Kafka sink writes records into a Kafka topic. It is configured in the `WITH` clause of a `CREATE STREAMING TABLE` statement. + +**Required Properties:** + +| Property | Description | Example | +|----------|-------------|---------| +| `connector` | Must be `kafka`. | `'kafka'` | +| `topic` | Kafka topic to write to. | `'sink_results'` | +| `format` | Serialization format of output messages. | `'json'` | +| `bootstrap.servers` | Comma-separated Kafka broker addresses. | `'broker1:9092'` | + +**Example:** + +```sql +CREATE STREAMING TABLE enriched_clicks WITH ( + 'connector' = 'kafka', + 'topic' = 'enriched_clicks', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT click_id, user_id, click_time +FROM ad_clicks; +``` + +--- + +## 2. Data Format + +Currently the only supported serialization format is **JSON**. Each Kafka message is expected to be a self-describing JSON object whose fields map directly to the columns defined in `CREATE TABLE`. + +Set `'format' = 'json'` in the `WITH` clause (this is also the default when omitted). + +--- + +## 3. SQL Data Types + +The following SQL data types are supported in `CREATE TABLE` column definitions: + +### Numeric Types + +| SQL Type | Aliases | Arrow Type | Description | +|----------|---------|------------|-------------| +| `BOOLEAN` | `BOOL` | Boolean | True / false. | +| `TINYINT` | — | Int8 | 8-bit signed integer. | +| `SMALLINT` | `INT2` | Int16 | 16-bit signed integer. | +| `INT` | `INTEGER`, `INT4` | Int32 | 32-bit signed integer. | +| `BIGINT` | `INT8` | Int64 | 64-bit signed integer. | +| `TINYINT UNSIGNED` | — | UInt8 | 8-bit unsigned integer. | +| `SMALLINT UNSIGNED` | `INT2 UNSIGNED` | UInt16 | 16-bit unsigned integer. | +| `INT UNSIGNED` | `INT4 UNSIGNED` | UInt32 | 32-bit unsigned integer. | +| `BIGINT UNSIGNED` | `INT8 UNSIGNED` | UInt64 | 64-bit unsigned integer. | +| `FLOAT` | `REAL`, `FLOAT4` | Float32 | 32-bit IEEE 754 floating point. | +| `DOUBLE` | `DOUBLE PRECISION`, `FLOAT8` | Float64 | 64-bit IEEE 754 floating point. | +| `DECIMAL(p, s)` | `NUMERIC(p, s)` | Decimal128 | Fixed-point decimal. Precision 1–38, scale <= precision. | + +### String & Binary Types + +| SQL Type | Aliases | Arrow Type | Description | +|----------|---------|------------|-------------| +| `VARCHAR` | `TEXT`, `STRING`, `CHAR` | Utf8 | Variable-length UTF-8 string. | +| `BYTEA` | — | Binary | Variable-length byte array. | +| `JSON` | — | Utf8 (JSON extension) | JSON-typed string with FunctionStream extension metadata. | + +### Date & Time Types + +| SQL Type | Arrow Type | Description | +|----------|------------|-------------| +| `TIMESTAMP` | Timestamp(Nanosecond) | Date and time without timezone (nanosecond precision). | +| `TIMESTAMP(0)` | Timestamp(Second) | Second precision. | +| `TIMESTAMP(3)` | Timestamp(Millisecond) | Millisecond precision. | +| `TIMESTAMP(6)` | Timestamp(Microsecond) | Microsecond precision. | +| `TIMESTAMP(9)` | Timestamp(Nanosecond) | Nanosecond precision (same as `TIMESTAMP`). | +| `DATE` | Date32 | Calendar date (year, month, day). | +| `DATETIME` | Timestamp(Nanosecond) | Alias for `TIMESTAMP`. | +| `TIME` | Time64(Nanosecond) | Time of day without timezone. | +| `INTERVAL` | Interval(MonthDayNano) | Time duration / interval. | + +### Composite Types + +| SQL Type | Arrow Type | Description | +|----------|------------|-------------| +| `STRUCT` | Struct | Named composite fields. | +| `ARRAY` | List | Ordered list of elements of the same type. Also supports `element_type[]` syntax. | + +--- + +## 4. Full Example + +Below is a complete example combining a Kafka source, a Kafka sink, JSON format, and various SQL data types: + +```sql +-- Source: user activity events from Kafka +CREATE TABLE user_activity ( + event_id VARCHAR, + user_id BIGINT, + action VARCHAR, + amount DECIMAL(10, 2), + tags ARRAY, + event_time TIMESTAMP NOT NULL, + WATERMARK FOR event_time AS event_time - INTERVAL '5' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'user_activity', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); + +-- Sink: 1-minute tumbling window aggregation +CREATE STREAMING TABLE activity_stats_1m WITH ( + 'connector' = 'kafka', + 'topic' = 'activity_stats_1m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + TUMBLE(INTERVAL '1' MINUTE) AS time_window, + action, + COUNT(*) AS event_count, + SUM(amount) AS total_amount +FROM user_activity +GROUP BY 1, action; +``` diff --git a/docs/sql-cli-guide-zh.md b/docs/sql-cli-guide-zh.md index 8352dea1..bff05932 100644 --- a/docs/sql-cli-guide-zh.md +++ b/docs/sql-cli-guide-zh.md @@ -129,7 +129,69 @@ DROP FUNCTION go_processor_demo; --- -## 三、REPL 内建辅助指令 +## 三、Streaming SQL:TABLE 与 STREAMING TABLE + +除了 Function 管理之外,CLI 还支持一整套 **Streaming SQL** 命令,用于声明数据源和构建实时管道。完整示例请参阅 [Streaming SQL 使用指南](streaming-sql-guide-zh.md)。 + +### 3.1 注册数据源:CREATE TABLE + +声明外部数据源(如 Kafka),包含 Schema、事件时间和水位线策略。此操作仅创建**静态目录条目**,不消耗计算资源。 + +```sql +CREATE TABLE ad_impressions ( + impression_id VARCHAR, + ad_id BIGINT, + campaign_id BIGINT, + user_id VARCHAR, + impression_time TIMESTAMP NOT NULL, + WATERMARK FOR impression_time AS impression_time - INTERVAL '2' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'raw_ad_impressions', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); +``` + +### 3.2 创建流计算管道:CREATE STREAMING TABLE + +使用 CTAS 语法启动持续运行的分布式计算管道。结果以纯追加模式写入目标连接器。 + +```sql +CREATE STREAMING TABLE metric_tumble_impressions_1m WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_impressions_1m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + TUMBLE(INTERVAL '1' MINUTE) AS time_window, + campaign_id, + COUNT(*) AS total_impressions +FROM ad_impressions +GROUP BY 1, campaign_id; +``` + +### 3.3 查看与监控 + +| 命令 | 说明 | +|------|------| +| `SHOW TABLES` | 列出所有已注册的数据源表。 | +| `SHOW CREATE TABLE ` | 显示某张表的建表 DDL。 | +| `SHOW STREAMING TABLES` | 列出所有正在运行的流计算管道及其状态。 | +| `SHOW CREATE STREAMING TABLE ` | 查看某条管道的物理执行拓扑图(ASCII 格式)。 | + +### 3.4 销毁流计算管道:DROP STREAMING TABLE + +停止并释放某条流计算管道的所有资源: + +```sql +DROP STREAMING TABLE metric_tumble_impressions_1m; +``` + +--- + +## 四、REPL 内建辅助指令 在 `function-stream>` 提示符下,支持以下便捷指令: @@ -141,7 +203,7 @@ DROP FUNCTION go_processor_demo; --- -## 四、技术约束与注意事项 +## 五、技术约束与注意事项 - **路径隔离**:SQL CLI 本身不负责上传文件。function_path 指向的文件必须预先存在于**服务端机器**的磁盘上。若需远程上传打包,请使用 Python SDK。 - **Python 函数限制**:由于 Python 函数涉及动态依赖分析与代码打包,目前**不支持**通过 SQL CLI 创建,仅能通过 CLI 进行 START / STOP / SHOW 等生命周期管理。 diff --git a/docs/sql-cli-guide.md b/docs/sql-cli-guide.md index be42a37e..a7f36a88 100644 --- a/docs/sql-cli-guide.md +++ b/docs/sql-cli-guide.md @@ -129,7 +129,69 @@ DROP FUNCTION go_processor_demo; --- -## 3. REPL Built-in Auxiliary Commands +## 3. Streaming SQL: TABLE & STREAMING TABLE + +In addition to Function management, the CLI supports a full set of **Streaming SQL** commands for declaring data sources and building real-time pipelines. For a comprehensive guide with examples, see [Streaming SQL Guide](streaming-sql-guide.md). + +### 3.1 Register Data Source: CREATE TABLE + +Declare an external data source (e.g. Kafka) with schema, event time, and watermark strategy. This creates a **static catalog entry** that consumes no compute resources. + +```sql +CREATE TABLE ad_impressions ( + impression_id VARCHAR, + ad_id BIGINT, + campaign_id BIGINT, + user_id VARCHAR, + impression_time TIMESTAMP NOT NULL, + WATERMARK FOR impression_time AS impression_time - INTERVAL '2' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'raw_ad_impressions', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); +``` + +### 3.2 Create Streaming Pipeline: CREATE STREAMING TABLE + +Launch a continuous, distributed compute pipeline using CTAS syntax. Results are written to the target connector in append-only mode. + +```sql +CREATE STREAMING TABLE metric_tumble_impressions_1m WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_impressions_1m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + TUMBLE(INTERVAL '1' MINUTE) AS time_window, + campaign_id, + COUNT(*) AS total_impressions +FROM ad_impressions +GROUP BY 1, campaign_id; +``` + +### 3.3 Inspect & Monitor + +| Command | Description | +|---------|-------------| +| `SHOW TABLES` | List all registered source tables. | +| `SHOW CREATE TABLE ` | Display the DDL of a registered table. | +| `SHOW STREAMING TABLES` | List all running streaming pipelines with status. | +| `SHOW CREATE STREAMING TABLE ` | Inspect the physical execution graph (ASCII topology). | + +### 3.4 Destroy Streaming Pipeline: DROP STREAMING TABLE + +Stop and release all resources for a streaming pipeline: + +```sql +DROP STREAMING TABLE metric_tumble_impressions_1m; +``` + +--- + +## 4. REPL Built-in Auxiliary Commands At the `function-stream>` prompt, the following convenient commands are supported: @@ -141,7 +203,7 @@ At the `function-stream>` prompt, the following convenient commands are supporte --- -## 4. Technical Constraints and Notes +## 5. Technical Constraints and Notes - **Path Isolation**: The SQL CLI itself is not responsible for uploading files. The file pointed to by function_path must pre-exist on the **Server machine's** disk. If remote upload packaging is required, please use the Python SDK. - **Python Function Limitations**: Since Python functions involve dynamic dependency analysis and code packaging, they are currently **not supported** for creation via SQL CLI; only lifecycle management such as START / STOP / SHOW via CLI is supported. diff --git a/docs/streaming-sql-guide-zh.md b/docs/streaming-sql-guide-zh.md new file mode 100644 index 00000000..ee18ab2a --- /dev/null +++ b/docs/streaming-sql-guide-zh.md @@ -0,0 +1,261 @@ + + +# Streaming SQL 使用指南 + +[中文](streaming-sql-guide-zh.md) | [English](streaming-sql-guide.md) + +Function Stream 提供了声明式 SQL 接口来构建实时流处理管道。通过 Streaming SQL,您可以轻松应对无界数据流(Unbounded Data)的摄取、时间窗口聚合、流式关联以及任务生命周期管理 — 无需编写任何命令式代码。 + +--- + +## 目录 + +- [核心概念](#核心概念) +- [第一部分:注册数据源 (TABLE)](#第一部分注册数据源-table) +- [第二部分:构建实时 Pipeline (STREAMING TABLE)](#第二部分构建实时-pipeline-streaming-table) + - [滚动窗口 (Tumbling Window)](#场景-1滚动窗口-tumbling-window) + - [滑动窗口 (Hopping Window)](#场景-2滑动窗口-hopping-window) + - [窗口双流关联 (Window Join)](#场景-3窗口双流关联-window-join) +- [第三部分:生命周期与流任务管理](#第三部分生命周期与流任务管理) + - [数据源管理](#1-数据源与元数据管理) + - [Pipeline 监控](#2-实时-pipeline-监控与排障) + - [停止与释放](#3-安全停止与释放资源) +- [SQL 语法速查表](#sql-语法速查表) + +--- + +## 核心概念 + +| 概念 | SQL 关键字 | 说明 | +|------|-----------|------| +| **TABLE** | `CREATE TABLE` | 系统目录(Catalog)中的静态逻辑定义。只记录外部数据源的连接信息、格式和 Schema,不消耗任何计算资源。 | +| **STREAMING TABLE** | `CREATE STREAMING TABLE ... AS SELECT` | 持续运行的物理数据管道。引擎会在后台拉起真实的分布式计算任务,并将结果以纯追加(Append-only)方式持续写入外部系统。 | +| **事件时间 (Event Time)** | `WATERMARK FOR ` | 引擎内部用于推进时间进度的时间戳列。 | +| **水位线 (Watermark)** | `AS - INTERVAL ...` | 对迟到乱序数据的容忍度。超过水位线的事件将被丢弃。 | + +> 支持的连接器、数据格式和 SQL 数据类型的完整参考,请参阅 [连接器、格式与类型参考](connectors-and-formats-zh.md)。 + +--- + +## 第一部分:注册数据源 (TABLE) + +`TABLE` 是系统目录(Catalog)中的静态逻辑定义。它只记录外部数据源(如 Kafka)的连接信息、格式和 Schema,**不消耗任何计算资源**。 + +在流计算中,我们必须为输入流指定**事件时间(Event Time)**和**水位线(Watermark)**,以此作为引擎内部推进时间、触发计算的唯一依据。 + +### 示例:注册广告曝光流与点击流 + +```sql +-- 1. 注册广告曝光流 +CREATE TABLE ad_impressions ( + impression_id VARCHAR, + ad_id BIGINT, + campaign_id BIGINT, + user_id VARCHAR, + impression_time TIMESTAMP NOT NULL, + -- 核心:将 impression_time 设为事件时间,并容忍最多 2 秒的数据迟到乱序 + WATERMARK FOR impression_time AS impression_time - INTERVAL '2' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'raw_ad_impressions', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); + +-- 2. 注册广告点击流 +CREATE TABLE ad_clicks ( + click_id VARCHAR, + impression_id VARCHAR, + ad_id BIGINT, + click_time TIMESTAMP NOT NULL, + WATERMARK FOR click_time AS click_time - INTERVAL '5' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'raw_ad_clicks', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); +``` + +**关键要素:** + +- `WATERMARK FOR <列> AS <列> - INTERVAL '' SECOND`:声明事件时间列以及允许的最大乱序延迟。 +- `WITH (...)`:连接器属性 — 类型、Topic、格式、Broker 地址。 + +--- + +## 第二部分:构建实时 Pipeline (STREAMING TABLE) + +`STREAMING TABLE` 是持续运行的物理数据管道。使用 `CREATE STREAMING TABLE ... AS SELECT`(CTAS)语法,引擎会在后台拉起真实的分布式计算任务,并将结果以**纯追加(Append-only)**的方式持续写入外部系统。 + +### 场景 1:滚动窗口 (Tumbling Window) + +将时间切分为互不重叠的固定窗口。 + +```sql +-- 需求:每 1 分钟统计一次各广告计划的曝光总量 +CREATE STREAMING TABLE metric_tumble_impressions_1m WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_impressions_1m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + TUMBLE(INTERVAL '1' MINUTE) AS time_window, + campaign_id, + COUNT(*) AS total_impressions +FROM ad_impressions +GROUP BY + 1, -- 指代 SELECT 中的第一个字段 (time_window) + campaign_id; +``` + +### 场景 2:滑动窗口 (Hopping Window) + +窗口之间存在重叠,用于平滑趋势监控。 + +```sql +-- 需求:统计过去 10 分钟内各广告的独立访客数(UV),每 1 分钟刷新一次 +CREATE STREAMING TABLE metric_hop_uv_10m WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_uv_10m_step_1m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + HOP(INTERVAL '1' MINUTE, INTERVAL '10' MINUTE) AS time_window, + ad_id, + COUNT(DISTINCT user_id) AS unique_users +FROM ad_impressions +GROUP BY + 1, + ad_id; +``` + +### 场景 3:窗口双流关联 (Window Join) + +将两条流在完全相同的时间窗口内进行等值关联。因为状态限定在窗口内,水位线越过窗口后状态会自动清理,绝不发生内存泄漏(OOM)。 + +```sql +-- 需求:精确计算 5 分钟级别的点击率 (CTR) +CREATE STREAMING TABLE metric_window_join_ctr_5m WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_ctr_5m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + imp.time_window, + imp.ad_id, + imp.impressions, + COALESCE(clk.clicks, 0) AS clicks +FROM ( + SELECT TUMBLE(INTERVAL '5' MINUTE) AS time_window, ad_id, COUNT(*) AS impressions + FROM ad_impressions + GROUP BY 1, ad_id +) imp +LEFT JOIN ( + SELECT TUMBLE(INTERVAL '5' MINUTE) AS time_window, ad_id, COUNT(*) AS clicks + FROM ad_clicks + GROUP BY 1, ad_id +) clk +ON imp.time_window = clk.time_window AND imp.ad_id = clk.ad_id; +``` + +> **要求:**关联条件**必须**包含相同的时间窗口列,以确保状态有界。 + +--- + +## 第三部分:生命周期与流任务管理 + +Function Stream 提供了一套完整的运维指令,帮助您管理元数据目录、排查物理执行图以及销毁流计算任务。 + +### 1. 数据源与元数据管理 + +**查看所有已注册的数据源表:** + +```sql +SHOW TABLES; +``` + +列出当前 Catalog 中的所有静态表定义及其对应的 Event Time 与 Watermark 策略。 + +**查看原始建表语句(DDL):** + +```sql +SHOW CREATE TABLE ad_clicks; +``` + +用于导出或排查某张表的底层连接参数(如 Kafka Topic、Format 等)。 + +### 2. 实时 Pipeline 监控与排障 + +**查看当前运行的计算流:** + +```sql +SHOW STREAMING TABLES; +``` + +输出字段说明: + +| 字段 | 说明 | +|------|------| +| `job_id` | 计算流的名称(如 `metric_tumble_impressions_1m`)。 | +| `status` | 当前生命周期状态(如 `RUNNING`、`FAILED`)。 | +| `pipeline_count` | 该任务在底层被拆分成的并行算子链数量。 | +| `uptime` | 任务已持续运行的时长。 | + +**洞察物理执行拓扑 (Execution Graph):** + +```sql +SHOW CREATE STREAMING TABLE metric_tumble_impressions_1m; +``` + +这是 Function Stream 极其强大的排障指令。它会以 ASCII 格式打印出一条 SQL 是如何在底层被转化为真实分布式计算图的: + +- `[Source]` — 从连接器读取数据。 +- `[Operator] ExpressionWatermark` — 注入水位线。 +- `[Shuffle]` — 重分布网络数据。 +- `[Operator] TumblingWindowAggregate` — 执行真正的窗口聚合。 +- `[Sink] ConnectorSink` — 将结果发往目标连接器(如 Kafka)。 + +### 3. 安全停止与释放资源 + +当某个实时大屏活动结束,或者您需要更新计算逻辑时,必须显式销毁旧的流任务: + +```sql +DROP STREAMING TABLE metric_tumble_impressions_1m; +``` + +--- + +## SQL 语法速查表 + +| 语句 | 说明 | +|------|------| +| `CREATE TABLE ... WITH (...)` | 注册外部数据源,声明 Schema、事件时间和水位线。 | +| `CREATE STREAMING TABLE ... WITH (...) AS SELECT ...` | 创建并启动持续运行的流计算管道。 | +| `SHOW TABLES` | 列出所有已注册的数据源表。 | +| `SHOW CREATE TABLE ` | 显示某张表的建表 DDL。 | +| `SHOW STREAMING TABLES` | 列出所有正在运行的流计算管道及其状态。 | +| `SHOW CREATE STREAMING TABLE ` | 查看某条管道的物理执行拓扑图。 | +| `DROP STREAMING TABLE ` | 销毁流计算管道并释放所有资源。 | diff --git a/docs/streaming-sql-guide.md b/docs/streaming-sql-guide.md new file mode 100644 index 00000000..8250e794 --- /dev/null +++ b/docs/streaming-sql-guide.md @@ -0,0 +1,260 @@ + + +# Streaming SQL Guide + +[中文](streaming-sql-guide-zh.md) | [English](streaming-sql-guide.md) + +Function Stream provides a declarative SQL interface for building real-time stream processing pipelines. With Streaming SQL you can ingest unbounded data streams, perform time-windowed aggregations, join multiple streams, and manage pipeline lifecycles — all without writing imperative code. + +--- + +## Table of Contents + +- [Core Concepts](#core-concepts) +- [Part 1: Registering Data Sources (TABLE)](#part-1-registering-data-sources-table) +- [Part 2: Building Real-Time Pipelines (STREAMING TABLE)](#part-2-building-real-time-pipelines-streaming-table) + - [Tumbling Window](#scenario-1-tumbling-window) + - [Hopping Window](#scenario-2-hopping-window) + - [Window Join](#scenario-3-window-join) +- [Part 3: Lifecycle & Pipeline Management](#part-3-lifecycle--pipeline-management) + - [Data Source Management](#1-data-source--metadata-management) + - [Pipeline Monitoring](#2-real-time-pipeline-monitoring--troubleshooting) + - [Stopping & Cleanup](#3-safe-shutdown--resource-release) +- [SQL Reference Summary](#sql-reference-summary) + +--- + +## Core Concepts + +| Concept | SQL Keyword | Description | +|---------|-------------|-------------| +| **TABLE** | `CREATE TABLE` | A static logical definition in the catalog. Records external source connection info, format, and schema. Consumes no compute resources. | +| **STREAMING TABLE** | `CREATE STREAMING TABLE ... AS SELECT` | A physically running data pipeline. The engine allocates distributed compute tasks and continuously writes results to external systems in append-only mode. | +| **Event Time** | `WATERMARK FOR ` | The timestamp column used by the engine to track the progression of time within a stream. | +| **Watermark** | `AS - INTERVAL ...` | A tolerance for late-arriving, out-of-order data. Events arriving after the watermark are dropped. | + +> For the full reference on supported connectors, data formats, and SQL data types, see [Connectors, Formats & Data Types](connectors-and-formats.md). + +--- + +## Part 1: Registering Data Sources (TABLE) + +A `TABLE` is a static logical definition in the system catalog. It only records the connection information (e.g. Kafka broker, topic), data format, and schema of an external data source. **It does not consume any compute resources.** + +In stream processing, you must specify an **Event Time** column and a **Watermark** strategy for each input stream. The engine uses these as the sole basis for advancing time and triggering computations. + +### Example: Register an Ad-Impressions Stream and a Clicks Stream + +```sql +-- 1. Register the ad-impressions stream +CREATE TABLE ad_impressions ( + impression_id VARCHAR, + ad_id BIGINT, + campaign_id BIGINT, + user_id VARCHAR, + impression_time TIMESTAMP NOT NULL, + WATERMARK FOR impression_time AS impression_time - INTERVAL '2' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'raw_ad_impressions', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); + +-- 2. Register the ad-clicks stream +CREATE TABLE ad_clicks ( + click_id VARCHAR, + impression_id VARCHAR, + ad_id BIGINT, + click_time TIMESTAMP NOT NULL, + WATERMARK FOR click_time AS click_time - INTERVAL '5' SECOND +) WITH ( + 'connector' = 'kafka', + 'topic' = 'raw_ad_clicks', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +); +``` + +**Key elements:** + +- `WATERMARK FOR AS - INTERVAL '' SECOND`: declares the event-time column and the maximum tolerated out-of-order delay. +- `WITH (...)`: connector properties — type, topic, format, and broker address. + +--- + +## Part 2: Building Real-Time Pipelines (STREAMING TABLE) + +A `STREAMING TABLE` is a continuously running physical data pipeline. Using the `CREATE STREAMING TABLE ... AS SELECT` (CTAS) syntax, the engine launches real distributed compute tasks in the background and continuously writes results to an external system in **append-only** mode. + +### Scenario 1: Tumbling Window + +Divides time into fixed, non-overlapping windows. + +```sql +-- Count total impressions per campaign every 1 minute +CREATE STREAMING TABLE metric_tumble_impressions_1m WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_impressions_1m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + TUMBLE(INTERVAL '1' MINUTE) AS time_window, + campaign_id, + COUNT(*) AS total_impressions +FROM ad_impressions +GROUP BY + 1, + campaign_id; +``` + +### Scenario 2: Hopping Window + +Windows overlap, useful for smoothed trend monitoring. + +```sql +-- Count distinct visitors (UV) per ad over the last 10 minutes, refreshed every 1 minute +CREATE STREAMING TABLE metric_hop_uv_10m WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_uv_10m_step_1m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + HOP(INTERVAL '1' MINUTE, INTERVAL '10' MINUTE) AS time_window, + ad_id, + COUNT(DISTINCT user_id) AS unique_users +FROM ad_impressions +GROUP BY + 1, + ad_id; +``` + +### Scenario 3: Window Join + +Join two streams within exactly the same time window. Because state is bounded by the window, memory is automatically reclaimed once the watermark advances past the window boundary — eliminating the risk of OOM. + +```sql +-- Calculate 5-minute click-through rate (CTR) +CREATE STREAMING TABLE metric_window_join_ctr_5m WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_ctr_5m', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + imp.time_window, + imp.ad_id, + imp.impressions, + COALESCE(clk.clicks, 0) AS clicks +FROM ( + SELECT TUMBLE(INTERVAL '5' MINUTE) AS time_window, ad_id, COUNT(*) AS impressions + FROM ad_impressions + GROUP BY 1, ad_id +) imp +LEFT JOIN ( + SELECT TUMBLE(INTERVAL '5' MINUTE) AS time_window, ad_id, COUNT(*) AS clicks + FROM ad_clicks + GROUP BY 1, ad_id +) clk +ON imp.time_window = clk.time_window AND imp.ad_id = clk.ad_id; +``` + +> **Requirement:** The join condition **must** include the same time-window column to ensure bounded state. + +--- + +## Part 3: Lifecycle & Pipeline Management + +Function Stream provides a complete set of operational commands for managing the metadata catalog, inspecting physical execution graphs, and destroying streaming pipelines. + +### 1. Data Source & Metadata Management + +**List all registered source tables:** + +```sql +SHOW TABLES; +``` + +Lists all static table definitions in the current catalog along with their Event Time and Watermark strategies. + +**Show the original DDL of a table:** + +```sql +SHOW CREATE TABLE ad_clicks; +``` + +Useful for exporting or auditing the underlying connection parameters (Kafka topic, format, etc.). + +### 2. Real-Time Pipeline Monitoring & Troubleshooting + +**List all running streaming pipelines:** + +```sql +SHOW STREAMING TABLES; +``` + +Output columns: + +| Column | Description | +|--------|-------------| +| `job_id` | Pipeline name (e.g. `metric_tumble_impressions_1m`). | +| `status` | Lifecycle state (`RUNNING`, `FAILED`, etc.). | +| `pipeline_count` | Number of parallel operator chains the engine split the job into. | +| `uptime` | How long the pipeline has been running. | + +**Inspect the physical execution topology:** + +```sql +SHOW CREATE STREAMING TABLE metric_tumble_impressions_1m; +``` + +This prints an ASCII representation of how the SQL was translated into a distributed execution graph: + +- `[Source]` — reads from the connector. +- `[Operator] ExpressionWatermark` — injects watermarks. +- `[Shuffle]` — redistributes data across the network. +- `[Operator] TumblingWindowAggregate` — performs the actual windowed aggregation. +- `[Sink] ConnectorSink` — writes results to the target connector (e.g. Kafka). + +### 3. Safe Shutdown & Resource Release + +When a campaign ends or you need to update the pipeline logic, explicitly destroy the old streaming pipeline: + +```sql +DROP STREAMING TABLE metric_tumble_impressions_1m; +``` + +--- + +## SQL Reference Summary + +| Statement | Description | +|-----------|-------------| +| `CREATE TABLE ... WITH (...)` | Register an external data source with schema, event time, and watermark. | +| `CREATE STREAMING TABLE ... WITH (...) AS SELECT ...` | Create and launch a continuous streaming pipeline. | +| `SHOW TABLES` | List all registered source tables. | +| `SHOW CREATE TABLE ` | Display the DDL of a registered table. | +| `SHOW STREAMING TABLES` | List all running streaming pipelines with status. | +| `SHOW CREATE STREAMING TABLE ` | Inspect the physical execution graph of a pipeline. | +| `DROP STREAMING TABLE ` | Destroy a streaming pipeline and release all resources. | From 29d7a4b8c53b8714000f670730876e6a4578a61f Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 22:40:55 +0800 Subject: [PATCH 43/44] update --- docs/streaming-sql-guide-zh.md | 27 +++++++++++++++++++++++++-- docs/streaming-sql-guide.md | 27 +++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/docs/streaming-sql-guide-zh.md b/docs/streaming-sql-guide-zh.md index ee18ab2a..98842614 100644 --- a/docs/streaming-sql-guide-zh.md +++ b/docs/streaming-sql-guide-zh.md @@ -34,7 +34,8 @@ Function Stream 提供了声明式 SQL 接口来构建实时流处理管道。 - [第二部分:构建实时 Pipeline (STREAMING TABLE)](#第二部分构建实时-pipeline-streaming-table) - [滚动窗口 (Tumbling Window)](#场景-1滚动窗口-tumbling-window) - [滑动窗口 (Hopping Window)](#场景-2滑动窗口-hopping-window) - - [窗口双流关联 (Window Join)](#场景-3窗口双流关联-window-join) + - [会话窗口 (Session Window)](#场景-3会话窗口-session-window) + - [窗口双流关联 (Window Join)](#场景-4窗口双流关联-window-join) - [第三部分:生命周期与流任务管理](#第三部分生命周期与流任务管理) - [数据源管理](#1-数据源与元数据管理) - [Pipeline 监控](#2-实时-pipeline-监控与排障) @@ -151,7 +152,29 @@ GROUP BY ad_id; ``` -### 场景 3:窗口双流关联 (Window Join) +### 场景 3:会话窗口 (Session Window) + +会话窗口根据指定的不活跃间隔(Gap)对事件进行分组。如果在 Gap 时间内没有新事件到达,窗口关闭并输出结果。会话窗口非常适合用户行为会话分析。 + +```sql +-- 需求:按用户检测广告曝光会话,30 秒无活动则会话结束 +CREATE STREAMING TABLE metric_session_impressions WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_session_impressions', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + SESSION(INTERVAL '30' SECOND) AS time_window, + user_id, + COUNT(*) AS impressions_in_session +FROM ad_impressions +GROUP BY + 1, + user_id; +``` + +### 场景 4:窗口双流关联 (Window Join) 将两条流在完全相同的时间窗口内进行等值关联。因为状态限定在窗口内,水位线越过窗口后状态会自动清理,绝不发生内存泄漏(OOM)。 diff --git a/docs/streaming-sql-guide.md b/docs/streaming-sql-guide.md index 8250e794..cafaf887 100644 --- a/docs/streaming-sql-guide.md +++ b/docs/streaming-sql-guide.md @@ -34,7 +34,8 @@ Function Stream provides a declarative SQL interface for building real-time stre - [Part 2: Building Real-Time Pipelines (STREAMING TABLE)](#part-2-building-real-time-pipelines-streaming-table) - [Tumbling Window](#scenario-1-tumbling-window) - [Hopping Window](#scenario-2-hopping-window) - - [Window Join](#scenario-3-window-join) + - [Session Window](#scenario-3-session-window) + - [Window Join](#scenario-4-window-join) - [Part 3: Lifecycle & Pipeline Management](#part-3-lifecycle--pipeline-management) - [Data Source Management](#1-data-source--metadata-management) - [Pipeline Monitoring](#2-real-time-pipeline-monitoring--troubleshooting) @@ -150,7 +151,29 @@ GROUP BY ad_id; ``` -### Scenario 3: Window Join +### Scenario 3: Session Window + +A session window groups events that arrive within a specified gap of inactivity. If no new event arrives within the gap duration, the window closes and emits results. Session windows are ideal for user-session analysis. + +```sql +-- Detect ad-impression sessions per user; a session ends after 30 seconds of inactivity +CREATE STREAMING TABLE metric_session_impressions WITH ( + 'connector' = 'kafka', + 'topic' = 'sink_session_impressions', + 'format' = 'json', + 'bootstrap.servers' = 'localhost:9092' +) AS +SELECT + SESSION(INTERVAL '30' SECOND) AS time_window, + user_id, + COUNT(*) AS impressions_in_session +FROM ad_impressions +GROUP BY + 1, + user_id; +``` + +### Scenario 4: Window Join Join two streams within exactly the same time window. Because state is bounded by the window, memory is automatically reclaimed once the watermark advances past the window boundary — eliminating the risk of OOM. From 7b1f95960b7a74537e5dfc7f93037381805bba4d Mon Sep 17 00:00:00 2001 From: luoluoyuyu Date: Wed, 1 Apr 2026 23:28:52 +0800 Subject: [PATCH 44/44] update --- protocol/proto/storage.proto | 15 +++ src/coordinator/execution/executor.rs | 28 +++++- src/server/initializer.rs | 1 + src/storage/stream_catalog/manager.rs | 135 +++++++++++++++++++++++++- src/storage/stream_catalog/mod.rs | 1 + 5 files changed, 176 insertions(+), 4 deletions(-) diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto index 9ab0995d..f107d472 100644 --- a/protocol/proto/storage.proto +++ b/protocol/proto/storage.proto @@ -39,6 +39,21 @@ message CatalogSourceTable { string description = 7; } +// ============================================================================= +// Streaming table storage (CREATE STREAMING TABLE persistence) +// ============================================================================= + +// Persisted record for one streaming table (CREATE STREAMING TABLE). +// On restart, the engine re-submits each record to JobManager to resume the pipeline. +message StreamingTableDefinition { + string table_name = 1; + int64 created_at_millis = 2; + // Serialized function_stream.api.FsProgram — the full execution graph. + // Stored as opaque bytes to avoid coupling storage schema with runtime API protos. + bytes fs_program_bytes = 3; + string comment = 4; +} + // ============================================================================= // Task storage (RocksDB metadata + module payload) // ============================================================================= diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs index dcfbcb83..c24a4cda 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/execution/executor.rs @@ -14,7 +14,7 @@ use std::sync::Arc; use protocol::grpc::api::FsProgram; use thiserror::Error; -use tracing::{debug, info}; +use tracing::{debug, info, warn}; use crate::coordinator::dataset::{ empty_record_batch, ExecuteResult, ShowCatalogTablesResult, @@ -323,14 +323,28 @@ impl PlanVisitor for Executor { let job_id = plan.name.clone(); let job_id = tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(job_manager.submit_job(job_id, fs_program)) + tokio::runtime::Handle::current() + .block_on(job_manager.submit_job(job_id, fs_program.clone())) }) .map_err(|e| ExecuteError::Internal(format!("Failed to submit streaming job: {e}")))?; + self.catalog_manager + .persist_streaming_job( + &plan.name, + &fs_program, + plan.comment.as_deref().unwrap_or(""), + ) + .map_err(|e| { + ExecuteError::Internal(format!( + "Streaming job '{}' submitted but persistence failed: {e}", + plan.name + )) + })?; + info!( job_id = %job_id, table = %plan.name, - "Streaming job submitted" + "Streaming job submitted and persisted" ); Ok(ExecuteResult::ok_with_data( @@ -472,6 +486,14 @@ impl PlanVisitor for Executor { ); } + if let Err(e) = self.catalog_manager.remove_streaming_job(&plan.table_name) { + warn!( + table = %plan.table_name, + error = %e, + "Failed to remove streaming job persisted definition (non-fatal)" + ); + } + let _ = self .catalog_manager .drop_catalog_table(&plan.table_name, true); diff --git a/src/server/initializer.rs b/src/server/initializer.rs index a73ec14a..70c19685 100644 --- a/src/server/initializer.rs +++ b/src/server/initializer.rs @@ -114,6 +114,7 @@ pub fn bootstrap_system(config: &GlobalConfig) -> Result<()> { registry.initialize_all(config)?; crate::storage::stream_catalog::restore_global_catalog_from_store(); + crate::storage::stream_catalog::restore_streaming_jobs_from_store(); info!("System bootstrap finished. Node is ready to accept traffic."); Ok(()) diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs index 086b206b..a0eb9b49 100644 --- a/src/storage/stream_catalog/manager.rs +++ b/src/storage/stream_catalog/manager.rs @@ -15,6 +15,7 @@ use std::sync::{Arc, OnceLock}; use anyhow::{anyhow, bail, Context}; use datafusion::common::{internal_err, plan_err, Result as DFResult}; use prost::Message; +use protocol::grpc::api::FsProgram; use protocol::storage::{self as pb, table_definition}; use tracing::{info, warn}; use unicase::UniCase; @@ -30,6 +31,7 @@ use super::codec::CatalogCodec; use super::meta_store::MetaStore; const CATALOG_KEY_PREFIX: &str = "catalog:stream_table:"; +const STREAMING_JOB_KEY_PREFIX: &str = "streaming_job:"; pub struct CatalogManager { store: Arc, @@ -72,6 +74,79 @@ impl CatalogManager { format!("{CATALOG_KEY_PREFIX}{}", table_name.to_lowercase()) } + #[inline] + fn build_streaming_job_key(table_name: &str) -> String { + format!("{STREAMING_JOB_KEY_PREFIX}{}", table_name.to_lowercase()) + } + + // ======================================================================== + // Streaming job persistence (CREATE STREAMING TABLE / DROP STREAMING TABLE) + // ======================================================================== + + pub fn persist_streaming_job( + &self, + table_name: &str, + fs_program: &FsProgram, + comment: &str, + ) -> DFResult<()> { + let program_bytes = fs_program.encode_to_vec(); + let def = pb::StreamingTableDefinition { + table_name: table_name.to_string(), + created_at_millis: chrono::Utc::now().timestamp_millis(), + fs_program_bytes: program_bytes, + comment: comment.to_string(), + }; + let payload = def.encode_to_vec(); + let key = Self::build_streaming_job_key(table_name); + self.store.put(&key, payload)?; + info!(table = %table_name, "Streaming job definition persisted"); + Ok(()) + } + + pub fn remove_streaming_job(&self, table_name: &str) -> DFResult<()> { + let key = Self::build_streaming_job_key(table_name); + self.store.delete(&key)?; + info!(table = %table_name, "Streaming job definition removed from store"); + Ok(()) + } + + pub fn load_streaming_job_definitions( + &self, + ) -> DFResult> { + let records = self.store.scan_prefix(STREAMING_JOB_KEY_PREFIX)?; + let mut out = Vec::with_capacity(records.len()); + for (key, payload) in records { + let def = match pb::StreamingTableDefinition::decode(payload.as_slice()) { + Ok(v) => v, + Err(e) => { + warn!( + key = %key, + error = %e, + "Skipping corrupted streaming job record" + ); + continue; + } + }; + let program = match FsProgram::decode(def.fs_program_bytes.as_slice()) { + Ok(v) => v, + Err(e) => { + warn!( + table = %def.table_name, + error = %e, + "Skipping streaming job with corrupted FsProgram" + ); + continue; + } + }; + out.push((def.table_name, program)); + } + Ok(out) + } + + // ======================================================================== + // Catalog table persistence (CREATE TABLE / DROP TABLE) + // ======================================================================== + pub fn add_catalog_table(&self, table: CatalogTable) -> DFResult<()> { let proto_def = self.encode_catalog_table(&table)?; let payload = proto_def.encode_to_vec(); @@ -392,6 +467,64 @@ pub fn restore_global_catalog_from_store() { } } +pub fn restore_streaming_jobs_from_store() { + use crate::runtime::streaming::job::JobManager; + + let Some(catalog) = CatalogManager::try_global() else { + warn!("CatalogManager not available; skipping streaming job restore"); + return; + }; + let job_manager = match JobManager::global() { + Ok(jm) => jm, + Err(e) => { + warn!(error = %e, "JobManager not available; skipping streaming job restore"); + return; + } + }; + + let definitions = match catalog.load_streaming_job_definitions() { + Ok(defs) => defs, + Err(e) => { + warn!(error = %e, "Failed to load streaming job definitions from store"); + return; + } + }; + + if definitions.is_empty() { + info!("No persisted streaming jobs to restore"); + return; + } + + let total = definitions.len(); + info!(count = total, "Restoring persisted streaming jobs"); + + let rt = tokio::runtime::Handle::current(); + let mut restored = 0usize; + let mut failed = 0usize; + + for (table_name, fs_program) in definitions { + let jm = job_manager.clone(); + let name = table_name.clone(); + match rt.block_on(jm.submit_job(name.clone(), fs_program)) { + Ok(job_id) => { + info!(table = %table_name, job_id = %job_id, "Streaming job restored"); + restored += 1; + } + Err(e) => { + warn!(table = %table_name, error = %e, "Failed to restore streaming job"); + failed += 1; + } + } + } + + info!( + restored = restored, + failed = failed, + total = total, + "Streaming job restore complete" + ); +} + pub fn initialize_stream_catalog(config: &crate::config::GlobalConfig) -> anyhow::Result<()> { if !config.stream_catalog.persist { return CatalogManager::init_global_in_memory() @@ -440,7 +573,7 @@ mod tests { use crate::sql::schema::connection_type::ConnectionType; use crate::sql::schema::source_table::SourceTable; use crate::sql::schema::table::Table as CatalogTable; - use crate::storage::stream_catalog::{InMemoryMetaStore, MetaStore}; + use crate::storage::stream_catalog::InMemoryMetaStore; use super::CatalogManager; diff --git a/src/storage/stream_catalog/mod.rs b/src/storage/stream_catalog/mod.rs index 1b893cea..b99f3080 100644 --- a/src/storage/stream_catalog/mod.rs +++ b/src/storage/stream_catalog/mod.rs @@ -20,6 +20,7 @@ mod rocksdb_meta_store; pub use manager::{ CatalogManager, initialize_stream_catalog, restore_global_catalog_from_store, + restore_streaming_jobs_from_store, }; pub use meta_store::{InMemoryMetaStore, MetaStore}; pub use rocksdb_meta_store::RocksDbMetaStore;